From 7aff006aed66b6ec202ccb9f35bd29d958b1c981 Mon Sep 17 00:00:00 2001 From: gurkenhabicht Date: Thu, 11 Jun 2020 01:08:16 +0200 Subject: [PATCH] introduced ip_raw(vlan) encapsulation type parsing, clean up --- src/configure/mod.rs | 3 +- src/main.rs | 4 +- src/parser.json | 2 +- src/parser/mod.rs | 229 ++++++++++++++++++++++++++----------------- 4 files changed, 145 insertions(+), 93 deletions(-) diff --git a/src/configure/mod.rs b/src/configure/mod.rs index 9b6206d..5f7e555 100644 --- a/src/configure/mod.rs +++ b/src/configure/mod.rs @@ -1,5 +1,5 @@ // Init of configuration files could also be done via Config crate. -// But at this point of development it seems like unjustified overhead. +// But at this point of development it seems like this overhead is unjust. extern crate serde_json; use std::fs::File; @@ -88,6 +88,7 @@ fn bytes_from_file( entry: std::path::PathBuf ) -> Result<([u8;4], u16, u16), st } pub fn map_pcap_dir ( pcap_dir: &str ) -> Option> { + // Well, this conditional intermezzo seems to be best practice. See std::fs doc let mut pcap_map = HashMap::new(); if let Ok(entries) = fs::read_dir(pcap_dir) { for entry in entries { diff --git a/src/main.rs b/src/main.rs index 09c440d..d72cd36 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,10 +56,10 @@ async fn main() -> Result<(), Error> { /* device or file input */ match config.is_device { - false => for _pcap_file in pcap_map.keys() { + false => for (_pcap_file, _pcap_info) in pcap_map.iter() { println!("{:?}",&_pcap_file); // TODO: Tuning vector capacity according to mean average & std dev of packet size - let v: Vec = parser::parse(&_pcap_file, &config.filter, &config.regex_filter); + let v: Vec = parser::parse(&_pcap_file, &config.filter, &config.regex_filter, _pcap_info.encapsulation_type); //let mut v = Vec::::with_capacity(35536); //v.extend(parser::parse(&_pcap_file, &config.filter)); diff --git a/src/parser.json b/src/parser.json index 16ef13b..75461fa 100644 --- a/src/parser.json +++ b/src/parser.json @@ -1,6 +1,6 @@ { "insert_max": 20000, - "filter": " !ip6 && tcp || udp", + "filter": "ip6 && tcp ", "regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*", "from_device": false, "parse_device": "enp7s0", diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9020c89..a095abc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -26,6 +26,15 @@ fn build_ether() -> packet_handler::EtherHeader { } } +/* + QryData could be written in the sense of QryData{ ... frame: .., packet: .., segment:.. } + On the one hand, only the actual type of frame/packet/segment would be contained in the resulting struct. + So, increased benefit in serialization/cpu time, could result in less data to be serialized, depending on layout. + On the other hand, each datagram::type needs to implement traits which would need to be dynamically dispatched by returning any of these types per iso level from a single function each. The result would be a performance decrease. + See: https://doc.rust-lang.org/book/ch17-02-trait-objects.html#trait-objects-perform-dynamic-dispatch + Then again, parser logic would be fewer lines + more unified using the latter method. Maybe better optimizable as well? Maybe this is a nice tradeoff? + TODO: Implement and benchmark dynamically dispatched packet data in conjunction with restructured QryData. +*/ #[derive(Debug, Clone)] pub struct QryData { pub id: i32, @@ -57,22 +66,129 @@ fn init_qrydata( ) -> Result { } -//fn link_layer_protocol ( packet_data: &[u8] ) -> Option { -// Some(packet_handler::ethernet_handler(packet_data)) //this needs some love, obviously -//} -// -//fn network_layer_protocol ( packet_data: &[u8], prot_type: usize ) -> Option { -//} -// -//fn transport_layer_protocol ( packet_data: &[u8], prot_type: usize ) -> Option { -//} +enum EncapsulationType { + ETHER = 1, + RAWIP = 107, +} + +impl QryData { +// This is not cool! +// This will get modularized into subfunctions + fn encap_ether( packet_data: &[u8] ) -> Option { + let mut pkg: QryData = init_qrydata().unwrap(); + pkg.ether_header = packet_handler::ethernet_handler(packet_data); + match pkg.ether_header.ether_type as usize { + ETH_P_IP => { + //pkg.ipv6_header = None::; + pkg.ipv4_header = Some(packet_handler::ip_handler(packet_data)).unwrap(); + match pkg.ipv4_header.unwrap().ip_protocol as usize { + TCP => { + // pkg.udp_header = None::; + pkg.tcp_header = Some(packet_handler::tcp_handler( + pkg.ipv4_header.unwrap().ip_ihl, + packet_data, + )) + .unwrap(); + pkg.data = Some(packet_handler::payload_handler( + pkg.ipv4_header.unwrap().ip_ihl, + pkg.tcp_header.unwrap().data_offset, + packet_data, + )).unwrap(); + } + UDP => { + // pkg.tcp_header = None::; + pkg.udp_header = Some(packet_handler::udp_handler( + pkg.ipv4_header.unwrap().ip_ihl, + packet_data, + )) + .unwrap(); + pkg.data = Some(packet_handler::payload_handler( + pkg.ipv4_header.unwrap().ip_ihl, + 7, + packet_data, + )).unwrap(); + } + _ => println!("Transport layer protocol not implemented"), + } + } + ETH_P_IPV6 => { + //pkg.ipv4_header = None::; + pkg.ipv6_header = Some(packet_handler::ipv6_handler(packet_data)).unwrap(); + match pkg.ipv6_header.unwrap().next_header as usize { + TCP => { + // pkg.udp_header = None::; + pkg.tcp_header = Some(packet_handler::tcp_handler(10, packet_data)).unwrap(); + pkg.data = Some(packet_handler::payload_handler( + 10, + pkg.tcp_header.unwrap().data_offset, + packet_data, + )).unwrap(); + } + UDP => { + // pkg.tcp_header = None::; + pkg.udp_header = Some(packet_handler::udp_handler(10, packet_data)).unwrap(); + pkg.data = Some(packet_handler::payload_handler(10, 7, packet_data)).unwrap(); + } + _ => println!("Transport layer protocol not implemented"), + } + } + ETH_P_ARP | ETH_P_RARP => { + pkg.arp_header = Some(packet_handler::arp_handler(packet_data)).unwrap(); + pkg.data = None; + } + _ => println!("Network protocol not implemented"), + } + Some(pkg) + } + + fn encap_rawip ( packet_data: &[u8] ) -> Option { + let mut pkg: QryData = init_qrydata().unwrap(); + //pkg.ether_header = None::; + //pkg.ipv6_header = None::; + pkg.ipv4_header = Some(packet_handler::ip_handler(packet_data)).unwrap(); + match pkg.ipv4_header.unwrap().ip_protocol as usize { + TCP => { + pkg.udp_header = None::; + pkg.tcp_header = Some(packet_handler::tcp_handler( + pkg.ipv4_header.unwrap().ip_ihl, + packet_data, + )) + .unwrap(); + pkg.data = Some(packet_handler::payload_handler( + pkg.ipv4_header.unwrap().ip_ihl, + pkg.tcp_header.unwrap().data_offset, + packet_data, + )).unwrap(); + } + UDP => { + pkg.tcp_header = None::; + pkg.udp_header = Some(packet_handler::udp_handler( + pkg.ipv4_header.unwrap().ip_ihl, + packet_data, + )) + .unwrap(); + pkg.data = Some(packet_handler::payload_handler( + pkg.ipv4_header.unwrap().ip_ihl, + 7, + packet_data, + )).unwrap(); + } + _ => println!("Transport layer protocol not implemented"), + + } + Some(pkg) + } +} /* Regex parse _complete_ package */ fn flag_carnage(re: &Regex, payload: &[u8]) -> Option { let mut flags: String = String::new(); for mat in re.find_iter(payload) { - flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap())); - //flags.push_str(" "); + // TODO: Test benchmark format! vs. push_str() + // flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap())); + // See: https://github.com/hoodie/concatenation_benchmarks-rs + flags.push_str(std::str::from_utf8(mat.as_bytes()).unwrap()); + flags.push_str(";"); } match 0 < flags.len() { false => None, @@ -80,7 +196,7 @@ fn flag_carnage(re: &Regex, payload: &[u8]) -> Option { } } -pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) -> Vec { +pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str, encap: u16) -> Vec { let mut me: QryData = init_qrydata().unwrap(); let mut v: Vec = Vec::new(); @@ -88,83 +204,18 @@ pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) Capture::filter(&mut cap, &filter_str).unwrap(); let re = Regex::new(regex_filter).unwrap(); while let Ok(packet) = cap.next() { + + match encap { + // Syntax is clunky, but no num_derive + num_traits dependencies. + encap if encap == EncapsulationType::ETHER as u16 => me = QryData::encap_ether(packet.data).unwrap(), + encap if encap == EncapsulationType::RAWIP as u16 => me = QryData::encap_rawip(packet.data).unwrap(), + _ => (), + }; + me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64; - me.data = Some(packet.data.to_vec()); - me.reg_res = flag_carnage(&re, packet.data); - me.ether_header = packet_handler::ethernet_handler(packet.data); - match me.ether_header.ether_type as usize { - ETH_P_IP => { - me.ipv6_header = None::; - me.ipv4_header = Some(packet_handler::ip_handler(packet.data)).unwrap(); - match me.ipv4_header.unwrap().ip_protocol as usize { - TCP => { - me.udp_header = None::; - me.tcp_header = Some(packet_handler::tcp_handler( - me.ipv4_header.unwrap().ip_ihl, - packet.data, - )) - .unwrap(); - me.data = Some(packet_handler::payload_handler( - me.ipv4_header.unwrap().ip_ihl, - me.tcp_header.unwrap().data_offset, - packet.data, - )).unwrap(); - } - UDP => { - me.tcp_header = None::; - me.udp_header = Some(packet_handler::udp_handler( - me.ipv4_header.unwrap().ip_ihl, - packet.data, - )) - .unwrap(); - me.data = Some(packet_handler::payload_handler( - me.ipv4_header.unwrap().ip_ihl, - 7, - packet.data, - )).unwrap(); - } - _ => println!("network protocol not implemented"), - } - } - ETH_P_IPV6 => { - me.ipv4_header = None::; - me.ipv6_header = Some(packet_handler::ipv6_handler(packet.data)).unwrap(); - match me.ipv6_header.unwrap().next_header as usize { - TCP => { - me.udp_header = None::; - me.tcp_header = Some(packet_handler::tcp_handler(10, packet.data)).unwrap(); - me.data = Some(packet_handler::payload_handler( - 10, - me.tcp_header.unwrap().data_offset, - packet.data, - )).unwrap(); - } - UDP => { - me.tcp_header = None::; - me.udp_header = Some(packet_handler::udp_handler(10, packet.data)).unwrap(); - me.data = Some(packet_handler::payload_handler(10, 7, packet.data)).unwrap(); - } - _ => println!("network protocol not implemented"), - } - } - ETH_P_ARP | ETH_P_RARP => { - me.arp_header = Some(packet_handler::arp_handler(packet.data)).unwrap(); - me.data = None; - } - _ => println!("network protocol not implemented"), - } - v.push(QryData { - id: 0, - time: me.time, - data: me.data, - ether_header: me.ether_header, - ipv4_header: me.ipv4_header, - ipv6_header: me.ipv6_header, - tcp_header: me.tcp_header, - udp_header: me.udp_header, - arp_header: me.arp_header, - reg_res: me.reg_res, - }); + //me.data = Some(packet.data.to_vec()); + me.reg_res = flag_carnage(&re, packet.data); // Regex overhead is between 4-9% --single threaded-- on complete packet [u8] data + v.push(me.clone()); } v } @@ -179,7 +230,7 @@ pub fn parse_device(parse_device: &str, filter_str: &str, insert_max: &usize, re let re = Regex::new(regex_filter).unwrap(); 'parse: while let Ok(packet) = cap.next() { - me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64; + me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64; me.data = Some(packet.data.to_vec()); me.reg_res = flag_carnage(&re, packet.data); me.ether_header = packet_handler::ethernet_handler(packet.data);