introduced ip_raw(vlan) encapsulation type parsing, clean up

This commit is contained in:
gurkenhabicht 2020-06-11 01:08:16 +02:00
parent 8a9f819e68
commit 7aff006aed
4 changed files with 145 additions and 93 deletions

View File

@ -1,5 +1,5 @@
// Init of configuration files could also be done via Config crate.
// But at this point of development it seems like unjustified overhead.
// But at this point of development it seems like this overhead is unjust.
extern crate serde_json;
use std::fs::File;
@ -88,6 +88,7 @@ fn bytes_from_file( entry: std::path::PathBuf ) -> Result<([u8;4], u16, u16), st
}
pub fn map_pcap_dir ( pcap_dir: &str ) -> Option<std::collections::HashMap<std::path::PathBuf, FileInfo>> {
// Well, this conditional intermezzo seems to be best practice. See std::fs doc
let mut pcap_map = HashMap::new();
if let Ok(entries) = fs::read_dir(pcap_dir) {
for entry in entries {

View File

@ -56,10 +56,10 @@ async fn main() -> Result<(), Error> {
/* device or file input */
match config.is_device {
false => for _pcap_file in pcap_map.keys() {
false => for (_pcap_file, _pcap_info) in pcap_map.iter() {
println!("{:?}",&_pcap_file);
// TODO: Tuning vector capacity according to mean average & std dev of packet size
let v: Vec<parser::QryData> = parser::parse(&_pcap_file, &config.filter, &config.regex_filter);
let v: Vec<parser::QryData> = parser::parse(&_pcap_file, &config.filter, &config.regex_filter, _pcap_info.encapsulation_type);
//let mut v = Vec::<parser::QryData>::with_capacity(35536);
//v.extend(parser::parse(&_pcap_file, &config.filter));

View File

@ -1,6 +1,6 @@
{
"insert_max": 20000,
"filter": " !ip6 && tcp || udp",
"filter": "ip6 && tcp ",
"regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*",
"from_device": false,
"parse_device": "enp7s0",

View File

@ -26,6 +26,15 @@ fn build_ether() -> packet_handler::EtherHeader {
}
}
/*
QryData could be written in the sense of QryData{ ... frame: .., packet: .., segment:.. }
On the one hand, only the actual type of frame/packet/segment would be contained in the resulting struct.
So, increased benefit in serialization/cpu time, could result in less data to be serialized, depending on layout.
On the other hand, each datagram::type needs to implement traits which would need to be dynamically dispatched by returning any of these types per iso level from a single function each. The result would be a performance decrease.
See: https://doc.rust-lang.org/book/ch17-02-trait-objects.html#trait-objects-perform-dynamic-dispatch
Then again, parser logic would be fewer lines + more unified using the latter method. Maybe better optimizable as well? Maybe this is a nice tradeoff?
TODO: Implement and benchmark dynamically dispatched packet data in conjunction with restructured QryData.
*/
#[derive(Debug, Clone)]
pub struct QryData {
pub id: i32,
@ -57,22 +66,129 @@ fn init_qrydata( ) -> Result<QryData, core::fmt::Error> {
}
//fn link_layer_protocol <T> ( packet_data: &[u8] ) -> Option<T> {
// Some(packet_handler::ethernet_handler(packet_data)) //this needs some love, obviously
//}
//
//fn network_layer_protocol <T> ( packet_data: &[u8], prot_type: usize ) -> Option<T> {
//}
//
//fn transport_layer_protocol <T> ( packet_data: &[u8], prot_type: usize ) -> Option<T> {
//}
enum EncapsulationType {
ETHER = 1,
RAWIP = 107,
}
impl QryData {
// This is not cool!
// This will get modularized into subfunctions
fn encap_ether( packet_data: &[u8] ) -> Option<QryData> {
let mut pkg: QryData = init_qrydata().unwrap();
pkg.ether_header = packet_handler::ethernet_handler(packet_data);
match pkg.ether_header.ether_type as usize {
ETH_P_IP => {
//pkg.ipv6_header = None::<packet_handler::IpV6Header>;
pkg.ipv4_header = Some(packet_handler::ip_handler(packet_data)).unwrap();
match pkg.ipv4_header.unwrap().ip_protocol as usize {
TCP => {
// pkg.udp_header = None::<packet_handler::UdpHeader>;
pkg.tcp_header = Some(packet_handler::tcp_handler(
pkg.ipv4_header.unwrap().ip_ihl,
packet_data,
))
.unwrap();
pkg.data = Some(packet_handler::payload_handler(
pkg.ipv4_header.unwrap().ip_ihl,
pkg.tcp_header.unwrap().data_offset,
packet_data,
)).unwrap();
}
UDP => {
// pkg.tcp_header = None::<packet_handler::TcpHeader>;
pkg.udp_header = Some(packet_handler::udp_handler(
pkg.ipv4_header.unwrap().ip_ihl,
packet_data,
))
.unwrap();
pkg.data = Some(packet_handler::payload_handler(
pkg.ipv4_header.unwrap().ip_ihl,
7,
packet_data,
)).unwrap();
}
_ => println!("Transport layer protocol not implemented"),
}
}
ETH_P_IPV6 => {
//pkg.ipv4_header = None::<packet_handler::IpV4Header>;
pkg.ipv6_header = Some(packet_handler::ipv6_handler(packet_data)).unwrap();
match pkg.ipv6_header.unwrap().next_header as usize {
TCP => {
// pkg.udp_header = None::<packet_handler::UdpHeader>;
pkg.tcp_header = Some(packet_handler::tcp_handler(10, packet_data)).unwrap();
pkg.data = Some(packet_handler::payload_handler(
10,
pkg.tcp_header.unwrap().data_offset,
packet_data,
)).unwrap();
}
UDP => {
// pkg.tcp_header = None::<packet_handler::TcpHeader>;
pkg.udp_header = Some(packet_handler::udp_handler(10, packet_data)).unwrap();
pkg.data = Some(packet_handler::payload_handler(10, 7, packet_data)).unwrap();
}
_ => println!("Transport layer protocol not implemented"),
}
}
ETH_P_ARP | ETH_P_RARP => {
pkg.arp_header = Some(packet_handler::arp_handler(packet_data)).unwrap();
pkg.data = None;
}
_ => println!("Network protocol not implemented"),
}
Some(pkg)
}
fn encap_rawip ( packet_data: &[u8] ) -> Option<QryData> {
let mut pkg: QryData = init_qrydata().unwrap();
//pkg.ether_header = None::<packet_handler::EtherHeader>;
//pkg.ipv6_header = None::<packet_handler::IpV6Header>;
pkg.ipv4_header = Some(packet_handler::ip_handler(packet_data)).unwrap();
match pkg.ipv4_header.unwrap().ip_protocol as usize {
TCP => {
pkg.udp_header = None::<packet_handler::UdpHeader>;
pkg.tcp_header = Some(packet_handler::tcp_handler(
pkg.ipv4_header.unwrap().ip_ihl,
packet_data,
))
.unwrap();
pkg.data = Some(packet_handler::payload_handler(
pkg.ipv4_header.unwrap().ip_ihl,
pkg.tcp_header.unwrap().data_offset,
packet_data,
)).unwrap();
}
UDP => {
pkg.tcp_header = None::<packet_handler::TcpHeader>;
pkg.udp_header = Some(packet_handler::udp_handler(
pkg.ipv4_header.unwrap().ip_ihl,
packet_data,
))
.unwrap();
pkg.data = Some(packet_handler::payload_handler(
pkg.ipv4_header.unwrap().ip_ihl,
7,
packet_data,
)).unwrap();
}
_ => println!("Transport layer protocol not implemented"),
}
Some(pkg)
}
}
/* Regex parse _complete_ package */
fn flag_carnage(re: &Regex, payload: &[u8]) -> Option<String> {
let mut flags: String = String::new();
for mat in re.find_iter(payload) {
flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap()));
//flags.push_str(" ");
// TODO: Test benchmark format! vs. push_str()
// flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap()));
// See: https://github.com/hoodie/concatenation_benchmarks-rs
flags.push_str(std::str::from_utf8(mat.as_bytes()).unwrap());
flags.push_str(";");
}
match 0 < flags.len() {
false => None,
@ -80,7 +196,7 @@ fn flag_carnage(re: &Regex, payload: &[u8]) -> Option<String> {
}
}
pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) -> Vec<QryData> {
pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str, encap: u16) -> Vec<QryData> {
let mut me: QryData = init_qrydata().unwrap();
let mut v: Vec<QryData> = Vec::new();
@ -88,83 +204,18 @@ pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str)
Capture::filter(&mut cap, &filter_str).unwrap();
let re = Regex::new(regex_filter).unwrap();
while let Ok(packet) = cap.next() {
match encap {
// Syntax is clunky, but no num_derive + num_traits dependencies.
encap if encap == EncapsulationType::ETHER as u16 => me = QryData::encap_ether(packet.data).unwrap(),
encap if encap == EncapsulationType::RAWIP as u16 => me = QryData::encap_rawip(packet.data).unwrap(),
_ => (),
};
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
me.data = Some(packet.data.to_vec());
me.reg_res = flag_carnage(&re, packet.data);
me.ether_header = packet_handler::ethernet_handler(packet.data);
match me.ether_header.ether_type as usize {
ETH_P_IP => {
me.ipv6_header = None::<packet_handler::IpV6Header>;
me.ipv4_header = Some(packet_handler::ip_handler(packet.data)).unwrap();
match me.ipv4_header.unwrap().ip_protocol as usize {
TCP => {
me.udp_header = None::<packet_handler::UdpHeader>;
me.tcp_header = Some(packet_handler::tcp_handler(
me.ipv4_header.unwrap().ip_ihl,
packet.data,
))
.unwrap();
me.data = Some(packet_handler::payload_handler(
me.ipv4_header.unwrap().ip_ihl,
me.tcp_header.unwrap().data_offset,
packet.data,
)).unwrap();
}
UDP => {
me.tcp_header = None::<packet_handler::TcpHeader>;
me.udp_header = Some(packet_handler::udp_handler(
me.ipv4_header.unwrap().ip_ihl,
packet.data,
))
.unwrap();
me.data = Some(packet_handler::payload_handler(
me.ipv4_header.unwrap().ip_ihl,
7,
packet.data,
)).unwrap();
}
_ => println!("network protocol not implemented"),
}
}
ETH_P_IPV6 => {
me.ipv4_header = None::<packet_handler::IpV4Header>;
me.ipv6_header = Some(packet_handler::ipv6_handler(packet.data)).unwrap();
match me.ipv6_header.unwrap().next_header as usize {
TCP => {
me.udp_header = None::<packet_handler::UdpHeader>;
me.tcp_header = Some(packet_handler::tcp_handler(10, packet.data)).unwrap();
me.data = Some(packet_handler::payload_handler(
10,
me.tcp_header.unwrap().data_offset,
packet.data,
)).unwrap();
}
UDP => {
me.tcp_header = None::<packet_handler::TcpHeader>;
me.udp_header = Some(packet_handler::udp_handler(10, packet.data)).unwrap();
me.data = Some(packet_handler::payload_handler(10, 7, packet.data)).unwrap();
}
_ => println!("network protocol not implemented"),
}
}
ETH_P_ARP | ETH_P_RARP => {
me.arp_header = Some(packet_handler::arp_handler(packet.data)).unwrap();
me.data = None;
}
_ => println!("network protocol not implemented"),
}
v.push(QryData {
id: 0,
time: me.time,
data: me.data,
ether_header: me.ether_header,
ipv4_header: me.ipv4_header,
ipv6_header: me.ipv6_header,
tcp_header: me.tcp_header,
udp_header: me.udp_header,
arp_header: me.arp_header,
reg_res: me.reg_res,
});
//me.data = Some(packet.data.to_vec());
me.reg_res = flag_carnage(&re, packet.data); // Regex overhead is between 4-9% --single threaded-- on complete packet [u8] data
v.push(me.clone());
}
v
}