This commit is contained in:
gurkenhabicht 2020-06-15 01:08:40 +02:00
parent 4e86759c68
commit ff1da0de84
9 changed files with 61879 additions and 2944 deletions

File diff suppressed because it is too large Load Diff

29254
src/callgrind.out Normal file

File diff suppressed because it is too large Load Diff

30058
src/callgrind.out.18324 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@ pub struct FileInfo {
pub encapsulation_type: u16, pub encapsulation_type: u16,
pub file_size: u64, pub file_size: u64,
pub metadata: std::fs::Metadata, pub metadata: std::fs::Metadata,
// std::fs::Metadata::FileType + FilePermission return identical values?!
} }
impl FileInfo { impl FileInfo {

View File

@ -8,6 +8,10 @@ mod serializer;
use tokio_postgres::types::ToSql; use tokio_postgres::types::ToSql;
use tokio_postgres::{Error, NoTls}; use tokio_postgres::{Error, NoTls};
/* conditionals */
const FROM_FILE: bool = false;
const FROM_DEVICE: bool = true;
fn query_string(insert_max: &usize) -> String { fn query_string(insert_max: &usize) -> String {
let mut insert_template = String::with_capacity(insert_max * 8 + 43); let mut insert_template = String::with_capacity(insert_max * 8 + 43);
insert_template.push_str("INSERT INTO json_dump (packet) Values "); insert_template.push_str("INSERT INTO json_dump (packet) Values ");
@ -25,22 +29,21 @@ async fn main() -> Result<(), Error> {
/* Init values from file */ /* Init values from file */
let config: configure::Config = configure::from_json_file().unwrap(); let config: configure::Config = configure::from_json_file().unwrap();
// TODO: hash file metadata, so its state is comparable at times and can be written to a db table (e.g. after system crash)
// This db table should include UUIDs so it can be joined effectively
let pcap_map = configure::map_pcap_dir(&config.pcap_dir).unwrap(); let pcap_map = configure::map_pcap_dir(&config.pcap_dir).unwrap();
println!("{:?}", pcap_map.iter()); println!("{:?}", pcap_map.iter());
// TODO: Create db table with pcap file hashes
// TODO: hash file metadata, so its state is comparable at times and can be written to a db table (and read e.g. after system crash)
// This db table should include UUIDs so it can be joined effectively with former runs
// TODO: Use inotfy crate to update pcap_map according to files created while parser is running
/* db connection */ /* db connection */
let (client, connection) = tokio_postgres::connect(&config.connection, NoTls).await?; let (client, connection) = tokio_postgres::connect(&config.connection, NoTls).await?;
tokio::spawn(async move { tokio::spawn(async move {
if let Err(e) = connection.await { if let Err(e) = connection.await {
eprintln!("connection error: {}", e); eprintln!("connection error: {}", e);
} }
}); });
client client
.execute("DROP TABLE IF EXISTS json_dump", &[]) .execute("DROP TABLE IF EXISTS json_dump", &[])
.await?; .await?;
@ -52,22 +55,21 @@ async fn main() -> Result<(), Error> {
.await?; .await?;
/* device or file input */ /* device or file input */
match config.is_device { match config.is_device {
false => { FROM_FILE => {
for (_pcap_file, _pcap_info) in pcap_map.iter() { for (_pcap_file, _pcap_info) in pcap_map.iter() {
println!("{:?}", &_pcap_file); println!("{:?}", &_pcap_file);
// TODO: Tuning vector capacity according to mean average & std dev of packet size // TODO: Tuning vector capacity according to mean average & std dev of packet size
let v: Vec<parser::QryData> = let v: Vec<parser::QryData> =
parser::parse(&_pcap_file, &config.filter, &config.regex_filter); parser::parse(&_pcap_file, &config.filter, &config.regex_filter);
//let mut v = Vec::<parser::QryData>::with_capacity(35536); // let mut v = Vec::<parser::QryData>::with_capacity(100000);
//v.extend(parser::parse(&_pcap_file, &config.filter)); // v.extend(parser::parse(&_pcap_file, &config.filter, &config.regex_filter));
let packets_serialized = serializer::serialize_packets(v); let packets_serialized = serializer::serialize_packets(v);
//let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(35536); // let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(100000);
//packets_serialized.extend(serializer::serialize_packets(v)); // packets_serialized.extend(serializer::serialize_packets(v));
/* Query */ /* Do chunks and query data */
let chunk_count = packets_serialized.len() / config.insert_max; let chunk_count = packets_serialized.len() / config.insert_max;
let remainder: usize = packets_serialized.len() % config.insert_max; let remainder: usize = packets_serialized.len() % config.insert_max;
let chunker = &packets_serialized.len() < &config.insert_max; let chunker = &packets_serialized.len() < &config.insert_max;
@ -97,7 +99,7 @@ async fn main() -> Result<(), Error> {
.await?; .await?;
} }
if remainder > 0 { if 0 < remainder {
let rem_str = query_string(&remainder); let rem_str = query_string(&remainder);
let statement_remainder = client.prepare(&rem_str).await?; let statement_remainder = client.prepare(&rem_str).await?;
let (_garbage, _input) = let (_garbage, _input) =
@ -113,7 +115,7 @@ async fn main() -> Result<(), Error> {
} }
} }
} }
true => { FROM_DEVICE => {
let insert_str = query_string(&config.insert_max); let insert_str = query_string(&config.insert_max);
let statement = client.prepare(&insert_str).await?; let statement = client.prepare(&insert_str).await?;
loop { loop {

View File

@ -1,7 +1,7 @@
{ {
"insert_max": 20000, "insert_max": 20000,
"filter": "ip6 && tcp", "filter": "!ip6 && tcp",
"regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*", "regex_filter": "http://",
"from_device": false, "from_device": false,
"parse_device": "enp7s0", "parse_device": "enp7s0",
"pcap_file": "", "pcap_file": "",

View File

@ -22,7 +22,7 @@ const ETHER_HDRLEN: usize = 0xE;
const NO_PREDECESSOR: usize = 0x0; const NO_PREDECESSOR: usize = 0x0;
const IPV6_HDRLEN: u32 = 0xA; // I know, this will get changed. It works for now. const IPV6_HDRLEN: u32 = 0xA; // I know, this will get changed. It works for now.
/* random constants */ /* conditionals */
const IPV4: usize = 0x4; const IPV4: usize = 0x4;
const IPV6: usize = 0x6; const IPV6: usize = 0x6;
@ -59,8 +59,8 @@ enum EncapsulationType {
impl QryData { impl QryData {
// This is not cool! // This is not cool!
// Implementing objectoriented is slower by 3-10%. Variance is all over the place. It's awful but modular! // I don't know if object oriented is the way to go here.It's awful but modular!
// Guess I'll do a roolback and do a different design // Maybe I'll do a roolback and do a different design
fn new() -> QryData { fn new() -> QryData {
QryData { QryData {
@ -212,8 +212,8 @@ pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str)
while let Ok(packet) = cap.next() { while let Ok(packet) = cap.next() {
let mut me = QryData::new(); let mut me = QryData::new();
match linktype { match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(), // EN10MB Linktype(1) => me.encap_en10mb(packet.data).unwrap(), // I reversed encapsulation/linktype bytes in pcap/pcapng file by looking at https://www.tcpdump.org/linktypes.html
Linktype(12) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(), // RAW Linktype(12) => me.encap_raw(packet.data).unwrap(), // Either this source + my implementation is wrong or pcap crate sucks
_ => (), _ => (),
}; };
@ -243,7 +243,6 @@ pub fn parse_device(
insert_max: &usize, insert_max: &usize,
regex_filter: &str, regex_filter: &str,
) -> Vec<QryData> { ) -> Vec<QryData> {
//let mut me: QryData = QryData::new ( );
let mut v: Vec<QryData> = Vec::new(); let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap(); let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap();
Capture::filter(&mut cap, &filter_str).unwrap(); Capture::filter(&mut cap, &filter_str).unwrap();
@ -253,7 +252,7 @@ pub fn parse_device(
let mut me = QryData::new(); let mut me = QryData::new();
match linktype { match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(), Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(),
Linktype(101) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(), Linktype(12) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(),
_ => (), _ => (),
}; };

View File

@ -1,268 +0,0 @@
extern crate bitfield;
extern crate byteorder;
extern crate eui48;
mod packet_handler;
use pcap::{Capture, Linktype};
use regex::bytes::Regex;
use std::convert::TryInto;
use std::str;
//use std::thread::{spawn, JoinHandle};
//use std::sync::mpsc::{channel, Receiver};
/* protocol ids, LittleEndian */
const ETH_P_IPV6: usize = 0xDD86;
const ETH_P_IP: usize = 0x08;
const TCP: usize = 0x06;
const UDP: usize = 0x11;
const ETH_P_ARP: usize = 0x0608;
const ETH_P_RARP: usize = 0x3580;
/* Protocol header sizes */
const ETHER_HDRLEN: usize = 14;
const NO_PREDECESSOR: usize = 0;
const IPV6_HDRLEN: u32 = 10; // I know, this will get changed. It works for now.
/*
QryData could be written in the sense of QryData{ ... frame: .., packet: .., segment:.. }
On the one hand, only the actual type of frame/packet/segment would be contained in the resulting struct.
So, increased benefit in serialization/cpu time, could result in less data to be serialized, depending on layout.
On the other hand, each datagram::type needs to implement traits which would need to be dynamically dispatched by returning any of these types per iso level from a single function each. The result would be a performance decrease.
See: https://doc.rust-lang.org/book/ch10-02-traits.html#returning-types-that-implement-traits
See: https://doc.rust-lang.org/book/ch17-02-trait-objects.html#trait-objects-perform-dynamic-dispatch
Then again, parser logic would be fewer lines + more unified using the latter method. Maybe better optimizable as well? Maybe this is a nice tradeoff?
TODO: Implement and benchmark dynamically dispatched packet data in conjunction with restructured QryData.
*/
#[derive(Debug, Clone)]
pub struct QryData {
pub id: i32,
pub time: f64,
pub data: Option<Vec<u8>>,
pub ether_header: Option<packet_handler::EtherHeader>,
pub ipv4_header: Option<packet_handler::IpV4Header>,
pub ipv6_header: Option<packet_handler::IpV6Header>,
pub tcp_header: Option<packet_handler::TcpHeader>,
pub udp_header: Option<packet_handler::UdpHeader>,
pub arp_header: Option<packet_handler::ArpHeader>,
pub reg_res: Option<String>,
}
#[allow(dead_code)]
enum EncapsulationType {
// pcap::Linktype::get_name() is unsafe.
EN10MB = 1, // See: https://docs.rs/pcap/0.7.0/src/pcap/lib.rs.html#247-261
RAW = 101, // Would this be an issue?
}
impl QryData {
// This is not cool!
// Implementing objectoriented is slower by 3-10%. Variance is all over the place. It's awful but modular!
// Guess I'll do a roolback and do a different approach
fn new() -> QryData {
QryData {
id: 0,
time: 0.0,
data: None,
ether_header: None::<packet_handler::EtherHeader>,
ipv4_header: None::<packet_handler::IpV4Header>,
ipv6_header: None::<packet_handler::IpV6Header>,
tcp_header: None::<packet_handler::TcpHeader>,
udp_header: None::<packet_handler::UdpHeader>,
arp_header: None::<packet_handler::ArpHeader>,
reg_res: None::<String>,
}
}
fn encap_en10mb(mut self, packet_data: &[u8]) -> Self {
//let mut pkg: QryData = new().unwrap();
self.ether_header = Some(packet_handler::ethernet_handler(packet_data)).unwrap();
match self.ether_header.unwrap().ether_type as usize {
ETH_P_IP => {
self.ipv4_header = Some(packet_handler::ip_handler(packet_data, ETHER_HDRLEN)).unwrap();
self.transport_layer(packet_data, self.ipv4_header.unwrap().ip_protocol as usize, self.ipv4_header.unwrap().ip_ihl, ETHER_HDRLEN)
.unwrap();
self
}
ETH_P_IPV6 => {
self.ipv6_header = Some(packet_handler::ipv6_handler(packet_data, ETHER_HDRLEN)).unwrap();
self.transport_layer(packet_data, self.ipv6_header.unwrap().next_header as usize, IPV6_HDRLEN, ETHER_HDRLEN)
.unwrap();
self
}
ETH_P_ARP | ETH_P_RARP => {
self.arp_header = Some(packet_handler::arp_handler(packet_data, ETHER_HDRLEN)).unwrap();
self
}
_ => self
}
}
fn encap_raw(mut self, packet_data: &[u8]) -> Self {
let ip_version: usize = ((packet_data[0] & 0xf0) >> 4).try_into().unwrap();
match ip_version {
4 => {
self.ipv4_header = Some(packet_handler::ip_handler(packet_data, NO_PREDECESSOR)).unwrap();
self.transport_layer(packet_data, self.ipv4_header.unwrap().ip_protocol as usize, self.ipv4_header.unwrap().ip_ihl, NO_PREDECESSOR)
.unwrap();
self
}
6 => {
self.ipv6_header = Some(packet_handler::ipv6_handler(packet_data, NO_PREDECESSOR)).unwrap();
self.transport_layer(packet_data, self.ipv6_header.unwrap().next_header as usize, IPV6_HDRLEN, NO_PREDECESSOR)
.unwrap();
self
}
_ => self
}
}
// TODO: impl correct Err type and use in Result
fn transport_layer(
&mut self,
packet_data: &[u8],
protocol_type: usize,
l3_header_length: u32,
ether_hdrlen: usize,
) -> Result<(), core::fmt::Error> {
match protocol_type {
TCP => {
self.tcp_header =
Some(packet_handler::tcp_handler(l3_header_length, packet_data, ether_hdrlen)).unwrap();
self.data = Some(packet_handler::payload_handler(
l3_header_length,
self.tcp_header.unwrap().data_offset,
packet_data,
ether_hdrlen
))
.unwrap();
}
UDP => {
self.udp_header =
Some(packet_handler::udp_handler(l3_header_length, packet_data, ether_hdrlen)).unwrap();
self.data = Some(packet_handler::payload_handler(
l3_header_length,
7,
packet_data,
ether_hdrlen
))
.unwrap();
}
_ => println!("Transport layer protocol not implemented"),
}
Ok(())
}
fn regex_parse(&mut self, re: &Regex, packet_data: &[u8]) -> Result<(), regex::Error> {
self.reg_res = flag_carnage(&re, packet_data);
Ok(())
}
fn time(mut self, tv_usec: f64, tv_sec: f64) -> Self {
self.time = (tv_usec as f64 / 1000000.0) + tv_sec as f64;
self
}
}
/* Regex parse _complete_ package */
fn flag_carnage(re: &Regex, payload: &[u8]) -> Option<String> {
let mut flags: String = String::new();
for mat in re.find_iter(payload) {
// TODO: Test benchmark format! vs. push_str()
// flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap()));
// See: https://github.com/hoodie/concatenation_benchmarks-rs
flags.push_str(std::str::from_utf8(mat.as_bytes()).unwrap());
flags.push_str(";");
}
match 0 < flags.len() {
false => None,
true => Some(flags),
}
}
pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) -> Vec<QryData> {
let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_file(parse_file).unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
println!("{:?}", &linktype);
let re = Regex::new(regex_filter).unwrap();
while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data), //me = QryData::encap_en10mb(packet.data).unwrap(), // EN10MB
Linktype(12) => me.encap_raw(packet.data), //me = QryData::encap_raw(packet.data).unwrap(), // RAW
_ => QryData::new(),
};
//me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
//me.reg_res = flag_carnage(&re, packet.data).unwrap(); // Regex overhead is between 4-9% --single threaded-- on complete packet [u8] data
me.time(packet.header.ts.tv_usec as f64, packet.header.ts.tv_sec as f64);
me.regex_parse(&re, packet.data).unwrap();
v.push(me.clone());
// v.push(QryData {
// id: 0,
// time: me.time,
// data: me.data,
// ether_header: me.ether_header,
// ipv4_header: me.ipv4_header,
// ipv6_header: me.ipv6_header,
// tcp_header: me.tcp_header,
// udp_header: me.udp_header,
// arp_header: me.arp_header,
// reg_res: me.reg_res,
// });
}
v
}
/* This could need some love */
pub fn parse_device(
parse_device: &str,
filter_str: &str,
insert_max: &usize,
regex_filter: &str,
) -> Vec<QryData> {
//let mut me: QryData = QryData::new ( );
let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
let re = Regex::new(regex_filter).unwrap();
'parse: while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data), //me = QryData::encap_en10mb(packet.data).unwrap(),
Linktype(12) => me.encap_raw(packet.data), //me = QryData::encap_raw(packet.data).unwrap(),
_ => QryData::new(),
};
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
// &mut me.reg_res = flag_carnage(&re, packet.data).unwrap();
me.time(packet.header.ts.tv_usec as f64, packet.header.ts.tv_sec as f64);
me.regex_parse(&re, packet.data).unwrap();
v.push(me.clone());
// v.push(QryData {
// id: 0,
// time: me.time,
// data: me.data,
// ether_header: me.ether_header,
// ipv4_header: me.ipv4_header,
// ipv6_header: me.ipv6_header,
// tcp_header: me.tcp_header,
// udp_header: me.udp_header,
// arp_header: me.arp_header,
// reg_res: me.reg_res,
// });
if &v.len() >= insert_max {
break 'parse;
}
}
v
}