clean up
This commit is contained in:
parent
4e86759c68
commit
ff1da0de84
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -30,6 +30,7 @@ pub struct FileInfo {
|
|||
pub encapsulation_type: u16,
|
||||
pub file_size: u64,
|
||||
pub metadata: std::fs::Metadata,
|
||||
// std::fs::Metadata::FileType + FilePermission return identical values?!
|
||||
}
|
||||
|
||||
impl FileInfo {
|
||||
|
|
30
src/main.rs
30
src/main.rs
|
@ -8,6 +8,10 @@ mod serializer;
|
|||
use tokio_postgres::types::ToSql;
|
||||
use tokio_postgres::{Error, NoTls};
|
||||
|
||||
/* conditionals */
|
||||
const FROM_FILE: bool = false;
|
||||
const FROM_DEVICE: bool = true;
|
||||
|
||||
fn query_string(insert_max: &usize) -> String {
|
||||
let mut insert_template = String::with_capacity(insert_max * 8 + 43);
|
||||
insert_template.push_str("INSERT INTO json_dump (packet) Values ");
|
||||
|
@ -25,22 +29,21 @@ async fn main() -> Result<(), Error> {
|
|||
/* Init values from file */
|
||||
|
||||
let config: configure::Config = configure::from_json_file().unwrap();
|
||||
|
||||
// TODO: hash file metadata, so its state is comparable at times and can be written to a db table (e.g. after system crash)
|
||||
// This db table should include UUIDs so it can be joined effectively
|
||||
let pcap_map = configure::map_pcap_dir(&config.pcap_dir).unwrap();
|
||||
|
||||
println!("{:?}", pcap_map.iter());
|
||||
// TODO: Create db table with pcap file hashes
|
||||
// TODO: hash file metadata, so its state is comparable at times and can be written to a db table (and read e.g. after system crash)
|
||||
// This db table should include UUIDs so it can be joined effectively with former runs
|
||||
// TODO: Use inotfy crate to update pcap_map according to files created while parser is running
|
||||
|
||||
|
||||
/* db connection */
|
||||
let (client, connection) = tokio_postgres::connect(&config.connection, NoTls).await?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
client
|
||||
.execute("DROP TABLE IF EXISTS json_dump", &[])
|
||||
.await?;
|
||||
|
@ -52,22 +55,21 @@ async fn main() -> Result<(), Error> {
|
|||
.await?;
|
||||
|
||||
/* device or file input */
|
||||
|
||||
match config.is_device {
|
||||
false => {
|
||||
FROM_FILE => {
|
||||
for (_pcap_file, _pcap_info) in pcap_map.iter() {
|
||||
println!("{:?}", &_pcap_file);
|
||||
// TODO: Tuning vector capacity according to mean average & std dev of packet size
|
||||
let v: Vec<parser::QryData> =
|
||||
parser::parse(&_pcap_file, &config.filter, &config.regex_filter);
|
||||
//let mut v = Vec::<parser::QryData>::with_capacity(35536);
|
||||
//v.extend(parser::parse(&_pcap_file, &config.filter));
|
||||
// let mut v = Vec::<parser::QryData>::with_capacity(100000);
|
||||
// v.extend(parser::parse(&_pcap_file, &config.filter, &config.regex_filter));
|
||||
|
||||
let packets_serialized = serializer::serialize_packets(v);
|
||||
//let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(35536);
|
||||
// let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(100000);
|
||||
// packets_serialized.extend(serializer::serialize_packets(v));
|
||||
|
||||
/* Query */
|
||||
/* Do chunks and query data */
|
||||
let chunk_count = packets_serialized.len() / config.insert_max;
|
||||
let remainder: usize = packets_serialized.len() % config.insert_max;
|
||||
let chunker = &packets_serialized.len() < &config.insert_max;
|
||||
|
@ -97,7 +99,7 @@ async fn main() -> Result<(), Error> {
|
|||
.await?;
|
||||
}
|
||||
|
||||
if remainder > 0 {
|
||||
if 0 < remainder {
|
||||
let rem_str = query_string(&remainder);
|
||||
let statement_remainder = client.prepare(&rem_str).await?;
|
||||
let (_garbage, _input) =
|
||||
|
@ -113,7 +115,7 @@ async fn main() -> Result<(), Error> {
|
|||
}
|
||||
}
|
||||
}
|
||||
true => {
|
||||
FROM_DEVICE => {
|
||||
let insert_str = query_string(&config.insert_max);
|
||||
let statement = client.prepare(&insert_str).await?;
|
||||
loop {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"insert_max": 20000,
|
||||
"filter": "ip6 && tcp",
|
||||
"regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*",
|
||||
"filter": "!ip6 && tcp",
|
||||
"regex_filter": "http://",
|
||||
"from_device": false,
|
||||
"parse_device": "enp7s0",
|
||||
"pcap_file": "",
|
||||
|
|
|
@ -22,7 +22,7 @@ const ETHER_HDRLEN: usize = 0xE;
|
|||
const NO_PREDECESSOR: usize = 0x0;
|
||||
const IPV6_HDRLEN: u32 = 0xA; // I know, this will get changed. It works for now.
|
||||
|
||||
/* random constants */
|
||||
/* conditionals */
|
||||
const IPV4: usize = 0x4;
|
||||
const IPV6: usize = 0x6;
|
||||
|
||||
|
@ -59,8 +59,8 @@ enum EncapsulationType {
|
|||
|
||||
impl QryData {
|
||||
// This is not cool!
|
||||
// Implementing objectoriented is slower by 3-10%. Variance is all over the place. It's awful but modular!
|
||||
// Guess I'll do a roolback and do a different design
|
||||
// I don't know if object oriented is the way to go here.It's awful but modular!
|
||||
// Maybe I'll do a roolback and do a different design
|
||||
|
||||
fn new() -> QryData {
|
||||
QryData {
|
||||
|
@ -212,8 +212,8 @@ pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str)
|
|||
while let Ok(packet) = cap.next() {
|
||||
let mut me = QryData::new();
|
||||
match linktype {
|
||||
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(), // EN10MB
|
||||
Linktype(12) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(), // RAW
|
||||
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), // I reversed encapsulation/linktype bytes in pcap/pcapng file by looking at https://www.tcpdump.org/linktypes.html
|
||||
Linktype(12) => me.encap_raw(packet.data).unwrap(), // Either this source + my implementation is wrong or pcap crate sucks
|
||||
_ => (),
|
||||
};
|
||||
|
||||
|
@ -243,7 +243,6 @@ pub fn parse_device(
|
|||
insert_max: &usize,
|
||||
regex_filter: &str,
|
||||
) -> Vec<QryData> {
|
||||
//let mut me: QryData = QryData::new ( );
|
||||
let mut v: Vec<QryData> = Vec::new();
|
||||
let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap();
|
||||
Capture::filter(&mut cap, &filter_str).unwrap();
|
||||
|
@ -253,7 +252,7 @@ pub fn parse_device(
|
|||
let mut me = QryData::new();
|
||||
match linktype {
|
||||
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(),
|
||||
Linktype(101) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(),
|
||||
Linktype(12) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(),
|
||||
_ => (),
|
||||
};
|
||||
|
||||
|
|
|
@ -1,268 +0,0 @@
|
|||
extern crate bitfield;
|
||||
extern crate byteorder;
|
||||
extern crate eui48;
|
||||
mod packet_handler;
|
||||
use pcap::{Capture, Linktype};
|
||||
use regex::bytes::Regex;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
//use std::thread::{spawn, JoinHandle};
|
||||
//use std::sync::mpsc::{channel, Receiver};
|
||||
|
||||
/* protocol ids, LittleEndian */
|
||||
const ETH_P_IPV6: usize = 0xDD86;
|
||||
const ETH_P_IP: usize = 0x08;
|
||||
const TCP: usize = 0x06;
|
||||
const UDP: usize = 0x11;
|
||||
const ETH_P_ARP: usize = 0x0608;
|
||||
const ETH_P_RARP: usize = 0x3580;
|
||||
|
||||
/* Protocol header sizes */
|
||||
const ETHER_HDRLEN: usize = 14;
|
||||
const NO_PREDECESSOR: usize = 0;
|
||||
const IPV6_HDRLEN: u32 = 10; // I know, this will get changed. It works for now.
|
||||
|
||||
/*
|
||||
QryData could be written in the sense of QryData{ ... frame: .., packet: .., segment:.. }
|
||||
On the one hand, only the actual type of frame/packet/segment would be contained in the resulting struct.
|
||||
So, increased benefit in serialization/cpu time, could result in less data to be serialized, depending on layout.
|
||||
On the other hand, each datagram::type needs to implement traits which would need to be dynamically dispatched by returning any of these types per iso level from a single function each. The result would be a performance decrease.
|
||||
See: https://doc.rust-lang.org/book/ch10-02-traits.html#returning-types-that-implement-traits
|
||||
See: https://doc.rust-lang.org/book/ch17-02-trait-objects.html#trait-objects-perform-dynamic-dispatch
|
||||
Then again, parser logic would be fewer lines + more unified using the latter method. Maybe better optimizable as well? Maybe this is a nice tradeoff?
|
||||
TODO: Implement and benchmark dynamically dispatched packet data in conjunction with restructured QryData.
|
||||
*/
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QryData {
|
||||
pub id: i32,
|
||||
pub time: f64,
|
||||
pub data: Option<Vec<u8>>,
|
||||
pub ether_header: Option<packet_handler::EtherHeader>,
|
||||
pub ipv4_header: Option<packet_handler::IpV4Header>,
|
||||
pub ipv6_header: Option<packet_handler::IpV6Header>,
|
||||
pub tcp_header: Option<packet_handler::TcpHeader>,
|
||||
pub udp_header: Option<packet_handler::UdpHeader>,
|
||||
pub arp_header: Option<packet_handler::ArpHeader>,
|
||||
pub reg_res: Option<String>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
enum EncapsulationType {
|
||||
// pcap::Linktype::get_name() is unsafe.
|
||||
EN10MB = 1, // See: https://docs.rs/pcap/0.7.0/src/pcap/lib.rs.html#247-261
|
||||
RAW = 101, // Would this be an issue?
|
||||
}
|
||||
|
||||
impl QryData {
|
||||
// This is not cool!
|
||||
// Implementing objectoriented is slower by 3-10%. Variance is all over the place. It's awful but modular!
|
||||
// Guess I'll do a roolback and do a different approach
|
||||
|
||||
fn new() -> QryData {
|
||||
QryData {
|
||||
id: 0,
|
||||
time: 0.0,
|
||||
data: None,
|
||||
ether_header: None::<packet_handler::EtherHeader>,
|
||||
ipv4_header: None::<packet_handler::IpV4Header>,
|
||||
ipv6_header: None::<packet_handler::IpV6Header>,
|
||||
tcp_header: None::<packet_handler::TcpHeader>,
|
||||
udp_header: None::<packet_handler::UdpHeader>,
|
||||
arp_header: None::<packet_handler::ArpHeader>,
|
||||
reg_res: None::<String>,
|
||||
}
|
||||
}
|
||||
|
||||
fn encap_en10mb(mut self, packet_data: &[u8]) -> Self {
|
||||
//let mut pkg: QryData = new().unwrap();
|
||||
self.ether_header = Some(packet_handler::ethernet_handler(packet_data)).unwrap();
|
||||
match self.ether_header.unwrap().ether_type as usize {
|
||||
ETH_P_IP => {
|
||||
self.ipv4_header = Some(packet_handler::ip_handler(packet_data, ETHER_HDRLEN)).unwrap();
|
||||
self.transport_layer(packet_data, self.ipv4_header.unwrap().ip_protocol as usize, self.ipv4_header.unwrap().ip_ihl, ETHER_HDRLEN)
|
||||
.unwrap();
|
||||
self
|
||||
}
|
||||
ETH_P_IPV6 => {
|
||||
self.ipv6_header = Some(packet_handler::ipv6_handler(packet_data, ETHER_HDRLEN)).unwrap();
|
||||
self.transport_layer(packet_data, self.ipv6_header.unwrap().next_header as usize, IPV6_HDRLEN, ETHER_HDRLEN)
|
||||
.unwrap();
|
||||
self
|
||||
}
|
||||
ETH_P_ARP | ETH_P_RARP => {
|
||||
self.arp_header = Some(packet_handler::arp_handler(packet_data, ETHER_HDRLEN)).unwrap();
|
||||
self
|
||||
}
|
||||
_ => self
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn encap_raw(mut self, packet_data: &[u8]) -> Self {
|
||||
let ip_version: usize = ((packet_data[0] & 0xf0) >> 4).try_into().unwrap();
|
||||
match ip_version {
|
||||
4 => {
|
||||
self.ipv4_header = Some(packet_handler::ip_handler(packet_data, NO_PREDECESSOR)).unwrap();
|
||||
self.transport_layer(packet_data, self.ipv4_header.unwrap().ip_protocol as usize, self.ipv4_header.unwrap().ip_ihl, NO_PREDECESSOR)
|
||||
.unwrap();
|
||||
self
|
||||
}
|
||||
6 => {
|
||||
self.ipv6_header = Some(packet_handler::ipv6_handler(packet_data, NO_PREDECESSOR)).unwrap();
|
||||
self.transport_layer(packet_data, self.ipv6_header.unwrap().next_header as usize, IPV6_HDRLEN, NO_PREDECESSOR)
|
||||
.unwrap();
|
||||
self
|
||||
}
|
||||
_ => self
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: impl correct Err type and use in Result
|
||||
fn transport_layer(
|
||||
&mut self,
|
||||
packet_data: &[u8],
|
||||
protocol_type: usize,
|
||||
l3_header_length: u32,
|
||||
ether_hdrlen: usize,
|
||||
) -> Result<(), core::fmt::Error> {
|
||||
match protocol_type {
|
||||
TCP => {
|
||||
self.tcp_header =
|
||||
Some(packet_handler::tcp_handler(l3_header_length, packet_data, ether_hdrlen)).unwrap();
|
||||
self.data = Some(packet_handler::payload_handler(
|
||||
l3_header_length,
|
||||
self.tcp_header.unwrap().data_offset,
|
||||
packet_data,
|
||||
ether_hdrlen
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
UDP => {
|
||||
self.udp_header =
|
||||
Some(packet_handler::udp_handler(l3_header_length, packet_data, ether_hdrlen)).unwrap();
|
||||
self.data = Some(packet_handler::payload_handler(
|
||||
l3_header_length,
|
||||
7,
|
||||
packet_data,
|
||||
ether_hdrlen
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
_ => println!("Transport layer protocol not implemented"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn regex_parse(&mut self, re: &Regex, packet_data: &[u8]) -> Result<(), regex::Error> {
|
||||
self.reg_res = flag_carnage(&re, packet_data);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
fn time(mut self, tv_usec: f64, tv_sec: f64) -> Self {
|
||||
self.time = (tv_usec as f64 / 1000000.0) + tv_sec as f64;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/* Regex parse _complete_ package */
|
||||
fn flag_carnage(re: &Regex, payload: &[u8]) -> Option<String> {
|
||||
let mut flags: String = String::new();
|
||||
for mat in re.find_iter(payload) {
|
||||
// TODO: Test benchmark format! vs. push_str()
|
||||
// flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap()));
|
||||
// See: https://github.com/hoodie/concatenation_benchmarks-rs
|
||||
flags.push_str(std::str::from_utf8(mat.as_bytes()).unwrap());
|
||||
flags.push_str(";");
|
||||
}
|
||||
match 0 < flags.len() {
|
||||
false => None,
|
||||
true => Some(flags),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) -> Vec<QryData> {
|
||||
let mut v: Vec<QryData> = Vec::new();
|
||||
|
||||
let mut cap = Capture::from_file(parse_file).unwrap();
|
||||
Capture::filter(&mut cap, &filter_str).unwrap();
|
||||
let linktype = cap.get_datalink();
|
||||
println!("{:?}", &linktype);
|
||||
let re = Regex::new(regex_filter).unwrap();
|
||||
while let Ok(packet) = cap.next() {
|
||||
let mut me = QryData::new();
|
||||
match linktype {
|
||||
Linktype(1) => me.encap_en10mb(packet.data), //me = QryData::encap_en10mb(packet.data).unwrap(), // EN10MB
|
||||
Linktype(12) => me.encap_raw(packet.data), //me = QryData::encap_raw(packet.data).unwrap(), // RAW
|
||||
_ => QryData::new(),
|
||||
};
|
||||
|
||||
//me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
|
||||
//me.reg_res = flag_carnage(&re, packet.data).unwrap(); // Regex overhead is between 4-9% --single threaded-- on complete packet [u8] data
|
||||
me.time(packet.header.ts.tv_usec as f64, packet.header.ts.tv_sec as f64);
|
||||
me.regex_parse(&re, packet.data).unwrap();
|
||||
|
||||
v.push(me.clone());
|
||||
// v.push(QryData {
|
||||
// id: 0,
|
||||
// time: me.time,
|
||||
// data: me.data,
|
||||
// ether_header: me.ether_header,
|
||||
// ipv4_header: me.ipv4_header,
|
||||
// ipv6_header: me.ipv6_header,
|
||||
// tcp_header: me.tcp_header,
|
||||
// udp_header: me.udp_header,
|
||||
// arp_header: me.arp_header,
|
||||
// reg_res: me.reg_res,
|
||||
// });
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/* This could need some love */
|
||||
pub fn parse_device(
|
||||
parse_device: &str,
|
||||
filter_str: &str,
|
||||
insert_max: &usize,
|
||||
regex_filter: &str,
|
||||
) -> Vec<QryData> {
|
||||
//let mut me: QryData = QryData::new ( );
|
||||
let mut v: Vec<QryData> = Vec::new();
|
||||
let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap();
|
||||
Capture::filter(&mut cap, &filter_str).unwrap();
|
||||
let linktype = cap.get_datalink();
|
||||
let re = Regex::new(regex_filter).unwrap();
|
||||
'parse: while let Ok(packet) = cap.next() {
|
||||
let mut me = QryData::new();
|
||||
match linktype {
|
||||
Linktype(1) => me.encap_en10mb(packet.data), //me = QryData::encap_en10mb(packet.data).unwrap(),
|
||||
Linktype(12) => me.encap_raw(packet.data), //me = QryData::encap_raw(packet.data).unwrap(),
|
||||
_ => QryData::new(),
|
||||
};
|
||||
|
||||
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
|
||||
// &mut me.reg_res = flag_carnage(&re, packet.data).unwrap();
|
||||
me.time(packet.header.ts.tv_usec as f64, packet.header.ts.tv_sec as f64);
|
||||
me.regex_parse(&re, packet.data).unwrap();
|
||||
|
||||
v.push(me.clone());
|
||||
|
||||
// v.push(QryData {
|
||||
// id: 0,
|
||||
// time: me.time,
|
||||
// data: me.data,
|
||||
// ether_header: me.ether_header,
|
||||
// ipv4_header: me.ipv4_header,
|
||||
// ipv6_header: me.ipv6_header,
|
||||
// tcp_header: me.tcp_header,
|
||||
// udp_header: me.udp_header,
|
||||
// arp_header: me.arp_header,
|
||||
// reg_res: me.reg_res,
|
||||
// });
|
||||
if &v.len() >= insert_max {
|
||||
break 'parse;
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
Loading…
Reference in New Issue