tpcpr/src/parser/mod.rs

322 lines
12 KiB
Rust

mod packet_handler;
use pcap::{Capture, Linktype};
use regex::bytes::Regex;
use serde::Serialize;
use std::convert::TryInto;
use std::str;
use std::sync::mpsc::{sync_channel, Receiver};
use std::thread::{spawn, JoinHandle};
//use tokio::sync::mpsc;
//use tokio::task;
/* protocol ids, LittleEndian */
const ETH_P_IPV6: usize = 0xDD86;
const ETH_P_IP: usize = 0x08;
const TCP: usize = 0x06;
const UDP: usize = 0x11;
const ETH_P_ARP: usize = 0x0608;
const ETH_P_RARP: usize = 0x3580;
/* Protocol header sizes */
const ETHER_HDRLEN: usize = 0xE;
const NO_PREDECESSOR: usize = 0x0;
const IPV6_HDRLEN: u32 = 0xA; // I know, this will get changed. It works for now.
/* conditionals */
const IPV4: usize = 0x4;
const IPV6: usize = 0x6;
#[derive(Debug, Clone, Serialize)]
pub struct QryData {
pub id: i32,
pub time: f64,
// I came to the conclusion, that importing serde_with crate just for a single struct decorator is not worth it. So, this looks a bit ugly, having eight decorator. Deal with it.
#[serde(skip_serializing_if = "Option::is_none")]
pub data: Option<Vec<u8>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ether_header: Option<packet_handler::EtherHeader>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ipv4_header: Option<packet_handler::IpV4Header>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ipv6_header: Option<packet_handler::IpV6Header>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tcp_header: Option<packet_handler::TcpHeader>,
#[serde(skip_serializing_if = "Option::is_none")]
pub udp_header: Option<packet_handler::UdpHeader>,
#[serde(skip_serializing_if = "Option::is_none")]
pub arp_header: Option<packet_handler::ArpHeader>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reg_res: Option<String>,
}
#[allow(dead_code)]
enum EncapsulationType {
// pcap::Linktype::get_name() is unsafe. That's why this safe data structure would be an alternative.
EN10MB = 1, // See: https://docs.rs/pcap/0.7.0/src/pcap/lib.rs.html#247-261
RAW = 101, // Would this be an issue?
}
impl QryData {
fn new() -> QryData {
QryData {
id: 0,
time: 0.0,
data: None,
ether_header: None::<packet_handler::EtherHeader>,
ipv4_header: None::<packet_handler::IpV4Header>,
ipv6_header: None::<packet_handler::IpV6Header>,
tcp_header: None::<packet_handler::TcpHeader>,
udp_header: None::<packet_handler::UdpHeader>,
arp_header: None::<packet_handler::ArpHeader>,
reg_res: None::<String>,
}
}
fn encap_en10mb(&mut self, packet_data: &[u8]) -> Result<(), core::fmt::Error> {
self.ether_header = Some(packet_handler::ethernet_handler(packet_data)).unwrap();
match self.ether_header.unwrap().ether_type as usize {
ETH_P_IP => {
self.ipv4_header =
Some(packet_handler::ip_handler(packet_data, ETHER_HDRLEN)).unwrap();
self.transport_layer(
packet_data,
self.ipv4_header.unwrap().ip_protocol as usize,
self.ipv4_header.unwrap().ip_ihl,
ETHER_HDRLEN,
)
.unwrap();
}
ETH_P_IPV6 => {
self.ipv6_header =
Some(packet_handler::ipv6_handler(packet_data, ETHER_HDRLEN)).unwrap();
self.transport_layer(
packet_data,
self.ipv6_header.unwrap().next_header as usize,
IPV6_HDRLEN,
ETHER_HDRLEN,
)
.unwrap();
}
ETH_P_ARP | ETH_P_RARP => {
self.arp_header =
Some(packet_handler::arp_handler(packet_data, ETHER_HDRLEN)).unwrap();
}
_ => println!("Network protocol not implemented"),
}
Ok(())
}
fn encap_raw(&mut self, packet_data: &[u8]) -> Result<(), core::fmt::Error> {
let ip_version: usize = ((packet_data[0] & 0xf0) >> 4).try_into().unwrap();
match ip_version {
IPV4 => {
self.ipv4_header =
Some(packet_handler::ip_handler(packet_data, NO_PREDECESSOR)).unwrap();
self.transport_layer(
packet_data,
self.ipv4_header.unwrap().ip_protocol as usize,
self.ipv4_header.unwrap().ip_ihl,
NO_PREDECESSOR,
)
.unwrap();
}
IPV6 => {
self.ipv6_header =
Some(packet_handler::ipv6_handler(packet_data, NO_PREDECESSOR)).unwrap();
self.transport_layer(
packet_data,
self.ipv6_header.unwrap().next_header as usize,
IPV6_HDRLEN,
NO_PREDECESSOR,
)
.unwrap();
}
_ => println!("Network Protocol not implemented"),
}
Ok(())
}
// TODO: impl correct Err type and use as Result
fn transport_layer(
&mut self,
packet_data: &[u8],
protocol_type: usize,
l3_header_length: u32,
ether_hdrlen: usize,
) -> Result<(), core::fmt::Error> {
match protocol_type {
TCP => {
self.tcp_header = Some(packet_handler::tcp_handler(
l3_header_length,
packet_data,
ether_hdrlen,
))
.unwrap();
self.data = Some(packet_handler::payload_handler(
l3_header_length,
self.tcp_header.unwrap().data_offset,
packet_data,
ether_hdrlen,
))
.unwrap();
}
UDP => {
self.udp_header = Some(packet_handler::udp_handler(
l3_header_length,
packet_data,
ether_hdrlen,
))
.unwrap();
self.data = Some(packet_handler::payload_handler(
l3_header_length,
7,
packet_data,
ether_hdrlen,
))
.unwrap();
}
_ => println!("Transport layer protocol not implemented"),
}
Ok(())
}
}
/* Regex parse _complete_ package */
fn flag_carnage(re: &Regex, payload: &[u8]) -> Option<String> {
let mut flags: String = String::new();
if !re.as_str().is_empty() {
for mat in re.find_iter(payload) {
// TODO: Test benchmark format! vs. push_str()
// flags.push_str(&format!("{} ",std::str::from_utf8(mat.as_bytes()).unwrap()));
// See: https://github.com/hoodie/concatenation_benchmarks-rs
flags.push_str(std::str::from_utf8(mat.as_bytes()).unwrap());
flags.push_str(";");
}
}
if !flags.is_empty() {
// println!("{:?}", flags);
}
match flags.is_empty() {
true => None,
false => Some(flags),
}
}
#[allow(dead_code)]
pub fn parse(parse_file: &std::path::Path, filter_str: &str, regex_filter: &str) -> Vec<QryData> {
let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_file(parse_file).unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
println!("{:?}", &linktype);
let re = Regex::new(regex_filter).unwrap();
while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), // I reversed encapsulation/linktype bytes in pcap/pcapng file by looking at https://www.tcpdump.org/linktypes.html
Linktype(12) => me.encap_raw(packet.data).unwrap(), // Either this source + my implementation is wrong or pcap crate sucks
_ => (),
};
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
me.reg_res = Some(flag_carnage(&re, packet.data)).unwrap(); // Regex parser on complete packet [u8] data
v.push(me.clone());
}
v
}
/* This could need some love */
pub fn parse_device(
// Pcap file data parsing will result in less cpu cycles if device parsing is handled in a seperate function, I guess.
// It would result in less conditional overhead?!
parse_device: &str,
filter_str: &str,
insert_max: &usize,
regex_filter: &str,
) -> Vec<QryData> {
let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_device(parse_device).unwrap().open().unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
let re = Regex::new(regex_filter).unwrap();
'parse: while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), //me = QryData::encap_en10mb(packet.data).unwrap(),
Linktype(12) => me.encap_raw(packet.data).unwrap(), //me = QryData::encap_raw(packet.data).unwrap(),
_ => (),
};
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
me.reg_res = flag_carnage(&re, packet.data);
v.push(me.clone());
if &v.len() >= insert_max {
break 'parse;
}
}
v
}
#[allow(dead_code)]
pub fn mpsc_parser(
parse_file: std::path::PathBuf,
filter_str: String,
regex_filter: String,
) -> (Receiver<QryData>, JoinHandle<()>) {
let (sender, receiver) = sync_channel(100);
let handle = spawn(move || {
let mut cap = Capture::from_file(parse_file).unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
//println!("{:?}", &linktype);
let re = Regex::new(&regex_filter).unwrap();
while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), // I reversed encapsulation/linktype bytes in pcap/pcapng file by looking at https://www.tcpdump.org/linktypes.html
Linktype(12) => me.encap_raw(packet.data).unwrap(), // Either this source + my implementation is wrong or pcap crate sucks
_ => (),
};
me.time =
(packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
me.reg_res = Some(flag_carnage(&re, packet.data)).unwrap(); // Regex parser on complete packet [u8] data
if sender.send(me).is_err() {
break;
}
}
});
(receiver, handle)
}
#[allow(dead_code)]
pub async fn tokio_parse<'a>(
parse_file: std::path::PathBuf,
filter_str: &'a str,
regex_filter: &'a str,
) -> Vec<QryData> {
let mut v: Vec<QryData> = Vec::new();
let mut cap = Capture::from_file(parse_file).unwrap();
Capture::filter(&mut cap, &filter_str).unwrap();
let linktype = cap.get_datalink();
// println!("{:?}", &linktype);
let re = Regex::new(&regex_filter).unwrap();
while let Ok(packet) = cap.next() {
let mut me = QryData::new();
match linktype {
Linktype(1) => me.encap_en10mb(packet.data).unwrap(), // I reversed encapsulation/linktype bytes in pcap/pcapng file by looking at https://www.tcpdump.org/linktypes.html
Linktype(12) => me.encap_raw(packet.data).unwrap(), // Either this source + my implementation is wrong or pcap crate sucks
_ => (),
};
me.time = (packet.header.ts.tv_usec as f64 / 1000000.0) + packet.header.ts.tv_sec as f64;
me.reg_res = Some(flag_carnage(&re, packet.data)).unwrap(); // Regex parser on complete packet [u8] data
v.push(me.clone());
}
v
}