This commit is contained in:
gurkenhabicht 2020-06-16 01:48:59 +02:00
parent 7dc1788597
commit 6a2dc9cff6
7 changed files with 18 additions and 64465 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -77,14 +77,14 @@ pub fn from_json_file() -> Option<Config> {
File signature and encapsulation type from file File signature and encapsulation type from file
See: https://www.tcpdump.org/linktypes.html See: https://www.tcpdump.org/linktypes.html
*/ */
// Futher:file.len() is included in metadata() but only shows up if called explicitly. Maybe this is not needed at all in the end // Further:file.len() is included in metadata() but only shows up if called explicitly. Maybe this is not needed at all in the end
// This would be needed for comparability over time. print metadata and you will see // This would be needed for comparability over time. print metadata and you will see
fn bytes_from_file(entry: std::path::PathBuf) -> Result<([u8; 4], u16, u16), std::io::Error> { fn bytes_from_file(entry: std::path::PathBuf) -> Result<([u8; 4], u16, u16), std::io::Error> {
let mut magic_number: [u8; 4] = [0; 4]; let mut magic_number: [u8; 4] = [0; 4];
let mut buffer: [u8; 32] = [0; 32]; let mut buffer: [u8; 32] = [0; 32];
let mut _file = File::open(entry.to_owned())?; let mut _file = File::open(entry.to_owned())?;
_file.read_exact(&mut buffer)?; _file.read_exact(&mut buffer)?;
magic_number.clone_from_slice(&buffer[0..4]); magic_number.copy_from_slice(&buffer[0..4]);
let enc_pcap: u16 = LittleEndian::read_u16(&buffer[20..22]); let enc_pcap: u16 = LittleEndian::read_u16(&buffer[20..22]);
let enc_pcapng: u16 = LittleEndian::read_u16(&buffer[12..14]); let enc_pcapng: u16 = LittleEndian::read_u16(&buffer[12..14]);

View File

@ -11,6 +11,8 @@ use tokio_postgres::{Error, NoTls};
/* conditionals */ /* conditionals */
const FROM_FILE: bool = false; const FROM_FILE: bool = false;
const FROM_DEVICE: bool = true; const FROM_DEVICE: bool = true;
const NON_CHUNKED: bool = true;
const CHUNKED: bool = false;
fn query_string(insert_max: &usize) -> String { fn query_string(insert_max: &usize) -> String {
let mut insert_template = String::with_capacity(insert_max * 8 + 43); let mut insert_template = String::with_capacity(insert_max * 8 + 43);
@ -59,13 +61,13 @@ async fn main() -> Result<(), Error> {
FROM_FILE => { FROM_FILE => {
for (_pcap_file, _pcap_info) in pcap_map.iter() { for (_pcap_file, _pcap_info) in pcap_map.iter() {
println!("{:?}", &_pcap_file); println!("{:?}", &_pcap_file);
// TODO: Tuning vector capacity according to mean average & std dev of packet size // TODO: Tuning vector capacity according to mean average & std dev of packet sizes
let v: Vec<parser::QryData> = let v: Vec<parser::QryData> =
parser::parse(&_pcap_file, &config.filter, &config.regex_filter); parser::parse(&_pcap_file, &config.filter, &config.regex_filter);
let packets_serialized = serializer::serialize_packets(v);
// let mut v = Vec::<parser::QryData>::with_capacity(100000); // let mut v = Vec::<parser::QryData>::with_capacity(100000);
// v.extend(parser::parse(&_pcap_file, &config.filter, &config.regex_filter)); // v.extend(parser::parse(&_pcap_file, &config.filter, &config.regex_filter));
let packets_serialized = serializer::serialize_packets(v);
// let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(100000); // let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(100000);
// packets_serialized.extend(serializer::serialize_packets(v)); // packets_serialized.extend(serializer::serialize_packets(v));
@ -74,18 +76,17 @@ async fn main() -> Result<(), Error> {
let remainder: usize = packets_serialized.len() % config.insert_max; let remainder: usize = packets_serialized.len() % config.insert_max;
let chunker = &packets_serialized.len() < &config.insert_max; let chunker = &packets_serialized.len() < &config.insert_max;
match chunker { match chunker {
true => { NON_CHUNKED => {
let insert_str = query_string(&packets_serialized.len()); let insert_str = query_string(&packets_serialized.len());
let statement_false = client.prepare(&insert_str).await?; let statement = client.prepare(&insert_str).await?;
client client
.query_raw( .query_raw(
&statement_false, &statement,
packets_serialized.iter().map(|p| p as &dyn ToSql), packets_serialized.iter().map(|p| p as &dyn ToSql),
) )
.await?; .await?;
} }
CHUNKED => {
false => {
let insert_str = query_string(&config.insert_max); let insert_str = query_string(&config.insert_max);
let statement = client.prepare(&insert_str).await?; let statement = client.prepare(&insert_str).await?;
@ -98,15 +99,14 @@ async fn main() -> Result<(), Error> {
) )
.await?; .await?;
} }
if 0 < remainder { if 0 < remainder {
let rem_str = query_string(&remainder); let rem_str = query_string(&remainder);
let statement_remainder = client.prepare(&rem_str).await?; let statement = client.prepare(&rem_str).await?;
let (_garbage, _input) = let (_garbage, _input) =
packets_serialized.split_at(packets_serialized.len() - remainder); packets_serialized.split_at(packets_serialized.len() - remainder);
client client
.query_raw( .query_raw(
&statement_remainder, &statement,
_input.to_vec().iter().map(|p| p as &dyn ToSql), _input.to_vec().iter().map(|p| p as &dyn ToSql),
) )
.await?; .await?;

View File

@ -1,7 +1,7 @@
{ {
"insert_max": 20000, "insert_max": 20000,
"filter": "!ip6 && tcp", "filter": "ip6 && tcp",
"regex_filter": "http://", "regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*",
"from_device": false, "from_device": false,
"parse_device": "enp7s0", "parse_device": "enp7s0",
"pcap_file": "", "pcap_file": "",

View File

@ -12,7 +12,7 @@ impl Serialize for parser::QryData {
where where
S: Serializer, S: Serializer,
{ {
let mut state = serializer.serialize_struct("parser::QryData", 11)?; let mut state = serializer.serialize_struct("parser::QryData", 9)?;
state.serialize_field("time", &self.time)?; state.serialize_field("time", &self.time)?;
state.serialize_field("ether_header", &self.ether_header)?; state.serialize_field("ether_header", &self.ether_header)?;
state.serialize_field("ipv4_header", &self.ipv4_header)?; state.serialize_field("ipv4_header", &self.ipv4_header)?;
@ -28,12 +28,12 @@ impl Serialize for parser::QryData {
pub fn serialize_packets(v: Vec<parser::QryData>) -> Vec<serde_json::Value> { pub fn serialize_packets(v: Vec<parser::QryData>) -> Vec<serde_json::Value> {
/* rayon parallelized */ /* rayon parallelized */
// TODO: Benchmark. As far as I tested, this reaps no benefit. // TODO: Benchmark. As far as I've tested, using rayon reaps no benefit nor does it run any slower. I leave it in for now.
let packets_serialized = v let packets_serialized = v
.par_iter() .par_iter()
.map(|x| serde_json::to_value(x).unwrap()) .map(|x| serde_json::to_value(x).unwrap())
.collect(); .collect();
//let packets_serialized: Vec<serde_json::Value> = v.par_iter().map(|x| json!(x)).collect(); // let packets_serialized: Vec<serde_json::Value> = v.par_iter().map(|x| json!(x)).collect();
packets_serialized packets_serialized
} }
@ -50,12 +50,3 @@ pub fn serialize_packets_as_string(v: Vec<parser::QryData>) -> Vec<serde_json::V
packets_serialized packets_serialized
} }
#[allow(dead_code)]
fn smallest_prime_divisor(remainder: usize) -> usize {
let smallest_divisor: usize = (2..(remainder / 2))
.into_par_iter()
.find_first(|x| remainder % x == 0)
.unwrap();
smallest_divisor
}