This commit is contained in:
gurkenhabicht 2020-06-16 01:48:59 +02:00
parent 7dc1788597
commit 6a2dc9cff6
7 changed files with 18 additions and 64465 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -77,14 +77,14 @@ pub fn from_json_file() -> Option<Config> {
File signature and encapsulation type from file
See: https://www.tcpdump.org/linktypes.html
*/
// Futher:file.len() is included in metadata() but only shows up if called explicitly. Maybe this is not needed at all in the end
// Further:file.len() is included in metadata() but only shows up if called explicitly. Maybe this is not needed at all in the end
// This would be needed for comparability over time. print metadata and you will see
fn bytes_from_file(entry: std::path::PathBuf) -> Result<([u8; 4], u16, u16), std::io::Error> {
let mut magic_number: [u8; 4] = [0; 4];
let mut buffer: [u8; 32] = [0; 32];
let mut _file = File::open(entry.to_owned())?;
_file.read_exact(&mut buffer)?;
magic_number.clone_from_slice(&buffer[0..4]);
magic_number.copy_from_slice(&buffer[0..4]);
let enc_pcap: u16 = LittleEndian::read_u16(&buffer[20..22]);
let enc_pcapng: u16 = LittleEndian::read_u16(&buffer[12..14]);

View File

@ -11,6 +11,8 @@ use tokio_postgres::{Error, NoTls};
/* conditionals */
const FROM_FILE: bool = false;
const FROM_DEVICE: bool = true;
const NON_CHUNKED: bool = true;
const CHUNKED: bool = false;
fn query_string(insert_max: &usize) -> String {
let mut insert_template = String::with_capacity(insert_max * 8 + 43);
@ -59,13 +61,13 @@ async fn main() -> Result<(), Error> {
FROM_FILE => {
for (_pcap_file, _pcap_info) in pcap_map.iter() {
println!("{:?}", &_pcap_file);
// TODO: Tuning vector capacity according to mean average & std dev of packet size
// TODO: Tuning vector capacity according to mean average & std dev of packet sizes
let v: Vec<parser::QryData> =
parser::parse(&_pcap_file, &config.filter, &config.regex_filter);
let packets_serialized = serializer::serialize_packets(v);
// let mut v = Vec::<parser::QryData>::with_capacity(100000);
// v.extend(parser::parse(&_pcap_file, &config.filter, &config.regex_filter));
let packets_serialized = serializer::serialize_packets(v);
// let mut packets_serialized = Vec::<serde_json::Value>::with_capacity(100000);
// packets_serialized.extend(serializer::serialize_packets(v));
@ -74,18 +76,17 @@ async fn main() -> Result<(), Error> {
let remainder: usize = packets_serialized.len() % config.insert_max;
let chunker = &packets_serialized.len() < &config.insert_max;
match chunker {
true => {
NON_CHUNKED => {
let insert_str = query_string(&packets_serialized.len());
let statement_false = client.prepare(&insert_str).await?;
let statement = client.prepare(&insert_str).await?;
client
.query_raw(
&statement_false,
&statement,
packets_serialized.iter().map(|p| p as &dyn ToSql),
)
.await?;
}
false => {
CHUNKED => {
let insert_str = query_string(&config.insert_max);
let statement = client.prepare(&insert_str).await?;
@ -98,15 +99,14 @@ async fn main() -> Result<(), Error> {
)
.await?;
}
if 0 < remainder {
let rem_str = query_string(&remainder);
let statement_remainder = client.prepare(&rem_str).await?;
let statement = client.prepare(&rem_str).await?;
let (_garbage, _input) =
packets_serialized.split_at(packets_serialized.len() - remainder);
client
.query_raw(
&statement_remainder,
&statement,
_input.to_vec().iter().map(|p| p as &dyn ToSql),
)
.await?;

View File

@ -1,7 +1,7 @@
{
"insert_max": 20000,
"filter": "!ip6 && tcp",
"regex_filter": "http://",
"filter": "ip6 && tcp",
"regex_filter": "(?:http|https)[[::punct::]]//([[::word::]]+\\.)*",
"from_device": false,
"parse_device": "enp7s0",
"pcap_file": "",

View File

@ -12,7 +12,7 @@ impl Serialize for parser::QryData {
where
S: Serializer,
{
let mut state = serializer.serialize_struct("parser::QryData", 11)?;
let mut state = serializer.serialize_struct("parser::QryData", 9)?;
state.serialize_field("time", &self.time)?;
state.serialize_field("ether_header", &self.ether_header)?;
state.serialize_field("ipv4_header", &self.ipv4_header)?;
@ -28,12 +28,12 @@ impl Serialize for parser::QryData {
pub fn serialize_packets(v: Vec<parser::QryData>) -> Vec<serde_json::Value> {
/* rayon parallelized */
// TODO: Benchmark. As far as I tested, this reaps no benefit.
// TODO: Benchmark. As far as I've tested, using rayon reaps no benefit nor does it run any slower. I leave it in for now.
let packets_serialized = v
.par_iter()
.map(|x| serde_json::to_value(x).unwrap())
.collect();
//let packets_serialized: Vec<serde_json::Value> = v.par_iter().map(|x| json!(x)).collect();
// let packets_serialized: Vec<serde_json::Value> = v.par_iter().map(|x| json!(x)).collect();
packets_serialized
}
@ -50,12 +50,3 @@ pub fn serialize_packets_as_string(v: Vec<parser::QryData>) -> Vec<serde_json::V
packets_serialized
}
#[allow(dead_code)]
fn smallest_prime_divisor(remainder: usize) -> usize {
let smallest_divisor: usize = (2..(remainder / 2))
.into_par_iter()
.find_first(|x| remainder % x == 0)
.unwrap();
smallest_divisor
}