From 2e2c8c5e5b2777b0388bd00c3c410402e2dff860 Mon Sep 17 00:00:00 2001 From: gurkenhabicht Date: Fri, 19 Jun 2020 02:03:28 +0200 Subject: [PATCH] solved logic bug,clean up --- README.md | 7 +++++-- src/main.rs | 2 +- src/parser.json | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ee1f128..ceca250 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # This is experimental -The software is written in Rust (2018, safe mode only). At the current state I have some fun writing and testing language features. The code should be modular enough to change any function you deem awfull enough. +The software is written in Rust (2018, safe mode only). At the current state I am having fun writing in Rust and testing language features. The code should be modular enough to change any function you deem awful enough. Error handling is subpar at the moment. There is no real unit testing to speak of since switching to asynchronous functionality. Testing will come back. -This version is a successor of the _POSIX_C_SOURCE 200809L implementation in which all data parsed from a cap/pcapng files is written as a single and simple query. The ingestion time is rather fast (tested writes: 100*10^3 tcp packets in ~1.8 sec) but may be insecure. See the other repository. +This version is a successor of the _POSIX_C_SOURCE 200809L implementation in which all data parsed from a cap/pcapng files is written as a single and simple query. The ingestion time is rather fast (tested writes: 100*10^3 tcp packets in ~1.8 sec) but may be insecure. See the other repository for more information. The idea of this iteration is to use a prepared statement and chunk the data according to maximum input. Postgres databases have a custom maximum limit on each insert query of prepared statements. Said chunk size is initialized through the config/interface file called parser.json as `insert_max`. Data can be read from PCAP/PCANG files, as well as network devices. Process is as follows: @@ -27,6 +27,9 @@ Speaking of serialization: After profiling it turns out that ~20% of cpu time is Another subgoal was the ability to compile a static binary, which --last time I tested-- works without dependencies, but the need for libpcap itself. It even executes on oracle linux, after linking against the elf64 interpreter in a direct manner. If you ever had the pleasure using this derivate it may come as a suprise to you. The key is to compile via `x86_64-unknown-linux-musl` target. See: https://doc.rust-lang.org/edition-guide/rust-2018/platform-and-target-support/musl-support-for-fully-static-binaries.html +Caveats: Regex Syntax is limited at the moment, because it is not compiled from a Rawstring, but a common one. Escaping does not work properly, character classes do. I have to fiddle the correct synctactical way to get it out of the json file and into a raw. For already supported regular expression syntax see: https://docs.rs/regex/1.3.9/regex/#syntax , also see the example in `parser.json`. +Transmitting the data of the formerly described testing table layout results in a rather big table size. HDD space was no issue so far. + If this whole thing turns out to be viable, some future features may be: - Database containing file hash map to compare file status/sizes after the parser may have crashed, or to join a complete overview of any existing PCAP files. diff --git a/src/main.rs b/src/main.rs index cec8bcb..00a452d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,7 +74,7 @@ async fn main() -> Result<(), Error> { // packets_serialized.extend(serializer::serialize_packets(v)); /* Do chunks and query data */ - let chunker = &packets_serialized.len() < &config.insert_max; + let chunker = (&packets_serialized.len() < &config.insert_max) && (0 < packets_serialized.len()) ; match chunker { NON_CHUNKED => { let insert_str = query_string(&packets_serialized.len(), &config.tablename); diff --git a/src/parser.json b/src/parser.json index 17a2be2..caa7be3 100644 --- a/src/parser.json +++ b/src/parser.json @@ -1,11 +1,11 @@ { - "insert_max": 16000, - "filter": "!ip6 && tcp", + "insert_max": 20000, + "filter": "tcp", "regex_filter": "(?:http|https)[[:punct:]]+[[:alnum:]]+[[:punct:]][[:alnum:]]+[[:punct:]](?:com|de|org)", "from_device": false, "parse_device": "enp7s0", "pcap_file": "not in use right now", - "pcap_dir": "../target/files", + "pcap_dir": "../target", "database_tablename": "json_dump", "database_user": "postgres", "database_host": "localhost",