From 0299eb846a233592d2ba24f0b8582924723113d6 Mon Sep 17 00:00:00 2001 From: Tobias Dussa Date: Fri, 19 Mar 2021 08:34:56 +0100 Subject: [PATCH 1/2] Added compact database scheme. --- README.md | 32 ++++++++++++++ src/addrwatch.c | 4 ++ src/addrwatch.h | 2 + src/output_sqlite.c | 101 +++++++++++++++++++++++++++++++++++++++----- 4 files changed, 129 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 190b4e7..97dff5f 100644 --- a/README.md +++ b/README.md @@ -274,3 +274,35 @@ Ethernet/ip pairing discovery can be triggered by these types of events: * ND_DAD - Duplicate Address Detection packet. Source MAC (Ethernet header) and target address (NS header) is saved. +Compact database +---------------- + +This version of `addrwatch` features a compact database mode for SQL. This +is meant to minimize the amount of storage required by storing not all data +sets, but only summaries. The ordinary database scheme stores the +following information (in parentheses, their SQL column identifiers are +given): + * timestamp (`timestamp`) + * interface (`interface`) + * VLAN (`vlan_tag`) + * MAC address (`mac_address`) + * IP address (`ip_address`) + * event type (`origin`) +One such entry is created per received event. In contrast, in compact +mode, the following information is stored: + * first-seen timestamp (`timestamp_first`) + * last-seen timestamp (`timestamp`) + * interface (`interface`) + * VLAN (`vlan_tag`) + * MAC address (`mac_address`) + * IP address (`ip_address`) + * event type (`origin`) + * count (`count`) +The quintuple `(interface, VLAN, MAC address, IP addres, event type)` is +used as a unique identifier. If the quintuple has not been seen before, a +new data row is inserted; if it _has_ been seen before, then just the +last-seen timestamp and the count fields are updated accordingly. + +The SQL column names of the original (non-compact) are deliberately left +unchanged so that any tools that work with the non-compact database format +will likely still work (albeit with less information). diff --git a/src/addrwatch.c b/src/addrwatch.c index e0d5a46..985f4f2 100644 --- a/src/addrwatch.c +++ b/src/addrwatch.c @@ -42,6 +42,7 @@ static struct argp_option options[] = { { 0, 0, 0, 0, "Options for data output:" { "quiet", 'q', 0, 0, "Suppress any output to stdout and stderr.", 0 }, { "verbose", 'v', 0, 0, "Enable debug messages.", 0 }, #if HAVE_LIBSQLITE3 + { "compact", 'c', 0, 0, "Use compact database scheme.", 0 }, { "sqlite3", 's', "FILE", 0, "Output data to sqlite3 database FILE.", 0 }, { "sqlite3-table", 2, "TBL", 0, "Use sqlite table TBL (default: " PACKAGE ").", 0 }, #endif @@ -119,6 +120,9 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state) } break; #if HAVE_LIBSQLITE3 + case 'c': + cfg.sqlite_compact = 1; + break; case 's': cfg.sqlite_file = arg; break; diff --git a/src/addrwatch.h b/src/addrwatch.h index ebdea61..ff454bc 100644 --- a/src/addrwatch.h +++ b/src/addrwatch.h @@ -67,10 +67,12 @@ struct addrwatch_config { } shm_data; #if HAVE_LIBSQLITE3 + int sqlite_compact; char *sqlite_file; char *sqlite_table; sqlite3 *sqlite_conn; sqlite3_stmt *sqlite_stmt; + sqlite3_stmt *sqlite_stmt2; #endif struct event_base *eb; #if HAVE_LIBEVENT2 diff --git a/src/output_sqlite.c b/src/output_sqlite.c index a97193e..9ad69fe 100644 --- a/src/output_sqlite.c +++ b/src/output_sqlite.c @@ -13,7 +13,21 @@ ip_address varchar(42), \ origin TINYINT\ );"; +static const char sqlite_create_template_compact[] = "\ +CREATE TABLE IF NOT EXISTS %s(\ +timestamp_first UNSIGNED BIG INT, \ +timestamp UNSIGNED BIG INT, \ +interface varchar(16), \ +vlan_tag UNSIGNED INT, \ +mac_address varchar(17), \ +ip_address varchar(42), \ +origin TINYINT,\ +count UNSIGNED BIG INT \ +);"; + static const char sqlite_insert_template[] = "INSERT INTO %s VALUES(?, ?, ?, ?, ?, ?);"; +static const char sqlite_insert_template_compact[] = "INSERT INTO %s VALUES(?, ?, ?, ?, ?, ?, ?, 1);"; +static const char sqlite_update_template_compact[] = "UPDATE %s SET timestamp=?, count=count+1 WHERE interface=? AND vlan_tag=? AND mac_address=? AND ip_address=? AND origin=?;"; void output_sqlite_init() { @@ -21,6 +35,9 @@ void output_sqlite_init() int rc; char create_query[sizeof(sqlite_create_template) + 64]; char insert_query[sizeof(sqlite_insert_template) + 64]; + char create_query_compact[sizeof(sqlite_create_template_compact) + 64]; + char insert_query_compact[sizeof(sqlite_insert_template_compact) + 64]; + char update_query_compact[sizeof(sqlite_update_template_compact) + 64]; if (!cfg.sqlite_file) { return; @@ -28,23 +45,49 @@ void output_sqlite_init() snprintf(create_query, sizeof(create_query), sqlite_create_template, cfg.sqlite_table); snprintf(insert_query, sizeof(insert_query), sqlite_insert_template, cfg.sqlite_table); + snprintf(update_query_compact, sizeof(update_query_compact), sqlite_update_template_compact, cfg.sqlite_table); + snprintf(create_query_compact, sizeof(create_query_compact), sqlite_create_template_compact, cfg.sqlite_table); + snprintf(insert_query_compact, sizeof(insert_query_compact), sqlite_insert_template_compact, cfg.sqlite_table); rc = sqlite3_open(cfg.sqlite_file, &cfg.sqlite_conn); if (rc) { log_msg(LOG_ERR, "Unable to open sqlite3 database file %s", cfg.sqlite_file); } - log_msg(LOG_DEBUG, "Using sqlite create query: %s", create_query); - rc = sqlite3_exec(cfg.sqlite_conn, create_query, 0, 0, 0); - if (rc) { - log_msg(LOG_ERR, "Error creating table `addrwatch` in sqlite3 database"); - } - log_msg(LOG_DEBUG, "Using sqlite insert query: %s", insert_query); - rc = sqlite3_prepare_v2(cfg.sqlite_conn, insert_query, - sizeof(insert_query), &cfg.sqlite_stmt, NULL); - if (rc) { - log_msg(LOG_ERR, "Error preparing sqlite insert statement"); + if (cfg.sqlite_compact) { + log_msg(LOG_DEBUG, "Using sqlite create query: %s", create_query_compact); + rc = sqlite3_exec(cfg.sqlite_conn, create_query_compact, 0, 0, 0); + if (rc) { + log_msg(LOG_ERR, "Error creating table `addrwatch` in sqlite3 database"); + } + + log_msg(LOG_DEBUG, "Using sqlite insert query: %s", insert_query_compact); + rc = sqlite3_prepare_v2(cfg.sqlite_conn, insert_query_compact, + sizeof(insert_query_compact), &cfg.sqlite_stmt2, NULL); + if (rc) { + log_msg(LOG_ERR, "Error preparing sqlite insert statement"); + } + + log_msg(LOG_DEBUG, "Using sqlite update query: %s", update_query_compact); + rc = sqlite3_prepare_v2(cfg.sqlite_conn, update_query_compact, + sizeof(update_query_compact), &cfg.sqlite_stmt, NULL); + if (rc) { + log_msg(LOG_ERR, "Error preparing sqlite update statement"); + } + } else { + log_msg(LOG_DEBUG, "Using sqlite create query: %s", create_query); + rc = sqlite3_exec(cfg.sqlite_conn, create_query, 0, 0, 0); + if (rc) { + log_msg(LOG_ERR, "Error creating table `addrwatch` in sqlite3 database"); + } + + log_msg(LOG_DEBUG, "Using sqlite insert query: %s", insert_query); + rc = sqlite3_prepare_v2(cfg.sqlite_conn, insert_query, + sizeof(insert_query), &cfg.sqlite_stmt, NULL); + if (rc) { + log_msg(LOG_ERR, "Error preparing sqlite insert statement"); + } } sqlite3_busy_timeout(cfg.sqlite_conn, 100); @@ -94,6 +137,41 @@ void output_sqlite_save(struct pkt *p, char *mac_str, char *ip_str) if (rc && rc != SQLITE_BUSY) { log_msg(LOG_ERR, "Error reseting sqlite prepared statement (%d)", rc); } + + if (cfg.sqlite_compact) { + // If no rows have bee affected by the update statement, + // insert a new row + if (sqlite3_changes(cfg.sqlite_conn) == 0) { + rc = sqlite3_bind_int64(cfg.sqlite_stmt2, 1, p->pcap_header->ts.tv_sec); + rc += sqlite3_bind_int64(cfg.sqlite_stmt2, 2, p->pcap_header->ts.tv_sec); + rc += sqlite3_bind_text(cfg.sqlite_stmt2, 3, p->ifc->name, -1, NULL); + rc += sqlite3_bind_int(cfg.sqlite_stmt2, 4, p->vlan_tag); + rc += sqlite3_bind_text(cfg.sqlite_stmt2, 5, mac_str, -1, NULL); + rc += sqlite3_bind_text(cfg.sqlite_stmt2, 6, ip_str, -1, NULL); + rc += sqlite3_bind_int(cfg.sqlite_stmt2, 7, p->origin); + if (rc) { + log_msg(LOG_ERR, "Unable to bind values to sql statement"); + } + + rc = sqlite3_step(cfg.sqlite_stmt2); + switch (rc) { + case SQLITE_DONE: + break; + case SQLITE_BUSY: + log_msg(LOG_WARNING, "Unable to execute sqlite prepared statement, database is locked (%ld, %s, %s, %s)", + p->pcap_header->ts.tv_sec, p->ifc->name, mac_str, ip_str); + break; + default: + log_msg(LOG_ERR, "Error executing sqlite prepared statement (%d)", rc); + break; + } + + rc = sqlite3_reset(cfg.sqlite_stmt2); + if (rc && rc != SQLITE_BUSY) { + log_msg(LOG_ERR, "Error reseting sqlite prepared statement (%d)", rc); + } + } + } #endif } @@ -102,6 +180,9 @@ void output_sqlite_close() #if HAVE_LIBSQLITE3 if (cfg.sqlite_conn) { sqlite3_finalize(cfg.sqlite_stmt); + if (cfg.sqlite_compact) { + sqlite3_finalize(cfg.sqlite_stmt2); + } sqlite3_close(cfg.sqlite_conn); } #endif From 67edd5d5a5927c6419577e7241af325cfa743990 Mon Sep 17 00:00:00 2001 From: Tobias Dussa Date: Fri, 19 Mar 2021 08:47:17 +0100 Subject: [PATCH 2/2] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 97dff5f..dc71fc7 100644 --- a/README.md +++ b/README.md @@ -282,14 +282,17 @@ is meant to minimize the amount of storage required by storing not all data sets, but only summaries. The ordinary database scheme stores the following information (in parentheses, their SQL column identifiers are given): + * timestamp (`timestamp`) * interface (`interface`) * VLAN (`vlan_tag`) * MAC address (`mac_address`) * IP address (`ip_address`) * event type (`origin`) + One such entry is created per received event. In contrast, in compact mode, the following information is stored: + * first-seen timestamp (`timestamp_first`) * last-seen timestamp (`timestamp`) * interface (`interface`) @@ -298,6 +301,7 @@ mode, the following information is stored: * IP address (`ip_address`) * event type (`origin`) * count (`count`) + The quintuple `(interface, VLAN, MAC address, IP addres, event type)` is used as a unique identifier. If the quintuple has not been seen before, a new data row is inserted; if it _has_ been seen before, then just the