From add5fb8c41df76bfd02f23317b138e073297a977 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Wed, 14 Sep 2016 14:36:39 +0200 Subject: [PATCH 01/13] Added a Sampling Module Added a module that allows the output to be sampled. With the new -q option, the user is now able to decide whether the output should be sampled as follows: Only every nth query is output and only responses that correspond to one of these sampled queries will be output. e.g.: dnscap -r ./file.pcap -g -q 10 will output only every 10th query and these queries' corresponding responses. --- src/dnscap.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 4 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index 7e03072..720f064 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -128,6 +128,7 @@ static const char version_fmt[] = "V1.0-OARC-r%d (%s)"; #include #include #include +#include "../uthash/uthash.h" //for the hash used in the sample module #ifdef __linux__ extern char *strptime(const char *, const char *, struct tm *); @@ -287,6 +288,22 @@ struct plugin { }; LIST(struct plugin) plugins; +typedef struct +{ + UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" + unsigned from; //these next 5 fields make up the compound key + int sport,dport,transaction_id; + unsigned to; + +}samplePacket ; + +typedef struct +{ + unsigned from; + int sport,dport,transaction_id; + unsigned to; +}sample_lookup_key; + /* Forward. */ static void setsig(int, int); @@ -384,6 +401,12 @@ static unsigned long long mem_limit = (unsigned) MEM_MAX; // process memory li static int mem_limit_set = 1; // Should be configurable const char DROPTOUSER[] = "nobody"; static pcap_thread_t pcap_thread = PCAP_THREAD_T_INIT; +static int sample = FALSE; +static unsigned sampleAmount; +static unsigned querycount; +static samplePacket *allSampleQueries = NULL; +static int chooseSidesResponse = FALSE; +static unsigned keylen; /* Public. */ @@ -694,7 +717,8 @@ help_1(void) { "\t[-w [-W ] [-k ]] [-t ] [-c ] [-C ]\n" "\t[-x ]+ [-X ]+\n" "\t[-B ] [-E ]\n" - "\t[-P plugin.so] [-U ]\n", + "\t[-P plugin.so] [-U ]\n" + "\t[-q ]\n", ProgramName); } @@ -755,6 +779,8 @@ help_2(void) { "\t-E end collecting at this date and time\n" "\t-M set monitor mode on interfaces\n" "\t-D set immediate mode on interfaces\n" + "\t-q output only every nth DNS query and only output responses\n \ + if they correspond to one of the sampled queries\n" ); } @@ -782,7 +808,7 @@ parse_args(int argc, char *argv[]) { INIT_LIST(myregexes); INIT_LIST(plugins); while ((ch = getopt(argc, argv, - "a:bc:de:fgh:i:k:l:m:pr:s:t:u:w:x:" + "a:bc:de:fgh:i:k:l:m:pq:r:s:t:u:w:x:" #ifdef USE_SECCOMP "y" #endif @@ -889,6 +915,7 @@ parse_args(int argc, char *argv[]) { msg_wanted = u; break; case 's': + chooseSidesResponse = TRUE; u = 0; for (p = optarg; *p; p++) switch (*p) { @@ -898,6 +925,13 @@ parse_args(int argc, char *argv[]) { } dir_wanted = u; break; + case 'q': + sample = TRUE; + sampleAmount = atoi(optarg); + if(sampleAmount == 0) + usage("-q takes only unsigned integer values != 0"); + querycount = 0; + break; case 'h': u = 0; for (p = optarg; *p; p++) @@ -1099,6 +1133,11 @@ parse_args(int argc, char *argv[]) { usage("the -L and -l options are mutually exclusive"); if (background && (dumptrace || preso)) usage("the -b option is incompatible with -d and -g"); + if (sample && chooseSidesResponse) + { + if (!((dir_wanted & DIR_INITIATE) != 0) && ((dir_wanted & DIR_RESPONSE) != 0)) + usage("the -q option is incompatible with -s r"); + } if (dumptrace >= 1) { endpoint_ptr ep; const char *sep; @@ -2339,8 +2378,61 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, abort(); } } - output(descr, from, to, proto, flags, sport, dport, ts, - pkt_copy, olen, dnspkt, dnslen); +/*Sample Module*/ + if (sample == TRUE) + { + ns_msg dnsmsgSample; + ns_initparse(dnspkt,dnslen,&dnsmsgSample); + samplePacket *currentQuery; + void *ipAddrBufferFrom = &from.u; + void *ipAddrBufferTo = &to.u; + unsigned *fromBuffer = (unsigned*)ipAddrBufferFrom; + unsigned *toBuffer = (unsigned*)ipAddrBufferTo; + + keylen = offsetof(samplePacket,to) //keylen is used to define which fields of the hash structure are added + + sizeof(*toBuffer) //as a compound key. Here, the key is composed of all fields between (and including) + - offsetof(samplePacket,from); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to) + + if(dns.qr == 0) + { + querycount++; + if(querycount % sampleAmount == 0) + { + currentQuery = malloc(sizeof(*currentQuery)); + memset(currentQuery, 0, sizeof(*currentQuery)); + currentQuery->from = *fromBuffer; + currentQuery->to = *toBuffer; + currentQuery->sport = sport; + currentQuery->dport = dport; + currentQuery->transaction_id = ns_msg_id(dnsmsgSample); + + HASH_ADD(hh,allSampleQueries,from,keylen,currentQuery); + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } + } + else + { + sample_lookup_key *lookup_key = (sample_lookup_key*)malloc(sizeof(*lookup_key));; + memset(lookup_key, 0, sizeof(*lookup_key)); + lookup_key->from = *toBuffer; + lookup_key->to = *fromBuffer; + lookup_key->dport = sport; + lookup_key->sport = dport; + lookup_key->transaction_id = ns_msg_id(dnsmsgSample); + + HASH_FIND(hh,allSampleQueries,&lookup_key->from,keylen,currentQuery); + if(currentQuery) + { + HASH_DEL(allSampleQueries,currentQuery); + free(currentQuery); + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } + free(lookup_key); + } + }else + { + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } } /* From 0fbc9c22c6f9189417dbf9ee194c8e7c6ab31b4e Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Wed, 14 Sep 2016 14:37:54 +0200 Subject: [PATCH 02/13] Updated dnscap.1.in Updated dnscap.1.in to match the Sampling Module addition --- src/dnscap.1.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/dnscap.1.in b/src/dnscap.1.in index 3fabd1f..73c9c89 100644 --- a/src/dnscap.1.in +++ b/src/dnscap.1.in @@ -53,6 +53,7 @@ .Op Fl E Ar datetime .Op Fl P Ar plugin.so .Op Fl U Ar str +.Op Fl q Ar unsigned int .Sh DESCRIPTION .Nm is a network capture utility designed specifically for DNS traffic. It @@ -353,6 +354,11 @@ Enable immediate mode on interfaces. Append "and .Ar str " to the pcap filter. +.It Fl q Ar unsigned int +Causes the output to be sampled after the application of all other filters. +Only every nth dns initiation will be output and responses are only output +if they correspond to one of the sampled queries. This option cannot be +used with option. .El .Pp If started with no options, From 2a5449e51375f5b694bdafb71142cfe80cbe6338 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Wed, 14 Sep 2016 14:39:09 +0200 Subject: [PATCH 03/13] Added uthash.h uthash is used as the hash table library in the Sampling Module --- uthash/uthash.h | 1074 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1074 insertions(+) create mode 100644 uthash/uthash.h diff --git a/uthash/uthash.h b/uthash/uthash.h new file mode 100644 index 0000000..45d1f9f --- /dev/null +++ b/uthash/uthash.h @@ -0,0 +1,1074 @@ +/* +Copyright (c) 2003-2016, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.0.1 + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ +#include /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while (0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while (0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined(_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include +#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#elif defined(__GNUC__) && !defined(__VXWORKS__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif +#ifndef uthash_memcmp +#define uthash_memcmp(a,b,n) memcmp(a,b,n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_VALUE(keyptr,keylen,hashv) \ +do { \ + HASH_FCN(keyptr, keylen, hashv); \ +} while (0) + +#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ + } \ + } \ +} while (0) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ +} while (0) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ +} while (0) + +#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ +} while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ +do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ +} while (0) + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + struct UT_hash_handle *_hs_iter = &(head)->hh; \ + (add)->hh.tbl = (head)->hh.tbl; \ + do { \ + if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \ + break; \ + } while ((_hs_iter = _hs_iter->next)); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = _hs_iter->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add); \ + } else { \ + (head) = (add); \ + } \ + _hs_iter->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ +do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv,num_bkts,bkt) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ +} while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + unsigned _hd_bkt; \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count; \ + char *_prev; \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,hashv) \ +do { \ + unsigned _hb_keylen=(unsigned)keylen; \ + const unsigned char *_hb_key=(const unsigned char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,hashv) \ +do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,hashv) \ +do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key=(const unsigned char*)(key); \ + hashv = 2166136261U; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ +} while (0) + +#define HASH_OAT(key,keylen,hashv) \ +do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ +} while (0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,hashv) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned const char *_hj_key=(unsigned const char*)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ +} while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,hashv) \ +do { \ + unsigned const char *_sfh_key=(unsigned const char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ +} while (0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6bu; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35u; \ + _h ^= _h >> 16; \ +} while (0) + +#define HASH_MUR(key,keylen,hashv) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (int)(keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353u; \ + uint32_t _mur_c1 = 0xcc9e2d51u; \ + uint32_t _mur_c2 = 0x1b873593u; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ + _mur_k1=0; \ + switch((keylen) & 3U) { \ + case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ + case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ + case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (uint32_t)(keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ +} while (0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ +do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ + if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ +} while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ + && ((addhh)->tbl->noexpand != 1U)) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ + (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ + _he_thh; } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2U; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1U) : 0U; \ + if (tbl->ineff_expands > 1U) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) { break; } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1U ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src != NULL) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ + if (dst == NULL) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head != NULL) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while (0) + +#define HASH_OVERHEAD(hh,head) \ + ((head != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ From 949db6eb7999d5b1457009da0590aaaa861df839 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Wed, 14 Sep 2016 14:43:55 +0200 Subject: [PATCH 04/13] Added a Sampling Module Added a module that allows the output to be sampled. With the new -q option, the user is now able to decide whether the output should be sampled as follows: Only every nth query is output and only responses that correspond to one of these sampled queries will be output. e.g.: dnscap -r ./file.pcap -g -q 10 will output only every 10th query and these queries' corresponding responses. --- src/dnscap.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index 720f064..81a6e0b 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -288,17 +288,14 @@ struct plugin { }; LIST(struct plugin) plugins; -typedef struct -{ +typedef struct{ UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" unsigned from; //these next 5 fields make up the compound key int sport,dport,transaction_id; unsigned to; - }samplePacket ; -typedef struct -{ +typedef struct{ unsigned from; int sport,dport,transaction_id; unsigned to; From 7705450c1778e37ace2ccb407b94c7246886b76a Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Thu, 15 Sep 2016 14:00:15 +0200 Subject: [PATCH 05/13] corrections Changed parameter for the q option from "unsigned int" to "nth" --- src/dnscap.1.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dnscap.1.in b/src/dnscap.1.in index 73c9c89..fc37f36 100644 --- a/src/dnscap.1.in +++ b/src/dnscap.1.in @@ -53,7 +53,7 @@ .Op Fl E Ar datetime .Op Fl P Ar plugin.so .Op Fl U Ar str -.Op Fl q Ar unsigned int +.Op Fl q Ar nth .Sh DESCRIPTION .Nm is a network capture utility designed specifically for DNS traffic. It @@ -358,7 +358,9 @@ Append "and Causes the output to be sampled after the application of all other filters. Only every nth dns initiation will be output and responses are only output if they correspond to one of the sampled queries. This option cannot be -used with option. +used with option +.Fl s +[r] due to obvious filter reasons. .El .Pp If started with no options, From db0467bddc571a261de0bc85b2826fe42fa4a3e5 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Thu, 15 Sep 2016 14:01:23 +0200 Subject: [PATCH 06/13] corrections changed the parameter of the q option from "unsigned int" to "nth" --- src/dnscap.1.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnscap.1.in b/src/dnscap.1.in index fc37f36..6367c34 100644 --- a/src/dnscap.1.in +++ b/src/dnscap.1.in @@ -354,7 +354,7 @@ Enable immediate mode on interfaces. Append "and .Ar str " to the pcap filter. -.It Fl q Ar unsigned int +.It Fl q Ar nth Causes the output to be sampled after the application of all other filters. Only every nth dns initiation will be output and responses are only output if they correspond to one of the sampled queries. This option cannot be From a527e697ad6220c17451d6133bb0ad066e7cc855 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Thu, 15 Sep 2016 14:47:45 +0200 Subject: [PATCH 07/13] Applied corrections to dnscap.c Applied most of the suggested corrections suggested by jelu --- src/dnscap.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index 81a6e0b..ff078a9 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -289,16 +289,16 @@ struct plugin { LIST(struct plugin) plugins; typedef struct{ - UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" - unsigned from; //these next 5 fields make up the compound key - int sport,dport,transaction_id; - unsigned to; + UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" + uint32_t from; //these next 5 fields make up the compound key + int sport,dport,transaction_id; + uint32_t to; }samplePacket ; typedef struct{ - unsigned from; + uint32_t from; int sport,dport,transaction_id; - unsigned to; + uint32_t to; }sample_lookup_key; /* Forward. */ @@ -715,7 +715,7 @@ help_1(void) { "\t[-x ]+ [-X ]+\n" "\t[-B ] [-E ]\n" "\t[-P plugin.so] [-U ]\n" - "\t[-q ]\n", + "\t[-q ]\n", ProgramName); } @@ -776,8 +776,8 @@ help_2(void) { "\t-E end collecting at this date and time\n" "\t-M set monitor mode on interfaces\n" "\t-D set immediate mode on interfaces\n" - "\t-q output only every nth DNS query and only output responses\n \ - if they correspond to one of the sampled queries\n" + "\t-q output only every nth DNS query and only output responses\n \ + if they correspond to one of the sampled queries\n" ); } @@ -1132,8 +1132,8 @@ parse_args(int argc, char *argv[]) { usage("the -b option is incompatible with -d and -g"); if (sample && chooseSidesResponse) { - if (!((dir_wanted & DIR_INITIATE) != 0) && ((dir_wanted & DIR_RESPONSE) != 0)) - usage("the -q option is incompatible with -s r"); + if(((dir_wanted & DIR_RESPONSE) != 0) && ((dir_wanted & DIR_INITIATE) == 0)) + usage("the -q option is incompatible with -s r"); } if (dumptrace >= 1) { endpoint_ptr ep; @@ -2381,10 +2381,9 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, ns_msg dnsmsgSample; ns_initparse(dnspkt,dnslen,&dnsmsgSample); samplePacket *currentQuery; - void *ipAddrBufferFrom = &from.u; - void *ipAddrBufferTo = &to.u; - unsigned *fromBuffer = (unsigned*)ipAddrBufferFrom; - unsigned *toBuffer = (unsigned*)ipAddrBufferTo; + + uint32_t *fromBuffer = (uint32_t*)&from.u; + uint32_t *toBuffer = (uint32_t*)&to.u; keylen = offsetof(samplePacket,to) //keylen is used to define which fields of the hash structure are added + sizeof(*toBuffer) //as a compound key. Here, the key is composed of all fields between (and including) @@ -2395,8 +2394,8 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, querycount++; if(querycount % sampleAmount == 0) { - currentQuery = malloc(sizeof(*currentQuery)); - memset(currentQuery, 0, sizeof(*currentQuery)); + currentQuery = calloc(1,sizeof(*currentQuery)); + assert(currentQuery != NULL); currentQuery->from = *fromBuffer; currentQuery->to = *toBuffer; currentQuery->sport = sport; @@ -2409,8 +2408,8 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, } else { - sample_lookup_key *lookup_key = (sample_lookup_key*)malloc(sizeof(*lookup_key));; - memset(lookup_key, 0, sizeof(*lookup_key)); + sample_lookup_key *lookup_key = (sample_lookup_key*)calloc(1,sizeof(*lookup_key)); + assert(lookup_key != NULL); lookup_key->from = *toBuffer; lookup_key->to = *fromBuffer; lookup_key->dport = sport; From 12d1fd384b58600206e6bb47cafbef5baee51f9c Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Thu, 15 Sep 2016 15:19:15 +0200 Subject: [PATCH 08/13] disable sampling if compiled without libbind Disabled the sampling code if dnscap is not compiled with libbind. --- src/dnscap.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index ff078a9..d4ba072 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -923,11 +923,19 @@ parse_args(int argc, char *argv[]) { dir_wanted = u; break; case 'q': - sample = TRUE; - sampleAmount = atoi(optarg); - if(sampleAmount == 0) - usage("-q takes only unsigned integer values != 0"); - querycount = 0; +#if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR + { + sample = TRUE; + sampleAmount = atoi(optarg); + if(sampleAmount == 0) + usage("-q takes only unsigned integer values != 0"); + querycount = 0; + } +#else + fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", + ProgramName); + exit(1); +#endif break; case 'h': u = 0; From 5ece8436465e007e015a2119c528bf3e58480cb5 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Thu, 15 Sep 2016 16:28:26 +0200 Subject: [PATCH 09/13] More corrections to dnscap.c Corrected chooseSidesResponse logic. --- src/dnscap.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index d4ba072..76dac96 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -402,7 +402,6 @@ static int sample = FALSE; static unsigned sampleAmount; static unsigned querycount; static samplePacket *allSampleQueries = NULL; -static int chooseSidesResponse = FALSE; static unsigned keylen; /* Public. */ @@ -912,7 +911,6 @@ parse_args(int argc, char *argv[]) { msg_wanted = u; break; case 's': - chooseSidesResponse = TRUE; u = 0; for (p = optarg; *p; p++) switch (*p) { @@ -1138,11 +1136,9 @@ parse_args(int argc, char *argv[]) { usage("the -L and -l options are mutually exclusive"); if (background && (dumptrace || preso)) usage("the -b option is incompatible with -d and -g"); - if (sample && chooseSidesResponse) - { - if(((dir_wanted & DIR_RESPONSE) != 0) && ((dir_wanted & DIR_INITIATE) == 0)) - usage("the -q option is incompatible with -s r"); - } + if(sample && ((dir_wanted & DIR_INITIATE) == 0)) + usage("the -q option is incompatible with -s r"); + if (dumptrace >= 1) { endpoint_ptr ep; const char *sep; From 53bb6b90e67040113473095b2f7300221b5d2483 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Mon, 19 Sep 2016 11:40:22 +0200 Subject: [PATCH 10/13] added ipv6 support for sampling Added a proper way to support ipv6 addresses for sampling. --- src/dnscap.c | 203 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 119 insertions(+), 84 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index 76dac96..17d12e9 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -206,17 +206,18 @@ extern char *strptime(const char *, const char *, struct tm *); # define ETHERTYPE_IPV6 0x86DD #endif -#define THOUSAND 1000 -#define MILLION (THOUSAND*THOUSAND) -#define MAX_VLAN 4095 -#define DNS_PORT 53 -#define TO_MS 1 -#define SNAPLEN 65536 -#define TRUE 1 -#define FALSE 0 -#define REGEX_CFLAGS (REG_EXTENDED|REG_ICASE|REG_NOSUB|REG_NEWLINE) -#define MAX_TCP_WINDOW (0xFFFF << 14) -#define MEM_MAX 20000000000 // SETTING MAX MEMORY USAGE TO 2GB +#define THOUSAND 1000 +#define MILLION (THOUSAND*THOUSAND) +#define MAX_VLAN 4095 +#define DNS_PORT 53 +#define TO_MS 1 +#define SNAPLEN 65536 +#define TRUE 1 +#define FALSE 0 +#define REGEX_CFLAGS (REG_EXTENDED|REG_ICASE|REG_NOSUB|REG_NEWLINE) +#define MAX_TCP_WINDOW (0xFFFF << 14) +#define MEM_MAX 20000000000 // SETTING MAX MEMORY USAGE TO 2GB +#define AMOUNT_32BIT_IN_128BIT 4 /* Data structures. */ @@ -289,16 +290,16 @@ struct plugin { LIST(struct plugin) plugins; typedef struct{ - UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" - uint32_t from; //these next 5 fields make up the compound key - int sport,dport,transaction_id; - uint32_t to; -}samplePacket ; + UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" + uint32_t from[AMOUNT_32BIT_IN_128BIT]; //up to 4 32-bit fields will be used for IPv6 addresses. IPv4 will use 1 + int sport,dport,transaction_id; + uint32_t to[AMOUNT_32BIT_IN_128BIT]; +}samplePacket; typedef struct{ - uint32_t from; - int sport,dport,transaction_id; - uint32_t to; + uint32_t from[AMOUNT_32BIT_IN_128BIT]; + int sport,dport,transaction_id; + uint32_t to[AMOUNT_32BIT_IN_128BIT]; }sample_lookup_key; /* Forward. */ @@ -402,7 +403,6 @@ static int sample = FALSE; static unsigned sampleAmount; static unsigned querycount; static samplePacket *allSampleQueries = NULL; -static unsigned keylen; /* Public. */ @@ -920,21 +920,21 @@ parse_args(int argc, char *argv[]) { } dir_wanted = u; break; - case 'q': + case 'q': #if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR - { - sample = TRUE; - sampleAmount = atoi(optarg); - if(sampleAmount == 0) - usage("-q takes only unsigned integer values != 0"); - querycount = 0; - } + { + sample = TRUE; + sampleAmount = atoi(optarg); + if(sampleAmount == 0) + usage("-q takes only unsigned integer values != 0"); + querycount = 0; + } #else - fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", - ProgramName); - exit(1); + fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", + ProgramName); + exit(1); #endif - break; + break; case 'h': u = 0; for (p = optarg; *p; p++) @@ -2380,59 +2380,94 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, } } /*Sample Module*/ - if (sample == TRUE) - { - ns_msg dnsmsgSample; - ns_initparse(dnspkt,dnslen,&dnsmsgSample); - samplePacket *currentQuery; - - uint32_t *fromBuffer = (uint32_t*)&from.u; - uint32_t *toBuffer = (uint32_t*)&to.u; - - keylen = offsetof(samplePacket,to) //keylen is used to define which fields of the hash structure are added - + sizeof(*toBuffer) //as a compound key. Here, the key is composed of all fields between (and including) - - offsetof(samplePacket,from); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to) - - if(dns.qr == 0) - { - querycount++; - if(querycount % sampleAmount == 0) - { - currentQuery = calloc(1,sizeof(*currentQuery)); - assert(currentQuery != NULL); - currentQuery->from = *fromBuffer; - currentQuery->to = *toBuffer; - currentQuery->sport = sport; - currentQuery->dport = dport; - currentQuery->transaction_id = ns_msg_id(dnsmsgSample); - - HASH_ADD(hh,allSampleQueries,from,keylen,currentQuery); - output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); - } - } - else - { - sample_lookup_key *lookup_key = (sample_lookup_key*)calloc(1,sizeof(*lookup_key)); + if (sample == TRUE) + { + int i; + ns_msg dnsmsgSample; + ns_initparse(dnspkt,dnslen,&dnsmsgSample); + samplePacket *currentQuery; + uint32_t fromBuffer[AMOUNT_32BIT_IN_128BIT]; + uint32_t toBuffer[AMOUNT_32BIT_IN_128BIT]; + + unsigned keylen = offsetof(samplePacket,to[AMOUNT_32BIT_IN_128BIT - 1]) //keylen is used to define which fields of the hash structure are added + + sizeof(uint32_t) //as a compound key. Here, the key is composed of all fields between (and including) + - offsetof(samplePacket,from[0]); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to) + + //Parsing the IPv4 or IPv6 addresses into an array, so they can be hashed. + if(from.af == AF_INET) + { + uint32_t *f = (uint32_t*) &from.u; + uint32_t *t = (uint32_t*) &to.u; + + for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) + { + fromBuffer[i] = f[i]; + toBuffer[i] = t[i]; + } + } + else if(from.af == AF_INET6) + { + uint32_t *f = (uint32_t*) &from.u; + uint32_t *t = (uint32_t*) &to.u; + + for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) + { + fromBuffer[i] = f[i]; + toBuffer[i] = t[i]; + } + } + + + if(dns.qr == 0) + { + querycount++; + + if(querycount % sampleAmount == 0) + { + currentQuery = calloc(1,sizeof(*currentQuery)); + assert(currentQuery != NULL); + for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) + { + currentQuery->from[i] = fromBuffer[i]; + currentQuery->to[i] = toBuffer[i]; + } + + currentQuery->sport = sport; + currentQuery->dport = dport; + currentQuery->transaction_id = ns_msg_id(dnsmsgSample); + + HASH_ADD(hh,allSampleQueries,from[0],keylen,currentQuery); + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } + } + else + { + sample_lookup_key *lookup_key = (sample_lookup_key*)calloc(1,sizeof(*lookup_key)); assert(lookup_key != NULL); - lookup_key->from = *toBuffer; - lookup_key->to = *fromBuffer; - lookup_key->dport = sport; - lookup_key->sport = dport; - lookup_key->transaction_id = ns_msg_id(dnsmsgSample); - - HASH_FIND(hh,allSampleQueries,&lookup_key->from,keylen,currentQuery); - if(currentQuery) - { - HASH_DEL(allSampleQueries,currentQuery); - free(currentQuery); - output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); - } - free(lookup_key); - } - }else - { - output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); - } + + for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) + { + lookup_key->from[i] = toBuffer[i]; + lookup_key->to[i] = fromBuffer[i]; + } + + lookup_key->dport = sport; + lookup_key->sport = dport; + lookup_key->transaction_id = ns_msg_id(dnsmsgSample); + + HASH_FIND(hh,allSampleQueries,&lookup_key->from[0],keylen,currentQuery); + if(currentQuery) + { + HASH_DEL(allSampleQueries,currentQuery); + free(currentQuery); + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } + free(lookup_key); + } + }else + { + output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); + } } /* From 423990b733bc42eeb3eb0ba18aec36d5d85b6150 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Tue, 20 Sep 2016 11:25:07 +0200 Subject: [PATCH 11/13] fixed the libbind dependency Fixed the libbind dependency logic for sampling --- src/dnscap.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index 17d12e9..bb24798 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -921,19 +921,11 @@ parse_args(int argc, char *argv[]) { dir_wanted = u; break; case 'q': -#if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR - { - sample = TRUE; - sampleAmount = atoi(optarg); - if(sampleAmount == 0) - usage("-q takes only unsigned integer values != 0"); - querycount = 0; - } -#else - fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", - ProgramName); - exit(1); -#endif + sample = TRUE; + sampleAmount = atoi(optarg); + if(sampleAmount == 0) + usage("-q takes only unsigned integer values != 0"); + querycount = 0; break; case 'h': u = 0; @@ -2380,6 +2372,7 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, } } /*Sample Module*/ +#if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR if (sample == TRUE) { int i; @@ -2468,6 +2461,11 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, { output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); } +#else + fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", + ProgramName); + exit(1); +#endif /* HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR */ } /* From 31a4215a3f0672dc105cb33b36eff887f9331f6c Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Tue, 20 Sep 2016 13:27:48 +0200 Subject: [PATCH 12/13] updated ipv4/v6 support for sampling Updated the support for ipv4/v6 in the sampling code. Also made some slight changes to the libbind dependency logic. --- src/dnscap.c | 97 +++++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 65 deletions(-) diff --git a/src/dnscap.c b/src/dnscap.c index bb24798..ee2033a 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -206,18 +206,17 @@ extern char *strptime(const char *, const char *, struct tm *); # define ETHERTYPE_IPV6 0x86DD #endif -#define THOUSAND 1000 -#define MILLION (THOUSAND*THOUSAND) -#define MAX_VLAN 4095 -#define DNS_PORT 53 -#define TO_MS 1 -#define SNAPLEN 65536 -#define TRUE 1 -#define FALSE 0 -#define REGEX_CFLAGS (REG_EXTENDED|REG_ICASE|REG_NOSUB|REG_NEWLINE) -#define MAX_TCP_WINDOW (0xFFFF << 14) -#define MEM_MAX 20000000000 // SETTING MAX MEMORY USAGE TO 2GB -#define AMOUNT_32BIT_IN_128BIT 4 +#define THOUSAND 1000 +#define MILLION (THOUSAND*THOUSAND) +#define MAX_VLAN 4095 +#define DNS_PORT 53 +#define TO_MS 1 +#define SNAPLEN 65536 +#define TRUE 1 +#define FALSE 0 +#define REGEX_CFLAGS (REG_EXTENDED|REG_ICASE|REG_NOSUB|REG_NEWLINE) +#define MAX_TCP_WINDOW (0xFFFF << 14) +#define MEM_MAX 20000000000 // SETTING MAX MEMORY USAGE TO 2GB /* Data structures. */ @@ -290,16 +289,16 @@ struct plugin { LIST(struct plugin) plugins; typedef struct{ - UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" - uint32_t from[AMOUNT_32BIT_IN_128BIT]; //up to 4 32-bit fields will be used for IPv6 addresses. IPv4 will use 1 + UT_hash_handle hh; //makes the structure hashable using "uthash/uthash.h" + iaddr from; int sport,dport,transaction_id; - uint32_t to[AMOUNT_32BIT_IN_128BIT]; + iaddr to; }samplePacket; typedef struct{ - uint32_t from[AMOUNT_32BIT_IN_128BIT]; + iaddr from; int sport,dport,transaction_id; - uint32_t to[AMOUNT_32BIT_IN_128BIT]; + iaddr to; }sample_lookup_key; /* Forward. */ @@ -921,11 +920,17 @@ parse_args(int argc, char *argv[]) { dir_wanted = u; break; case 'q': +#if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR sample = TRUE; sampleAmount = atoi(optarg); if(sampleAmount == 0) usage("-q takes only unsigned integer values != 0"); querycount = 0; +#else + fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", + ProgramName); + exit(1); +#endif break; case 'h': u = 0; @@ -2371,45 +2376,18 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, abort(); } } + /*Sample Module*/ #if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR if (sample == TRUE) { - int i; ns_msg dnsmsgSample; ns_initparse(dnspkt,dnslen,&dnsmsgSample); samplePacket *currentQuery; - uint32_t fromBuffer[AMOUNT_32BIT_IN_128BIT]; - uint32_t toBuffer[AMOUNT_32BIT_IN_128BIT]; - - unsigned keylen = offsetof(samplePacket,to[AMOUNT_32BIT_IN_128BIT - 1]) //keylen is used to define which fields of the hash structure are added - + sizeof(uint32_t) //as a compound key. Here, the key is composed of all fields between (and including) - - offsetof(samplePacket,from[0]); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to) - - //Parsing the IPv4 or IPv6 addresses into an array, so they can be hashed. - if(from.af == AF_INET) - { - uint32_t *f = (uint32_t*) &from.u; - uint32_t *t = (uint32_t*) &to.u; - - for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) - { - fromBuffer[i] = f[i]; - toBuffer[i] = t[i]; - } - } - else if(from.af == AF_INET6) - { - uint32_t *f = (uint32_t*) &from.u; - uint32_t *t = (uint32_t*) &to.u; - - for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) - { - fromBuffer[i] = f[i]; - toBuffer[i] = t[i]; - } - } + unsigned keylen = offsetof(samplePacket,to) //keylen is used to define which fields of the hash structure are added + + sizeof(to) //as a compound key. Here, the key is composed of all fields between (and including) + - offsetof(samplePacket,from); //samplePacket->to and samplePacket->from (from, sport, dport, transaction_id, to) if(dns.qr == 0) { @@ -2419,17 +2397,14 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, { currentQuery = calloc(1,sizeof(*currentQuery)); assert(currentQuery != NULL); - for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) - { - currentQuery->from[i] = fromBuffer[i]; - currentQuery->to[i] = toBuffer[i]; - } + currentQuery->from = from; + currentQuery->to = to; currentQuery->sport = sport; currentQuery->dport = dport; currentQuery->transaction_id = ns_msg_id(dnsmsgSample); - HASH_ADD(hh,allSampleQueries,from[0],keylen,currentQuery); + HASH_ADD(hh,allSampleQueries,from,keylen,currentQuery); output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); } } @@ -2437,18 +2412,14 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, { sample_lookup_key *lookup_key = (sample_lookup_key*)calloc(1,sizeof(*lookup_key)); assert(lookup_key != NULL); - - for(i = 0; i < AMOUNT_32BIT_IN_128BIT; i++) - { - lookup_key->from[i] = toBuffer[i]; - lookup_key->to[i] = fromBuffer[i]; - } + lookup_key->from = to; + lookup_key->to = from; lookup_key->dport = sport; lookup_key->sport = dport; lookup_key->transaction_id = ns_msg_id(dnsmsgSample); - HASH_FIND(hh,allSampleQueries,&lookup_key->from[0],keylen,currentQuery); + HASH_FIND(hh,allSampleQueries,&lookup_key->from,keylen,currentQuery); if(currentQuery) { HASH_DEL(allSampleQueries,currentQuery); @@ -2461,10 +2432,6 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, { output(descr,from,to,proto,flags,sport,dport,ts,pkt_copy,olen,dnspkt,dnslen); } -#else - fprintf(stderr, "%s must be compiled with libbind to use the -q option.\n", - ProgramName); - exit(1); #endif /* HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR */ } From 9581375b18f7ecb8bb2888ffb98aa6dca3312023 Mon Sep 17 00:00:00 2001 From: Christian Egger Date: Tue, 20 Sep 2016 13:56:56 +0200 Subject: [PATCH 13/13] rephrased a comment rephrased a comment from "Sampling Module" to "sampling" --- src/dnscap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnscap.c b/src/dnscap.c index ee2033a..a240a32 100644 --- a/src/dnscap.c +++ b/src/dnscap.c @@ -2377,7 +2377,7 @@ network_pkt(const char *descr, my_bpftimeval ts, unsigned pf, } } -/*Sample Module*/ +/*sampling*/ #if HAVE_NS_INITPARSE && HAVE_NS_PARSERR && HAVE_NS_SPRINTRR if (sample == TRUE) {