Skip to content

Commit

Permalink
dict.c modified to be able to handle more than 150,000,000 keys
Browse files Browse the repository at this point in the history
  • Loading branch information
antirez committed Apr 30, 2009
1 parent 75398fb commit f2923be
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 32 deletions.
19 changes: 7 additions & 12 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,22 @@ BEFORE REDIS 1.0.0-rc1

- TTL command that returns -1 if a key is not volatile otherwise the time to live of a volatile key.
- Remove max number of args limit
- What happens if the saving child gets killed instead to end normally? Handle this.
- What happens if the saving child gets killed or segfaults instead of ending normally? Handle this.
- Make sinterstore / unionstore / sdiffstore returning the cardinality of the resulting set.
- network layer stresser in test in demo, make sure to set/get random streams of data and check that what we read back is byte-by-byte the same.
- maxclients directive
- check 'server.dirty' everywere
- replication automated tests
- an external tool able to perform the 'difference' between two Redis servers. It's like 'diff', but against Redis servers, and the output is the set of commands needed to turn the first server into the second, suitable to be sent via netcat.
$ ./redis-diff 192.168.1.1 192.168.1.2 > diff.txt
$ cat diff.txt | nc 192.168.1.1 6379
$ ./redis-diff 192.168.1.1 192.168.1.2
$ # No output now the servers are identical

This command should be smart and don't use too much memory, that is, take two connections at the same time against the two servers and perform the comparison key by key. Probably the initial "KEYS *" is unavoidable.

- Shutdown must kill other background savings before to start saving. Otherwise the DB can get replaced by the child that rename(2) after the parent for some reason.
- Add missing commands in documentation
- Document replication
- Objects sharing configuration, add the directive "objectsharingpool <size>"
- Make sure to convert all the fstat() calls to 64bit versions.
- SINTERCOUNT, SUNIONCOUNT, SDIFFCOUNT

AFTER 1.0 stable release

- Use partial qsort for SORT + LIMIT
- Locking primitives

FUTURE HINTS

- if in-memory values compression will be implemented, make sure to implement this so that addReply() is able to handle compressed objects, just creating an uncompressed version on the fly and adding this to the output queue instead of the original one. When insetad we need to look at the object string value (SORT BY for example), call a function that will turn the object into an uncompresed one.
- In memory compression: if in-memory values compression will be implemented, make sure to implement this so that addReply() is able to handle compressed objects, just creating an uncompressed version on the fly and adding this to the output queue instead of the original one. When insetad we need to look at the object string value (SORT BY for example), call a function that will turn the object into an uncompresed one.
32 changes: 16 additions & 16 deletions dict.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <limits.h>

#include "dict.h"
#include "zmalloc.h"
Expand Down Expand Up @@ -74,7 +75,7 @@ static void _dictFree(void *ptr) {
/* -------------------------- private prototypes ---------------------------- */

static int _dictExpandIfNeeded(dict *ht);
static unsigned int _dictNextPower(unsigned int size);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

Expand Down Expand Up @@ -152,10 +153,10 @@ int dictResize(dict *ht)
}

/* Expand or create the hashtable */
int dictExpand(dict *ht, unsigned int size)
int dictExpand(dict *ht, unsigned long size)
{
dict n; /* the new hashtable */
unsigned int realsize = _dictNextPower(size), i;
unsigned long realsize = _dictNextPower(size), i;

/* the size is invalid if it is smaller than the number of
* elements already inside the hashtable */
Expand Down Expand Up @@ -286,7 +287,7 @@ int dictDeleteNoFree(dict *ht, const void *key) {
/* Destroy an entire hash table */
int _dictClear(dict *ht)
{
unsigned int i;
unsigned long i;

/* Free all the elements */
for (i = 0; i < ht->size && ht->used > 0; i++) {
Expand Down Expand Up @@ -413,12 +414,11 @@ static int _dictExpandIfNeeded(dict *ht)
}

/* Our hash table capability is a power of two */
static unsigned int _dictNextPower(unsigned int size)
static unsigned long _dictNextPower(unsigned long size)
{
unsigned int i = DICT_HT_INITIAL_SIZE;
unsigned long i = DICT_HT_INITIAL_SIZE;

if (size >= 2147483648U)
return 2147483648U;
if (size >= LONG_MAX) return LONG_MAX;
while(1) {
if (i >= size)
return i;
Expand Down Expand Up @@ -455,9 +455,9 @@ void dictEmpty(dict *ht) {

#define DICT_STATS_VECTLEN 50
void dictPrintStats(dict *ht) {
unsigned int i, slots = 0, chainlen, maxchainlen = 0;
unsigned int totchainlen = 0;
unsigned int clvector[DICT_STATS_VECTLEN];
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
unsigned long totchainlen = 0;
unsigned long clvector[DICT_STATS_VECTLEN];

if (ht->used == 0) {
printf("No stats available for empty dictionaries\n");
Expand Down Expand Up @@ -485,16 +485,16 @@ void dictPrintStats(dict *ht) {
totchainlen += chainlen;
}
printf("Hash table stats:\n");
printf(" table size: %d\n", ht->size);
printf(" number of elements: %d\n", ht->used);
printf(" different slots: %d\n", slots);
printf(" max chain length: %d\n", maxchainlen);
printf(" table size: %ld\n", ht->size);
printf(" number of elements: %ld\n", ht->used);
printf(" different slots: %ld\n", slots);
printf(" max chain length: %ld\n", maxchainlen);
printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
printf(" Chain length distribution:\n");
for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
if (clvector[i] == 0) continue;
printf(" %s%d: %d (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
}
}

Expand Down
8 changes: 4 additions & 4 deletions dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ typedef struct dictType {
typedef struct dict {
dictEntry **table;
dictType *type;
unsigned int size;
unsigned int sizemask;
unsigned int used;
unsigned long size;
unsigned long sizemask;
unsigned long used;
void *privdata;
} dict;

Expand Down Expand Up @@ -112,7 +112,7 @@ typedef struct dictIterator {

/* API */
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *ht, unsigned int size);
int dictExpand(dict *ht, unsigned long size);
int dictAdd(dict *ht, void *key, void *val);
int dictReplace(dict *ht, void *key, void *val);
int dictDelete(dict *ht, const void *key);
Expand Down

0 comments on commit f2923be

Please sign in to comment.