redis dict(hash表)剖析

发表于 2018-11-23 更新于 2022-06-25 阅读次数：

说明

本篇文章介绍redis的hash表。redis是一个以key-value形式提供服务的内存数据库，为了快速的进行key的查找和插入，只能采用O(1)为时间复杂度的hash表结构来存放数据。所以hash表应该是redis中最重要的一个结构。

实现

redis的hash表结构实现方式跟一般的hash表没有区别，hash冲突采用链表法来实现。因为redis是单进程单线程，所以为了避免rehash带来卡住线程时间过长的影响，redis采用了分步rehash。在这个hash表被使用过程中（Add，Remove，Get…），会触发rehash过程。为了实现分步操作，增加了大部分操作的实现复杂度，但对外的提供的接口还是跟普通hash表一样简单。

dictEntry

typedef struct dictEntry {
    void *key;              // 节点key
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;                    // 节点value，使用了union来节省内存同时方便访问
    struct dictEntry *next; // hash冲突的时候使用链表来串联起所有冲突的节点
} dictEntry;

hash表的一个节点，key使用void类型可以指向任意类型，value采用uion结构可以方便获取一些基本数据类型
next指针，采用链表法处理冲突节点

dictType

// 用来存放某些自定义函数的结构体，实现hash表多态功能
typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);                          // hash计算函数
    void *(*keyDup)(void *privdata, const void *key);                       // key复制函数
    void *(*valDup)(void *privdata, const void *obj);                       // value复制函数
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);  // key比较函数
    void (*keyDestructor)(void *privdata, void *key);                       // key销毁函数
    void (*valDestructor)(void *privdata, void *obj);                       // value销毁函数
} dictType;

每个hash表在创建的时候需要提供该结构体指针，用来实现hash表一些回调函数功能，hash表会在某些场合调用这些回调
这个结构体也能实现hash表类似c++类多态功能，不同的hash表可以拥有不同的某些操作函数，实际上c++类也是采用类似方法实现多态

dictht

typedef struct dictht {
    dictEntry **table;          // entry数组
    unsigned long size;         // 数组大小
    unsigned long sizemask;     // 数组掩码，为size-1，用于取模计算hash表桶的下标
    unsigned long used;         // 当前存放的entry数量
} dictht;

hash表结构，如果是普通的hash表，使用这个结构即可，为了实现分步rehash功能，redis在外面又套了一层
size永远保持为2幂次方，sizemask=size-1，这样计算桶下标的时候可以直接使用h & sizemask来快速计算

dict

typedef struct dict {
    dictType *type;
    void *privdata;     // 用来存放用户变量指针
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;

真正的redis hash表结构，套了两个dictht对象，在一般使用过程中，ht[1]一直保持为空，但一旦进入到rehash过程，ht[1]就会创建成新的size的hash表，然后分步将ht[0]原有的数据rehash到ht[1]中，直到ht[0]没有元素为止，最后将ht[1]的hash表赋值给ht[0]，ht[1]清空
rehash过程中，所有新加的元素都会加入到ht[1]中，所有删除，查找，遍历都需要同时操作ht[0]，ht[1]，加大了这一块的复杂度
rehashidx，没有rehash时值一直为-1，如果进入rehahs过程，则表示当前分步进行到ht[0]的哪个桶，下一次分步的过程能够对接上一次分步的过程
iterators，因为rehash过程不是一次性完成的，所以如果使用迭代器遍历过程中可能会触发rehash，会导致某些元素刚在ht[0]中遍历过，rehash到ht[1]中又遍历一次。所以redish提供一种safe iterator的安全迭代器，在有安全迭代器指向该hash表时，会阻止分步rehash，无论当前是否在rehash过程。改变量值为多少就表示有多少个safe iterator正在迭代。

dictIterator

typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;
} dictIterator;

hash表迭代器，d，index，table，entry，nextEntry用来记录遍历过程的位置
safe用来表示该迭代器是否为安全的迭代器
fingerprint，用在非安全迭代器的校验，如果使用非迭代器去遍历某个hash表过程中，就不应该操作该hash表，该变量会保存遍历之前该hash表的一个指纹，遍历结束之后在对比下两个指纹是否相等，debug模式下如果不相等会abort

主要函数分析

函数	描述	时间复杂度	是否触发rehash步骤
dictCreate	创建一个空的hash表	O(1)	N
dictExpand	将hash表扩容到能容纳size的最小2次幂大小（只是初始化rehash的一些变量，rehash过程分步在每次操作该hash表过程中）	O(1)	N
dictAdd	增加键值对到hash表中，如果key已经存在，返回错误	O(1)	Y
dictReplace	更新hash表的key对应的value值，如果key不存在则插入一个新的，操作成功返回1，失败返回0	O(1)	Y
dictDelete	删除hash表key所对应的元素，并调用对应的回调去释放key和value	O(1)	Y
dictDeleteNoFree	删除hash表key所对应的元素，但不调用对应释放key和value的回调	O(1)	Y
dictRelease	释放并清除整个hash表的所有内存	O(N)，N为hash表元素个数	N
dictFind	hash表中查找key对应的entry，如果找不到返回NULL	O(1)	Y
dictFetchValue	hash表中查找key对应的value，如果找不到返回NULL	O(1)	Y
dictResize	将hash表的大小减少到能容纳里面元素的最小值，最小不能小过DICT_HT_INITIAL_SIZE，如果当前禁止resize操作或者当前正在rehash，返回出错（只是初始化rehash的一些变量，rehash过程分步在每次操作该hash表过程中）	O(1)	N
dictGetIterator	获取遍历该hash表的迭代器，遍历过程中应该确保该hash表不能被改变	O(1)	N
dictGetSafeIterator	获取遍历该hash表的安全迭代器，遍历过程中能确保不会触发rehash操作，但遍历过程中新加的元素可能会不被遍历	O(1)	N
dictNext	获取迭代器下一个该遍历的元素	O(1)	N
dictReleaseIterator	释放迭代器内存	O(1)	N
dictEmpty	清空hash表，清空之前会调用callback，具体是一次还是两次取决于是否在rehash	O(N)，N为hash表元素个数	N

每个函数及宏的声明及作用，具体细节可以查看我注释的redis源码

// 一些简单的宏定义，包括释放key，value，设置key，value，比较key
#define dictFreeVal(d, entry) \
    if ((d)->type->valDestructor) \
        (d)->type->valDestructor((d)->privdata, (entry)->v.val)

#define dictSetVal(d, entry, _val_) do { \
    if ((d)->type->valDup) \
        entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
    else \
        entry->v.val = (_val_); \
} while(0)

#define dictSetSignedIntegerVal(entry, _val_) \
    do { entry->v.s64 = _val_; } while(0)

#define dictSetUnsignedIntegerVal(entry, _val_) \
    do { entry->v.u64 = _val_; } while(0)

#define dictSetDoubleVal(entry, _val_) \
    do { entry->v.d = _val_; } while(0)

#define dictFreeKey(d, entry) \
    if ((d)->type->keyDestructor) \
        (d)->type->keyDestructor((d)->privdata, (entry)->key)

#define dictSetKey(d, entry, _key_) do { \
    if ((d)->type->keyDup) \
        entry->key = (d)->type->keyDup((d)->privdata, _key_); \
    else \
        entry->key = (_key_); \
} while(0)

#define dictCompareKeys(d, key1, key2) \
    (((d)->type->keyCompare) ? \
        (d)->type->keyCompare((d)->privdata, key1, key2) : \
        (key1) == (key2))

#define dictHashKey(d, key) (d)->type->hashFunction(key)    // 计算key的hash值
#define dictGetKey(he) ((he)->key)
#define dictGetVal(he) ((he)->v.val)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
#define dictGetDoubleVal(he) ((he)->v.d)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)      // hash表桶数量
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)       // hash表元素数量
#define dictIsRehashing(d) ((d)->rehashidx != -1)           // 是否正在分步resh操作

/* API */
dict *dictCreate(dictType *type, void *privDataPtr);                            // 创建一个空的hash表
int dictExpand(dict *d, unsigned long size);                                    // 将hash表扩容到能容纳size的最小2次幂大小（只是初始化rehash的一些变量，rehash过程分步在每次操作该hash表过程中）
int dictAdd(dict *d, void *key, void *val);                                     // 增加键值对到hash表中，如果key已经存在，返回错误
dictEntry *dictAddRaw(dict *d, void *key);                                      // hash表增加元素的原始接口，只增加了一个对应key的entry到hash表中，但不对值进行设置，如果key已经存在返回NULL
int dictReplace(dict *d, void *key, void *val);                                 // 更新hash表的key对应的value值，如果key不存在则插入一个新的，操作成功返回1，失败返回0
dictEntry *dictReplaceRaw(dict *d, void *key);                                  // dictAddRaw的升级版，如果key存在，返回当前key对应的value，如果不存在，返回新插入的entry
int dictDelete(dict *d, const void *key);                                       // 删除hash表key所对应的元素，并调用对应的回调去释放key和value
int dictDeleteNoFree(dict *d, const void *key);                                 // 删除hash表key所对应的元素，但不调用对应释放key和value的回调
void dictRelease(dict *d);                                                      // 释放并清除整个hash表的所有内存
dictEntry * dictFind(dict *d, const void *key);                                 // hash表中查找key对应的entry，如果找不到返回NULL
void *dictFetchValue(dict *d, const void *key);                                 // hash表中查找key对应的value，如果找不到返回NULL
int dictResize(dict *d);                                                        // 将hash表的大小减少到能容纳里面元素的最小值，最小不能小过DICT_HT_INITIAL_SIZE，如果当前禁止resize操作或者当前正在rehash，返回出错
dictIterator *dictGetIterator(dict *d);                                         // 获取遍历该hash表的迭代器，遍历过程中应该确保该hash表不能被改变
dictIterator *dictGetSafeIterator(dict *d);                                     // 获取遍历该hash表的安全迭代器，遍历过程中能确保不会触发rehash操作，但遍历过程中新加的元素可能会不被遍历
dictEntry *dictNext(dictIterator *iter);                                        // 获取迭代器下一个该遍历的元素
void dictReleaseIterator(dictIterator *iter);                                   // 释放迭代器内存
dictEntry *dictGetRandomKey(dict *d);                                           // 从hash表中随机获取一个entry
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);     // 从hash表中随机选取count个元素，存放在des指向的数组中，返回实际获取的元素个数，可能会小于count
void dictGetStats(char *buf, size_t bufsize, dict *d);                          // for debug
unsigned int dictGenHashFunction(const void *key, int len);                     // hash函数
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);        // 不区分大小写的hash函数
void dictEmpty(dict *d, void(callback)(void*));                                 // 清空hash表，清空之前会调用callback，具体是一次还是两次取决于是否在rehash
void dictEnableResize(void);                                                    // 开启hash表resize操作
void dictDisableResize(void);                                                   // 关闭hash表resize操作，注意即使关闭的情况下，当比率大于5:1的情况下还是会触发resize
int dictRehash(dict *d, int n);                                                 // 执行n步rehash操作，返回1表示还有元素需要再一次进行rehash，否则返回0
int dictRehashMilliseconds(dict *d, int ms);                                    // rehash一定时间
void dictSetHashFunctionSeed(unsigned int initval);                             // 设置hash函数种子
unsigned int dictGetHashFunctionSeed(void);                                     // 获取hash函数种子
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);     // 触发一次遍历hash表操作，外层需要套用循环来遍历整个hash表