redis：字典-hash-CFANZ编程社区

redis 中使用hash表实现字典：首先看hash表的实现

typedef struct dictht {//dictht hash桶存在于dict结构中
    //每个具体table[i]中的节点数据类型是dictEntry 结构表示， 每个 dictEntry 结构都保存着一个键值对：
    // 哈希表节点指针数组（俗称桶，bucket）
    dictEntry **table;//table[idx]指向的是首个dictEntry节点，见dictAddRaw  创建初始化table桶见dictExpand
    unsigned long size;//表示该hash中实际桶的个数---哈希表大小
    // 哈希表大小掩码，用于计算索引值
    // 总是等于 size - 1 相当于指针数组的长度掩码，用于计算索引值；具体的桶是从0到size-1
    unsigned long sizemask; // 该哈希表已有节点的数量
    unsigned long used;
} dictht;

/*
 * 哈希表节点  该结构式字典dictht->table[] hash中的成员结构，任何key - value键值对都会添加到转换为dictEntry结构添加到字典hash table中
 */
typedef struct dictEntry { 
    /*key 属性保存着键值对中的键， 而 v 属性则保存着键值对中的值， 其中键值对的值可以是一个指针， 或者是一个 uint64_t 整数， 又或者是一个 int64_t 整数。*/
     /*首先命令行中的字符串先保存到进入xxxCommand(如setCommand)函数时，各个命令字符串单纯已经转换为redisObject保存到redisClient->argv[]中，
    然后在setKey等相关函数中把key-value转换为dictEntry节点的key和v(分别对应key和value)添加到dictht->table[]  hash中
    */
    void *key;/*
      key 属性保存着键值对中的键， 
      而 v 属性则保存着键值对中的值， 其中键值对的值可以是一个指针， 或者是一个 uint64_t 整数， 又或者是一个 int64_t 整数。 */
     /*首先命令行中的字符串先保存到进入xxxCommand(如setCommand)函数时，各个命令字符串单纯已经转换为redisObject保存到redisClient->argv[]中，
    然后在setKey等相关函数中把key-value转换为dictEntry节点的key和v(分别对应key和value)添加到dictht->table[]  hash中
    */
    union {
        void *val;
        uint64_t u64;
        int64_t s64;//一般记录的是过期键db->expires中每个键的过期时间  单位ms
    } v;//对应一个robj

    
    //next 属性是指向另一个哈希表节点的指针， 这个指针可以将多个哈希值相同的键值对连接在一次， 以此来解决键冲突（collision）的问题。
    // 链往后继节点
    // 指向下个哈希表节点，形成链表
    struct dictEntry *next;

} dictEntry;

来看看字典怎么实现的

/* 字典*/
typedef struct dict {//dictCreate创建和初始化

    //type 属性是一个指向 dictType 结构的指针， 每个 dictType 结构保存了一簇用于操作特定类型键值对的函数， Redis 会为用途不
//同的字典设置不同的类型特定函数。
    // 类型特定函数
    dictType *type;

    // 私有数据 // 类型处理函数的私有数据  privdata 属性则保存了需要传给那些类型type特定函数的可选参数。
    void *privdata;

    /*
    ht 属性是一个包含两个项的数组， 数组中的每个项都是一个 dictht 哈希表， 一般情况下， 字典只使用 ht[0] 哈希表， ht[1] 哈希表只
    会在对 ht[0] 哈希表进行 rehash 时使用。
    
    除了 ht[1] 之外， 另一个和 rehash 有关的属性就是 rehashidx ： 它记录了 rehash 目前的进度， 如果目前没有在进行 rehash ， 
    那么它的值为 -1 。
    */

    // 哈希表
    dictht ht[2];//dictht hash桶初始化创建见dictExpand     

    // rehash 索引
    // 当 rehash 不在进行时，值为 -1  // 记录 rehash 进度的标志，值为-1 表示 rehash 未进行
    
    //判断是否需要rehash dictIsRehashing  _dictInit初始化-1 //dictRehash中子曾，dictExpand中置0，如果是迁移完毕置-1
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */
    // 目前正在运行的安全迭代器的数量
    int iterators; /* number of iterators currently running */
} dict; //dict空间创建初始化在dictExpand，第一次是在_dictExpandIfNeededif->dictExpand(d, DICT_HT_INITIAL_SIZE);

也就是字典是依赖于hash 实现！！

核心问题有：

hash算法：可以参考之前的一篇文章：hash算法

目前使用到的hash算法主要有

MurmurHash算法：高运算性能，低碰撞率
DJB Hash Time 33 hash哈希算法;Time33在效率和随机性两方面上俱佳；
CRC-32、MD5、SHA-1，SM3

Redis 计算哈希值和索引值的方法如下：

// 使用字典设置的哈希函数，计算键 key 的哈希值 一般使用 DJB 或者murhash-2 算法
hash = dict->type->hashFunction(key);

// 使用哈希表的 sizemask 属性和哈希值，计算出索引值
// 根据情况不同， ht[x] 可以是 ht[0] 或者 ht[1]
index = hash & dict->ht[x].sizemask;

键冲突解决

当有两个或以上数量的键被分配到了哈希表数组的同一个索引上面时，我们称这些键发生了冲突（collision）

　　Redis 的哈希表使用链地址法（separate chaining）来解决键冲突：每个哈希表节点都有一个 next 指针，多个哈希表节点可以用 next 指针构成一个单向链表，被分配到同一个索引上的多个节点可以用这个单向链表连接起来，这就解决了键冲突的问题

Rehash

随着操作的不断增加，哈希表保存的键值对会逐渐的增多或者减少，为了让哈希表的负载保存在一个合理的范围之内，当哈希表保存的键值对数量太多或者太少时，程序对hash表的大小进行动态的扩展或者收缩。

拓展和收缩哈希表的工作可以通过执行Rehash操作来完成，Redis的字典的Rehash过程如下：

为字典的ht[1]哈希表分配空间，这个哈希表空间大小取决于要执行的操作，以及ht[0]当前包含的键值对数量（也即是ht[0].used属性的值）
将保持在ht[0]中所有键值对rehash到ht[1]上面，rehash指的是重新计算hash值和索引值，然后将键值对放置到ht[1]哈希表指定位置上。
当ht[0]包含的所有键值对都迁移到ht[1]之后，释放ht[0]，当ht[1]设置为ht[0]，并在ht[1]新创建空白哈希表，为下一次hash做准备

那什么时候开始rehash呢？

# 负载因子 = 哈希表已保存节点数量 / 哈希表大小
load_factor = ht[0].used / ht[0].size

目前不考虑业务的情况下负载因子大约0.75 或者小于0.1 都会重新扩大/缩小

注意

目前redis 的哈希桶大小都是2的n次方！！目前笔者认为应该遵循如下设计规则：hash桶的大小都是素数（和2倍相近的一个素数）

设有一个哈希函数
H( c ) = c % N;
当N取一个合数时，最简单的例子是取2^n，比如说取2^3=8,这时候
H( 11100(二进制） ) = H( 28 ) = 4
H( 10100(二进制) ) = H( 20 ）= 4
这时候c的二进制第4位（从右向左数）就”失效”了，也就是说，无论第c的4位取什么值，都会导致H( c )的值一样．这时候c的第四位就根本不参与H( c )的运算，这样H( c )就无法完整地反映c的特性，增大了导致冲突的几率．

如何设计rehash 也就是如何遍历hash表？

　　如果哈希表里保存的键值对数量是四百万、四千万甚至四亿个键值对，那么要一次性将这些ht[0]键值对全部 rehash 到 ht[1] 的话，庞大的计算量可能会导致服务器在一段时间内停止服务。为了避免 rehash 对服务器性能造成影响， 服务器不是一次性将 ht[0] 里面的所有键值对全部 rehash 到 ht[1] ，而是分多次、渐进式地将 ht[0] 里面的键值对慢慢地 rehash 到 ht[1]

为 ht[1] 分配空间，让字典同时持有 ht[0] 和 ht[1] 两个哈希表。
在字典中维持一个索引计数器变量 rehashidx ，并将它的值设置为 0 ，表示 rehash 工作正式开始。
在 rehash 进行期间，每次对字典执行添加、删除、查找或者更新操作时，程序除了执行指定的操作以外，还会顺带将 ht[0] 哈希表在 rehashidx 索引上的所有键值对 rehash 到 ht[1] ，当 rehash 工作完成之后，程序将 rehashidx 属性的值增一。
随着字典操作的不断执行，最终在某个时间点上， ht[0] 的所有键值对都会被 rehash 至 ht[1] ，这时程序将 rehashidx 属性的值设为 -1 ，表示 rehash 操作已完成。

渐进式 rehash 的好处在于它采取分而治之的方式，将 rehash 键值对所需的计算工作均摊到对字典的每个添加、删除、查找和更新操作上，从而避免了集中式 rehash 而带来的庞大计算量。
PS:每步 rehash 都是以一个哈希表索引（桶）作为单位的，一个桶里可能会有多个节点，

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * 执行 N 步渐进式 rehash 。
 *
 * 返回 1 表示仍有键需要从 0 号哈希表移动到 1 号哈希表，
 * 返回 0 则表示所有键都已经迁移完毕。
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table.
 *
 * T = O(N)
 */
int dictRehash(dict *d, int n) { 
//rehash分为主动rehash，dictRehashMilliseconds也就是定时去rehash，怕有的KV长时间不访问一直不迁移到ht[1],
//另一种是由客户端访问的时候被动进行rehash，见_dictRehashStep
    // 只可以在 rehash 进行中时执行
    if (!dictIsRehashing(d)) return 0;

    // 进行 N 步迁移
    // T = O(N)
    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        // 如果 0 号哈希表为空，那么表示 rehash 执行完毕
        // T = O(1)
        if (d->ht[0].used == 0) {
            // 释放 0 号哈希表
            zfree(d->ht[0].table);
            // 将原来的 1 号哈希表设置为新的 0 号哈希表
            d->ht[0] = d->ht[1];
            // 重置旧的 1 号哈希表
            _dictReset(&d->ht[1]);
            // 关闭 rehash 标识
            d->rehashidx = -1;
            // 返回 0 ，向调用者表示 rehash 已经完成
            return 0;
        }
        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        // 确保 rehashidx 没有越界
        assert(d->ht[0].size > (unsigned)d->rehashidx);
        // 略过数组中为空的索引，找到下一个非空索引
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        // 指向该索引的链表表头节点
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        // 将链表中的所有节点迁移到新哈希表
        // T = O(1)
        while(de) { /* 在hdel一次删除数十条的时候，如果刚好触发dictRehash,这里可能会阻塞，有可能该table链表上的数据量很大 */
            unsigned int h;
            // 保存下个节点的指针
            nextde = de->next;
            /* Get the index in the new hash table */
            // 计算新哈希表的哈希值，以及节点插入的索引位置
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            // 插入节点到新哈希表
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            // 更新计数器
            d->ht[0].used--;
            d->ht[1].used++;

            // 继续处理下个节点
            de = nextde;
        }
        // 将刚迁移完的哈希表索引的指针设为空
        d->ht[0].table[d->rehashidx] = NULL;
        // 更新 rehash 索引
        d->rehashidx++;
    }
    return 1;
}

目前有一份处理hash 的代码：由于处理数据量比较小；所以没有使用渐进式 rehash

redis：字典-hash_redis redis：字典-hash_键值对_02

struct hash_node_st {
    int klen;                   /* key and len */

    unsigned int __hval;        /* hash value of this node, for rehashing */
    void *val;                  /* value */

    struct hash_node_st *next;
    char key[0];
};

/**
 * 哈希表结构
 */
struct msp_hash_st {
    struct hash_node_st **slots;
    unsigned int nslot;         /* number of bucket */
    unsigned int max_element;   /* max element allowed for next rehashing */
    unsigned int min_element;   /* min element allowed for next rehashing */


    unsigned int nelement;      /* hash entry count */

    int walk_count; /* 遍历计数，表示当前是否在执行walk函数 */
    hash_data_free_func_t hdel;
    hash_key_func_t hkey;
};

static const struct hash_spec_st {
    unsigned int nslot;
    unsigned int min_node;
    unsigned int max_node;
} hash_specs[] = {
    { (1<<3), 0, (1<<5) },
    { (1<<5), (1<<4), (1<<7) },
    { (1<<7), (1<<6), (1<<9) },
    { (1<<9), (1<<8), (1<<11) },
    { (1<<11), (1<<10), (1<<13) },
    { (1<<13), (1<<12), MAX_SIGNED_32INT }
};


unsigned int 
hash_default_key_time33(const void * key, unsigned int length){
   unsigned int hash = 5381;
   unsigned int i    = 0;
   const unsigned char *str = (const unsigned char *)key;
   
   for (i = 0; i < length; ++str, ++i)
   {
      hash = ((hash << 5) + hash) + (*str);
   }//((hash << 5) + hash) 就是位操作实现的hash * 32+hash，即 hash * 33。
   return hash;
}


map_hash_st *create_hash_obj(hash_data_free_func_t del, hash_key_func_t keyf,
        unsigned int slotnum)
{
    map_hash_st *h;

    if (!POWEROF2(slotnum))
        return NULL;

    if ((h = calloc(1, sizeof(map_hash_st))) == NULL)
        return NULL;

    if (slotnum == 0) { /* 自动增长 */
        h->nslot = hash_specs[0].nslot;
        h->max_element = hash_specs[0].max_node;
        h->min_element = hash_specs[0].min_node;
    } else { /* 固定大小 */
        h->nslot = slotnum;
        h->max_element = (MAX_UNSIGNED_32INT);
        h->min_element = 0;
    }

    h->slots = calloc(1, h->nslot * sizeof(struct hash_node_st *));
    if (h->slots == NULL) {
        free(h);
        return NULL;
    }

    h->hdel = del;

    if (keyf)
        h->hkey = keyf;
    else
        h->hkey = hash_default_key_time33;

    return h;
}

/* destroy every thing */
void map_hash_destroy(map_hash_st *ht)
{
    unsigned int i;
    struct hash_node_st *t;

    for(i = 0; i < ht->nslot; i++) {
        while(ht->slots[i]) {
            t = ht->slots[i];
            ht->slots[i] = ht->slots[i]->next;

            if ((void *)ht->hdel)
                ht->hdel(t->val);

            free(t);
        }
    }
    free(ht->slots);
    free(ht);
}


int hash_insert(map_hash_st *ht, const void *key, int len, void *val)
{
    unsigned int hval = ht->hkey(key, len);
    unsigned int idx = hval & (ht->nslot - 1);
    struct hash_node_st *tmp;
    struct hash_node_st *p = ht->slots[idx];

    while (p) {
        if (hval == p->__hval
                && p->klen == len && memcmp(p->key, key, len) == 0) {
            if ((void *)ht->hdel)
                ht->hdel(p->val);

            p->val = val;
            return 0;
        }
        p = p->next;
    }

    /* new node for insert */
    tmp = malloc(sizeof(struct hash_node_st) + len);
    if (!tmp)
        return -1;

    memset(tmp, 0, sizeof(struct hash_node_st) + len);
    tmp->klen = len;
    memcpy(tmp->key, key, len);

    tmp->val = val;
    tmp->__hval = hval;

    tmp->next = ht->slots[idx];
    ht->slots[idx] = tmp;
    ht->nelement++;

    if (ht->nelement > ht->max_element)         /* rehash after newly node inserted */
        hash_rehash(ht, 1);

    return 0;
}

int hash_delete(map_hash_st *ht, const void *key, int len)
{
    unsigned int hval = ht->hkey(key, len);
    unsigned int idx = hval & (ht->nslot - 1);
    struct hash_node_st *p = ht->slots[idx];
    struct hash_node_st *last = NULL;

    while (p) {
        if (hval == p->__hval
                && p->klen == len && memcmp(p->key, key, len) == 0) {
            if (last)
                last->next = p->next;
            else
                ht->slots[idx] = p->next;

            ht->nelement--;

            if ((void *)ht->hdel)
                ht->hdel(p->val);

            free(p);

            if (ht->nelement < ht->min_element)         /* rehash after node delete */
                hash_rehash(ht, -1);

            return 0;
        }
        last = p;
        p = p->next;
    }
    return -1;
}
/* extern/shrink the slots of hash and rearrange all nodes */
/* flag小于0表示收缩，flag大于0扩张，等于0什么都不干  使用 全局遍历更新 rehash*/
static void hash_rehash(map_hash_st *h, int flag)
{
    unsigned int new_nslot = 0, new_max = 0, new_min = 0;
    struct hash_node_st **new_slots;
    unsigned int i;
    /* 遍历的时候不扩张 */
    if (flag == 0 || h->walk_count != 0)
        return;

    if (flag > 0) {
        /* finding the next slot size and max/min element */
        for (i = 0;
                i < sizeof(hash_specs)/sizeof(struct hash_spec_st) - 1;
                ++i) {
            if (hash_specs[i].nslot == h->nslot) {
                new_nslot = hash_specs[i+1].nslot;
                new_max = hash_specs[i+1].max_node;
                new_min = hash_specs[i+1].min_node;
                break;
            }
        }
    } else {
        /* finding the prev slot size and max/min element */
        for (i = 1; i < sizeof(hash_specs)/sizeof(struct hash_spec_st); ++i) {
            if (hash_specs[i].nslot == h->nslot) {
                new_nslot = hash_specs[i-1].nslot;
                new_max = hash_specs[i-1].max_node;
                new_min = hash_specs[i-1].min_node;
                break;
            }
        }
    }

    if (!new_nslot) {
        fprintf(stderr, "[BUG] rehashing, can not get next_mask\n");
        return;
    }

    /* allocate new slots and move every node to new slots */
    new_slots = calloc(1, new_nslot * sizeof(struct hash_node_st *));
    if (new_slots == NULL) {
        fprintf(stderr, "[BUG] rehashing, Out of memory\n");
        return;
    }

    for (i = 0; i < h->nslot; ++i) {
        struct hash_node_st *p = h->slots[i];
        while (p) {
            struct hash_node_st *next = p->next;
            unsigned int newindex = p->__hval & (new_nslot - 1);

            p->next = new_slots[newindex];
            new_slots[newindex] = p;

            p = next;
        }
    }
    free(h->slots);
    h->slots = new_slots;
    h->nslot = new_nslot;
    h->max_element = new_max;
    h->min_element = new_min;
}


/* walk the hash with callback on every element */
int hash_walk(map_hash_st *ht, void *udata, hash_walk_func_t fn)
{
    struct hash_node_st *p;
    unsigned int i;
    int ret;

    if (!(void *)fn)
        return -1;

    ht->walk_count++;

    for (i = 0; i < ht->nslot; ++i) {
        p = ht->slots[i];
        while (p) {
            struct hash_node_st *next = p->next;

            ret = fn(p->key, p->klen, p->val, udata);
            if (ret) {
                ht->walk_count--;
                return ret;
            }

            p = next;
        }
    }
    ht->walk_count--;
    return 0;
}

/* search the key, return at void **val return 0 if key founded */
int hash_search(map_hash_st *ht, const void *key, int len, void **val)
{
    unsigned int hval = ht->hkey(key, len);
    unsigned int idx = hval & (ht->nslot - 1);
    struct hash_node_st *p = ht->slots[idx];

    while(p) {
        if (hval == p->__hval
                && p->klen == len && memcmp(p->key, key, len) == 0) {
            if (val)
                *val = p->val;

            return 0;
        }
        p = p->next;
    }
    return -1;
}

/支持每次遍历指定个数
int hash_walk_partial(map_hash_st *ht, hash_par_st *cookie,
        void *udata, hash_walk_func_t fn)
{
    struct hash_node_st *p;
    int towalk, ret;

    if (!(void *)fn)
        return -1;

    ht->walk_count++;

    towalk = ht->nslot - cookie->iter;
    if (towalk > cookie->limit_cnt) {
        towalk = cookie->limit_cnt;
    }

    while (towalk > 0) {
        p = ht->slots[cookie->iter];
        while (p) {
            struct hash_node_st *next = p->next;
            ret = fn(p->key, p->klen, p->val, udata);
            if (ret < 0)
            {
                return -1;
            }
            p = next;
        }

        cookie->iter++;
        towalk--;
    }
    
    if (cookie->iter >= ht->nslot) {
        return 0;
    }
    ht->walk_count--;
    return 1;
}

View Code

后续如果使用渐进式rehash的时候需要考虑

　部分遍历的场景 + 渐进rehash +数据插入+数据删除+数据更新怎样做到互斥处理

看下redis 中数据时怎样的添加到dict中

/* Add an element to the target hash table */
/*
 * 尝试将给定键值对添加到字典 只有给定键 key 不存在于字典时，添加操作才会成功
 *添加成功返回 DICT_OK ，失败返回 DICT_ERR 最坏 T = O(N) ，平滩 O(1) 
 */ // 将给定的键值对添加到字典里面。      
int dictAdd(dict *d, void *key, void *val) //把key和val对应的obj添加到一个entry中 
{
    // 尝试添加键到字典，并返回包含了这个键的新哈希节点
    // T = O(N)
    dictEntry *entry = dictAddRaw(d,key);
    // 键已存在，添加失败
    if (!entry)  {
          return DICT_ERR;
    }
    // 键不存在，设置节点的值
    // T = O(1)
    dictSetVal(d, entry, val);
    // 添加成功
    return DICT_OK;
}        

/*
 * 尝试将键插入到字典中，如果键已经在字典存在，那么返回 NULL
 * 如果键不存在，那么程序创建新的哈希节点， 将节点和键关联，并插入到字典，然后返回节点本身。 T = O(N)
 */ 
dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;

    // 如果条件允许的话，进行单步 rehash
    // T = O(1)
    if (dictIsRehashing(d)) {
         _dictRehashStep(d);
    }

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    // 计算键在哈希表中的索引值  如果值为 -1 ，那么表示键已经存在
    /*扩大调用关系调用过程:dictAddRaw->_dictKeyIndex->_dictExpandIfNeeded(这里决定是否需要扩展hash)->dictExpand 缩减hash过程:serverCron->tryResizeHashTables->dictResize(这里绝对缩减后的桶数)->dictExpand */
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;

    // T = O(1)
    /* Allocate the memory and store the new entry */
    // 如果字典正在 rehash ，那么将新键添加到 1 号哈希表
    // 否则，将新键添加到 0 号哈希表
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    // 为新节点分配空间
    entry = zmalloc(sizeof(*entry));
    // 将新节点插入到链表表头
    entry->next = ht->table[index];
    ht->table[index] = entry;
    // 更新哈希表已使用节点数量
    ht->used++;
    /* Set the hash entry fields. */
    // 设置新节点的键
    // T = O(1)
    dictSetKey(d, entry, key);
    return entry;
}


/* Returns the index of a free slot that can be populated with
 * an hash entry for the given 'key'.
 * If the key already exists, -1 is returned. */
static int _dictKeyIndex(dict *ht, const void *key) {
    unsigned int h;
    dictEntry *he;

    /* Expand the hashtable if needed */
    if (_dictExpandIfNeeded(ht) == DICT_ERR)
        return -1;
    /* Compute the key hash value */
    h = dictHashKey(ht, key) & ht->sizemask;
    /* Search if this slot does not already contain the given key */
    he = ht->table[h];
    while(he) {
        if (dictCompareHashKeys(ht, key, he->key))
            return -1;
        he = he->next;
    }
    return h;
}

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *ht) {
    /* If the hash table is empty expand it to the intial size,
     * if the table is "full" dobule its size. */
    if (ht->size == 0)
        return dictExpand(ht, DICT_HT_INITIAL_SIZE);
    if (ht->used == ht->size)
        return dictExpand(ht, ht->size*2);
    return DICT_OK;
}


/* Expand or create the hashtable */
static int dictExpand(dict *ht, unsigned long size) {
    dict n; /* the new hashtable */
    unsigned long realsize = _dictNextPower(size), i;

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hashtable */
    if (ht->used > size)
        return DICT_ERR;

    _dictInit(&n, ht->type, ht->privdata);
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = calloc(realsize,sizeof(dictEntry*));

    /* Copy all the elements from the old to the new table:
     * note that if the old hash table is empty ht->size is zero,
     * so dictExpand just creates an hash table. */
    n.used = ht->used;
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (ht->table[i] == NULL) continue;

        /* For each hash entry on this slot... */
        he = ht->table[i];
        while(he) {
            unsigned int h;

            nextHe = he->next;
            /* Get the new element index */
            h = dictHashKey(ht, he->key) & n.sizemask;
            he->next = n.table[h];
            n.table[h] = he;
            ht->used--;
            /* Pass to the next element */
            he = nextHe;
        }
    }
    assert(ht->used == 0);
    free(ht->table);

    /* Remap the new hashtable in the old */
    *ht = n;
    return DICT_OK;
}

来看下redis获取键值对应的值函数是怎样实现？

核心考虑点就是：rehash时存在两张表的问题

/*
 * 获取包含给定键的节点的值，如果节点不为空，返回节点的值，
 * 否则返回 NULL，T = O(1)
 */
void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he = NULL;
    // T = O(1)
    he = dictFind(d,key);
    return he ? dictGetVal(he) : NULL;
}


/* 返回字典中包含键 key 的节点,找到返回节点，找不到返回 NULL
T = O(1)
 */
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    // 字典（的哈希表）为空
    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */

    // 如果条件允许的话，进行单步 rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    // 计算键的哈希值
    h = dictHashKey(d, key);
    // 在字典的哈希表中查找这个键
    // T = O(1)
    for (table = 0; table <= 1; table++) {
        // 计算索引值
        idx = h & d->ht[table].sizemask;
        // 遍历给定索引上的链表的所有节点，查找 key
        he = d->ht[table].table[idx];
        // T = O(1)
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;

            he = he->next;
        }
        // 如果程序遍历完 0 号哈希表，仍然没找到指定的键的节点
        // 那么程序会检查字典是否在进行 rehash ，
        // 然后才决定是直接返回 NULL ，还是继续查找 1 号哈希表
        if (!dictIsRehashing(d)) return NULL;
    }

    // 进行到这里时，说明两个哈希表都没找到
    return NULL;
}