postgresql数据库数据结构——创建动态哈希表-APISpace

postgresql数据库数据结构——创建动态哈希表

hash_create

创建动态哈希表hash_create函数，形参tabname用于传入表名，nelem用于传入元素(elemets)的最大数量，info传入额外表参数结构体指针，flags用于传入用于指示从Info取哪些参数的掩码。

以InitShmemIndex函数为例（PostgreSQL数据库共享内存——小管家InitShmemIndex函数），向hash_create函数传入的HASHCTL结构体和flags如下所示。

对于共享哈希表，local哈希头（HTAB结构体）会被分配在TopMemoryContext，其他的都会被分配在共享内存中。对于非共享哈希表，所有结构体都被分配在专门为哈希表创建的内存上下文中，该内存上下文是调用者的内存上下文的子级，如果没有指定任何内存上下文，则使用TopMemoryContext。如果flags指定了HASH_SHARED_MEM标志，是共享哈希表，否则是非共享哈希表。

HTAB *hash_create(const char *tabname, long nelem, HASHCTL *info, int flags) { HTAB *hashp; HASHHDR *hctl; if (flags & HASH_SHARED_MEM) { /* Set up to allocate the hash header */ CurrentDynaHashCxt = TopMemoryContext; }else{ /* Create the hash table's private memory context */ if (flags & HASH_CONTEXT) CurrentDynaHashCxt = info->hcxt; else CurrentDynaHashCxt = TopMemoryContext; CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,"dynahash",ALLOCSET_DEFAULT_SIZES); }

初始化哈希头，并将table名拷贝到HTAB结构体中的tabname成员。

/* Initialize the hash header, plus a copy of the table name */ hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1); MemSet(hashp, 0, sizeof(HTAB)); hashp->tabname = (char *) (hashp + 1); strcpy(hashp->tabname, tabname); /* If we have a private context, label it with hashtable's name */ if (!(flags & HASH_SHARED_MEM)) MemoryContextSetIdentifier(CurrentDynaHashCxt, hashp->tabname);

选择合适的哈希函数，如果指定了HASH_FUNCTION标志，则使用info->hash，如果使用HASH_BLOBS标志，则使用uint32_hash或tag_hash，否则使用string_hash。同样处理比较函数和键拷贝函数以及内存分配函数。

/* Select the appropriate hash function (see comments at head of file). */ if (flags & HASH_FUNCTION) hashp->hash = info->hash; else if (flags & HASH_BLOBS){ /* We can optimize hashing for common key sizes */ Assert(flags & HASH_ELEM); if (info->keysize == sizeof(uint32)) hashp->hash = uint32_hash; else hashp->hash = tag_hash; }else hashp->hash = string_hash; /* default hash function */ if (flags & HASH_COMPARE) hashp->match = info->match; else if (hashp->hash == string_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; /* Similarly, the key-copying function defaults to strlcpy or memcpy. */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; else if (hashp->hash == string_hash) hashp->keycopy = (HashCopyFunc) strlcpy; else hashp->keycopy = memcpy; /* And select the entry allocation function, too. */ if (flags & HASH_ALLOC) hashp->alloc = info->alloc; else hashp->alloc = DynaHashAlloc;

对于HASH_SHARED_MEM来说，ctl结构体和目录是预先分配好的，并且需要设置isshared为true；否则需要设置hctl和dir为null，不预先分配，且isshared为false，后续使用alloc函数为hctl分配内存。

if (flags & HASH_SHARED_MEM){ hashp->hctl = info->hctl; hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR)); hashp->hcxt = NULL; hashp->isshared = true; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH){ /* make local copies of some heavily-used values */ hctl = hashp->hctl; hashp->keysize = hctl->keysize; hashp->ssize = hctl->ssize; hashp->sshift = hctl->sshift; return hashp; } }else{ /* setup hash table defaults */ hashp->hctl = NULL; hashp->dir = NULL; hashp->hcxt = CurrentDynaHashCxt; hashp->isshared = false; } if (!hashp->hctl){ hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR)); if (!hashp->hctl) ereport(ERROR,(errcode(ERRCODE_OUT_OF_MEMORY),errmsg("out of memory"))); }

以下为针对HASH_PARTITION、HASH_SEGMENT、HASH_FFACTOR、HASH_ELEM的处理。

->frozen = false; hdefault(hashp); hctl = hashp->hctl; if (flags & HASH_PARTITION){ /* Doesn't make sense to partition a local hash table */ Assert(flags & HASH_SHARED_MEM); Assert(info->num_partitions == next_pow2_int(info->num_partitions)); hctl->num_partitions = info->num_partitions; } if (flags & HASH_SEGMENT){ hctl->ssize = info->ssize; hctl->sshift = my_log2(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } if (flags & HASH_FFACTOR) hctl->ffactor = info->ffactor; /* SHM hash tables have fixed directory size passed by the caller. */ if (flags & HASH_DIRSIZE){ hctl->max_dsize = info->max_dsize; hctl->dsize = info->dsize; } /* hash table now allocates space for key and data but you have to say how much space to allocate */ if (flags & HASH_ELEM){ Assert(info->entrysize >= info->keysize); hctl->keysize = info->keysize; hctl->entrysize = info->entrysize; } /* make local copies of heavily-used constant fields */ hashp->keysize = hctl->keysize; hashp->ssize = hctl->ssize; hashp->sshift = hctl->sshift;

调用init_htab函数构建hash目录结构。

/* Build the hash directory structure */ if (!init_htab(hashp, nelem)) elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);

对于共享哈希表，预分配需要数量的元素会降低运行时out of shared memory的几率，对于非共享哈希表来说，预分配需要梳理的元素（数量小于nelem_alloc）会降低空间浪费。

if ((flags & HASH_SHARED_MEM) || nelem < hctl->nelem_alloc){ int i,freelist_partitions,nelem_alloc,nelem_alloc_first; /* If hash table is partitioned, give each freelist an equal share of the initial allocation. Otherwise only freeList[0] is used. */ if (IS_PARTITIONED(hashp->hctl)) freelist_partitions = NUM_FREELISTS; else freelist_partitions = 1; nelem_alloc = nelem / freelist_partitions; if (nelem_alloc <= 0) nelem_alloc = 1; /* Make sure we'll allocate all the requested elements; freeList[0] gets the excess if the request isn't divisible by NUM_FREELISTS. */ if (nelem_alloc * freelist_partitions < nelem) nelem_alloc_first = nelem - nelem_alloc * (freelist_partitions - 1); else nelem_alloc_first = nelem_alloc; for (i = 0; i < freelist_partitions; i++){ int temp = (i == 0) ? nelem_alloc_first : nelem_alloc; if (!element_alloc(hashp, temp, i)) ereport(ERROR,(errcode(ERRCODE_OUT_OF_MEMORY),errmsg("out of memory"))); } } if (flags & HASH_FIXED_SIZE) hashp->isfixed = true; return hashp;}

init_htab

init_htab建立哈希表目录和段数组。计算哈希桶的数量next_pow2_int((nelem - 1) / hctl->ffactor + 1)，计算segment数量next_pow2_int((nbuckets - 1) / hctl->ssize + 1)，

static bool init_htab(HTAB *hashp, long nelem) { HASHHDR *hctl = hashp->hctl; HASHSEGMENT *segp; int nbuckets, nsegs, i; /* initialize mutexes if it's a partitioned table */ if (IS_PARTITIONED(hctl)) for (i = 0; i < NUM_FREELISTS; i++) SpinLockInit(&(hctl->freeList[i].mutex)); /* Divide number of elements by the fill factor to determine a desired * number of buckets. Allocate space for the next greater power of two * number of buckets */ nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1); /* In a partitioned table, nbuckets must be at least equal to * num_partitions; were it less, keys with apparently different partition * numbers would map to the same bucket, breaking partition independence. * (Normally nbuckets will be much bigger; this is just a safety check.) */ while (nbuckets < hctl->num_partitions) nbuckets <<= 1; hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; /* Figure number of directory segments needed, round up to a power of 2 */ nsegs = (nbuckets - 1) / hctl->ssize + 1; nsegs = next_pow2_int(nsegs); /* Make sure directory is big enough. If pre-allocated directory is too small, choke (caller screwed up). */ if (nsegs > hctl->dsize){ if (!(hashp->dir)) hctl->dsize = nsegs; else return false; } /* Allocate a directory */ if (!(hashp->dir)){ CurrentDynaHashCxt = hashp->hcxt; hashp->dir = (HASHSEGMENT *)hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT)); if (!hashp->dir) return false; } /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { *segp = seg_alloc(hashp); if (*segp == NULL) return false; } /* Choose number of entries to allocate at a time */ hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); return true;}

python怎么过滤字符串中的英文字母

316 2022-12-02

postgresql数据库数据结构——创建动态哈希表

c语言sscanf函数的用法是什么

r语言清空数组的方法是什么

python怎么过滤字符串中的英文字母

推荐文章

api接口有哪几种分类及功能

什么是API接口?API接口简单介绍

短信API接口概述，短信API接口的优势

7款快递物流的物流查询API工具，物流快递查询API接口怎么对接？

企业四要素: 了解企业经营成功的关键

什么是语音验证码?,语音验证码平台有哪些

全国工商查询系统怎么查企业名录

哪些平台提供实名认证的接口？

PHP如何调用API接口?

如何使用百度天气预报API接口?

最近发表

热评文章

数据接口api（数据接口API开发平台）

数据开放接口api（数据服务api开发）

Python爬虫教程：爬取酷狗音乐（python爬取

hbuilder怎么更改字体大小和颜色

直播平台api接口 - 构建卓越的直播平台

实时股票数据api接口（股票实时行情api接口）