ir/ir_strtab.c

228 lines
5.5 KiB
C
Raw Normal View History

2022-11-08 09:32:46 +01:00
/*
* IR - Lightweight JIT Compilation Framework
* (String table)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
2022-04-05 23:19:23 +02:00
#include "ir.h"
2022-05-27 12:18:04 +02:00
#include "ir_private.h"
2022-04-05 23:19:23 +02:00
typedef struct _ir_strtab_bucket {
uint32_t h;
uint32_t len;
const char *str;
uint32_t next;
ir_ref val;
} ir_strtab_bucket;
static uint32_t ir_str_hash(const char *str, size_t len)
2022-04-05 23:19:23 +02:00
{
size_t i;
uint32_t h = 5381;
for (i = 0; i < len; i++) {
h = ((h << 5) + h) + *str;
}
return h | 0x10000000;
}
static uint32_t ir_strtab_hash_size(uint32_t size)
{
size -= 1;
size |= (size >> 1);
size |= (size >> 2);
size |= (size >> 4);
size |= (size >> 8);
size |= (size >> 16);
return size + 1;
}
static void ir_strtab_resize(ir_strtab *strtab)
{
uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask);
char *old_data = strtab->data;
uint32_t size = strtab->size * 2;
uint32_t hash_size = ir_strtab_hash_size(size);
char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
ir_strtab_bucket *p;
uint32_t pos, i;
memset(data, -1, hash_size * sizeof(uint32_t));
strtab->data = data + (hash_size * sizeof(uint32_t));
strtab->mask = (uint32_t)(-(int32_t)hash_size);
strtab->size = size;
memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket));
ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t)));
i = strtab->count;
pos = 0;
p = (ir_strtab_bucket*)strtab->data;
do {
uint32_t h = p->h | strtab->mask;
p->next = ((uint32_t*)strtab->data)[(int32_t)h];
((uint32_t*)strtab->data)[(int32_t)h] = pos;
pos += sizeof(ir_strtab_bucket);
p++;
} while (--i);
}
static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len)
{
2023-04-12 21:45:53 +02:00
size_t old = (size_t)strtab->buf;
2022-04-05 23:19:23 +02:00
do {
strtab->buf_size *= 2;
} while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1));
strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size);
2023-04-12 21:45:53 +02:00
if ((size_t)strtab->buf != old) {
size_t offset = (size_t)strtab->buf - old;
2022-04-05 23:19:23 +02:00
ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data;
uint32_t i;
for (i = strtab->count; i > 0; i--) {
2023-04-12 21:45:53 +02:00
p->str += offset;
2022-04-05 23:19:23 +02:00
p++;
}
}
}
void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size)
{
IR_ASSERT(size > 0);
uint32_t hash_size = ir_strtab_hash_size(size);
char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
memset(data, -1, hash_size * sizeof(uint32_t));
strtab->data = (data + (hash_size * sizeof(uint32_t)));
strtab->mask = (uint32_t)(-(int32_t)hash_size);
strtab->size = size;
strtab->count = 0;
strtab->pos = 0;
if (buf_size) {
strtab->buf = ir_mem_malloc(buf_size);
strtab->buf_size = buf_size;
strtab->buf_top = 0;
} else {
strtab->buf = NULL;
strtab->buf_size = 0;
strtab->buf_top = 0;
}
}
2023-03-28 12:18:12 +02:00
ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len)
2022-04-05 23:19:23 +02:00
{
uint32_t h = ir_str_hash(str, len);
2023-03-28 12:18:12 +02:00
const char *data = (const char*)strtab->data;
2022-04-05 23:19:23 +02:00
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
2022-04-05 23:19:23 +02:00
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val;
}
pos = p->next;
}
return 0;
}
ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
{
uint32_t h = ir_str_hash(str, len);
char *data = (char*)strtab->data;
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
2022-04-05 23:19:23 +02:00
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val;
}
pos = p->next;
}
IR_ASSERT(val != 0);
if (UNEXPECTED(strtab->count >= strtab->size)) {
ir_strtab_resize(strtab);
data = strtab->data;
}
if (strtab->buf) {
if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) {
ir_strtab_grow_buf(strtab, len + 1);
}
memcpy(strtab->buf + strtab->buf_top, str, len);
strtab->buf[strtab->buf_top + len] = 0;
str = (const char*)strtab->buf + strtab->buf_top;
strtab->buf_top += len + 1;
}
pos = strtab->pos;
strtab->pos += sizeof(ir_strtab_bucket);
strtab->count++;
p = (ir_strtab_bucket*)(data + pos);
p->h = h;
p->len = len;
p->str = str;
h |= strtab->mask;
p->next = ((uint32_t*)data)[(int32_t)h];
((uint32_t*)data)[(int32_t)h] = pos;
p->val = val;
return val;
}
ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
{
uint32_t h = ir_str_hash(str, len);
char *data = (char*)strtab->data;
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
2022-04-05 23:19:23 +02:00
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val = val;
}
pos = p->next;
}
return 0;
}
2023-03-28 12:18:12 +02:00
const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx)
2022-04-05 23:19:23 +02:00
{
IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count);
2023-03-28 12:18:12 +02:00
return ((const ir_strtab_bucket*)strtab->data)[idx].str;
2022-04-05 23:19:23 +02:00
}
void ir_strtab_free(ir_strtab *strtab)
{
uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask);
char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t));
2022-04-05 23:19:23 +02:00
ir_mem_free(data);
strtab->data = NULL;
if (strtab->buf) {
ir_mem_free(strtab->buf);
strtab->buf = NULL;
}
}
2023-03-28 12:18:12 +02:00
void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func)
2022-04-05 23:19:23 +02:00
{
uint32_t i;
2022-04-05 23:19:23 +02:00
for (i = 0; i < strtab->count; i++) {
2023-03-28 12:18:12 +02:00
const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i];
2022-04-05 23:19:23 +02:00
func(b->str, b->len, b->val);
}
}