hatab.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. #include <string.h>
  2. #include "hatab.h"
  3. #include "mem.h"
  4. #include "dynar.h"
  5. #define HATAB_ROOM_DEFAULT 128
  6. #define HATAB_CELLAR_DEFAULT 16
  7. /* TODO: the current hash table structure isn't too good.
  8. Empty pails are kept in the hash chains. Better would be to use doubly
  9. linked pails, and drop unused pails from the doubly linked list.
  10. Also, the cellar can be replaced by a pail storage, that
  11. keeps all the pails. The hash table's lookup table then
  12. only stores *pointers* to pails in the pail storage. In theory,
  13. the pail storage could even be external if that was needed.
  14. */
  15. /* Allocate a new hash tab pair. */
  16. HatabPair * hatabpair_alloc() {
  17. return STRUCT_ALLOC(HatabPair);
  18. }
  19. /* Initialize a hash tab pair. */
  20. HatabPair * hatabpair_init(HatabPair * self, void * key, void * value) {
  21. if(!self) return NULL;
  22. self->key = key;
  23. self->value = value;
  24. return self;
  25. }
  26. /* Make a new hash tab pair. */
  27. HatabPair * hatabpair_new(HatabPair * self, void * key, void * value) {
  28. return hatabpair_init(hatabpair_alloc(), key, value);
  29. }
  30. /* Free the pair. Contrary to normal, does not clean up the contents of the pair!. */
  31. HatabPair * hatabpair_free(HatabPair * self) {
  32. return mem_free(self);
  33. }
  34. /* Apply destructor to pair. */
  35. HatabPair * hatabpair_destroy(HatabPair * self, HatabPairFree * destructor) {
  36. if (destructor) destructor(self);
  37. return hatabpair_free(self);
  38. }
  39. /* Get the key of a hash tab pair. */
  40. void * hatabpair_key(HatabPair * self) {
  41. if(!self) return NULL;
  42. return self->key;
  43. }
  44. /* Get the value of a hash tab pair. */
  45. void * hatabpair_value(HatabPair * self) {
  46. if(!self) return NULL;
  47. return self->value;
  48. }
  49. /* Returns nonzero if the pair is empty, that is, has a NULL key, zero if not . */
  50. int hatabpair_empty_p(HatabPair * self) {
  51. if(!self) return TRUE;
  52. if(!self->key) return TRUE;
  53. return FALSE;
  54. }
  55. /* Jenkins hash function. */
  56. uint32_t hatab_jenkins(char *key, size_t len) {
  57. uint32_t hash, i;
  58. for(hash = i = 0; i < len; ++i) {
  59. hash += key[i];
  60. hash += (hash << 10);
  61. hash ^= (hash >> 6);
  62. }
  63. hash += (hash << 3);
  64. hash ^= (hash >> 11);
  65. hash += (hash << 15);
  66. return hash;
  67. }
  68. /** Hash function that hashes a C string. */
  69. uint32_t hatab_hash_cstr(void * key) {
  70. char * cstr = (char *) key;
  71. return hatab_jenkins(cstr, strlen(cstr));
  72. }
  73. /** Hash function that hashes a uint32_t. */
  74. uint32_t hatab_hash_uint32(void * key) {
  75. return hatab_jenkins((char *)&key, sizeof(uint32_t));
  76. }
  77. /* A Pail is a hash bucket. This is internal to Hatab and should not be used
  78. outside of this file. */
  79. struct Pail_;
  80. typedef struct Pail_ Pail;
  81. /* Hash bucket, contains a pair, the hash value, and and a linked list to the next pail. */
  82. struct Pail_ {
  83. HatabPair pair;
  84. Pail * next;
  85. Pail * prev;
  86. uint32_t hash;
  87. };
  88. /** A hash table. The algorithm implemented is a coalesced hash table. */
  89. struct Hatab_ {
  90. Dynar * lookup ;
  91. Dynar * pails ;
  92. int lookup_used;
  93. int pails_used;
  94. /* Acts of the hash table, for customization of the hash function, etc. */
  95. HatabActs * acts ;
  96. Pail * freelist;
  97. };
  98. /* Initializes a hash table bucket, here abbreviated to "pail". */
  99. static Pail * pail_init(Pail * self,
  100. void * key, void * data, void * next,
  101. void * prev, uint32_t hash) {
  102. hatabpair_init(&self->pair, key, data);
  103. self->next = next;
  104. self->prev = prev;
  105. self->hash = hash;
  106. return self;
  107. }
  108. /* Initializes a hash table bucket, here abbreviated to "pail". */
  109. static Pail * pail_initpair(Pail * self, HatabPair pair,
  110. void * next, void * prev, uint32_t hash) {
  111. self->pair = pair;
  112. self->next = next;
  113. self->prev = prev;
  114. self->hash = hash;
  115. return self;
  116. }
  117. /* Returns the next pail following this one. */
  118. static Pail * pail_next(Pail * self) {
  119. if(!self) return NULL;
  120. return self->next;
  121. }
  122. /* Returns the previous pail preceding this one. */
  123. static Pail * pail_prev(Pail * self) {
  124. if(!self) return NULL;
  125. return self->next;
  126. }
  127. /* Sets the next pail following this one. Also updates next->prev if needed. */
  128. static Pail * pail_next_(Pail * self, Pail * next) {
  129. if(!self) return NULL;
  130. self->next = next;
  131. if(next) {
  132. next->prev = self;
  133. }
  134. return next;
  135. }
  136. /* Sets the next pail preceding this one */
  137. static Pail * pail_prev_(Pail * self, Pail * prev) {
  138. if(!self) return NULL;
  139. self->prev = prev;
  140. if(prev) {
  141. prev->next = self;
  142. }
  143. return prev;
  144. }
  145. /* Gets the key of the pail. */
  146. static void * pail_key(Pail * self) {
  147. return hatabpair_key(&self->pair);
  148. }
  149. /* Gets the value of the pail. */
  150. static void * pail_value(Pail * self) {
  151. return hatabpair_value(&self->pair);
  152. }
  153. /* Initializes a hash bucket to be empty. */
  154. static Pail * pail_initempty(Pail * self) {
  155. return pail_init(self, NULL, NULL, NULL, NULL, 0);
  156. }
  157. /* Returns zero if the hash of the bucket is equal to hash, nonzero if not. */
  158. static int pail_cmphash(Pail * self, uint32_t hash) {
  159. return self->hash != hash;
  160. }
  161. /* Returns nonzero if the pail is empty, zero if not . */
  162. static int pail_empty_p(Pail * self) {
  163. return hatabpair_empty_p(&self->pair);
  164. }
  165. /* Empties a pail, without breaking the link chain */
  166. static Pail * pail_emptynobreak(Pail * self) {
  167. return pail_init(self, NULL, NULL, self->next, self->prev, 0);
  168. }
  169. /* Unlinks the pail, that is, removes itself from the pail chain it is in.
  170. Returns the next pail in the chain, if any. Then empties the pail,
  171. so make suure to destroy the contents before calling thsi function. */
  172. static Pail * pail_unlink(Pail * self) {
  173. /* Link next and prev together. */
  174. pail_next_(self->prev, self->next);
  175. pail_prev_(self->next, self->prev);
  176. pail_initempty(self);
  177. return self->next;
  178. }
  179. /* If the pail is in use, free the memory held by the key
  180. by using the HatabPairFree destructor in the acts struct.
  181. If the destructor is NULL, nothing happens.
  182. Does not break the link chain. Does nothing if the pail is already empty. */
  183. static Pail * pail_done(Pail * self, HatabActs * acts) {
  184. if(pail_empty_p(self)) return self;
  185. if(acts) {
  186. if(acts->free_pair) acts->free_pair(&self->pair);
  187. }
  188. return pail_unlink(self);
  189. }
  190. /* Allocates a new pail. */
  191. static Pail * pail_alloc() {
  192. return STRUCT_ALLOC(Pail);
  193. }
  194. /* Allocates and initializes a new empty pail.*/
  195. static Pail * pail_newempty() {
  196. return pail_initempty(pail_alloc());
  197. }
  198. /* Frees a pail, also calls pail_done with acts */
  199. static Pail * pail_free(Pail * self, HatabActs * acts) {
  200. pail_done(self, acts);
  201. return mem_free(self);
  202. }
  203. /* Default operations for the hash table. */
  204. static HatabActs hatab_default_acts = {
  205. (HatabCompare *) strcmp,
  206. (HatabHash *) hatab_hash_cstr,
  207. NULL,
  208. };
  209. /* Puts a pail in the hatab's free list. */
  210. Pail * hatab_addfreepail(Hatab * self, Pail * pail) {
  211. Pail * oldpail;
  212. if ((!self) || (!pail)) return NULL;
  213. oldpail = self->freelist;
  214. if (oldpail) {
  215. pail_next_(pail, oldpail);
  216. pail_prev_(oldpail, pail);
  217. }
  218. self->freelist = pail;
  219. return pail;
  220. }
  221. /* Gets a pail from the free pail's list and remove it from there.
  222. * Returns NULL if no more pails are available.
  223. */
  224. Pail * hatab_takefreepail(Hatab * self) {
  225. Pail * pail, *nextpail;
  226. pail = self->freelist;
  227. if (!pail) return NULL;
  228. nextpail = pail_next(pail);
  229. self->freelist = nextpail;
  230. return pail;
  231. }
  232. /* Adds a single, new pail to the hatab's free pail list. */
  233. Pail * hatab_newfreepail(Hatab * self) {
  234. Pail * pail, *oldpail;
  235. pail = pail_newempty();
  236. return hatab_addfreepail(self, pail);
  237. }
  238. /** Returns nonzero if the pails is full, zero if it isn't. */
  239. int hatab_pailsfull_p(Hatab * self) {
  240. if(!self) return 0;
  241. return self->freelist != NULL;
  242. }
  243. /** Returns nonzero if the lookup array is full, zero if it isn't. */
  244. int hatab_lookupfull_p(Hatab * self) {
  245. if(!self) return 0;
  246. return self->lookup_used >= dynar_size(self->lookup);
  247. }
  248. /** Cleans up and empties a table. */
  249. Hatab * hatab_done(Hatab * self) {
  250. int index;
  251. Pail * pail, * nextpail;
  252. if(!self) return NULL;
  253. for(index = 0; index < dynar_size(self->lookup); index++) {
  254. pail = dynar_getptr(self->lookup, index);
  255. while (pail) {
  256. nextpail = pail_next(pail);
  257. pail_free(pail, self->acts);
  258. pail = nextpail;
  259. }
  260. }
  261. dynar_free(self->lookup);
  262. pail = hatab_takefreepail(self);
  263. while(pail) {
  264. pail_free(pail, NULL); // NULL because no destructor needed for EMPTY pails.
  265. }
  266. return self;
  267. }
  268. /** Frees a hash table */
  269. Hatab * hatab_free(Hatab * self) {
  270. hatab_done(self);
  271. return mem_free(self);
  272. }
  273. /* Gets a pointer to the lookup pail at the given index,
  274. which is not checked and should be valid. */
  275. static Pail * hatab_getlookuppail(Hatab * self, uint32_t index) {
  276. return (Pail *) dynar_getptr(self->lookup, index);
  277. }
  278. /* Gets a pointer to the pails pail at the given index,
  279. which is not checked and should be valid. */
  280. static Pail * hatab_getpailspail(Hatab * self, uint32_t index) {
  281. return (Pail *) dynar_getdata(self->pails, index);
  282. }
  283. /* Returns the next empty pail cell, or NULL if the pails storage is full. */
  284. static Pail * hatab_getnextemptypail(Hatab * self) {
  285. int index, stop;
  286. stop = dynar_size(self->pails);
  287. for (index = 0; index < stop; index ++) {
  288. Pail * pail = hatab_getpailspail(self, index);
  289. if (pail_empty_p(pail)) {
  290. self->pails_used++;
  291. return pail;
  292. }
  293. }
  294. return NULL;
  295. }
  296. /* Empties all entries in the table. */
  297. Hatab * hatab_clear(Hatab * self) {
  298. int index, size;
  299. size = dynar_size(self->lookup);
  300. /* Empty lookup table. */
  301. for(index = 0; index < size; index++) {
  302. dynar_putptr(self->lookup, index, NULL);
  303. }
  304. /* Clean up and free pails. */
  305. size = dynar_size(self->pails);
  306. for(index = 0; index < size; index++) {
  307. void * data = hatab_getpailspail(self, index);
  308. pail_done(data, self->acts);
  309. }
  310. return self;
  311. }
  312. /* Initializes the dynars and pails of the table. */
  313. Hatab * hatab_initpails(Hatab * self) {
  314. int index, size;
  315. size = dynar_size(self->lookup);
  316. /* Empty lookup table. */
  317. for(index = 0; index < size; index++) {
  318. dynar_putptr(self->lookup, index, NULL);
  319. }
  320. /* Clean up and free pails. */
  321. size = dynar_size(self->pails);
  322. for(index = 0; index < size; index++) {
  323. void * data = hatab_getpailspail(self, index);
  324. pail_initempty(data);
  325. }
  326. return self;
  327. }
  328. /** Initializes the table with the given room and pails space. */
  329. Hatab * hatab_initroom(Hatab * self, HatabActs * acts,
  330. int pails, int space)
  331. {
  332. if(!self) return NULL;
  333. self->acts = acts ? acts : (&hatab_default_acts);
  334. self->lookup = dynar_new(pails, sizeof(Pail *));
  335. self->pails = dynar_new(space, sizeof(Pail));
  336. if((!self->lookup) || (!self->pails)) return hatab_done(self);
  337. hatab_initpails(self);
  338. return self;
  339. }
  340. /** Allocates a Hatab. */
  341. Hatab * hatab_alloc() {
  342. return STRUCT_ALLOC(Hatab);
  343. }
  344. /** Initializes the hatab with default room and pails space */
  345. Hatab * hatab_init(Hatab * self, HatabActs * acts) {
  346. return hatab_initroom(self, acts, HATAB_ROOM_DEFAULT, HATAB_ROOM_DEFAULT * 2);
  347. }
  348. /** Makes a new hatab */
  349. Hatab * hatab_newroom(HatabActs * acts, int pails, int space) {
  350. return hatab_initroom(hatab_alloc(), acts, pails, space);
  351. }
  352. /** Makes a new hatab with default room and pails space */
  353. Hatab * hatab_new(HatabActs * acts) {
  354. return hatab_newroom(acts, HATAB_ROOM_DEFAULT, HATAB_ROOM_DEFAULT * 2);
  355. }
  356. /* Calculates the hash value of the given data pointer using self->acts->hash*/
  357. uint32_t hatab_hash(Hatab * self, void * ptr) {
  358. return self->acts->hash(ptr);
  359. }
  360. /* Compares the given data pointers using self->acts->compare */
  361. int hatab_compare(Hatab * self, void * pa, void * pb) {
  362. return self->acts->compare(pa, pb);
  363. }
  364. /* Checks if the given bucket contains the given key with the given hash. */
  365. static int hatab_pailok(Hatab * self, Pail * pail,
  366. void * key, uint32_t hash) {
  367. if(!pail) return FALSE;
  368. if(pail_empty_p(pail)) return FALSE;
  369. if(pail_cmphash(pail, hash)) return FALSE;
  370. if(hatab_compare(self, pail_key(pail), key)) return FALSE;
  371. return TRUE;
  372. }
  373. /* Gets the pail that matches this key, or NULL if not found. */
  374. static Pail * hatab_findpail(Hatab * self, void * key) {
  375. uint32_t hash, index;
  376. Pail * pail;
  377. if(!self) return NULL;
  378. hash = hatab_hash(self, key);
  379. index = hash % dynar_size(self->lookup);
  380. pail = hatab_getlookuppail(self, index);
  381. if (!pail) return NULL;
  382. while(pail) {
  383. if(hatab_pailok(self, pail, key, hash)) return pail;
  384. // return the pail if it's OK.
  385. pail = pail_next(pail); // Follow the linked chain of pails.
  386. }
  387. // If we get here, no more links, and not found. Return null.
  388. return NULL;
  389. }
  390. /** Gets a value that matches key from a hash table. */
  391. void * hatab_get(Hatab * self, void * key) {
  392. Pail * pail = hatab_findpail(self, key);
  393. if(!pail) return NULL;
  394. return pail_value(pail);
  395. }
  396. /** Removes a value that matches key from a hash table.
  397. * The value will also will be deleted if the methods are set correctly. */
  398. void * hatab_drop(Hatab * self, void * key) {
  399. uint32_t hash, index;
  400. void * data;
  401. Pail * next;
  402. Pail * pail = hatab_findpail(self, key);
  403. if(!pail) return NULL;
  404. /* if it's the first pail get the next one and set that (also when NULL) */
  405. if(!pail_prev(pail)) {
  406. next = pail_next(pail);
  407. hash = hatab_hash(self, pail_key((pail)));
  408. index = hash % dynar_size(self->lookup);
  409. dynar_putptr(self->lookup, index, next);
  410. }
  411. /* XXX: this can't work now... */
  412. pail_done(pail, self->acts);
  413. return self;
  414. }
  415. /** Grows the hash table when needed. */
  416. Hatab * hatab_grow(Hatab * self) {
  417. int oldsize, index;
  418. oldsize = dynar_size(self->pails);
  419. // double the size of the pails block, so collisions can be handled
  420. // NOTE: should also grow lookup table but rehash is slow...
  421. // Dynar cannot work here, since it may invalidate the pointers in
  422. // itself when growing. Should maintain a free list in stead.
  423. void * mok = dynar_grow(self->pails, oldsize*2);
  424. /*void * cok = siarray_grow(hatab_pails(self), hatab_pailsroom(self)*2);*/
  425. if(!mok) return NULL;
  426. for(index = oldsize; index < (oldsize * 2); index++) {
  427. Pail * pail = (Pail *) dynar_getraw(self->pails, index);
  428. pail_initempty(pail);
  429. }
  430. // if(!cok) return NULL; // XXX: rehash the table here!
  431. return self;
  432. }
  433. /** Stores a value in the hash table. */
  434. void * hatab_put(Hatab * self, void * key, void * value) {
  435. uint32_t hash, index;
  436. Pail * pail, * newpail = NULL;
  437. if(!self) return NULL;
  438. hash = hatab_hash(self, key);
  439. index = hash % dynar_size(self->lookup);
  440. pail = hatab_getlookuppail(self, index);
  441. /* get a new pail */
  442. newpail = hatab_getnextemptypail(self);
  443. if(!newpail) {
  444. if(hatab_pailsfull_p(self)) { // The pails is full.
  445. if(!hatab_grow(self)) return NULL;
  446. // Grow the hashtable, return null if that failed.
  447. }
  448. newpail = hatab_getnextemptypail(self);
  449. if (!newpail) return NULL;
  450. }
  451. /* No pail set yet, set it in the array. */
  452. if (!pail) {
  453. dynar_putptr(self->lookup, index, newpail);
  454. } else {
  455. /* Pail already there. Go to end of linked list and add pail there. */
  456. while (pail->next) {
  457. pail = pail->next;
  458. // Follow chain of linked, nonempty pails. If the pail is empty or
  459. // the chain is broken, we can store the data in the pail.
  460. }
  461. // XXX
  462. pail->next = newpail;
  463. newpail->prev = pail;
  464. }
  465. newpail->hash = hash;
  466. hatabpair_init(&newpail->pair, key, value);
  467. return newpail;
  468. }
  469. /* Prints a dot graph of the hash tab for debugging */
  470. void hatab_printgraph(Hatab * self) {
  471. int index, stop, oldindex;
  472. Pail * pail, * oldpail, * nextpail, *linkpail;
  473. pail = NULL; oldpail = NULL;
  474. oldindex = -1;
  475. printf("digraph { \n");
  476. stop = dynar_size(self->lookup);
  477. for(index = 0; index < stop; index ++) {
  478. pail = (Pail *) dynar_getptr(self->lookup, index);
  479. if (!pail) {
  480. continue;
  481. }
  482. if (oldpail) {
  483. printf("t%p -> t%p [color=red];\n", (void *)oldpail, (void *)pail);
  484. }
  485. linkpail = pail ; nextpail = pail_next(linkpail);
  486. while (nextpail) {
  487. printf("t%p -> t%p [color=green];\n", (void *)linkpail, (void *)nextpail);
  488. linkpail = nextpail;
  489. nextpail = pail_next(linkpail);
  490. }
  491. oldpail = pail;
  492. }
  493. printf("} \n");
  494. }