Browse Source

cmHashTbl.h/c : Initial commit.

master
kevin 10 years ago
parent
commit
9b3aa4c798
2 changed files with 531 additions and 0 deletions
  1. 464
    0
      cmHashTbl.c
  2. 67
    0
      cmHashTbl.h

+ 464
- 0
cmHashTbl.c View File

@@ -0,0 +1,464 @@
1
+#include "cmGlobal.h"
2
+#include "cmFloatTypes.h"
3
+#include "cmRpt.h"
4
+#include "cmErr.h"
5
+#include "cmCtx.h"
6
+#include "cmMem.h"
7
+#include "cmLinkedHeap.h"
8
+#include "cmMallocDebug.h"
9
+#include "cmMath.h"
10
+#include "cmHashTbl.h"
11
+#include "cmText.h"
12
+
13
+enum
14
+{
15
+  kFreeHtFl = 0x01,
16
+};
17
+
18
+typedef struct cmHtValue_str
19
+{
20
+  unsigned              flags;    // See kXXXHtFl above.
21
+  unsigned              id;       // unique id associated with this value
22
+  void*                 value;    // value blob 
23
+  unsigned              byteCnt;  // size of value blob in bytes
24
+  struct cmHtValue_str* link;     // cmHtBucket_t.list link
25
+} cmHtValue_t;
26
+
27
+typedef struct
28
+{
29
+  cmHtValue_t* list;    // value list
30
+  cmHtValue_t* avail;   // available value slots - formed from cmHashTblRemoved() values.
31
+  unsigned     nextIdx; // next unused index for this bucket
32
+} cmHtBucket_t;
33
+
34
+typedef struct
35
+{
36
+  cmErr_t       err;
37
+  cmLHeapH_t    lhH;            // memory for hash table buckets, values, value blobs.
38
+  unsigned      bucketCnt;      // hash table bucket cnt
39
+  unsigned      linkCnt;        // max length of collision list for each bucket
40
+  unsigned      mask;           // hash id bucket index mask (masks the MSB's of the hash-id)
41
+  unsigned      maskShift;      // shift required to move the lowest 'mask' bit to the LSB.
42
+  cmHtBucket_t* b;              // b[bucketCnt] bucket array
43
+} cmHt_t;
44
+
45
+cmHashTblH_t cmHashTblNullHandle = cmSTATIC_NULL_HANDLE;
46
+
47
+#define _cmHtBucketIndex( p, id ) (((id) & (p)->mask) >> (p)->maskShift)
48
+
49
+cmHt_t* _cmHtHandleToPtr( cmHashTblH_t h )
50
+{
51
+  cmHt_t* p = (cmHt_t*)h.h;
52
+  assert(p!=NULL);
53
+  return p;
54
+}
55
+
56
+// Return the bucket index portion of the hash id.
57
+unsigned _cmHtGenId( cmHt_t* p, const void* v, unsigned byteCnt )
58
+{
59
+  unsigned i,j;
60
+  const char* cv = v;
61
+  unsigned h = 0;
62
+
63
+  for(i=0,j=3; i<byteCnt; ++i,++j)
64
+    h += ((unsigned)cv[i]) << ((j&0x3)*8);
65
+
66
+  return h & p->mask;
67
+}
68
+
69
+
70
+// Given an id find the value.
71
+cmHtValue_t* _cmHtIdToValue( cmHt_t* p, unsigned id )
72
+{
73
+  if( id == cmInvalidId )
74
+    return NULL;
75
+
76
+  unsigned      bi = _cmHtBucketIndex(p,id);
77
+
78
+  assert(bi < p->bucketCnt);
79
+
80
+  cmHtValue_t* v = p->b[bi].list;
81
+  for(; v!=NULL; v=v->link)
82
+    if( v->id == id )
83
+      return v;
84
+
85
+  return NULL;
86
+}
87
+
88
+// Given a value find the id
89
+cmHtValue_t* _cmHtValueToId( cmHt_t* p, const void* value, unsigned byteCnt, unsigned id )
90
+{
91
+  if( id == cmInvalidId )
92
+    id = _cmHtGenId(p,value,byteCnt);
93
+
94
+  unsigned bi = _cmHtBucketIndex(p,id);
95
+
96
+  assert(bi < p->bucketCnt);
97
+
98
+  cmHtValue_t* v = p->b[bi].list;
99
+  for(; v!=NULL; v=v->link)
100
+    if( v->byteCnt==byteCnt && memcmp(value,v->value,byteCnt)==0 )
101
+      return v;
102
+
103
+  return NULL;
104
+}
105
+
106
+cmHtRC_t _cmHtDestroy( cmHt_t* p )
107
+{
108
+  cmHtRC_t rc = kOkHtRC;
109
+  cmLHeapDestroy(&p->lhH);
110
+  cmMemFree(p->b);
111
+  cmMemFree(p);
112
+  return rc;
113
+}
114
+
115
+cmHtRC_t cmHashTblCreate( cmCtx_t* ctx, cmHashTblH_t* hp, unsigned bucketCnt )
116
+{
117
+  cmHtRC_t rc;
118
+  if((rc = cmHashTblDestroy(hp)) != kOkHtRC )
119
+    return rc;
120
+
121
+  cmHt_t* p = cmMemAllocZ(cmHt_t,1);
122
+
123
+  cmErrSetup(&p->err,&ctx->rpt,"hash table");
124
+
125
+  if(cmLHeapIsValid(p->lhH = cmLHeapCreate(8192,ctx)) == false )
126
+  {
127
+    cmErrMsg(&p->err,kLHeapFailHtRC,"Internal linked heap mgr. create failed.");
128
+    goto errLabel;
129
+  }
130
+
131
+  // force the bucket count to be a power of two
132
+  p->bucketCnt = cmNextPowerOfTwo(bucketCnt);
133
+  p->mask      = p->bucketCnt - 1;
134
+
135
+  // calcluate the hash-id bucket mask
136
+  for(p->maskShift=0; (0x80000000 & p->mask) == 0; ++p->maskShift )
137
+    p->mask <<= 1;
138
+  
139
+  // calculate the maximum collisions per bucket mask
140
+  p->linkCnt = ~p->mask;
141
+
142
+  // allocate the bucket array
143
+  p->b = cmMemAllocZ(cmHtBucket_t,p->bucketCnt);
144
+  
145
+  hp->h = p;
146
+
147
+ errLabel:
148
+  if( rc != kOkHtRC )
149
+    _cmHtDestroy(p);
150
+  return rc;
151
+}
152
+
153
+cmHtRC_t cmHashTblDestroy( cmHashTblH_t* hp )
154
+{
155
+  cmHtRC_t rc = kOkHtRC;
156
+  if(hp==NULL || cmHashTblIsValid(*hp)==false )
157
+    return rc;
158
+
159
+  cmHt_t* p = _cmHtHandleToPtr(*hp);
160
+
161
+  if((rc = _cmHtDestroy(p)) != kOkHtRC )
162
+    return rc;
163
+
164
+  hp->h = NULL;
165
+
166
+  return rc;
167
+}
168
+
169
+bool cmHashTblIsValid( cmHashTblH_t h )
170
+{ return h.h!=NULL; }
171
+
172
+unsigned cmHashTblStoreBase(      cmHashTblH_t h, void* v, unsigned byteCnt, bool staticFl )
173
+{
174
+  cmHt_t*      p  = _cmHtHandleToPtr(h);
175
+  cmHtValue_t* vp = NULL;
176
+  unsigned     id = _cmHtGenId(p, v, byteCnt );
177
+
178
+  // if the value is already stored then there is nothing else to do
179
+  if((vp = _cmHtValueToId(p,v,byteCnt,id)) != NULL )
180
+    return vp->id;
181
+
182
+  unsigned bi = _cmHtBucketIndex(p,id);
183
+
184
+  assert(bi < p->bucketCnt );
185
+
186
+  cmHtBucket_t* b  = p->b + bi;
187
+
188
+  if( b->avail != NULL )
189
+  {
190
+    vp       = b->avail;
191
+    b->avail = b->avail->link;
192
+  }
193
+  else
194
+  {
195
+    if( b->nextIdx == p->linkCnt || (id + b->nextIdx) == cmInvalidId )
196
+    {
197
+      cmErrMsg(&p->err,kHashFaultHtRC,"The hash table bucket at index %i is exhaused.",bi);
198
+      return cmInvalidId;
199
+    }
200
+
201
+    vp = cmLhAllocZ(p->lhH,cmHtValue_t,1);
202
+    vp->id = id + b->nextIdx++;
203
+  }  
204
+  
205
+
206
+  assert( vp->id != cmInvalidId );
207
+  
208
+  vp->link = b->list;
209
+  b->list = vp;
210
+  vp->byteCnt = byteCnt;
211
+
212
+  if( staticFl )
213
+    vp->value = v;
214
+  else
215
+  {
216
+    vp->value = cmLhAlloc(p->lhH,char,byteCnt);
217
+    memcpy(vp->value,v,byteCnt);
218
+    vp->flags = cmSetFlag(vp->flags,kFreeHtFl);
219
+  }
220
+
221
+  return vp->id;
222
+}
223
+
224
+unsigned cmHashTblStore(          cmHashTblH_t h, void* v, unsigned byteCnt )
225
+{ return cmHashTblStoreBase(h,v,byteCnt,false); }
226
+
227
+unsigned cmHashTblStoreStatic(    cmHashTblH_t h, void* v, unsigned byteCnt )
228
+{ return cmHashTblStoreBase(h,v,byteCnt,true); }
229
+
230
+unsigned _cmHashTblStoreStr(       cmHashTblH_t h, const cmChar_t* s, bool staticFl )
231
+{ 
232
+  unsigned n = cmTextLength(s);
233
+  if( n == 0 )
234
+  {
235
+    s = "";
236
+    n = 1;
237
+  }
238
+
239
+return cmHashTblStoreBase(h,(void*)s,n+1,staticFl); 
240
+}
241
+
242
+unsigned cmHashTblStoreStr(       cmHashTblH_t h, const cmChar_t* s )
243
+{ return _cmHashTblStoreStr(h,s,false); }
244
+
245
+unsigned cmhashTblStoreStaticStr( cmHashTblH_t h, const cmChar_t* s )
246
+{ return _cmHashTblStoreStr(h,s,true); }
247
+
248
+unsigned cmHashTblStoreV( cmHashTblH_t h, const cmChar_t* fmt, va_list vl )
249
+{
250
+  cmChar_t* s = NULL;
251
+  s = cmTsVPrintfP(s,fmt,vl);
252
+  unsigned id = _cmHashTblStoreStr(h,s,false); 
253
+  cmMemFree(s);
254
+  return id;
255
+}
256
+
257
+unsigned cmHashTblStoreF( cmHashTblH_t h, const cmChar_t* fmt, ... )
258
+{
259
+  va_list vl;
260
+  va_start(vl,fmt);
261
+  unsigned id = cmHashTblStoreV(h,fmt,vl);
262
+  va_end(vl);
263
+  return id;
264
+}
265
+
266
+unsigned cmHashTblId( cmHashTblH_t h, const void* value, unsigned byteCnt )
267
+{
268
+  cmHt_t*  p  = _cmHtHandleToPtr(h);
269
+  cmHtValue_t* vp;
270
+
271
+  if((vp = _cmHtValueToId(p,value,byteCnt,cmInvalidId)) == NULL )
272
+    return cmInvalidId;
273
+
274
+  return vp->id;
275
+}
276
+
277
+unsigned cmHashTblStrToId( cmHashTblH_t h, const cmChar_t* str )
278
+{
279
+  if( str == NULL )
280
+    return cmInvalidId;
281
+
282
+  return cmHashTblId(h,str,cmTextLength(str)+1);
283
+}
284
+
285
+
286
+const void* cmHashTblValue( cmHashTblH_t h, unsigned id, unsigned* byteCntRef )
287
+{
288
+  cmHt_t*      p = _cmHtHandleToPtr(h);
289
+  cmHtValue_t* vp;
290
+
291
+  if((vp = _cmHtIdToValue(p, id)) != NULL )
292
+  {
293
+    if( byteCntRef != NULL )
294
+      *byteCntRef = vp->byteCnt;
295
+
296
+    return vp->value;
297
+  }
298
+
299
+  return NULL; 
300
+}
301
+
302
+
303
+const cmChar_t* cmHashTblStr( cmHashTblH_t h, unsigned id )
304
+{  return (const cmChar_t*)cmHashTblValue(h,id,NULL);  }
305
+
306
+
307
+cmHtRC_t cmHashTblRemove( cmHashTblH_t h, unsigned id )
308
+{
309
+  cmHt_t*       p  = _cmHtHandleToPtr(h);
310
+  unsigned      bi = _cmHtBucketIndex(p,id);
311
+
312
+
313
+
314
+  assert(bi < p->bucketCnt);
315
+
316
+  cmHtBucket_t* b  = p->b + bi;
317
+
318
+  cmHtValue_t*  vp = b->list;
319
+  cmHtValue_t*  pp = NULL;
320
+
321
+  for(; vp!=NULL; vp=vp->link)
322
+  {
323
+    if( vp->id == id )
324
+    {
325
+      if( pp == NULL )
326
+        b->list = vp->link;
327
+      else
328
+        pp->link = vp->link;
329
+
330
+      break;
331
+    }
332
+
333
+    pp = vp;
334
+  }
335
+
336
+  if( vp == NULL )
337
+    return cmErrMsg(&p->err,kInvalidIdHtRC,"A value could not be found for the hash id 0x%x.",id);
338
+  
339
+  if( cmIsFlag(vp->flags,kFreeHtFl ) )
340
+    cmLhFree(p->lhH,vp->value);
341
+  
342
+
343
+  vp->flags   = 0;
344
+  vp->value   = NULL;
345
+  vp->byteCnt = 0;
346
+
347
+  // Note: Do not set the id to zero since we want to consert id's 
348
+  // and this recd will be reused by the next call to cmHashTblStoreBase().
349
+
350
+  return kOkHtRC;
351
+  
352
+}
353
+
354
+
355
+cmHtRC_t cmHashTblLastRC( cmHashTblH_t h )
356
+{
357
+  cmHt_t* p = _cmHtHandleToPtr(h);
358
+  return cmErrLastRC(&p->err);
359
+}
360
+
361
+void _cmHashTblBucketReport( cmHtBucket_t* b, cmRpt_t* rpt )
362
+{
363
+  cmHtValue_t* vp = b->list;
364
+  unsigned i;
365
+  for(i=0; vp!=NULL && i<10; vp=vp->link,++i)
366
+    cmRptPrintf(rpt,"0x%x : %s\n",vp->id,((const cmChar_t*)vp->value));
367
+
368
+  cmRptPrintf(rpt,"\n");
369
+}
370
+
371
+void cmHashTblReport( cmHashTblH_t h, cmRpt_t* rpt )
372
+{
373
+  cmHt_t* p = _cmHtHandleToPtr(h);
374
+  unsigned i;
375
+  for(i=0; i<p->bucketCnt; ++i)
376
+  {
377
+    //if( p->b[i].nextIdx > 0 )
378
+    //  cmRptPrintf(rpt,"%i,%i\n",i,p->b[i].nextIdx);
379
+
380
+    if( p->b[i].nextIdx > 100 )
381
+      _cmHashTblBucketReport(p->b + i,rpt);
382
+  }
383
+}
384
+
385
+
386
+cmHtRC_t cmHashTblTest( cmCtx_t* ctx )
387
+{
388
+  cmHtRC_t     rc = kOkHtRC;
389
+  cmHashTblH_t h  = cmHashTblNullHandle;
390
+  cmErr_t err;
391
+  cmErrSetup(&err,&ctx->rpt,"hash table test");
392
+
393
+  if((rc = cmHashTblCreate(ctx,&h,8192)) != kOkHtRC )
394
+    return cmErrMsg(&err,rc,"Hash table create failed.");
395
+
396
+  const cmChar_t* arr[] = 
397
+  {
398
+    "1",
399
+    "12",
400
+    "123",
401
+    "1234",
402
+    "12345",
403
+    "123456",
404
+    "123456",
405
+    "123456",
406
+    NULL
407
+  };
408
+
409
+  unsigned n = sizeof(arr)/sizeof(arr[0]);
410
+  unsigned ids[ n ];
411
+  int i = 0;
412
+
413
+  // store the values from arr[]
414
+  for(; arr[i]!=NULL; ++i)
415
+    if((ids[i] = cmHashTblStoreStr(h,arr[i])) == cmInvalidId )
416
+    {
417
+      rc = cmErrMsg(&err,cmHashTblLastRC(h),"Hash store failed on: '%s.",cmStringNullGuard(arr[i]));
418
+      goto errLabel;
419
+    }
420
+
421
+  /*
422
+  // remove a value
423
+  unsigned rem_idx = 3;
424
+  if((rc = cmHashTblRemove(h, ids[rem_idx] )) != kOkHtRC )
425
+  {
426
+    rc = cmErrMsg(&err,rc,"Hash removed failed.");
427
+    goto errLabel;
428
+  }
429
+
430
+  // insert the same value - which should restore the removed value
431
+  if((ids[rem_idx] = cmHashTblStoreStr(h,arr[rem_idx])) == cmInvalidId )
432
+  {
433
+    rc = cmErrMsg(&err,cmHashTblLastRC(h),"Hash store failed on: '%s.",cmStringNullGuard(arr[rem_idx]));
434
+    goto errLabel;
435
+  }
436
+  */
437
+  
438
+  // lookup all the stored values by id
439
+  for(--i; i>=0; --i)
440
+  {
441
+    const cmChar_t* s;
442
+
443
+    if((s = cmHashTblStr(h,ids[i])) == NULL )
444
+      rc = cmErrMsg(&err,kInvalidIdHtRC,"The value associated with hash-id:0x%x could not be found.",ids[i]);
445
+    else
446
+      printf("%i : %s\n",i,cmStringNullGuard(s));
447
+  }
448
+
449
+
450
+  for(i=0; arr[i]!=NULL; ++i)
451
+  {
452
+    unsigned id = cmHashTblStrToId(h, arr[i]);
453
+    printf("%i : 0x%x : %s\n",i, id, cmStringNullGuard(cmHashTblStr(h, id)));
454
+  }
455
+
456
+
457
+  cmHashTblReport(h, &ctx->rpt );
458
+
459
+
460
+ errLabel:
461
+  cmHashTblDestroy(&h);
462
+  return rc;
463
+
464
+}

+ 67
- 0
cmHashTbl.h View File

@@ -0,0 +1,67 @@
1
+#ifndef cmHashTbl_h
2
+#define cmHashTbl_h
3
+
4
+#ifdef __cplusplus
5
+extern "C" {
6
+#endif
7
+
8
+  enum
9
+  {
10
+    kOkHtRC,
11
+    kLHeapFailHtRC,
12
+    kHashFaultHtRC,
13
+    kInvalidIdHtRC
14
+  };
15
+
16
+  typedef cmRC_t cmHtRC_t;
17
+  typedef cmHandle_t cmHashTblH_t;
18
+  extern cmHashTblH_t cmHashTblNullHandle;
19
+  
20
+  cmHtRC_t cmHashTblCreate( cmCtx_t* ctx, cmHashTblH_t* hp, unsigned bucketCnt );
21
+
22
+  cmHtRC_t cmHashTblDestroy( cmHashTblH_t* hp );
23
+
24
+  bool cmHashTblIsValid( cmHashTblH_t h );
25
+
26
+  // cmhashTblStoreBase() is the canonical store function.
27
+  // Set 'staticFl' to true if the value does not need to be reallocated
28
+  // and copied into the internal storage space.
29
+  // Returns a value which uniquely identifies the value.  If a unique
30
+  // identifier cannot be generated then the function returns cmInvalidId
31
+  // and sets the hash table error code to kHashFaultRC.
32
+  unsigned cmHashTblStoreBase(      cmHashTblH_t h, void* v, unsigned byteCnt, bool staticFl );
33
+  
34
+  unsigned cmHashTblStore(          cmHashTblH_t h, void* v, unsigned byteCnt );
35
+  unsigned cmHashTblStoreStatic(    cmHashTblH_t h, void* v, unsigned byteCnt );
36
+  unsigned cmHashTblStoreStr(       cmHashTblH_t h, const cmChar_t* s );
37
+  unsigned cmhashTblStoreStaticStr( cmHashTblH_t h, const cmChar_t* s );
38
+  unsigned cmHashTblStoreV(         cmHashTblH_t h, const cmChar_t* fmt, va_list vl );
39
+  unsigned cmHashTblStoreF(         cmHashTblH_t h, const cmChar_t* fmt, ... );
40
+  
41
+  // Given a value find an id.
42
+  unsigned cmHashTblId( cmHashTblH_t h, const void* value, unsigned byteCnt );
43
+  unsigned cmHashTblStrToId( cmHashTblH_t h, const cmChar_t* str );
44
+
45
+  // Returns NULL if no value is associated with 'id'.
46
+  // 'byteCntRef' is optional.
47
+  const void* cmHashTblValue( cmHashTblH_t h, unsigned id, unsigned* byteCntRef );
48
+
49
+  // Wrapper around cmHashTblValue() which assumes that the stored value is a 
50
+  // zero terminated string.
51
+  const cmChar_t* cmHashTblStr( cmHashTblH_t h, unsigned id );
52
+
53
+  // Remove a value.
54
+  cmHtRC_t cmHashTblRemove( cmHashTblH_t h, unsigned id );
55
+
56
+  // Return the last error id generated by the cmHashTbl object.
57
+  cmHtRC_t cmHashTblLastRC( cmHashTblH_t h );
58
+  
59
+  void cmHashTblReport( cmHashTblH_t h, cmRpt_t* rpt );
60
+
61
+  cmHtRC_t cmHashTblTest( cmCtx_t* ctx );
62
+
63
+#ifdef __cplusplus
64
+}
65
+#endif
66
+
67
+#endif

Loading…
Cancel
Save