diff --git a/cmHashTbl.c b/cmHashTbl.c new file mode 100644 index 0000000..c4f628a --- /dev/null +++ b/cmHashTbl.c @@ -0,0 +1,464 @@ +#include "cmGlobal.h" +#include "cmFloatTypes.h" +#include "cmRpt.h" +#include "cmErr.h" +#include "cmCtx.h" +#include "cmMem.h" +#include "cmLinkedHeap.h" +#include "cmMallocDebug.h" +#include "cmMath.h" +#include "cmHashTbl.h" +#include "cmText.h" + +enum +{ + kFreeHtFl = 0x01, +}; + +typedef struct cmHtValue_str +{ + unsigned flags; // See kXXXHtFl above. + unsigned id; // unique id associated with this value + void* value; // value blob + unsigned byteCnt; // size of value blob in bytes + struct cmHtValue_str* link; // cmHtBucket_t.list link +} cmHtValue_t; + +typedef struct +{ + cmHtValue_t* list; // value list + cmHtValue_t* avail; // available value slots - formed from cmHashTblRemoved() values. + unsigned nextIdx; // next unused index for this bucket +} cmHtBucket_t; + +typedef struct +{ + cmErr_t err; + cmLHeapH_t lhH; // memory for hash table buckets, values, value blobs. + unsigned bucketCnt; // hash table bucket cnt + unsigned linkCnt; // max length of collision list for each bucket + unsigned mask; // hash id bucket index mask (masks the MSB's of the hash-id) + unsigned maskShift; // shift required to move the lowest 'mask' bit to the LSB. + cmHtBucket_t* b; // b[bucketCnt] bucket array +} cmHt_t; + +cmHashTblH_t cmHashTblNullHandle = cmSTATIC_NULL_HANDLE; + +#define _cmHtBucketIndex( p, id ) (((id) & (p)->mask) >> (p)->maskShift) + +cmHt_t* _cmHtHandleToPtr( cmHashTblH_t h ) +{ + cmHt_t* p = (cmHt_t*)h.h; + assert(p!=NULL); + return p; +} + +// Return the bucket index portion of the hash id. +unsigned _cmHtGenId( cmHt_t* p, const void* v, unsigned byteCnt ) +{ + unsigned i,j; + const char* cv = v; + unsigned h = 0; + + for(i=0,j=3; imask; +} + + +// Given an id find the value. +cmHtValue_t* _cmHtIdToValue( cmHt_t* p, unsigned id ) +{ + if( id == cmInvalidId ) + return NULL; + + unsigned bi = _cmHtBucketIndex(p,id); + + assert(bi < p->bucketCnt); + + cmHtValue_t* v = p->b[bi].list; + for(; v!=NULL; v=v->link) + if( v->id == id ) + return v; + + return NULL; +} + +// Given a value find the id +cmHtValue_t* _cmHtValueToId( cmHt_t* p, const void* value, unsigned byteCnt, unsigned id ) +{ + if( id == cmInvalidId ) + id = _cmHtGenId(p,value,byteCnt); + + unsigned bi = _cmHtBucketIndex(p,id); + + assert(bi < p->bucketCnt); + + cmHtValue_t* v = p->b[bi].list; + for(; v!=NULL; v=v->link) + if( v->byteCnt==byteCnt && memcmp(value,v->value,byteCnt)==0 ) + return v; + + return NULL; +} + +cmHtRC_t _cmHtDestroy( cmHt_t* p ) +{ + cmHtRC_t rc = kOkHtRC; + cmLHeapDestroy(&p->lhH); + cmMemFree(p->b); + cmMemFree(p); + return rc; +} + +cmHtRC_t cmHashTblCreate( cmCtx_t* ctx, cmHashTblH_t* hp, unsigned bucketCnt ) +{ + cmHtRC_t rc; + if((rc = cmHashTblDestroy(hp)) != kOkHtRC ) + return rc; + + cmHt_t* p = cmMemAllocZ(cmHt_t,1); + + cmErrSetup(&p->err,&ctx->rpt,"hash table"); + + if(cmLHeapIsValid(p->lhH = cmLHeapCreate(8192,ctx)) == false ) + { + cmErrMsg(&p->err,kLHeapFailHtRC,"Internal linked heap mgr. create failed."); + goto errLabel; + } + + // force the bucket count to be a power of two + p->bucketCnt = cmNextPowerOfTwo(bucketCnt); + p->mask = p->bucketCnt - 1; + + // calcluate the hash-id bucket mask + for(p->maskShift=0; (0x80000000 & p->mask) == 0; ++p->maskShift ) + p->mask <<= 1; + + // calculate the maximum collisions per bucket mask + p->linkCnt = ~p->mask; + + // allocate the bucket array + p->b = cmMemAllocZ(cmHtBucket_t,p->bucketCnt); + + hp->h = p; + + errLabel: + if( rc != kOkHtRC ) + _cmHtDestroy(p); + return rc; +} + +cmHtRC_t cmHashTblDestroy( cmHashTblH_t* hp ) +{ + cmHtRC_t rc = kOkHtRC; + if(hp==NULL || cmHashTblIsValid(*hp)==false ) + return rc; + + cmHt_t* p = _cmHtHandleToPtr(*hp); + + if((rc = _cmHtDestroy(p)) != kOkHtRC ) + return rc; + + hp->h = NULL; + + return rc; +} + +bool cmHashTblIsValid( cmHashTblH_t h ) +{ return h.h!=NULL; } + +unsigned cmHashTblStoreBase( cmHashTblH_t h, void* v, unsigned byteCnt, bool staticFl ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + cmHtValue_t* vp = NULL; + unsigned id = _cmHtGenId(p, v, byteCnt ); + + // if the value is already stored then there is nothing else to do + if((vp = _cmHtValueToId(p,v,byteCnt,id)) != NULL ) + return vp->id; + + unsigned bi = _cmHtBucketIndex(p,id); + + assert(bi < p->bucketCnt ); + + cmHtBucket_t* b = p->b + bi; + + if( b->avail != NULL ) + { + vp = b->avail; + b->avail = b->avail->link; + } + else + { + if( b->nextIdx == p->linkCnt || (id + b->nextIdx) == cmInvalidId ) + { + cmErrMsg(&p->err,kHashFaultHtRC,"The hash table bucket at index %i is exhaused.",bi); + return cmInvalidId; + } + + vp = cmLhAllocZ(p->lhH,cmHtValue_t,1); + vp->id = id + b->nextIdx++; + } + + + assert( vp->id != cmInvalidId ); + + vp->link = b->list; + b->list = vp; + vp->byteCnt = byteCnt; + + if( staticFl ) + vp->value = v; + else + { + vp->value = cmLhAlloc(p->lhH,char,byteCnt); + memcpy(vp->value,v,byteCnt); + vp->flags = cmSetFlag(vp->flags,kFreeHtFl); + } + + return vp->id; +} + +unsigned cmHashTblStore( cmHashTblH_t h, void* v, unsigned byteCnt ) +{ return cmHashTblStoreBase(h,v,byteCnt,false); } + +unsigned cmHashTblStoreStatic( cmHashTblH_t h, void* v, unsigned byteCnt ) +{ return cmHashTblStoreBase(h,v,byteCnt,true); } + +unsigned _cmHashTblStoreStr( cmHashTblH_t h, const cmChar_t* s, bool staticFl ) +{ + unsigned n = cmTextLength(s); + if( n == 0 ) + { + s = ""; + n = 1; + } + +return cmHashTblStoreBase(h,(void*)s,n+1,staticFl); +} + +unsigned cmHashTblStoreStr( cmHashTblH_t h, const cmChar_t* s ) +{ return _cmHashTblStoreStr(h,s,false); } + +unsigned cmhashTblStoreStaticStr( cmHashTblH_t h, const cmChar_t* s ) +{ return _cmHashTblStoreStr(h,s,true); } + +unsigned cmHashTblStoreV( cmHashTblH_t h, const cmChar_t* fmt, va_list vl ) +{ + cmChar_t* s = NULL; + s = cmTsVPrintfP(s,fmt,vl); + unsigned id = _cmHashTblStoreStr(h,s,false); + cmMemFree(s); + return id; +} + +unsigned cmHashTblStoreF( cmHashTblH_t h, const cmChar_t* fmt, ... ) +{ + va_list vl; + va_start(vl,fmt); + unsigned id = cmHashTblStoreV(h,fmt,vl); + va_end(vl); + return id; +} + +unsigned cmHashTblId( cmHashTblH_t h, const void* value, unsigned byteCnt ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + cmHtValue_t* vp; + + if((vp = _cmHtValueToId(p,value,byteCnt,cmInvalidId)) == NULL ) + return cmInvalidId; + + return vp->id; +} + +unsigned cmHashTblStrToId( cmHashTblH_t h, const cmChar_t* str ) +{ + if( str == NULL ) + return cmInvalidId; + + return cmHashTblId(h,str,cmTextLength(str)+1); +} + + +const void* cmHashTblValue( cmHashTblH_t h, unsigned id, unsigned* byteCntRef ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + cmHtValue_t* vp; + + if((vp = _cmHtIdToValue(p, id)) != NULL ) + { + if( byteCntRef != NULL ) + *byteCntRef = vp->byteCnt; + + return vp->value; + } + + return NULL; +} + + +const cmChar_t* cmHashTblStr( cmHashTblH_t h, unsigned id ) +{ return (const cmChar_t*)cmHashTblValue(h,id,NULL); } + + +cmHtRC_t cmHashTblRemove( cmHashTblH_t h, unsigned id ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + unsigned bi = _cmHtBucketIndex(p,id); + + + + assert(bi < p->bucketCnt); + + cmHtBucket_t* b = p->b + bi; + + cmHtValue_t* vp = b->list; + cmHtValue_t* pp = NULL; + + for(; vp!=NULL; vp=vp->link) + { + if( vp->id == id ) + { + if( pp == NULL ) + b->list = vp->link; + else + pp->link = vp->link; + + break; + } + + pp = vp; + } + + if( vp == NULL ) + return cmErrMsg(&p->err,kInvalidIdHtRC,"A value could not be found for the hash id 0x%x.",id); + + if( cmIsFlag(vp->flags,kFreeHtFl ) ) + cmLhFree(p->lhH,vp->value); + + + vp->flags = 0; + vp->value = NULL; + vp->byteCnt = 0; + + // Note: Do not set the id to zero since we want to consert id's + // and this recd will be reused by the next call to cmHashTblStoreBase(). + + return kOkHtRC; + +} + + +cmHtRC_t cmHashTblLastRC( cmHashTblH_t h ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + return cmErrLastRC(&p->err); +} + +void _cmHashTblBucketReport( cmHtBucket_t* b, cmRpt_t* rpt ) +{ + cmHtValue_t* vp = b->list; + unsigned i; + for(i=0; vp!=NULL && i<10; vp=vp->link,++i) + cmRptPrintf(rpt,"0x%x : %s\n",vp->id,((const cmChar_t*)vp->value)); + + cmRptPrintf(rpt,"\n"); +} + +void cmHashTblReport( cmHashTblH_t h, cmRpt_t* rpt ) +{ + cmHt_t* p = _cmHtHandleToPtr(h); + unsigned i; + for(i=0; ibucketCnt; ++i) + { + //if( p->b[i].nextIdx > 0 ) + // cmRptPrintf(rpt,"%i,%i\n",i,p->b[i].nextIdx); + + if( p->b[i].nextIdx > 100 ) + _cmHashTblBucketReport(p->b + i,rpt); + } +} + + +cmHtRC_t cmHashTblTest( cmCtx_t* ctx ) +{ + cmHtRC_t rc = kOkHtRC; + cmHashTblH_t h = cmHashTblNullHandle; + cmErr_t err; + cmErrSetup(&err,&ctx->rpt,"hash table test"); + + if((rc = cmHashTblCreate(ctx,&h,8192)) != kOkHtRC ) + return cmErrMsg(&err,rc,"Hash table create failed."); + + const cmChar_t* arr[] = + { + "1", + "12", + "123", + "1234", + "12345", + "123456", + "123456", + "123456", + NULL + }; + + unsigned n = sizeof(arr)/sizeof(arr[0]); + unsigned ids[ n ]; + int i = 0; + + // store the values from arr[] + for(; arr[i]!=NULL; ++i) + if((ids[i] = cmHashTblStoreStr(h,arr[i])) == cmInvalidId ) + { + rc = cmErrMsg(&err,cmHashTblLastRC(h),"Hash store failed on: '%s.",cmStringNullGuard(arr[i])); + goto errLabel; + } + + /* + // remove a value + unsigned rem_idx = 3; + if((rc = cmHashTblRemove(h, ids[rem_idx] )) != kOkHtRC ) + { + rc = cmErrMsg(&err,rc,"Hash removed failed."); + goto errLabel; + } + + // insert the same value - which should restore the removed value + if((ids[rem_idx] = cmHashTblStoreStr(h,arr[rem_idx])) == cmInvalidId ) + { + rc = cmErrMsg(&err,cmHashTblLastRC(h),"Hash store failed on: '%s.",cmStringNullGuard(arr[rem_idx])); + goto errLabel; + } + */ + + // lookup all the stored values by id + for(--i; i>=0; --i) + { + const cmChar_t* s; + + if((s = cmHashTblStr(h,ids[i])) == NULL ) + rc = cmErrMsg(&err,kInvalidIdHtRC,"The value associated with hash-id:0x%x could not be found.",ids[i]); + else + printf("%i : %s\n",i,cmStringNullGuard(s)); + } + + + for(i=0; arr[i]!=NULL; ++i) + { + unsigned id = cmHashTblStrToId(h, arr[i]); + printf("%i : 0x%x : %s\n",i, id, cmStringNullGuard(cmHashTblStr(h, id))); + } + + + cmHashTblReport(h, &ctx->rpt ); + + + errLabel: + cmHashTblDestroy(&h); + return rc; + +} diff --git a/cmHashTbl.h b/cmHashTbl.h new file mode 100644 index 0000000..0b7cdff --- /dev/null +++ b/cmHashTbl.h @@ -0,0 +1,67 @@ +#ifndef cmHashTbl_h +#define cmHashTbl_h + +#ifdef __cplusplus +extern "C" { +#endif + + enum + { + kOkHtRC, + kLHeapFailHtRC, + kHashFaultHtRC, + kInvalidIdHtRC + }; + + typedef cmRC_t cmHtRC_t; + typedef cmHandle_t cmHashTblH_t; + extern cmHashTblH_t cmHashTblNullHandle; + + cmHtRC_t cmHashTblCreate( cmCtx_t* ctx, cmHashTblH_t* hp, unsigned bucketCnt ); + + cmHtRC_t cmHashTblDestroy( cmHashTblH_t* hp ); + + bool cmHashTblIsValid( cmHashTblH_t h ); + + // cmhashTblStoreBase() is the canonical store function. + // Set 'staticFl' to true if the value does not need to be reallocated + // and copied into the internal storage space. + // Returns a value which uniquely identifies the value. If a unique + // identifier cannot be generated then the function returns cmInvalidId + // and sets the hash table error code to kHashFaultRC. + unsigned cmHashTblStoreBase( cmHashTblH_t h, void* v, unsigned byteCnt, bool staticFl ); + + unsigned cmHashTblStore( cmHashTblH_t h, void* v, unsigned byteCnt ); + unsigned cmHashTblStoreStatic( cmHashTblH_t h, void* v, unsigned byteCnt ); + unsigned cmHashTblStoreStr( cmHashTblH_t h, const cmChar_t* s ); + unsigned cmhashTblStoreStaticStr( cmHashTblH_t h, const cmChar_t* s ); + unsigned cmHashTblStoreV( cmHashTblH_t h, const cmChar_t* fmt, va_list vl ); + unsigned cmHashTblStoreF( cmHashTblH_t h, const cmChar_t* fmt, ... ); + + // Given a value find an id. + unsigned cmHashTblId( cmHashTblH_t h, const void* value, unsigned byteCnt ); + unsigned cmHashTblStrToId( cmHashTblH_t h, const cmChar_t* str ); + + // Returns NULL if no value is associated with 'id'. + // 'byteCntRef' is optional. + const void* cmHashTblValue( cmHashTblH_t h, unsigned id, unsigned* byteCntRef ); + + // Wrapper around cmHashTblValue() which assumes that the stored value is a + // zero terminated string. + const cmChar_t* cmHashTblStr( cmHashTblH_t h, unsigned id ); + + // Remove a value. + cmHtRC_t cmHashTblRemove( cmHashTblH_t h, unsigned id ); + + // Return the last error id generated by the cmHashTbl object. + cmHtRC_t cmHashTblLastRC( cmHashTblH_t h ); + + void cmHashTblReport( cmHashTblH_t h, cmRpt_t* rpt ); + + cmHtRC_t cmHashTblTest( cmCtx_t* ctx ); + +#ifdef __cplusplus +} +#endif + +#endif