From 85a4a8d3f48c3d2c95efb6437b25f0804d966161 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 23 Sep 2009 13:19:18 +0200 Subject: [PATCH] DRY out multiple hash functions. Jenkins hash function now in one place. --- src/Makefile.am | 3 ++- src/jenkins_hash.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/jenkins_hash.h | 35 +++++++++++++++++++++++++++++++++ src/marchash.c | 22 +++------------------ src/reclists.c | 20 ++----------------- src/termlists.c | 21 ++------------------ 6 files changed, 99 insertions(+), 57 deletions(-) create mode 100644 src/jenkins_hash.c create mode 100644 src/jenkins_hash.h diff --git a/src/Makefile.am b/src/Makefile.am index 62279a0..fbb7b16 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -26,7 +26,8 @@ libpazpar2_a_SOURCES = pazpar2_config.c pazpar2_config.h eventl.c eventl.h \ settings.h settings.c sel_thread.c sel_thread.h getaddrinfo.c \ charsets.c charsets.h \ client.c client.h connection.c connection.h host.h parameters.h \ - dirent.c direntz.h marcmap.c marcmap.h marchash.c marchash.h + dirent.c direntz.h marcmap.c marcmap.h marchash.c marchash.h \ + jenkins_hash.c jenkins_hash.h pazpar2_SOURCES = pazpar2.c pazpar2_LDADD = libpazpar2.a $(YAZLIB) diff --git a/src/jenkins_hash.c b/src/jenkins_hash.c new file mode 100644 index 0000000..1f0cd8b --- /dev/null +++ b/src/jenkins_hash.c @@ -0,0 +1,55 @@ +/* This file is part of Pazpar2. + Copyright (C) 2006-2009 Index Data + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +/** \file + \brief Jenkins hash function +*/ + +#if HAVE_CONFIG_H +#include +#endif + +#include "jenkins_hash.h" + +// Jenkins one-at-a-time hash (from wikipedia) +unsigned int jenkins_hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/jenkins_hash.h b/src/jenkins_hash.h new file mode 100644 index 0000000..f47e559 --- /dev/null +++ b/src/jenkins_hash.h @@ -0,0 +1,35 @@ +/* This file is part of Pazpar2. + Copyright (C) 2006-2009 Index Data + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef JENKINS_HASH_H +#define JENKINS_HASH_H + +unsigned int jenkins_hash(const unsigned char *key); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/marchash.c b/src/marchash.c index 6de22de..9d00f89 100644 --- a/src/marchash.c +++ b/src/marchash.c @@ -30,25 +30,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include "jenkins_hash.h" #include -// Jenkins one-at-a-time hash (from pp2 reclists.c, wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - inline void strtrimcat(char *dest, const char *src) { const char *in; @@ -144,7 +128,7 @@ struct marcfield *marchash_add_field(struct marchash *marchash, struct marcfield *new; struct marcfield *last; - slot = hash((const unsigned char *) key) & MARCHASH_MASK; + slot = jenkins_hash((const unsigned char *) key) & MARCHASH_MASK; new = marchash->table[slot]; last = NULL; @@ -211,7 +195,7 @@ struct marcfield *marchash_get_field (struct marchash *marchash, if (last) cur = last->next; else - cur = marchash->table[hash((const unsigned char *)key) & MARCHASH_MASK]; + cur = marchash->table[jenkins_hash((const unsigned char *)key) & MARCHASH_MASK]; while (cur) { if (!strcmp(cur->key, key)) diff --git a/src/reclists.c b/src/reclists.c index 48ce8e7..3e3100c 100644 --- a/src/reclists.c +++ b/src/reclists.c @@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "pazpar2.h" #include "reclists.h" +#include "jenkins_hash.h" static struct reclist_sortparms *qsort_sortparms = 0; /* thread pr */ @@ -231,23 +232,6 @@ void reclist_rewind(struct reclist *l) l->pointer = 0; } -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - struct reclist *reclist_create(NMEM nmem, int numrecs) { int hashsize = 1; @@ -287,7 +271,7 @@ struct record_cluster *reclist_insert( struct reclist *l, assert(merge_key); assert(total); - bucket = hash((unsigned char*) merge_key) & l->hashmask; + bucket = jenkins_hash((unsigned char*) merge_key) & l->hashmask; for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) { diff --git a/src/termlists.c b/src/termlists.c index 619fe3c..892e6d9 100644 --- a/src/termlists.c +++ b/src/termlists.c @@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "termlists.h" +#include "jenkins_hash.h" // Discussion: // As terms are found in incoming records, they are added to (or updated in) a @@ -52,24 +53,6 @@ struct termlist NMEM nmem; }; - -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) { int hashsize = 1; @@ -148,7 +131,7 @@ void termlist_insert(struct termlist *tl, const char *term) for (cp = buf + strlen(buf); cp != buf && strchr(",. -", cp[-1]); cp--) cp[-1] = '\0'; - bucket = hash((unsigned char *)buf) & tl->hashmask; + bucket = jenkins_hash((unsigned char *)buf) & tl->hashmask; for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) { if (!strcmp(buf, (*p)->term.term)) -- 1.7.10.4