-/*
- * $Id: termlists.c,v 1.2 2007-01-08 12:43:41 adam Exp $
+/* $Id: termlists.c,v 1.8 2007-05-10 09:24:32 adam Exp $
+ Copyright (c) 2006-2007, Index Data.
+
+This file is part of Pazpar2.
+
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Pazpar2; see the file LICENSE. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
*/
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#include <yaz/yaz-util.h>
#if HAVE_CONFIG_H
hashsize <<= 1;
res = nmem_malloc(nmem, sizeof(struct termlist));
res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*));
- bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*));
+ memset(res->hashtable, 0, hashsize * sizeof(struct termlist_bucket*));
res->hashtable_size = hashsize;
res->nmem = nmem;
res->hashmask = hashsize - 1; // Creates a bitmask
int smallest;
int me = -1;
- if (t->frequency < tl->highscore_min)
+ if (tl->highscore_num > tl->highscore_size && t->frequency < tl->highscore_min)
return;
smallest = 0;
}
if (tl->highscore_num)
tl->highscore_min = tl->highscore[smallest]->frequency;
+ if (t->frequency < tl->highscore_min)
+ tl->highscore_min = t->frequency;
if (me >= 0)
return;
if (tl->highscore_num < tl->highscore_size)
{
unsigned int bucket;
struct termlist_bucket **p;
+ char buf[256], *cp;
- bucket = hash((unsigned char *)term) & tl->hashmask;
+ if (strlen(term) > 255)
+ return;
+ strcpy(buf, term);
+ /* chop right */
+ for (cp = buf + strlen(buf); cp != buf && strchr(",. -", cp[-1]); cp--)
+ cp[-1] = '\0';
+
+ bucket = hash((unsigned char *)buf) & tl->hashmask;
for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next)
{
- if (!strcmp(term, (*p)->term.term))
+ if (!strcmp(buf, (*p)->term.term))
{
(*p)->term.frequency++;
update_highscore(tl, &((*p)->term));
{
struct termlist_bucket *new = nmem_malloc(tl->nmem,
sizeof(struct termlist_bucket));
- new->term.term = nmem_strdup(tl->nmem, term);
+ new->term.term = nmem_strdup(tl->nmem, buf);
new->term.frequency = 1;
new->next = 0;
*p = new;