# Copyright (C) 1995, Index Data I/S
# All rights reserved.
# Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.5 1995-09-04 12:33:40 adam Exp $
+# $Id: Makefile,v 1.6 1995-09-05 15:28:39 adam Exp $
SHELL=/bin/sh
RANLIB=ranlib
DEFS=$(INCLUDE)
O1 = main.o dir.o trav.o extract.o kinput.o kcompare.o ksort.o
O2 = kdump.o
-O3 = zserver.o kcompare.o zrpn.o
+O3 = zserver.o kcompare.o zrpn.o zsets.o
CPP=cc -E
all: $(TPROG1) $(TPROG2) $(TPROG3)
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: extract.c,v $
- * Revision 1.3 1995-09-04 12:33:41 adam
+ * Revision 1.4 1995-09-05 15:28:39 adam
+ * More work on search engine.
+ *
+ * Revision 1.3 1995/09/04 12:33:41 adam
* Various cleanup. YAZ util used instead.
*
* Revision 1.2 1995/09/04 09:10:34 adam
static Dict file_idx;
static SYSNO sysno_next;
static int key_fd = -1;
+static int sys_idx_fd = -1;
static char *key_buf;
static int key_offset;
return;
if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1)
{
- logf (LOG_FATAL|LOG_ERRNO, "Creat %s", fname);
+ logf (LOG_FATAL|LOG_ERRNO, "open %s", fname);
exit (1);
}
logf (LOG_DEBUG, "key_open of %s", fname);
exit (1);
}
key_offset = 0;
- if (!(file_idx = dict_open ("fileidx", 10, 1)))
+ if (!(file_idx = dict_open (FNAME_FILE_DICT, 10, 1)))
{
logf (LOG_FATAL, "dict_open fail of %s", "fileidx");
exit (1);
memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next));
else
sysno_next = 1;
+ if ((sys_idx_fd = open (FNAME_SYS_IDX, O_RDWR|O_CREAT, 0666)) == -1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "open %s", FNAME_SYS_IDX);
+ exit (1);
+ }
}
int key_close (void)
return 0;
}
close (key_fd);
+ close (sys_idx_fd);
dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next);
dict_close (file_idx);
key_fd = -1;
{
sysno = sysno_next++;
dict_insert (file_idx, kname, sizeof(sysno), &sysno);
+ lseek (sys_idx_fd, sysno * SYS_IDX_ENTRY_LEN, SEEK_SET);
+ write (sys_idx_fd, kname, strlen(kname)+1);
}
else
memcpy (&sysno, (char*) file_info+1, sizeof(sysno));
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: index.h,v $
- * Revision 1.5 1995-09-04 12:33:42 adam
+ * Revision 1.6 1995-09-05 15:28:39 adam
+ * More work on search engine.
+ *
+ * Revision 1.5 1995/09/04 12:33:42 adam
* Various cleanup. YAZ util used instead.
*
* Revision 1.4 1995/09/04 09:10:35 adam
void key_input (const char *dict_fname, const char *isam_fname,
const char *key_fname, int cache);
int key_sort (const char *key_fname, size_t mem);
+
+#define FNAME_WORD_DICT "worddict"
+#define FNAME_WORD_ISAM "wordisam"
+#define FNAME_FILE_DICT "filedict"
+#define FNAME_SYS_IDX "sysidx"
+#define SYS_IDX_ENTRY_LEN 120
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: main.c,v $
- * Revision 1.6 1995-09-04 12:33:43 adam
+ * Revision 1.7 1995-09-05 15:28:39 adam
+ * More work on search engine.
+ *
+ * Revision 1.6 1995/09/04 12:33:43 adam
* Various cleanup. YAZ util used instead.
*
* Revision 1.5 1995/09/04 09:10:39 adam
if (!key_sort ("keys.tmp", 1000000))
exit (0);
logf (LOG_DEBUG, "Input");
- key_input ("dictinv", "isaminv", "keys.tmp", 50);
+ key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, "keys.tmp", 50);
exit (0);
}
+
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zrpn.c,v $
- * Revision 1.3 1995-09-04 15:20:22 adam
+ * Revision 1.4 1995-09-05 15:28:40 adam
+ * More work on search engine.
+ *
+ * Revision 1.3 1995/09/04 15:20:22 adam
* Minor changes.
*
* Revision 1.2 1995/09/04 12:33:43 adam
#include <assert.h>
#include <unistd.h>
-#include <alexutil.h>
-#include <dict.h>
-#include <isam.h>
+#include "zserver.h"
+
#include <rsisam.h>
#include <rstemp.h>
-#include <proto.h>
-
-#include "index.h"
-
-struct index_info {
- Dict dict;
- ISAM isam;
-};
-
-static RSET rpn_search_APT (struct index_info *ii, Z_AttributesPlusTerm *zapt)
+static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt)
{
struct rset_isam_parms parms;
const char *info;
if (term->which != Z_Term_general)
return NULL;
- if (!(info = dict_lookup (ii->dict, term->u.general->buf)))
- return NULL;
+ logf (LOG_DEBUG, "dict_lookup: %s", term->u.general->buf);
+ if (!(info = dict_lookup (zi->wordDict, term->u.general->buf)))
+ {
+ rset_temp_parms parms;
+
+ parms.key_size = sizeof(struct it_key);
+ return rset_create (rset_kind_temp, &parms);
+ }
assert (*info == sizeof(parms.pos));
memcpy (&parms.pos, info+1, sizeof(parms.pos));
- parms.is = ii->isam;
+ parms.is = zi->wordIsam;
+ logf (LOG_DEBUG, "rset_create isam");
return rset_create (rset_kind_isam, &parms);
}
-static RSET rpn_search_and (struct index_info *ii, RSET r_l, RSET r_r)
+static RSET rpn_search_and (ZServerInfo *zi, RSET r_l, RSET r_r)
{
struct it_key k1, k2;
RSET r_dst;
return r_dst;
}
-static RSET rpn_search_or (struct index_info *ii, RSET r_l, RSET r_r)
+static RSET rpn_search_or (ZServerInfo *zi, RSET r_l, RSET r_r)
{
return r_l;
}
-static RSET rpn_search_not (struct index_info *ii, RSET r_l, RSET r_r)
+static RSET rpn_search_not (ZServerInfo *zi, RSET r_l, RSET r_r)
{
return r_l;
}
-static RSET rpn_search_ref (struct index_info *ii, Z_ResultSetId *resultSetId)
+static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId)
{
return NULL;
}
-static RSET rpn_search_structure (struct index_info *ii, Z_RPNStructure *zs)
+static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs)
{
RSET r;
if (zs->which == Z_RPNStructure_complex)
{
RSET r_l, r_r;
- r_l = rpn_search_structure (ii, zs->u.complex->s1);
- r_r = rpn_search_structure (ii, zs->u.complex->s2);
+ r_l = rpn_search_structure (zi, zs->u.complex->s1);
+ r_r = rpn_search_structure (zi, zs->u.complex->s2);
switch (zs->u.complex->operator->which)
{
case Z_Operator_and:
- r = rpn_search_and (ii, r_l, r_r);
+ rset_delete (r_r);
break;
case Z_Operator_or:
- r = rpn_search_or (ii, r_l, r_r);
+ rset_delete (r_r);
break;
case Z_Operator_and_not:
- r = rpn_search_not (ii, r_l, r_r);
+ rset_delete (r_r);
break;
default:
assert (0);
}
- rset_delete (r_l);
- rset_delete (r_r);
+ r = r_l;
}
else if (zs->which == Z_RPNStructure_simple)
{
if (zs->u.simple->which == Z_Operand_APT)
- r = rpn_search_APT (ii, zs->u.simple->u.attributesPlusTerm);
+ {
+ logf (LOG_DEBUG, "rpn_search_APT");
+ r = rpn_search_APT (zi, zs->u.simple->u.attributesPlusTerm);
+ }
else if (zs->u.simple->which == Z_Operand_resultSetId)
- r = rpn_search_ref (ii, zs->u.simple->u.resultSetId);
+ {
+ logf (LOG_DEBUG, "rpn_search_ref");
+ r = rpn_search_ref (zi, zs->u.simple->u.resultSetId);
+ }
else
{
assert (0);
}
return r;
}
+
+static RSET rpn_save_set (RSET r, int *count)
+{
+#if 0
+ RSET d;
+ rset_temp_parms parms;
+#endif
+ int psysno = 0;
+ struct it_key key;
+
+ logf (LOG_DEBUG, "rpn_save_set");
+ *count = 0;
+#if 0
+ parms.key_size = sizeof(struct it_key);
+ d = rset_create (rset_kind_temp, &parms);
+ rset_open (d, 1);
+#endif
+
+ rset_open (r, 0);
+ while (rset_read (r, &key))
+ {
+ if (key.sysno != psysno)
+ {
+ psysno = key.sysno;
+ (*count)++;
+ }
+ logf (LOG_DEBUG, "lllllllllllllllll");
+#if 0
+ rset_write (d, &key);
+#endif
+ }
+ rset_close (r);
+#if 0
+ rset_close (d);
+#endif
+ logf (LOG_DEBUG, "%d distinct sysnos", *count);
+#if 0
+ return d;
+#endif
+}
+
+int rpn_search (ZServerInfo *zi,
+ Z_RPNQuery *rpn, int num_bases, char **basenames,
+ const char *setname, int *hits)
+{
+ RSET rset, result_rset;
+
+ rset = rpn_search_structure (zi, rpn->RPNStructure);
+ if (!rset)
+ return 0;
+ result_rset = rpn_save_set (rset, hits);
+#if 0
+ rset_delete (result_rset);
+#endif
+
+ resultSetAdd (zi, setname, 1, rset);
+ return 0;
+}
+
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zserver.c,v $
- * Revision 1.2 1995-09-04 12:33:43 adam
+ * Revision 1.3 1995-09-05 15:28:40 adam
+ * More work on search engine.
+ *
+ * Revision 1.2 1995/09/04 12:33:43 adam
* Various cleanup. YAZ util used instead.
*
* Revision 1.1 1995/09/04 09:10:41 adam
#include <stdio.h>
#include <assert.h>
#include <unistd.h>
+#include <fcntl.h>
-#include <alexutil.h>
-#include "index.h"
+#include "zserver.h"
-char *prog;
+#include <backend.h>
+#include <dmalloc.h>
-int main (int argc, char **argv)
+ZServerInfo server_info;
+
+bend_initresult *bend_init (bend_initrequest *q)
+{
+ static bend_initresult r;
+ static char *name = "zserver";
+
+ r.errcode = 0;
+ r.errstring = 0;
+ r.handle = name;
+
+ server_info.sets = NULL;
+ if (!(server_info.sys_idx_fd = open (FNAME_SYS_IDX, O_RDONLY)))
+ {
+ r.errcode = 1;
+ r.errstring = "dict_open fail: filedict";
+ return &r;
+ }
+ if (!(server_info.fileDict = dict_open (FNAME_FILE_DICT, 5, 0)))
+ {
+ r.errcode = 1;
+ r.errstring = "dict_open fail: filedict";
+ return &r;
+ }
+ if (!(server_info.wordDict = dict_open (FNAME_WORD_DICT, 20, 0)))
+ {
+ dict_close (server_info.fileDict);
+ r.errcode = 1;
+ r.errstring = "dict_open fail: worddict";
+ return &r;
+ }
+ if (!(server_info.wordIsam = is_open (FNAME_WORD_ISAM, key_compare, 0)))
+ {
+ dict_close (server_info.wordDict);
+ dict_close (server_info.fileDict);
+ r.errcode = 1;
+ r.errstring = "is_open fail: wordisam";
+ return &r;
+ }
+ return &r;
+}
+
+bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd)
{
- int ret;
- char *arg;
- char *base_name = NULL;
+ static bend_searchresult r;
+
+ r.errcode = 0;
+ r.errstring = 0;
+ r.hits = 0;
- prog = *argv;
- while ((ret = options ("v:", argv, argc, &arg)) != -2)
+ switch (q->query->which)
{
- if (ret == 0)
- {
- if (!base_name)
- {
- base_name = arg;
-
- common_resource = res_open (base_name);
- if (!common_resource)
- {
- logf (LOG_FATAL, "Cannot open resource `%s'", base_name);
- exit (1);
- }
- }
- }
- else if (ret == 'v')
- {
- log_init (log_mask_str(arg), prog, NULL);
- }
- else
- {
- logf (LOG_FATAL, "Unknown option '-%s'", arg);
- exit (1);
- }
+ case Z_Query_type_1:
+ r.errcode = rpn_search (&server_info, q->query->u.type_1,
+ q->num_bases, q->basenames, q->setname,
+ &r.hits);
+ break;
+ default:
+ r.errcode = 107;
}
- if (!base_name)
+ return &r;
+}
+
+bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num)
+{
+ static bend_fetchresult r;
+ int positions[2];
+ ZServerRecord *records;
+
+ r.errstring = 0;
+ r.last_in_set = 0;
+ r.basename = "base";
+
+ positions[0] = q->number;
+ records = resultSetRecordGet (&server_info, q->setname, 1, positions);
+ if (!records)
+ {
+ logf (LOG_DEBUG, "resultSetRecordGet, error");
+ r.errcode = 13;
+ return &r;
+ }
+ r.len = records[0].size;
+ r.record = malloc (r.len+1);
+ strcpy (r.record, records[0].buf);
+ resultSetRecordDel (&server_info, records, 1);
+ r.format = VAL_SUTRS;
+ r.errcode = 0;
+ return &r;
+}
+
+bend_deleteresult *bend_delete (void *handle, bend_deleterequest *q, int *num)
+{
+ return 0;
+}
+
+bend_scanresult *bend_scan (void *handle, bend_scanrequest *q, int *num)
+{
+ static struct scan_entry list[200];
+ static char buf[200][200];
+ static bend_scanresult r;
+ int i;
+
+ r.term_position = q->term_position;
+ r.num_entries = q->num_entries;
+ r.entries = list;
+ for (i = 0; i < r.num_entries; i++)
+ {
+ list[i].term = buf[i];
+ sprintf(list[i].term, "term-%d", i+1);
+ list[i].occurrences = rand() % 100000;
+ }
+ r.errcode = 0;
+ r.errstring = 0;
+ return &r;
+}
+
+void bend_close (void *handle)
+{
+ dict_close (server_info.fileDict);
+ dict_close (server_info.wordDict);
+ is_close (server_info.wordIsam);
+ close (server_info.sys_idx_fd);
+ return;
+}
+
+int main (int argc, char **argv)
+{
+ char *base_name = "base";
+
+ if (!(common_resource = res_open (base_name)))
{
- fprintf (stderr, "zserver [-v log] base ...\n");
+ logf (LOG_FATAL, "Cannot open resource `%s'", base_name);
exit (1);
}
- exit (0);
+ return statserv_main (argc, argv);
}
--- /dev/null
+/*
+ * Copyright (C) 1995, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: zserver.h,v $
+ * Revision 1.1 1995-09-05 15:28:40 adam
+ * More work on search engine.
+ *
+ */
+
+#include "index.h"
+#include <proto.h>
+#include <rset.h>
+
+typedef struct {
+ size_t size;
+ char *buf;
+} ZServerRecord;
+
+typedef struct ZServerSet_ {
+ char *name;
+ RSET rset;
+ int size;
+ struct ZServerSet_ *next;
+} ZServerSet;
+
+typedef struct {
+ ZServerSet *sets;
+ Dict wordDict;
+ ISAM wordIsam;
+ Dict fileDict;
+ int sys_idx_fd;
+} ZServerInfo;
+
+int rpn_search (ZServerInfo *zi,
+ Z_RPNQuery *rpn, int num_bases, char **basenames,
+ const char *setname, int *hits);
+
+ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name,
+ int ov, RSET rset);
+ZServerSet *resultSetGet (ZServerInfo *zi, const char *name);
+ZServerRecord *resultSetRecordGet (ZServerInfo *zi, const char *name,
+ int num, int *positions);
+void resultSetRecordDel (ZServerInfo *zi, ZServerRecord *records, int num);
--- /dev/null
+/*
+ * Copyright (C) 1995, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: zsets.c,v $
+ * Revision 1.1 1995-09-05 15:28:40 adam
+ * More work on search engine.
+ *
+ */
+#include <stdio.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include "zserver.h"
+#include <rstemp.h>
+
+ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset)
+{
+ ZServerSet *s;
+
+ for (s = zi->sets; s; s = s->next)
+ if (!strcmp (s->name, name))
+ {
+ if (!ov)
+ return NULL;
+ rset_delete (s->rset);
+ s->rset = rset;
+ return s;
+ }
+ s = xmalloc (sizeof(*s));
+ s->next = zi->sets;
+ zi->sets = s;
+ s->name = xmalloc (strlen(name)+1);
+ strcpy (s->name, name);
+ s->rset = rset;
+ return s;
+}
+
+ZServerSet *resultSetGet (ZServerInfo *zi, const char *name)
+{
+ ZServerSet *s;
+
+ for (s = zi->sets; s; s = s->next)
+ if (!strcmp (s->name, name))
+ return s;
+ return NULL;
+}
+
+ZServerRecord *resultSetRecordGet (ZServerInfo *zi, const char *name,
+ int num, int *positions)
+{
+ ZServerSet *sset;
+ ZServerRecord *sr;
+ RSET rset;
+ int num_i = 0;
+ int position = 0;
+ int psysno = 0;
+ struct it_key key;
+
+ if (!(sset = resultSetGet (zi, name)))
+ return NULL;
+ if (!(rset = sset->rset))
+ return NULL;
+ logf (LOG_DEBUG, "resultSetRecordGet");
+ sr = xmalloc (sizeof(*sr) * num);
+ rset_open (rset, 0);
+ while (rset_read (rset, &key))
+ {
+ logf (LOG_DEBUG, "resultSetRecordGet: %d", key.sysno);
+ if (key.sysno != psysno)
+ {
+ psysno = key.sysno;
+ position++;
+ if (position == positions[num_i])
+ {
+ FILE *inf;
+ char fname[SYS_IDX_ENTRY_LEN];
+
+ logf (LOG_DEBUG, "get sysno=%d", psysno);
+ sr[num_i].buf = NULL;
+ if (lseek (zi->sys_idx_fd, psysno * SYS_IDX_ENTRY_LEN,
+ SEEK_SET) == -1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "lseek of sys_idx");
+ exit (1);
+ }
+ if (read (zi->sys_idx_fd, fname, SYS_IDX_ENTRY_LEN) == -1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "read of sys_idx");
+ exit (1);
+ }
+ if (!(inf = fopen (fname, "r")))
+ logf (LOG_WARN, "fopen: %s", fname);
+ else
+ {
+ long size;
+
+ fseek (inf, 0L, SEEK_END);
+ size = ftell (inf);
+ fseek (inf, 0L, SEEK_SET);
+ logf (LOG_DEBUG, "get sysno=%d, fname=%s, size=%ld",
+ psysno, fname, (long) size);
+ sr[num_i].buf = xmalloc (size+1);
+ sr[num_i].size = size;
+ sr[num_i].buf[size] = '\0';
+ if (fread (sr[num_i].buf, size, 1, inf) != 1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "fread %s", fname);
+ exit (1);
+ }
+ fclose (inf);
+ }
+ num_i++;
+ if (num_i == num)
+ break;
+ }
+ }
+ }
+ rset_close (rset);
+ return sr;
+}
+
+void resultSetRecordDel (ZServerInfo *zi, ZServerRecord *records, int num)
+{
+ int i;
+
+ for (i = 0; i<num; i++)
+ free (records[i].buf);
+ free (records);
+}