From fe3383c0559a453df1e5076fc6faac6d1a11685c Mon Sep 17 00:00:00 2001 From: Sebastian Hammer Date: Wed, 20 Dec 2006 20:47:16 +0000 Subject: [PATCH] Reorganized source tree --- Makefile | 45 -- bibs.pz | 12 - command.c | 392 --------------- command.h | 6 - danish.pz | 21 - defport.pz | 5 - etc/bibs.pz | 12 + etc/danish.pz | 21 + etc/defport.pz | 5 + etc/many.pz | 299 +++++++++++ etc/slow.pz | 1 + etc/test.pz | 1 + eventl.c | 154 ------ eventl.h | 104 ---- http.c | 854 -------------------------------- http.h | 88 ---- http_command.c | 403 --------------- http_command.h | 8 - many.pz | 299 ----------- pazpar2.c | 1395 ---------------------------------------------------- pazpar2.h | 172 ------- reclists.c | 110 ----- reclists.h | 23 - relevance.c | 247 ---------- relevance.h | 27 - src/Makefile | 45 ++ src/command.c | 392 +++++++++++++++ src/command.h | 6 + src/eventl.c | 154 ++++++ src/eventl.h | 107 ++++ src/http.c | 854 ++++++++++++++++++++++++++++++++ src/http.h | 88 ++++ src/http_command.c | 403 +++++++++++++++ src/http_command.h | 8 + src/pazpar2.c | 1395 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pazpar2.h | 172 +++++++ src/reclists.c | 110 +++++ src/reclists.h | 23 + src/relevance.c | 247 ++++++++++ src/relevance.h | 27 + src/termlists.c | 163 ++++++ src/termlists.h | 26 + src/util.c | 11 + src/util.h | 8 + termlists.c | 163 ------ termlists.h | 26 - test.pz | 1 - util.c | 11 - util.h | 8 - 49 files changed, 4578 insertions(+), 4574 deletions(-) delete mode 100644 Makefile delete mode 100644 bibs.pz delete mode 100644 command.c delete mode 100644 command.h delete mode 100644 danish.pz delete mode 100644 defport.pz create mode 100644 etc/bibs.pz create mode 100644 etc/danish.pz create mode 100644 etc/defport.pz create mode 100644 etc/many.pz create mode 100644 etc/slow.pz create mode 100644 etc/test.pz delete mode 100644 eventl.c delete mode 100644 eventl.h delete mode 100644 http.c delete mode 100644 http.h delete mode 100644 http_command.c delete mode 100644 http_command.h delete mode 100644 many.pz delete mode 100644 pazpar2.c delete mode 100644 pazpar2.h delete mode 100644 reclists.c delete mode 100644 reclists.h delete mode 100644 relevance.c delete mode 100644 relevance.h create mode 100644 src/Makefile create mode 100644 src/command.c create mode 100644 src/command.h create mode 100644 src/eventl.c create mode 100644 src/eventl.h create mode 100644 src/http.c create mode 100644 src/http.h create mode 100644 src/http_command.c create mode 100644 src/http_command.h create mode 100644 src/pazpar2.c create mode 100644 src/pazpar2.h create mode 100644 src/reclists.c create mode 100644 src/reclists.h create mode 100644 src/relevance.c create mode 100644 src/relevance.h create mode 100644 src/termlists.c create mode 100644 src/termlists.h create mode 100644 src/util.c create mode 100644 src/util.h delete mode 100644 termlists.c delete mode 100644 termlists.h delete mode 100644 test.pz delete mode 100644 util.c delete mode 100644 util.h diff --git a/Makefile b/Makefile deleted file mode 100644 index b5eefb2..0000000 --- a/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -# ParaZ. Copyright (C) 2000-2004, Index Data ApS -# All rights reserved. -# $Id: Makefile,v 1.6 2006-12-19 04:49:34 quinn Exp $ - -SHELL=/bin/sh - -CC=gcc - -YAZCONF=yaz-config -YAZLIBS=`$(YAZCONF) --libs` -YAZCFLAGS=`$(YAZCONF) --cflags` - -PROG=pazpar2 -PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \ - reclists.o relevance.o - -all: $(PROG) - -$(PROG): $(PROGO) - $(CC) $(CFLAGS) $(YAZCFLAGS) -o $(PROG) $(PROGO) $(YAZLIBS) - -.c.o: - $(CC) -c $(CFLAGS) -I. $(YAZCFLAGS) $< - -clean: - rm -f *.[oa] test core mon.out gmon.out errlist $(PROG) - - -## Dependencies go below - -command.o: command.c command.h util.h eventl.h pazpar2.h termlists.h \ - relevance.h reclists.h -eventl.o: eventl.c eventl.h -http.o: http.c command.h util.h eventl.h pazpar2.h termlists.h \ - relevance.h reclists.h http.h http_command.h -http_command.o: http_command.c command.h util.h eventl.h pazpar2.h \ - termlists.h relevance.h reclists.h http.h http_command.h -pazpar2.o: pazpar2.c pazpar2.h termlists.h relevance.h reclists.h \ - eventl.h command.h http.h -reclists.o: reclists.c pazpar2.h termlists.h relevance.h reclists.h \ - eventl.h -relevance.o: relevance.c relevance.h pazpar2.h termlists.h eventl.h \ - reclists.h -termlists.o: termlists.c termlists.h -util.o: util.c diff --git a/bibs.pz b/bibs.pz deleted file mode 100644 index bafdf1c..0000000 --- a/bibs.pz +++ /dev/null @@ -1,12 +0,0 @@ -target amicus.nlc-bnc.ca/NL -target melvyl.cdlib.org/cdl90 -target z3950.loc.gov:7090/voyager -target library2.mcmaster.ca/MORRIS -target melvyl.cdlib.org/CDL90 -target info.library.mun.ca:2200/innopac -target 137.122.27.60:210/innopac -target z3950.rlg.org/BIB -target 141.117.13.11:210/innopac -target siris-libraries.si.edu -target tpldynix.torontopubliclibrary.ca:210/server -target sirsi.library.utoronto.ca:2200/UNICORN diff --git a/command.c b/command.c deleted file mode 100644 index 1b9fd6d..0000000 --- a/command.c +++ /dev/null @@ -1,392 +0,0 @@ -/* $Id: command.c,v 1.5 2006-12-03 06:43:24 quinn Exp $ */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" - -extern IOCHAN channel_list; - -struct command_session { - IOCHAN channel; - char *outbuf; - - int outbuflen; - int outbufwrit; - - struct session *psession; -}; - -void command_destroy(struct command_session *s); -void command_prompt(struct command_session *s); -void command_puts(struct command_session *s, const char *buf); - -static int cmd_quit(struct command_session *s, char **argv, int argc) -{ - IOCHAN i = s->channel; - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return 0; -} - -#ifdef GAGA -static int cmd_load(struct command_session *s, char **argv, int argc) -{ - if (argc != 2) { - command_puts(s, "Usage: load filename\n"); - } - if (load_targets(s->psession, argv[1]) < 0) - command_puts(s, "Failed to open file\n"); - return 1; -} -#endif - -static int cmd_search(struct command_session *s, char **argv, int argc) -{ - if (argc != 2) - { - command_puts(s, "Usage: search word\n"); - return 1; - } - search(s->psession, argv[1]); - return 1; -} - -static int cmd_hitsbytarget(struct command_session *s, char **argv, int argc) -{ - int count; - int i; - - struct hitsbytarget *ht = hitsbytarget(s->psession, &count); - for (i = 0; i < count; i++) - { - char buf[1024]; - - sprintf(buf, "%s: %d (%d records, diag=%d, state=%s conn=%d)\n", ht[i].id, ht[i].hits, - ht[i].records, ht[i].diagnostic, ht[i].state, ht[i].connected); - command_puts(s, buf); - } - return 1; -} - -static int cmd_show(struct command_session *s, char **argv, int argc) -{ - struct record **recs; - int num = 10; - int merged, total; - int i; - - if (argc == 2) - num = atoi(argv[1]); - - recs = show(s->psession, 0, &num, &merged, &total); - - for (i = 0; i < num; i++) - { - int rc; - struct record *cnode; - struct record *r = recs[i]; - - command_puts(s, r->merge_key); - for (rc = 1, cnode = r->next_cluster; cnode; cnode = cnode->next_cluster, rc++) - ; - if (rc > 1) - { - char buf[256]; - sprintf(buf, " (%d records)", rc); - command_puts(s, buf); - } - command_puts(s, "\n"); - } - return 1; -} - -static int cmd_stat(struct command_session *s, char **argv, int argc) -{ - char buf[1024]; - struct statistics stat; - - statistics(s->psession, &stat); - sprintf(buf, "Number of connections: %d\n", stat.num_clients); - command_puts(s, buf); - if (stat.num_no_connection) - { - sprintf(buf, "#No_connection: %d\n", stat.num_no_connection); - command_puts(s, buf); - } - if (stat.num_connecting) - { - sprintf(buf, "#Connecting: %d\n", stat.num_connecting); - command_puts(s, buf); - } - if (stat.num_initializing) - { - sprintf(buf, "#Initializing: %d\n", stat.num_initializing); - command_puts(s, buf); - } - if (stat.num_searching) - { - sprintf(buf, "#Searching: %d\n", stat.num_searching); - command_puts(s, buf); - } - if (stat.num_presenting) - { - sprintf(buf, "#Ppresenting: %d\n", stat.num_presenting); - command_puts(s, buf); - } - if (stat.num_idle) - { - sprintf(buf, "#Idle: %d\n", stat.num_idle); - command_puts(s, buf); - } - if (stat.num_failed) - { - sprintf(buf, "#Failed: %d\n", stat.num_failed); - command_puts(s, buf); - } - if (stat.num_error) - { - sprintf(buf, "#Error: %d\n", stat.num_error); - command_puts(s, buf); - } - return 1; -} - -static struct { - char *cmd; - int (*fun)(struct command_session *s, char *argv[], int argc); -} cmd_array[] = { - {"quit", cmd_quit}, -#ifdef GAGA - {"load", cmd_load}, -#endif - {"find", cmd_search}, - {"ht", cmd_hitsbytarget}, - {"stat", cmd_stat}, - {"show", cmd_show}, - {0,0} -}; - -void command_command(struct command_session *s, char *command) -{ - char *p; - char *argv[20]; - int argc = 0; - int i; - int res = -1; - - p = command; - while (*p) - { - while (isspace(*p)) - p++; - if (!*p) - break; - argv[argc++] = p; - while (*p && !isspace(*p)) - p++; - if (!*p) - break; - *(p++) = '\0'; - } - if (argc) { - for (i = 0; cmd_array[i].cmd; i++) - { - if (!strcmp(cmd_array[i].cmd, argv[0])) { - res = (cmd_array[i].fun)(s, argv, argc); - - break; - } - } - if (res < 0) { - command_puts(s, "Unknown command.\n"); - command_prompt(s); - } - else if (res == 1) { - command_prompt(s); - } - } - else - command_prompt(s); - -} - - -static void command_io(IOCHAN i, int event) -{ - int res; - char buf[1024]; - struct command_session *s; - - s = iochan_getdata(i); - - - switch (event) - { - case EVENT_INPUT: - res = read(iochan_getfd(i), buf, 1024); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "read command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return; - } - if (!index(buf, '\n')) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Did not receive complete command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - return; - } - buf[res] = '\0'; - command_command(s, buf); - break; - case EVENT_OUTPUT: - if (!s->outbuflen || s->outbufwrit < 0) - { - yaz_log(YLOG_WARN, "Called with outevent but no data"); - iochan_clearflag(i, EVENT_OUTPUT); - } - else - { - res = write(iochan_getfd(i), s->outbuf + s->outbufwrit, s->outbuflen - - s->outbufwrit); - if (res < 0) { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write command"); - close(iochan_getfd(i)); - iochan_destroy(i); - command_destroy(s); - } - else - { - s->outbufwrit += res; - if (s->outbufwrit >= s->outbuflen) - { - s->outbuflen = s->outbufwrit = 0; - iochan_clearflag(i, EVENT_OUTPUT); - } - } - } - break; - default: - yaz_log(YLOG_WARN, "Bad voodoo on socket"); - } -} - -void command_puts(struct command_session *s, const char *buf) -{ - int len = strlen(buf); - memcpy(s->outbuf + s->outbuflen, buf, len); - s->outbuflen += len; - iochan_setflag(s->channel, EVENT_OUTPUT); -} - -void command_prompt(struct command_session *s) -{ - command_puts(s, "Pazpar2> "); -} - - -/* Accept a new command connection */ -static void command_accept(IOCHAN i, int event) -{ - struct sockaddr_in addr; - int fd = iochan_getfd(i); - socklen_t len; - int s; - IOCHAN c; - struct command_session *ses; - int flags; - - len = sizeof addr; - if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); - return; - } - if ((flags = fcntl(s, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - - yaz_log(YLOG_LOG, "New command connection"); - c = iochan_create(s, command_io, EVENT_INPUT | EVENT_EXCEPT); - - ses = xmalloc(sizeof(*ses)); - ses->outbuf = xmalloc(50000); - ses->outbuflen = 0; - ses->outbufwrit = 0; - ses->channel = c; - ses->psession = new_session(); - iochan_setdata(c, ses); - - command_puts(ses, "Welcome to pazpar2\n\n"); - command_prompt(ses); - - c->next = channel_list; - channel_list = c; -} - -void command_destroy(struct command_session *s) { - xfree(s->outbuf); - xfree(s); -} - -/* Create a command-channel listener */ -void command_init(int port) -{ - IOCHAN c; - int l; - struct protoent *p; - struct sockaddr_in myaddr; - int one = 1; - - yaz_log(YLOG_LOG, "Command port is %d", port); - if (!(p = getprotobyname("tcp"))) { - abort(); - } - if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); - if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - - bzero(&myaddr, sizeof myaddr); - myaddr.sin_family = AF_INET; - myaddr.sin_addr.s_addr = INADDR_ANY; - myaddr.sin_port = htons(port); - if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); - if (listen(l, SOMAXCONN) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); - - c = iochan_create(l, command_accept, EVENT_INPUT | EVENT_EXCEPT); - //iochan_setdata(c, &l); - c->next = channel_list; - channel_list = c; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/command.h b/command.h deleted file mode 100644 index 25e5b99..0000000 --- a/command.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef COMMAND_H -#define COMMAND_H - -void command_init(int port); - -#endif diff --git a/danish.pz b/danish.pz deleted file mode 100644 index ba1cc52..0000000 --- a/danish.pz +++ /dev/null @@ -1,21 +0,0 @@ -target cosmos.dnlb.dk:9909/DNL01 -target odin.dtv.dk:210/DTV01 -target dvjb1.kvl.dk:9909/dvj01 -target hermes.lib.cbs.dk:2100/S -target a800.hha.dk:9909/hba01 -target z3950.kb.dk:2100/kgl01 -target hans.ruc.dk:2210/S -target a500.aub.auc.dk:9909/AUB01 -target z3950.dbc.dk:213/def1forsk -target z3950.dbc.dk:213/def1total -target z3950.dbc.dk:213/danbibv2 -target hcb.bibnet.dk:2100/S -target z3950.bibsys.no:2100/BIBSYS -target z3950.libris.kb.se:210/libr -target bagel.indexdata.dk/gils -target www.deff.dk:2100/Default -target www.deff.dk:2102/Default -connect -init -search @attr 1=4 danmarks -present diff --git a/defport.pz b/defport.pz deleted file mode 100644 index de0df23..0000000 --- a/defport.pz +++ /dev/null @@ -1,5 +0,0 @@ -target www.deff.dk:2102/Default -connect -init -search @attr 1=4 danmarks -present diff --git a/etc/bibs.pz b/etc/bibs.pz new file mode 100644 index 0000000..bafdf1c --- /dev/null +++ b/etc/bibs.pz @@ -0,0 +1,12 @@ +target amicus.nlc-bnc.ca/NL +target melvyl.cdlib.org/cdl90 +target z3950.loc.gov:7090/voyager +target library2.mcmaster.ca/MORRIS +target melvyl.cdlib.org/CDL90 +target info.library.mun.ca:2200/innopac +target 137.122.27.60:210/innopac +target z3950.rlg.org/BIB +target 141.117.13.11:210/innopac +target siris-libraries.si.edu +target tpldynix.torontopubliclibrary.ca:210/server +target sirsi.library.utoronto.ca:2200/UNICORN diff --git a/etc/danish.pz b/etc/danish.pz new file mode 100644 index 0000000..ba1cc52 --- /dev/null +++ b/etc/danish.pz @@ -0,0 +1,21 @@ +target cosmos.dnlb.dk:9909/DNL01 +target odin.dtv.dk:210/DTV01 +target dvjb1.kvl.dk:9909/dvj01 +target hermes.lib.cbs.dk:2100/S +target a800.hha.dk:9909/hba01 +target z3950.kb.dk:2100/kgl01 +target hans.ruc.dk:2210/S +target a500.aub.auc.dk:9909/AUB01 +target z3950.dbc.dk:213/def1forsk +target z3950.dbc.dk:213/def1total +target z3950.dbc.dk:213/danbibv2 +target hcb.bibnet.dk:2100/S +target z3950.bibsys.no:2100/BIBSYS +target z3950.libris.kb.se:210/libr +target bagel.indexdata.dk/gils +target www.deff.dk:2100/Default +target www.deff.dk:2102/Default +connect +init +search @attr 1=4 danmarks +present diff --git a/etc/defport.pz b/etc/defport.pz new file mode 100644 index 0000000..de0df23 --- /dev/null +++ b/etc/defport.pz @@ -0,0 +1,5 @@ +target www.deff.dk:2102/Default +connect +init +search @attr 1=4 danmarks +present diff --git a/etc/many.pz b/etc/many.pz new file mode 100644 index 0000000..db1bd9c --- /dev/null +++ b/etc/many.pz @@ -0,0 +1,299 @@ +target 12.4.242.230:210/Unicorn +target 128.119.168.1:210/INNOPAC +target 128.119.168.2:210/INNOPAC +target 128.197.130.200:210/INNOPAC +target 128.218.15.173:210/INNOPAC +target 128.83.82.249:210/INNOPAC +target 129.234.4.6:210/INNOPAC +target 129.65.20.27:210/Innopac +target 129.78.72.7:210/INNOPAC +target 130.102.42.45:210/Innopac +target 130.111.64.52:210/INNOPAC +target 130.206.134.239:210/INNOPAC +target 130.212.18.200:210/Innopac +target 130.91.144.191:210/INNOPAC +target 131.165.177.76:2100/S +target 131.165.97.2:210/S +target 131.232.13.6:210/INNOPAC +target 134.115.152.130:210/Innopac +target 134.154.30.10:210/Innopac +target 137.187.166.250:210/INNOPAC +target 137.45.212.100:210/INNOPAC +target 141.215.16.4:210/INNOPAC +target 144.37.1.4:210/Innopac +target 147.143.2.5:210/INNOPAC +target 147.175.67.227:1111/DEFAULT +target 147.175.67.227:2222/DEFAULT +target 147.29.116.113:2100/DAB01 +target 147.29.116.113:2100/KGB01 +target 147.29.116.113:2100/PVS +target 147.29.116.113:2100/vej01 +target 149.130.90.2:210/INNOPAC +target 149.175.20.20:210/INNOPAC +target 161.210.213.100:210/INNOPAC +target 169.139.225.46:210/horizon +target 192.188.131.54:2200/UNICORN +target 192.197.190.4:210/Advance +target 192.65.218.23:210/DREWDB +target 193.162.157.130:2100/S +target 193.43.102.194:21210/ADVANCE +target 193.52.199.5:21210/ADVANCE +target 193.59.172.100:210/INNOPAC +target 194.182.134.04:210/S +target 194.239.156.133:2100/S +target 195.215.203.98:2100/S +target 195.249.206.204:210/Default +target 195.249.206.204:210/default +target 195.41.8.218:2100/S +target 195.50.60.15:2100/CB +target 195.50.60.15:2101/FB +target 195.50.60.15:2102/MB +target 195.50.60.15:2103/PB +target 196.6.221.16:210/INNOPAC +target 198.62.73.72:210/INNOPAC +target 199.184.19.10:210/innopac +target 199.88.71.7:210/INNOPAC +target 202.14.152.4:210/INNOPAC +target 202.40.151.2:210/INNOPAC +target 202.40.216.17:210/INNOPAC +target 206.48.3.167:7090/Voyager +target 207.194.254.193:210/advance +target Exlibris.albertslundbib.dk:2100/S +target Exlibris.daimi.au.dk:2104/FYS01 +target Exlibris.daimi.au.dk:2104/GEO01 +target Exlibris.daimi.au.dk:2104/KEM01 +target Exlibris.daimi.au.dk:2104/MAT01 +target Exlibris.daimi.au.dk:2104/VID01 +target Z3950.maribo.integrabib.dk:210/Default +target Z3950cat.bl.uk:9909/BLAC +target acorn.library.vanderbilt.edu:2200/ACORN +target advance.biblio.polymtl.ca:210/ADVANCE +target advance.lib.rmit.edu.au:21210/Advance +target albert.rit.edu:210/INNOPAC +target aleph.mcgill.ca:210/MUSE +target aleph.unibas.ch:9909/DSV01 +target alpha.svkol.cz:9909/SVK01 +target alpha.svkol.cz:9909/SVK02 +target atrium.bib.umontreal.ca:210/ADVANCE +target bagel.indexdata.dk:210/gils +target bagel.indexdata.dk:210/marc +target bcr1.larioja.org:210/AbsysE +target bib.gladsaxe.dk:2100/S +target bib.gladsaxe.dk:2101/H +target bib.gladsaxe.dk:2101/S +target biblio.crusca.fi.it:9909/ADC01 +target biblio.hec.ca:210/hec +target biblios.dic.uchile.cl:210/BELLO +target biblioteca.uc3m.es:2200/unicorn +target bobcat.nyu.edu:210/ADVANCE +target books.luther.edu:210/innopac +target brocar.unavarra.es:9999/libros +target brocar.unavarra.es:9999/revistas +target cat.cisti.nrc.ca:210/INNOPAC +target cat.lib.grin.edu:210/innopac +target catalog.bedfordlibrary.org:210/Innopac +target catalog.crl.edu:210/INNOPAC +target catalog.lib.jhu.edu:210/horizon +target cisne.sim.ucm.es:210/INNOPAC +target clavis.ucalgary.ca:2200/UNICORN +target cornelia.whoi.edu:7090/VOYAGER +target corpsgeo1.usace.army.mil:2210/geo1 +target csulib.ctstateu.edu:210/INNOPAC +target echea.ru.ac.za:210/INNOPAC +target edcsns17.cr.usgs.gov:6675/AVHRR +target edcsns17.cr.usgs.gov:6675/DOQ +target edcsns17.cr.usgs.gov:6675/HAZARDS +target edcsns17.cr.usgs.gov:6675/LANDSAT_MSS +target edcsns17.cr.usgs.gov:6675/LANDSAT_TM +target edcsns17.cr.usgs.gov:6675/NAPP +target edcsns17.cr.usgs.gov:6675/NHAP +target eleanor.lib.gla.ac.uk:210/INNOPAC +target eusa.library.net:5666/eusa +target explore.up.ac.za:210/INNOPAC +target gaudi.ua.es:2200/unicorn +target gb.glostrupbib.dk:2100/S +target hcb.bibnet.dk:2100/S +target helios.nlib.ee:210/INNOPAC +target hermes.lib.cbs.dk:2100/S +target hkbulib.hkbu.edu.hk:210/INNOPAC +target ikast.ikast.bibnet.dk:2100/S +target indexgeo.com.au:6668/dataset +target indexgeo.net:5511/act +target info.library.mun.ca:2200/UNICORN +target innopac.lib.kth.se:210/innopac +target innopac.wits.ac.za:210/INNOPAC +target itec.mty.itesm.mx:210/innopac +target jasper.acadiau.ca:2200/UNICORN +target jenda.lib.nccu.edu.tw:210/INNOPAC +target kat.vkol.cz:9909/SVK01 +target kat.vkol.cz:9909/SVK02 +target kraka.birkerod.bibnet.dk:2100/S +target ksclib.keene.edu:210/INNOPAC +target l1.uwaterloo.ca:7090/VOYAGER +target lance.missouri.edu:210/INNOPAC +target lib.leeds.ac.uk:210/INNOPAC +target lib.soas.ac.uk:210/innopac +target libadfa.adfa.edu.au:7090/VOYAGER +target libcat.qut.edu.au:210/INNOPAC +target liber.acadlib.lv:211/codex.previous +target liber.acadlib.lv:211/liber1 +target liber.acadlib.lv:212/liber1 +target library.anu.edu.au:210/INNOPAC +target library.ballarat.edu.au:210/INNOPAC +target library.bangor.ac.uk:210/INNOPAC +target library.bilgi.edu.tr:210/INNOPAC +target library.brad.ac.uk:210/xxDefault +target library.brunel.ac.uk:2200/UNICORN +target library.daemen.edu:210/innopac +target library.deakin.edu.au:210/INNOPAC +target library.gu.edu.au:21210/ADVANCE +target library.hud.ac.uk:210/HORIZON +target library.hull.ac.uk:210/INNOPAC +target library.latrobe.edu.au:210/INNOPAC +target library.lbc.edu:7090/voyager +target library.mdx.ac.uk:210/mdx +target library.newcastle.edu.au:210/INNOPAC +target library.ox.ac.uk:210/ADVANCE +target library.tcd.ie:210/advance +target library.ucc.ie:210/INNOPAC +target library.uh.edu:210/INNOPAC +target library.vu.edu.au:210/INNOPAC +target library2.open.ac.uk:7090/voyager +target libsys.lib.hull.ac.uk:210/INNOPAC +target libuni01.ccc.govt.nz:2200/unicorn +target libuni01.ccc.govt.nz:2220/cinch +target libuni01.ccc.govt.nz:2220/papers +target libunix.ku.edu.tr:210/INNOPAC +target linc.nus.edu.sg:210/innopac +target lion.swem.wm.edu:2200/unicorn +target loke.dcbib.dk:2100/S +target lrpapp.cc.umanitoba.ca:2200/unicorn +target malad2.mala.bc.ca:2200/UNICORN +target malad2.mala.bc.ca:2200/unicorn +target marte.biblioteca.upm.es:2200/unicorn +target medupe.ub.bw:210/INNOPAC +target melvyl.cdlib.org:210/CDL90 +target mercury.concordia.ca:210/Innopac +target merihobu.utlib.ee:210/INNOPAC +target merlinweb.ville.montreal.qc.ca:2100/Z3950S +target nbinet.ncl.edu.tw:210/INNOPAC +target ncsulib.lib.ncsu.edu:210/MARION +target newlib.ci.lubbock.tx.us:2200/unicorn +target nobis.njl.dk:210/S +target nrhcat.library.nrhtx.com:210/INNOPAC +target oda.fynbib.dk:2100/S +target odin2.bib.sdu.dk:210/Horizon +target odin2.bib.sdu.dk:210/otb +target opac.nebis.ch:9909/NEBIS +target opac.sbn.it:2100/nopac +target opac.sbn.it:3950/nopac +target opac.shu.ac.uk:210/INNOPAC +target opac.unifi.it:210/OPAC +target opac.utmem.edu:210/INNOPAC +target orac.lib.uts.edu.au:210/INNOPAC +target pollux.dslak.co.nz:210/MARNSL +target prodorbis.library.yale.edu:7090/voyager +target quest.unb.ca:2200/unicorn +target rebiun.crue.org:210/absysREBIUN +target roble.unizar.es:210/INNOPAC +target rs6000.nshpl.library.ns.ca:210/AVR +target rs6000.nshpl.library.ns.ca:210/CBR +target rs6000.nshpl.library.ns.ca:210/CEH +target rs6000.nshpl.library.ns.ca:210/CUR +target rs6000.nshpl.library.ns.ca:210/ECR +target rs6000.nshpl.library.ns.ca:210/NSP +target rs6000.nshpl.library.ns.ca:210/PAR +target rs6000.nshpl.library.ns.ca:210/SSR +target rs6000.nshpl.library.ns.ca:210/WCR +target sabio.library.arizona.edu:210/innopac +target salty.slcpl.lib.ut.us:210/INNOPAC +target scotty.mhsl.uab.edu:7090/VOYAGER +target serapis.leedsmet.ac.uk:2200/unicorn +target serapis.lmu.ac.uk:2200/unicorn +target sflwww.er.usgs.gov:251/sflwwwmeta +target silkbib.bib.dk:2100/S +target sirsi.library.utoronto.ca:2200/UNICORN +target star.tsl.state.tx.us:2200/unicorn +target strife.library.uwa.edu.au:210/INNOPAC +target strife.library.uwa.edu.au:210/innopac +target sundog.usask.ca:210/INNOPAC +target tegument.nlm.nih.gov:7090/VOYAGER +target titus.folger.edu:7090/VOYAGER +target tora.htk.dk:2100/S +target troy.lib.sfu.ca:210/INNOPAC +target unicorn.lib.ic.ac.uk:2200/IC +target unicorn.qmced.ac.uk:2200/unicorn +target unicornio.umb.edu.co:2200/Unicorn +target ustlib.ust.hk:210/INNOPAC +target vax.lib.state.ca.us:210/marion +target vlsirsi.rdg.ac.uk:2200/UNICORN +target voyager.its.csiro.au:7090/VOYAGER +target voyager.tcs.tulane.edu:7090/VOYAGER +target voyager.wrlc.org:7090/VOYAGER +target www.agralin.nl:210/clcz3950 +target www.bibliotek.taarnby.dk:210/S +target www.congreso.es:2100/ABSYSBCD +target www.csc.noaa.gov:2210/CSC_Products +target www.eevl.ac.uk:2100/eevlacuk +target www.grimes.lib.ia.us:210/main +target www.knihovna.cz:8888/un_cat +target www.library.nps.gov:7090/VOYAGER +target www.sbn.it:2100/nopac +target www.scran.ac.uk:3950/default +target yulib001.mc.yu.edu:1111/DEFAULT +target z39.libis.lt:210/KNYGOS +target z3950.ahds.ac.uk:210/CMF +target z3950.ahds.ac.uk:210/DC +target z3950.bcl.jcyl.es:210/AbsysBCL +target z3950.bcl.jcyl.es:210/AbsysCCFL +target z3950.bergen.folkebibl.no:210/data +target z3950.biblos.pk.edu.pl:4210/books +target z3950.bibsys.no:2100/BIBSYS +target z3950.bibsys.no:2100/PERI +target z3950.copac.ac.uk:210/copac +target z3950.copac.ac.uk:2100/COPAC +target z3950.deich.folkebibl.no:210/data +target z3950.dragsholm.integrabib.dk:210/default +target z3950.fcla.edu:210/CF +target z3950.fcla.edu:210/FA +target z3950.fcla.edu:210/FI +target z3950.fcla.edu:210/FO +target z3950.fcla.edu:210/QB +target z3950.fcla.edu:210/RF +target z3950.fcla.edu:210/SR +target z3950.fcla.edu:210/ST +target z3950.gbv.de:20010/GVK +target z3950.gbv.de:20011/GVK +target z3950.gbv.de:20011/GVK +target z3950.gbv.de:210/GVK +target z3950.gbv.de:210/GVK +target z3950.haderslev.integrabib.dk:210/default +target z3950.haslev.integrabib.dk:210/Default +target z3950.hoerning.integrabib.dk:210/default +target z3950.kb.dk:2100/BGF01 +target z3950.kb.dk:2100/BHS01 +target z3950.kb.dk:2100/DRA01 +target z3950.kb.dk:2100/KBB01 +target z3950.kb.dk:2100/KBH01 +target z3950.kb.dk:2100/KGL01 +target z3950.libh.uoc.gr:210/default +target z3950.libr.uoc.gr:210/logios +target z3950.libr.uoc.gr:210/ptolemeos_ii +target z3950.library.wisc.edu:210/madison +target z3950.libris.kb.se:210/libr +target z3950.lillehammer.folkebibl.no:210/data +target z3950.loc.gov:7090/voyager +target z3950.nakskov.integrabib.dk:210/default +target z3950.nb.no:2100/Norbok +target z3950.nb.no:2100/Norper +target z3950.nb.no:2100/Sambok +target z3950.nls.uk:7290/voyager +target z3950.ovid.com:2213/eric +target z3950.ovid.com:2213/pmed +target z3950.rdn.ac.uk:210/xxdefault +target z3950.ringsted.integrabib.dk:210/Default +target z3950.skagen.integrabib.dk:210/default +target z3950.sydfalster.integrabib.dk:210/default +target z3950.trondheim.folkebibl.no:210/data +target z3950.vallensbaek.integrabib.dk:210/default +target z3950.vejen.integrabib.dk:210/default diff --git a/etc/slow.pz b/etc/slow.pz new file mode 100644 index 0000000..933ee67 --- /dev/null +++ b/etc/slow.pz @@ -0,0 +1 @@ +target localhost:9999/Slow diff --git a/etc/test.pz b/etc/test.pz new file mode 100644 index 0000000..0fb10c8 --- /dev/null +++ b/etc/test.pz @@ -0,0 +1 @@ +target localhost:9999/Default diff --git a/eventl.c b/eventl.c deleted file mode 100644 index 87bd1ba..0000000 --- a/eventl.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * ParaZ - a simple tool for harvesting performance data for parallel - * operations using Z39.50. - * Copyright (c) 2000-2004 Index Data ApS - * See LICENSE file for details. - */ - -/* - * $Id: eventl.c,v 1.2 2006-12-12 02:36:24 quinn Exp $ - * Based on revision YAZ' server/eventl.c 1.29. - */ - -#include -#include -#ifdef WIN32 -#include -#else -#include -#endif -#include -#include -#include - -#include -#include -#include -#include -#include "eventl.h" -#include - -IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags) -{ - IOCHAN new_iochan; - - if (!(new_iochan = (IOCHAN)xmalloc(sizeof(*new_iochan)))) - return 0; - new_iochan->destroyed = 0; - new_iochan->fd = fd; - new_iochan->flags = flags; - new_iochan->fun = cb; - new_iochan->force_event = 0; - new_iochan->last_event = new_iochan->max_idle = 0; - new_iochan->next = NULL; - return new_iochan; -} - -int event_loop(IOCHAN *iochans) -{ - do /* loop as long as there are active associations to process */ - { - IOCHAN p, nextp; - fd_set in, out, except; - int res, max; - static struct timeval nullto = {0, 0}, to; - struct timeval *timeout; - - FD_ZERO(&in); - FD_ZERO(&out); - FD_ZERO(&except); - timeout = &to; /* hang on select */ - to.tv_sec = 30; - to.tv_usec = 0; - max = 0; - for (p = *iochans; p; p = p->next) - { - if (p->fd < 0) - continue; - if (p->force_event) - timeout = &nullto; /* polling select */ - if (p->flags & EVENT_INPUT) - FD_SET(p->fd, &in); - if (p->flags & EVENT_OUTPUT) - FD_SET(p->fd, &out); - if (p->flags & EVENT_EXCEPT) - FD_SET(p->fd, &except); - if (p->fd > max) - max = p->fd; - } - if ((res = select(max + 1, &in, &out, &except, timeout)) < 0) - { - if (errno == EINTR) - continue; - else - abort(); - } - for (p = *iochans; p; p = p->next) - { - int force_event = p->force_event; - time_t now = time(0); - - p->force_event = 0; - if (!p->destroyed && ((p->max_idle && now - p->last_event > - p->max_idle) || force_event == EVENT_TIMEOUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_TIMEOUT); - } - if (p->fd < 0) - continue; - if (!p->destroyed && (FD_ISSET(p->fd, &in) || - force_event == EVENT_INPUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_INPUT); - } - if (!p->destroyed && (FD_ISSET(p->fd, &out) || - force_event == EVENT_OUTPUT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_OUTPUT); - } - if (!p->destroyed && (FD_ISSET(p->fd, &except) || - force_event == EVENT_EXCEPT)) - { - p->last_event = now; - (*p->fun)(p, EVENT_EXCEPT); - } - } - for (p = *iochans; p; p = nextp) - { - nextp = p->next; - - if (p->destroyed) - { - IOCHAN tmp = p, pr; - - /* Now reset the pointers */ - if (p == *iochans) - *iochans = p->next; - else - { - for (pr = *iochans; pr; pr = pr->next) - if (pr->next == p) - break; - assert(pr); /* grave error if it weren't there */ - pr->next = p->next; - } - if (nextp == p) - nextp = p->next; - xfree(tmp); - } - } - } - while (*iochans); - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/eventl.h b/eventl.h deleted file mode 100644 index 9e10817..0000000 --- a/eventl.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 1995-1999, Index Data - * See the file LICENSE for details. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: eventl.h,v $ - * Revision 1.3 2006-12-12 02:36:24 quinn - * Implemented session timeout; ping command - * - * Revision 1.2 2006/11/18 05:00:38 quinn - * Added record retrieval, etc. - * - * Revision 1.1.1.1 2006/11/14 20:44:38 quinn - * PazPar2 - * - * Revision 1.1.1.1 2000/02/23 14:40:18 heikki - * Original import to cvs - * - * Revision 1.11 1999/04/20 09:56:48 adam - * Added 'name' paramter to encoder/decoder routines (typedef Odr_fun). - * Modified all encoders/decoders to reflect this change. - * - * Revision 1.10 1998/01/29 13:30:23 adam - * Better event handle system for NT/Unix. - * - * Revision 1.9 1997/09/01 09:31:48 adam - * Removed definition statserv_remove from statserv.h to eventl.h. - * - * Revision 1.8 1995/06/19 12:39:09 quinn - * Fixed bug in timeout code. Added BER dumper. - * - * Revision 1.7 1995/06/16 10:31:34 quinn - * Added session timeout. - * - * Revision 1.6 1995/05/16 08:51:02 quinn - * License, documentation, and memory fixes - * - * Revision 1.5 1995/05/15 11:56:37 quinn - * Asynchronous facilities. Restructuring of seshigh code. - * - * Revision 1.4 1995/03/27 08:34:23 quinn - * Added dynamic server functionality. - * Released bindings to session.c (is now redundant) - * - * Revision 1.3 1995/03/15 08:37:42 quinn - * Now we're pretty much set for nonblocking I/O. - * - * Revision 1.2 1995/03/14 10:28:00 quinn - * More work on demo server. - * - * Revision 1.1 1995/03/10 18:22:45 quinn - * The rudiments of an asynchronous server. - * - */ - -#ifndef EVENTL_H -#define EVENTL_H - -#include - -struct iochan; - -typedef void (*IOC_CALLBACK)(struct iochan *i, int event); - -typedef struct iochan -{ - int fd; - int flags; -#define EVENT_INPUT 0x01 -#define EVENT_OUTPUT 0x02 -#define EVENT_EXCEPT 0x04 -#define EVENT_TIMEOUT 0x08 -#define EVENT_WORK 0x10 - int force_event; - IOC_CALLBACK fun; - void *data; - int destroyed; - time_t last_event; - time_t max_idle; - - struct iochan *next; -} *IOCHAN; - -#define iochan_destroy(i) (void)((i)->destroyed = 1) -#define iochan_getfd(i) ((i)->fd) -#define iochan_setfd(i, f) ((i)->fd = (f)) -#define iochan_getdata(i) ((i)->data) -#define iochan_setdata(i, d) ((i)->data = d) -#define iochan_getflags(i) ((i)->flags) -#define iochan_setflags(i, d) ((i)->flags = d) -#define iochan_setflag(i, d) ((i)->flags |= d) -#define iochan_clearflag(i, d) ((i)->flags &= ~(d)) -#define iochan_getflag(i, d) ((i)->flags & d ? 1 : 0) -#define iochan_getfun(i) ((i)->fun) -#define iochan_setfun(i, d) ((i)->fun = d) -#define iochan_setevent(i, e) ((i)->force_event = (e)) -#define iochan_getnext(i) ((i)->next) -#define iochan_settimeout(i, t) ((i)->max_idle = (t), (i)->last_event = time(0)) -#define iochan_activity(i) ((i)->last_event = time(0)) - -IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags); -int event_loop(IOCHAN *iochans); - -#endif diff --git a/http.c b/http.c deleted file mode 100644 index bf47090..0000000 --- a/http.c +++ /dev/null @@ -1,854 +0,0 @@ -/* - * $Id: http.c,v 1.6 2006-12-19 04:49:34 quinn Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" -#include "http.h" -#include "http_command.h" - -static void proxy_io(IOCHAN i, int event); -static struct http_channel *http_create(void); -static void http_destroy(IOCHAN i); - -extern IOCHAN channel_list; - -static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests -static char proxy_url[256] = ""; -static struct http_buf *http_buf_freelist = 0; -static struct http_channel *http_channel_freelist = 0; - -static struct http_buf *http_buf_create() -{ - struct http_buf *r; - - if (http_buf_freelist) - { - r = http_buf_freelist; - http_buf_freelist = http_buf_freelist->next; - } - else - r = xmalloc(sizeof(struct http_buf)); - r->offset = 0; - r->len = 0; - r->next = 0; - return r; -} - -static void http_buf_destroy(struct http_buf *b) -{ - b->next = http_buf_freelist; - http_buf_freelist = b; -} - -static void http_buf_destroy_queue(struct http_buf *b) -{ - struct http_buf *p; - while (b) - { - p = b->next; - http_buf_destroy(b); - b = p; - } -} - -#ifdef GAGA -// Calculate length of chain -static int http_buf_len(struct http_buf *b) -{ - int sum = 0; - for (; b; b = b->next) - sum += b->len; - return sum; -} -#endif - -static struct http_buf *http_buf_bybuf(char *b, int len) -{ - struct http_buf *res = 0; - struct http_buf **p = &res; - - while (len) - { - *p = http_buf_create(); - int tocopy = len; - if (tocopy > HTTP_BUF_SIZE) - tocopy = HTTP_BUF_SIZE; - memcpy((*p)->buf, b, tocopy); - (*p)->len = tocopy; - len -= tocopy; - b += tocopy; - p = &(*p)->next; - } - return res; -} - -// Add a (chain of) buffers to the end of an existing queue. -static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b) -{ - while (*queue) - queue = &(*queue)->next; - *queue = b; -} - -static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) -{ - // Heavens to Betsy (buf)! - return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); -} - -// Non-destructively collapse chain of buffers into a string (max *len) -// Return -static int http_buf_peek(struct http_buf *b, char *buf, int len) -{ - int rd = 0; - while (b && rd < len) - { - int toread = len - rd; - if (toread > b->len) - toread = b->len; - memcpy(buf + rd, b->buf + b->offset, toread); - rd += toread; - b = b->next; - } - buf[rd] = '\0'; - return rd; -} - -// Ddestructively munch up to len from head of queue. -static int http_buf_read(struct http_buf **b, char *buf, int len) -{ - int rd = 0; - while ((*b) && rd < len) - { - int toread = len - rd; - if (toread > (*b)->len) - toread = (*b)->len; - memcpy(buf + rd, (*b)->buf + (*b)->offset, toread); - rd += toread; - if (toread < (*b)->len) - { - (*b)->len -= toread; - (*b)->offset += toread; - break; - } - else - { - struct http_buf *n = (*b)->next; - http_buf_destroy(*b); - *b = n; - } - } - buf[rd] = '\0'; - return rd; -} - -void static urldecode(char *i, char *o) -{ - while (*i) - { - if (*i == '+') - { - *(o++) = ' '; - i++; - } - else if (*i == '%') - { - i++; - sscanf(i, "%2hhx", o); - i += 2; - o++; - } - else - *(o++) = *(i++); - } - *o = '\0'; -} - -void http_addheader(struct http_response *r, const char *name, const char *value) -{ - struct http_channel *c = r->channel; - struct http_header *h = nmem_malloc(c->nmem, sizeof *h); - h->name = nmem_strdup(c->nmem, name); - h->value = nmem_strdup(c->nmem, value); - h->next = r->headers; - r->headers = h; -} - -char *http_argbyname(struct http_request *r, char *name) -{ - struct http_argument *p; - if (!name) - return 0; - for (p = r->arguments; p; p = p->next) - if (!strcmp(p->name, name)) - return p->value; - return 0; -} - -char *http_headerbyname(struct http_request *r, char *name) -{ - struct http_header *p; - for (p = r->headers; p; p = p->next) - if (!strcmp(p->name, name)) - return p->value; - return 0; -} - -struct http_response *http_create_response(struct http_channel *c) -{ - struct http_response *r = nmem_malloc(c->nmem, sizeof(*r)); - strcpy(r->code, "200"); - r->msg = "OK"; - r->channel = c; - r->headers = 0; - r->payload = 0; - return r; -} - -// Check if we have a complete request. Return 0 or length (including trailing newline) -// FIXME: Does not deal gracefully with requests carrying payload -// but this is kind of OK since we will reject anything other than an empty GET -static int request_check(struct http_buf *queue) -{ - char tmp[4096]; - int len = 0; - char *buf = tmp; - - http_buf_peek(queue, tmp, 4096); - while (*buf) // Check if we have a sequence of lines terminated by an empty line - { - char *b = strstr(buf, "\r\n"); - - if (!b) - return 0; - - len += (b - buf) + 2; - if (b == buf) - return len; - buf = b + 2; - } - return 0; -} - -struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, - int len) -{ - struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); - char *p, *p2; - char tmp[4096]; - char *buf = tmp; - - if (len > 4096) - return 0; - if (http_buf_read(queue, buf, len) < len) - return 0; - - r->channel = c; - r->arguments = 0; - r->headers = 0; - // Parse first line - for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) - *(p2++) = *p; - if (*p != ' ') - { - yaz_log(YLOG_WARN, "Unexpected HTTP method in request"); - return 0; - } - *p2 = '\0'; - - if (!(buf = strchr(buf, ' '))) - { - yaz_log(YLOG_WARN, "Syntax error in request (1)"); - return 0; - } - buf++; - if (!(p = strchr(buf, ' '))) - { - yaz_log(YLOG_WARN, "Syntax error in request (2)"); - return 0; - } - *(p++) = '\0'; - if ((p2 = strchr(buf, '?'))) // Do we have arguments? - *(p2++) = '\0'; - r->path = nmem_strdup(c->nmem, buf); - if (p2) - { - // Parse Arguments - while (*p2) - { - struct http_argument *a; - char *equal = strchr(p2, '='); - char *eoa = strchr(p2, '&'); - if (!equal) - { - yaz_log(YLOG_WARN, "Expected '=' in argument"); - return 0; - } - if (!eoa) - eoa = equal + strlen(equal); // last argument - else - *(eoa++) = '\0'; - a = nmem_malloc(c->nmem, sizeof(struct http_argument)); - *(equal++) = '\0'; - a->name = nmem_strdup(c->nmem, p2); - urldecode(equal, equal); - a->value = nmem_strdup(c->nmem, equal); - a->next = r->arguments; - r->arguments = a; - p2 = eoa; - } - } - buf = p; - - if (strncmp(buf, "HTTP/", 5)) - strcpy(r->http_version, "1.0"); - else - { - buf += 5; - if (!(p = strstr(buf, "\r\n"))) - return 0; - *(p++) = '\0'; - p++; - strcpy(r->http_version, buf); - buf = p; - } - strcpy(c->version, r->http_version); - - r->headers = 0; - while (*buf) - { - if (!(p = strstr(buf, "\r\n"))) - return 0; - if (p == buf) - break; - else - { - struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); - if (!(p2 = strchr(buf, ':'))) - return 0; - *(p2++) = '\0'; - h->name = nmem_strdup(c->nmem, buf); - while (isspace(*p2)) - p2++; - if (p2 >= p) // Empty header? - { - buf = p + 2; - continue; - } - *p = '\0'; - h->value = nmem_strdup(c->nmem, p2); - h->next = r->headers; - r->headers = h; - buf = p + 2; - } - } - - return r; -} - - -static struct http_buf *http_serialize_response(struct http_channel *c, - struct http_response *r) -{ - wrbuf_rewind(c->wrbuf); - struct http_header *h; - - wrbuf_printf(c->wrbuf, "HTTP/1.1 %s %s\r\n", r->code, r->msg); - for (h = r->headers; h; h = h->next) - wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); - wrbuf_printf(c->wrbuf, "Content-length: %d\r\n", r->payload ? (int) strlen(r->payload) : 0); - wrbuf_printf(c->wrbuf, "Content-type: text/xml\r\n"); - wrbuf_puts(c->wrbuf, "\r\n"); - - if (r->payload) - wrbuf_puts(c->wrbuf, r->payload); - - return http_buf_bywrbuf(c->wrbuf); -} - -// Serialize a HTTP request -static struct http_buf *http_serialize_request(struct http_request *r) -{ - struct http_channel *c = r->channel; - wrbuf_rewind(c->wrbuf); - struct http_header *h; - struct http_argument *a; - - wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); - - if (r->arguments) - { - wrbuf_putc(c->wrbuf, '?'); - for (a = r->arguments; a; a = a->next) { - if (a != r->arguments) - wrbuf_putc(c->wrbuf, '&'); - wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); - } - } - - wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); - - for (h = r->headers; h; h = h->next) - wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); - - wrbuf_puts(c->wrbuf, "\r\n"); - - return http_buf_bywrbuf(c->wrbuf); -} - - -static int http_weshouldproxy(struct http_request *rq) -{ - if (proxy_addr && !strstr(rq->path, "search.pz2")) - return 1; - return 0; -} - -static int http_proxy(struct http_request *rq) -{ - struct http_channel *c = rq->channel; - struct http_proxy *p = c->proxy; - struct http_header *hp; - struct http_buf *requestbuf; - - if (!p) // This is a new connection. Create a proxy channel - { - int sock; - struct protoent *pe; - int one = 1; - int flags; - - if (!(pe = getprotobyname("tcp"))) { - abort(); - } - if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "socket"); - return -1; - } - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - if ((flags = fcntl(sock, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0) - if (errno != EINPROGRESS) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect"); - return -1; - } - - p = xmalloc(sizeof(struct http_proxy)); - p->oqueue = 0; - p->channel = c; - c->proxy = p; - // We will add EVENT_OUTPUT below - p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT); - iochan_setdata(p->iochan, p); - p->iochan->next = channel_list; - channel_list = p->iochan; - } - - // Modify Host: header - for (hp = rq->headers; hp; hp = hp->next) - if (!strcmp(hp->name, "Host")) - break; - if (!hp) - { - yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); - return -1; - } - hp->value = nmem_strdup(c->nmem, proxy_url); - requestbuf = http_serialize_request(rq); - http_buf_enqueue(&p->oqueue, requestbuf); - iochan_setflag(p->iochan, EVENT_OUTPUT); - return 0; -} - -void http_send_response(struct http_channel *ch) -{ - struct http_response *rs = ch->response; - assert(rs); - struct http_buf *hb = http_serialize_response(ch, rs); - if (!hb) - { - yaz_log(YLOG_WARN, "Failed to serialize HTTP response"); - http_destroy(ch->iochan); - } - else - { - http_buf_enqueue(&ch->oqueue, hb); - iochan_setflag(ch->iochan, EVENT_OUTPUT); - ch->state = Http_Idle; - } -} - -static void http_io(IOCHAN i, int event) -{ - struct http_channel *hc = iochan_getdata(i); - - switch (event) - { - int res, reqlen; - struct http_buf *htbuf; - - case EVENT_INPUT: - htbuf = http_buf_create(); - res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1); - if (res <= 0 && errno != EAGAIN) - { - http_buf_destroy(htbuf); - http_destroy(i); - return; - } - if (res > 0) - { - htbuf->buf[res] = '\0'; - htbuf->len = res; - http_buf_enqueue(&hc->iqueue, htbuf); - } - - if (hc->state == Http_Busy) - return; - - if ((reqlen = request_check(hc->iqueue)) <= 2) - return; - - nmem_reset(hc->nmem); - if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) - { - yaz_log(YLOG_WARN, "Failed to parse request"); - http_destroy(i); - return; - } - hc->response = 0; - yaz_log(YLOG_LOG, "Request: %s %s v %s", hc->request->method, - hc->request->path, hc->request->http_version); - if (http_weshouldproxy(hc->request)) - http_proxy(hc->request); - else - { - // Execute our business logic! - hc->state = Http_Busy; - http_command(hc); - } - if (hc->iqueue) - { - yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); - iochan_setevent(i, EVENT_INPUT); - } - - break; - - case EVENT_OUTPUT: - if (hc->oqueue) - { - struct http_buf *wb = hc->oqueue; - res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); - http_destroy(i); - return; - } - if (res == wb->len) - { - hc->oqueue = hc->oqueue->next; - http_buf_destroy(wb); - } - else - { - wb->len -= res; - wb->offset += res; - } - if (!hc->oqueue) { - if (!strcmp(hc->version, "1.0")) - { - http_destroy(i); - return; - } - else - { - iochan_clearflag(i, EVENT_OUTPUT); - if (hc->iqueue) - iochan_setevent(hc->iochan, EVENT_INPUT); - } - } - } - - if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) - http_destroy(i); // Server closed; we're done - break; - default: - yaz_log(YLOG_WARN, "Unexpected event on connection"); - http_destroy(i); - } -} - -// Handles I/O on a client connection to a backend web server (proxy mode) -static void proxy_io(IOCHAN pi, int event) -{ - struct http_proxy *pc = iochan_getdata(pi); - struct http_channel *hc = pc->channel; - - switch (event) - { - int res; - struct http_buf *htbuf; - - case EVENT_INPUT: - htbuf = http_buf_create(); - res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1); - if (res == 0 || (res < 0 && errno != EINPROGRESS)) - { - if (hc->oqueue) - { - yaz_log(YLOG_WARN, "Proxy read came up short"); - // Close channel and alert client HTTP channel that we're gone - http_buf_destroy(htbuf); - close(iochan_getfd(pi)); - iochan_destroy(pi); - pc->iochan = 0; - } - else - { - http_destroy(hc->iochan); - return; - } - } - else - { - htbuf->buf[res] = '\0'; - htbuf->len = res; - http_buf_enqueue(&hc->oqueue, htbuf); - } - iochan_setflag(hc->iochan, EVENT_OUTPUT); - break; - case EVENT_OUTPUT: - if (!(htbuf = pc->oqueue)) - { - iochan_clearflag(pi, EVENT_OUTPUT); - return; - } - res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len); - if (res <= 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); - http_destroy(hc->iochan); - return; - } - if (res == htbuf->len) - { - struct http_buf *np = htbuf->next; - http_buf_destroy(htbuf); - pc->oqueue = np; - } - else - { - htbuf->len -= res; - htbuf->offset += res; - } - - if (!pc->oqueue) { - iochan_setflags(pi, EVENT_INPUT); // Turns off output flag - } - break; - default: - yaz_log(YLOG_WARN, "Unexpected event on connection"); - http_destroy(hc->iochan); - } -} - -// Cleanup channel -static void http_destroy(IOCHAN i) -{ - struct http_channel *s = iochan_getdata(i); - - if (s->proxy) - { - if (s->proxy->iochan) - { - close(iochan_getfd(s->proxy->iochan)); - iochan_destroy(s->proxy->iochan); - } - http_buf_destroy_queue(s->proxy->oqueue); - xfree(s->proxy); - } - s->next = http_channel_freelist; - http_channel_freelist = s; - close(iochan_getfd(i)); - iochan_destroy(i); -} - -static struct http_channel *http_create(void) -{ - struct http_channel *r = http_channel_freelist; - - if (r) - { - http_channel_freelist = r->next; - nmem_reset(r->nmem); - wrbuf_rewind(r->wrbuf); - } - else - { - r = xmalloc(sizeof(struct http_channel)); - r->nmem = nmem_create(); - r->wrbuf = wrbuf_alloc(); - } - r->proxy = 0; - r->iochan = 0; - r->iqueue = r->oqueue = 0; - r->state = Http_Idle; - r->request = 0; - r->response = 0; - return r; -} - - -/* Accept a new command connection */ -static void http_accept(IOCHAN i, int event) -{ - struct sockaddr_in addr; - int fd = iochan_getfd(i); - socklen_t len; - int s; - IOCHAN c; - int flags; - struct http_channel *ch; - - len = sizeof addr; - if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); - return; - } - if ((flags = fcntl(s, F_GETFL, 0)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); - if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); - - yaz_log(YLOG_LOG, "New command connection"); - c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT); - - ch = http_create(); - ch->iochan = c; - iochan_setdata(c, ch); - - c->next = channel_list; - channel_list = c; -} - -/* Create a http-channel listener, syntax [host:]port */ -void http_init(const char *addr) -{ - IOCHAN c; - int l; - struct protoent *p; - struct sockaddr_in myaddr; - int one = 1; - const char *pp; - int port; - - yaz_log(YLOG_LOG, "HTTP listener is %s", addr); - - bzero(&myaddr, sizeof myaddr); - myaddr.sin_family = AF_INET; - pp = strchr(addr, ':'); - if (pp) - { - int len = pp - addr; - char hostname[128]; - struct hostent *he; - - strncpy(hostname, addr, len); - hostname[len] = '\0'; - if (!(he = gethostbyname(hostname))) - { - yaz_log(YLOG_FATAL, "Unable to resolve '%s'", hostname); - exit(1); - } - memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length); - port = atoi(pp + 1); - } - else - { - port = atoi(addr); - myaddr.sin_addr.s_addr = INADDR_ANY; - } - myaddr.sin_port = htons(port); - - if (!(p = getprotobyname("tcp"))) { - abort(); - } - if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); - if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) - &one, sizeof(one)) < 0) - abort(); - - if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); - if (listen(l, SOMAXCONN) < 0) - yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); - - c = iochan_create(l, http_accept, EVENT_INPUT | EVENT_EXCEPT); - c->next = channel_list; - channel_list = c; -} - -void http_set_proxyaddr(char *host) -{ - char *p; - int port; - struct hostent *he; - - strcpy(proxy_url, host); - p = strchr(host, ':'); - yaz_log(YLOG_DEBUG, "Proxying for %s", host); - if (p) { - port = atoi(p + 1); - *p = '\0'; - } - else - port = 80; - if (!(he = gethostbyname(host))) - { - fprintf(stderr, "Failed to lookup '%s'\n", host); - exit(1); - } - proxy_addr = xmalloc(sizeof(struct sockaddr_in)); - proxy_addr->sin_family = he->h_addrtype; - memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length); - proxy_addr->sin_port = htons(port); -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/http.h b/http.h deleted file mode 100644 index 7501424..0000000 --- a/http.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef HTTP_H -#define HTTP_H - -// Generic I/O buffer -struct http_buf -{ -#define HTTP_BUF_SIZE 4096 - char buf[4096]; - int offset; - int len; - struct http_buf *next; -}; - -struct http_channel -{ - IOCHAN iochan; - struct http_buf *iqueue; - struct http_buf *oqueue; - char version[10]; - struct http_proxy *proxy; - enum - { - Http_Idle, - Http_Busy // Don't process new HTTP requests while we're busy - } state; - NMEM nmem; - WRBUF wrbuf; - struct http_request *request; - struct http_response *response; - struct http_channel *next; // for freelist -}; - -struct http_proxy // attached to iochan for proxy connection -{ - IOCHAN iochan; - struct http_channel *channel; - struct http_buf *oqueue; -}; - -struct http_header -{ - char *name; - char *value; - struct http_header *next; -}; - -struct http_argument -{ - char *name; - char *value; - struct http_argument *next; -}; - -struct http_request -{ - struct http_channel *channel; - char http_version[20]; - char method[20]; - char *path; - struct http_header *headers; - struct http_argument *arguments; -}; - -struct http_response -{ - char code[4]; - char *msg; - struct http_channel *channel; - struct http_header *headers; - char *payload; -}; - -void http_set_proxyaddr(char *url); -void http_init(const char *addr); -void http_addheader(struct http_response *r, const char *name, const char *value); -char *http_argbyname(struct http_request *r, char *name); -char *http_headerbyname(struct http_request *r, char *name); -struct http_response *http_create_response(struct http_channel *c); -void http_send_response(struct http_channel *c); - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ -#endif diff --git a/http_command.c b/http_command.c deleted file mode 100644 index d3939dc..0000000 --- a/http_command.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * $Id: http_command.c,v 1.9 2006-12-17 13:42:47 quinn Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "command.h" -#include "util.h" -#include "eventl.h" -#include "pazpar2.h" -#include "http.h" -#include "http_command.h" - -extern struct parameters global_parameters; -extern IOCHAN channel_list; - -struct http_session { - IOCHAN timeout_iochan; // NOTE: This is NOT associated with a socket - struct session *psession; - unsigned int session_id; - int timestamp; - struct http_session *next; -}; - -static struct http_session *session_list = 0; - -void http_session_destroy(struct http_session *s); - -static void session_timeout(IOCHAN i, int event) -{ - struct http_session *s = iochan_getdata(i); - http_session_destroy(s); -} - -struct http_session *http_session_create() -{ - struct http_session *r = xmalloc(sizeof(*r)); - r->psession = new_session(); - r->session_id = 0; - r->timestamp = 0; - r->next = session_list; - session_list = r; - r->timeout_iochan = iochan_create(-1, session_timeout, 0); - iochan_setdata(r->timeout_iochan, r); - iochan_settimeout(r->timeout_iochan, global_parameters.session_timeout); - r->timeout_iochan->next = channel_list; - channel_list = r->timeout_iochan; - return r; -} - -void http_session_destroy(struct http_session *s) -{ - struct http_session **p; - - for (p = &session_list; *p; p = &(*p)->next) - if (*p == s) - { - *p = (*p)->next; - break; - } - iochan_destroy(s->timeout_iochan); - destroy_session(s->psession); - xfree(s); -} - -static void error(struct http_response *rs, char *code, char *msg, char *txt) -{ - struct http_channel *c = rs->channel; - char tmp[1024]; - - if (!txt) - txt = msg; - rs->msg = nmem_strdup(c->nmem, msg); - strcpy(rs->code, code); - sprintf(tmp, "%s", txt); - rs->payload = nmem_strdup(c->nmem, tmp); - http_send_response(c); -} - -unsigned int make_sessionid() -{ - struct timeval t; - unsigned int res; - static int seq = 0; - - seq++; - if (gettimeofday(&t, 0) < 0) - abort(); - res = t.tv_sec; - res = ((res << 8) | (seq & 0xff)) & ((unsigned int) (1 << 31) - 1); - return res; -} - -static struct http_session *locate_session(struct http_request *rq, struct http_response *rs) -{ - struct http_session *p; - char *session = http_argbyname(rq, "session"); - unsigned int id; - - if (!session) - { - error(rs, "417", "Must supply session", 0); - return 0; - } - id = atoi(session); - for (p = session_list; p; p = p->next) - if (id == p->session_id) - { - iochan_activity(p->timeout_iochan); - return p; - } - error(rs, "417", "Session does not exist, or it has expired", 0); - return 0; -} - -static void cmd_exit(struct http_channel *c) -{ - yaz_log(YLOG_WARN, "exit"); - exit(0); -} - - -static void cmd_init(struct http_channel *c) -{ - unsigned int sesid; - char buf[1024]; - struct http_session *s = http_session_create(); - struct http_response *rs = c->response; - - yaz_log(YLOG_DEBUG, "HTTP Session init"); - sesid = make_sessionid(); - s->session_id = sesid; - sprintf(buf, "OK%u", sesid); - rs->payload = nmem_strdup(c->nmem, buf); - http_send_response(c); -} - -static void cmd_termlist(struct http_channel *c) -{ - struct http_response *rs = c->response; - struct http_request *rq = c->request; - struct http_session *s = locate_session(rq, rs); - struct termlist_score **p; - int len; - int i; - - if (!s) - return; - wrbuf_rewind(c->wrbuf); - - wrbuf_puts(c->wrbuf, ""); - p = termlist(s->psession, &len); - if (p) - for (i = 0; i < len; i++) - { - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s", p[i]->term); - wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); - wrbuf_puts(c->wrbuf, ""); - } - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - - -static void cmd_bytarget(struct http_channel *c) -{ - struct http_response *rs = c->response; - struct http_request *rq = c->request; - struct http_session *s = locate_session(rq, rs); - struct hitsbytarget *ht; - int count, i; - - if (!s) - return; - if (!(ht = hitsbytarget(s->psession, &count))) - { - error(rs, "500", "Failed to retrieve hitcounts", 0); - return; - } - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, "OK"); - - for (i = 0; i < count; i++) - { - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s\n", ht[i].id); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].hits); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].diagnostic); - wrbuf_printf(c->wrbuf, "%d\n", ht[i].records); - wrbuf_printf(c->wrbuf, "%s\n", ht[i].state); - wrbuf_puts(c->wrbuf, ""); - } - - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - -static void show_records(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - struct record **rl; - char *start = http_argbyname(rq, "start"); - char *num = http_argbyname(rq, "num"); - int startn = 0; - int numn = 20; - int total; - int total_hits; - int i; - - if (!s) - return; - - if (start) - startn = atoi(start); - if (num) - numn = atoi(num); - - rl = show(s->psession, startn, &numn, &total, &total_hits); - - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, "\nOK\n"); - wrbuf_printf(c->wrbuf, "%d\n", total); - wrbuf_printf(c->wrbuf, "%d\n", total_hits); - wrbuf_printf(c->wrbuf, "%d\n", startn); - wrbuf_printf(c->wrbuf, "%d\n", numn); - - for (i = 0; i < numn; i++) - { - int ccount; - struct record *p; - - wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s\n", rl[i]->title); - for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) - ; - if (ccount > 1) - wrbuf_printf(c->wrbuf, "%d\n", ccount); - wrbuf_puts(c->wrbuf, "\n"); - } - - wrbuf_puts(c->wrbuf, "\n"); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - -static void show_records_ready(void *data) -{ - struct http_channel *c = (struct http_channel *) data; - - show_records(c); -} - -static void cmd_show(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - char *block = http_argbyname(rq, "block"); - - if (!s) - return; - - if (block) - { - if (!s->psession->reclist || !s->psession->reclist->num_records) - { - session_set_watch(s->psession, SESSION_WATCH_RECORDS, show_records_ready, c); - yaz_log(YLOG_DEBUG, "Blocking on cmd_show"); - return; - } - } - - show_records(c); -} - -static void cmd_ping(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - if (!s) - return; - rs->payload = "OK"; - http_send_response(c); -} - -static void cmd_search(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - char *query = http_argbyname(rq, "query"); - char *res; - - if (!s) - return; - if (!query) - { - error(rs, "417", "Must supply query", 0); - return; - } - res = search(s->psession, query); - if (res) - { - error(rs, "417", res, res); - return; - } - rs->payload = "OK"; - http_send_response(c); -} - - -static void cmd_stat(struct http_channel *c) -{ - struct http_request *rq = c->request; - struct http_response *rs = c->response; - struct http_session *s = locate_session(rq, rs); - struct statistics stat; - - if (!s) - return; - - statistics(s->psession, &stat); - - wrbuf_rewind(c->wrbuf); - wrbuf_puts(c->wrbuf, ""); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_hits); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_records); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_clients); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_no_connection); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_connecting); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_initializing); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_searching); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_presenting); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_idle); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_failed); - wrbuf_printf(c->wrbuf, "%d\n", stat.num_error); - wrbuf_puts(c->wrbuf, ""); - rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); - http_send_response(c); -} - - -struct { - char *name; - void (*fun)(struct http_channel *c); -} commands[] = { - { "init", cmd_init }, - { "stat", cmd_stat }, - { "bytarget", cmd_bytarget }, - { "show", cmd_show }, - { "search", cmd_search }, - { "termlist", cmd_termlist }, - { "exit", cmd_exit }, - { "ping", cmd_ping }, - {0,0} -}; - -void http_command(struct http_channel *c) -{ - char *command = http_argbyname(c->request, "command"); - struct http_response *rs = http_create_response(c); - int i; - - c->response = rs; - if (!command) - { - error(rs, "417", "Must supply command", 0); - return; - } - for (i = 0; commands[i].name; i++) - if (!strcmp(commands[i].name, command)) - { - (*commands[i].fun)(c); - break; - } - if (!commands[i].name) - error(rs, "417", "Unknown command", 0); - - return; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/http_command.h b/http_command.h deleted file mode 100644 index e127925..0000000 --- a/http_command.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef HTTP_COMMAND_H -#define HTTP_COMMAND - -#include "http.h" - -void http_command(struct http_channel *c); - -#endif diff --git a/many.pz b/many.pz deleted file mode 100644 index db1bd9c..0000000 --- a/many.pz +++ /dev/null @@ -1,299 +0,0 @@ -target 12.4.242.230:210/Unicorn -target 128.119.168.1:210/INNOPAC -target 128.119.168.2:210/INNOPAC -target 128.197.130.200:210/INNOPAC -target 128.218.15.173:210/INNOPAC -target 128.83.82.249:210/INNOPAC -target 129.234.4.6:210/INNOPAC -target 129.65.20.27:210/Innopac -target 129.78.72.7:210/INNOPAC -target 130.102.42.45:210/Innopac -target 130.111.64.52:210/INNOPAC -target 130.206.134.239:210/INNOPAC -target 130.212.18.200:210/Innopac -target 130.91.144.191:210/INNOPAC -target 131.165.177.76:2100/S -target 131.165.97.2:210/S -target 131.232.13.6:210/INNOPAC -target 134.115.152.130:210/Innopac -target 134.154.30.10:210/Innopac -target 137.187.166.250:210/INNOPAC -target 137.45.212.100:210/INNOPAC -target 141.215.16.4:210/INNOPAC -target 144.37.1.4:210/Innopac -target 147.143.2.5:210/INNOPAC -target 147.175.67.227:1111/DEFAULT -target 147.175.67.227:2222/DEFAULT -target 147.29.116.113:2100/DAB01 -target 147.29.116.113:2100/KGB01 -target 147.29.116.113:2100/PVS -target 147.29.116.113:2100/vej01 -target 149.130.90.2:210/INNOPAC -target 149.175.20.20:210/INNOPAC -target 161.210.213.100:210/INNOPAC -target 169.139.225.46:210/horizon -target 192.188.131.54:2200/UNICORN -target 192.197.190.4:210/Advance -target 192.65.218.23:210/DREWDB -target 193.162.157.130:2100/S -target 193.43.102.194:21210/ADVANCE -target 193.52.199.5:21210/ADVANCE -target 193.59.172.100:210/INNOPAC -target 194.182.134.04:210/S -target 194.239.156.133:2100/S -target 195.215.203.98:2100/S -target 195.249.206.204:210/Default -target 195.249.206.204:210/default -target 195.41.8.218:2100/S -target 195.50.60.15:2100/CB -target 195.50.60.15:2101/FB -target 195.50.60.15:2102/MB -target 195.50.60.15:2103/PB -target 196.6.221.16:210/INNOPAC -target 198.62.73.72:210/INNOPAC -target 199.184.19.10:210/innopac -target 199.88.71.7:210/INNOPAC -target 202.14.152.4:210/INNOPAC -target 202.40.151.2:210/INNOPAC -target 202.40.216.17:210/INNOPAC -target 206.48.3.167:7090/Voyager -target 207.194.254.193:210/advance -target Exlibris.albertslundbib.dk:2100/S -target Exlibris.daimi.au.dk:2104/FYS01 -target Exlibris.daimi.au.dk:2104/GEO01 -target Exlibris.daimi.au.dk:2104/KEM01 -target Exlibris.daimi.au.dk:2104/MAT01 -target Exlibris.daimi.au.dk:2104/VID01 -target Z3950.maribo.integrabib.dk:210/Default -target Z3950cat.bl.uk:9909/BLAC -target acorn.library.vanderbilt.edu:2200/ACORN -target advance.biblio.polymtl.ca:210/ADVANCE -target advance.lib.rmit.edu.au:21210/Advance -target albert.rit.edu:210/INNOPAC -target aleph.mcgill.ca:210/MUSE -target aleph.unibas.ch:9909/DSV01 -target alpha.svkol.cz:9909/SVK01 -target alpha.svkol.cz:9909/SVK02 -target atrium.bib.umontreal.ca:210/ADVANCE -target bagel.indexdata.dk:210/gils -target bagel.indexdata.dk:210/marc -target bcr1.larioja.org:210/AbsysE -target bib.gladsaxe.dk:2100/S -target bib.gladsaxe.dk:2101/H -target bib.gladsaxe.dk:2101/S -target biblio.crusca.fi.it:9909/ADC01 -target biblio.hec.ca:210/hec -target biblios.dic.uchile.cl:210/BELLO -target biblioteca.uc3m.es:2200/unicorn -target bobcat.nyu.edu:210/ADVANCE -target books.luther.edu:210/innopac -target brocar.unavarra.es:9999/libros -target brocar.unavarra.es:9999/revistas -target cat.cisti.nrc.ca:210/INNOPAC -target cat.lib.grin.edu:210/innopac -target catalog.bedfordlibrary.org:210/Innopac -target catalog.crl.edu:210/INNOPAC -target catalog.lib.jhu.edu:210/horizon -target cisne.sim.ucm.es:210/INNOPAC -target clavis.ucalgary.ca:2200/UNICORN -target cornelia.whoi.edu:7090/VOYAGER -target corpsgeo1.usace.army.mil:2210/geo1 -target csulib.ctstateu.edu:210/INNOPAC -target echea.ru.ac.za:210/INNOPAC -target edcsns17.cr.usgs.gov:6675/AVHRR -target edcsns17.cr.usgs.gov:6675/DOQ -target edcsns17.cr.usgs.gov:6675/HAZARDS -target edcsns17.cr.usgs.gov:6675/LANDSAT_MSS -target edcsns17.cr.usgs.gov:6675/LANDSAT_TM -target edcsns17.cr.usgs.gov:6675/NAPP -target edcsns17.cr.usgs.gov:6675/NHAP -target eleanor.lib.gla.ac.uk:210/INNOPAC -target eusa.library.net:5666/eusa -target explore.up.ac.za:210/INNOPAC -target gaudi.ua.es:2200/unicorn -target gb.glostrupbib.dk:2100/S -target hcb.bibnet.dk:2100/S -target helios.nlib.ee:210/INNOPAC -target hermes.lib.cbs.dk:2100/S -target hkbulib.hkbu.edu.hk:210/INNOPAC -target ikast.ikast.bibnet.dk:2100/S -target indexgeo.com.au:6668/dataset -target indexgeo.net:5511/act -target info.library.mun.ca:2200/UNICORN -target innopac.lib.kth.se:210/innopac -target innopac.wits.ac.za:210/INNOPAC -target itec.mty.itesm.mx:210/innopac -target jasper.acadiau.ca:2200/UNICORN -target jenda.lib.nccu.edu.tw:210/INNOPAC -target kat.vkol.cz:9909/SVK01 -target kat.vkol.cz:9909/SVK02 -target kraka.birkerod.bibnet.dk:2100/S -target ksclib.keene.edu:210/INNOPAC -target l1.uwaterloo.ca:7090/VOYAGER -target lance.missouri.edu:210/INNOPAC -target lib.leeds.ac.uk:210/INNOPAC -target lib.soas.ac.uk:210/innopac -target libadfa.adfa.edu.au:7090/VOYAGER -target libcat.qut.edu.au:210/INNOPAC -target liber.acadlib.lv:211/codex.previous -target liber.acadlib.lv:211/liber1 -target liber.acadlib.lv:212/liber1 -target library.anu.edu.au:210/INNOPAC -target library.ballarat.edu.au:210/INNOPAC -target library.bangor.ac.uk:210/INNOPAC -target library.bilgi.edu.tr:210/INNOPAC -target library.brad.ac.uk:210/xxDefault -target library.brunel.ac.uk:2200/UNICORN -target library.daemen.edu:210/innopac -target library.deakin.edu.au:210/INNOPAC -target library.gu.edu.au:21210/ADVANCE -target library.hud.ac.uk:210/HORIZON -target library.hull.ac.uk:210/INNOPAC -target library.latrobe.edu.au:210/INNOPAC -target library.lbc.edu:7090/voyager -target library.mdx.ac.uk:210/mdx -target library.newcastle.edu.au:210/INNOPAC -target library.ox.ac.uk:210/ADVANCE -target library.tcd.ie:210/advance -target library.ucc.ie:210/INNOPAC -target library.uh.edu:210/INNOPAC -target library.vu.edu.au:210/INNOPAC -target library2.open.ac.uk:7090/voyager -target libsys.lib.hull.ac.uk:210/INNOPAC -target libuni01.ccc.govt.nz:2200/unicorn -target libuni01.ccc.govt.nz:2220/cinch -target libuni01.ccc.govt.nz:2220/papers -target libunix.ku.edu.tr:210/INNOPAC -target linc.nus.edu.sg:210/innopac -target lion.swem.wm.edu:2200/unicorn -target loke.dcbib.dk:2100/S -target lrpapp.cc.umanitoba.ca:2200/unicorn -target malad2.mala.bc.ca:2200/UNICORN -target malad2.mala.bc.ca:2200/unicorn -target marte.biblioteca.upm.es:2200/unicorn -target medupe.ub.bw:210/INNOPAC -target melvyl.cdlib.org:210/CDL90 -target mercury.concordia.ca:210/Innopac -target merihobu.utlib.ee:210/INNOPAC -target merlinweb.ville.montreal.qc.ca:2100/Z3950S -target nbinet.ncl.edu.tw:210/INNOPAC -target ncsulib.lib.ncsu.edu:210/MARION -target newlib.ci.lubbock.tx.us:2200/unicorn -target nobis.njl.dk:210/S -target nrhcat.library.nrhtx.com:210/INNOPAC -target oda.fynbib.dk:2100/S -target odin2.bib.sdu.dk:210/Horizon -target odin2.bib.sdu.dk:210/otb -target opac.nebis.ch:9909/NEBIS -target opac.sbn.it:2100/nopac -target opac.sbn.it:3950/nopac -target opac.shu.ac.uk:210/INNOPAC -target opac.unifi.it:210/OPAC -target opac.utmem.edu:210/INNOPAC -target orac.lib.uts.edu.au:210/INNOPAC -target pollux.dslak.co.nz:210/MARNSL -target prodorbis.library.yale.edu:7090/voyager -target quest.unb.ca:2200/unicorn -target rebiun.crue.org:210/absysREBIUN -target roble.unizar.es:210/INNOPAC -target rs6000.nshpl.library.ns.ca:210/AVR -target rs6000.nshpl.library.ns.ca:210/CBR -target rs6000.nshpl.library.ns.ca:210/CEH -target rs6000.nshpl.library.ns.ca:210/CUR -target rs6000.nshpl.library.ns.ca:210/ECR -target rs6000.nshpl.library.ns.ca:210/NSP -target rs6000.nshpl.library.ns.ca:210/PAR -target rs6000.nshpl.library.ns.ca:210/SSR -target rs6000.nshpl.library.ns.ca:210/WCR -target sabio.library.arizona.edu:210/innopac -target salty.slcpl.lib.ut.us:210/INNOPAC -target scotty.mhsl.uab.edu:7090/VOYAGER -target serapis.leedsmet.ac.uk:2200/unicorn -target serapis.lmu.ac.uk:2200/unicorn -target sflwww.er.usgs.gov:251/sflwwwmeta -target silkbib.bib.dk:2100/S -target sirsi.library.utoronto.ca:2200/UNICORN -target star.tsl.state.tx.us:2200/unicorn -target strife.library.uwa.edu.au:210/INNOPAC -target strife.library.uwa.edu.au:210/innopac -target sundog.usask.ca:210/INNOPAC -target tegument.nlm.nih.gov:7090/VOYAGER -target titus.folger.edu:7090/VOYAGER -target tora.htk.dk:2100/S -target troy.lib.sfu.ca:210/INNOPAC -target unicorn.lib.ic.ac.uk:2200/IC -target unicorn.qmced.ac.uk:2200/unicorn -target unicornio.umb.edu.co:2200/Unicorn -target ustlib.ust.hk:210/INNOPAC -target vax.lib.state.ca.us:210/marion -target vlsirsi.rdg.ac.uk:2200/UNICORN -target voyager.its.csiro.au:7090/VOYAGER -target voyager.tcs.tulane.edu:7090/VOYAGER -target voyager.wrlc.org:7090/VOYAGER -target www.agralin.nl:210/clcz3950 -target www.bibliotek.taarnby.dk:210/S -target www.congreso.es:2100/ABSYSBCD -target www.csc.noaa.gov:2210/CSC_Products -target www.eevl.ac.uk:2100/eevlacuk -target www.grimes.lib.ia.us:210/main -target www.knihovna.cz:8888/un_cat -target www.library.nps.gov:7090/VOYAGER -target www.sbn.it:2100/nopac -target www.scran.ac.uk:3950/default -target yulib001.mc.yu.edu:1111/DEFAULT -target z39.libis.lt:210/KNYGOS -target z3950.ahds.ac.uk:210/CMF -target z3950.ahds.ac.uk:210/DC -target z3950.bcl.jcyl.es:210/AbsysBCL -target z3950.bcl.jcyl.es:210/AbsysCCFL -target z3950.bergen.folkebibl.no:210/data -target z3950.biblos.pk.edu.pl:4210/books -target z3950.bibsys.no:2100/BIBSYS -target z3950.bibsys.no:2100/PERI -target z3950.copac.ac.uk:210/copac -target z3950.copac.ac.uk:2100/COPAC -target z3950.deich.folkebibl.no:210/data -target z3950.dragsholm.integrabib.dk:210/default -target z3950.fcla.edu:210/CF -target z3950.fcla.edu:210/FA -target z3950.fcla.edu:210/FI -target z3950.fcla.edu:210/FO -target z3950.fcla.edu:210/QB -target z3950.fcla.edu:210/RF -target z3950.fcla.edu:210/SR -target z3950.fcla.edu:210/ST -target z3950.gbv.de:20010/GVK -target z3950.gbv.de:20011/GVK -target z3950.gbv.de:20011/GVK -target z3950.gbv.de:210/GVK -target z3950.gbv.de:210/GVK -target z3950.haderslev.integrabib.dk:210/default -target z3950.haslev.integrabib.dk:210/Default -target z3950.hoerning.integrabib.dk:210/default -target z3950.kb.dk:2100/BGF01 -target z3950.kb.dk:2100/BHS01 -target z3950.kb.dk:2100/DRA01 -target z3950.kb.dk:2100/KBB01 -target z3950.kb.dk:2100/KBH01 -target z3950.kb.dk:2100/KGL01 -target z3950.libh.uoc.gr:210/default -target z3950.libr.uoc.gr:210/logios -target z3950.libr.uoc.gr:210/ptolemeos_ii -target z3950.library.wisc.edu:210/madison -target z3950.libris.kb.se:210/libr -target z3950.lillehammer.folkebibl.no:210/data -target z3950.loc.gov:7090/voyager -target z3950.nakskov.integrabib.dk:210/default -target z3950.nb.no:2100/Norbok -target z3950.nb.no:2100/Norper -target z3950.nb.no:2100/Sambok -target z3950.nls.uk:7290/voyager -target z3950.ovid.com:2213/eric -target z3950.ovid.com:2213/pmed -target z3950.rdn.ac.uk:210/xxdefault -target z3950.ringsted.integrabib.dk:210/Default -target z3950.skagen.integrabib.dk:210/default -target z3950.sydfalster.integrabib.dk:210/default -target z3950.trondheim.folkebibl.no:210/data -target z3950.vallensbaek.integrabib.dk:210/default -target z3950.vejen.integrabib.dk:210/default diff --git a/pazpar2.c b/pazpar2.c deleted file mode 100644 index 4105ba2..0000000 --- a/pazpar2.c +++ /dev/null @@ -1,1395 +0,0 @@ -/* $Id: pazpar2.c,v 1.17 2006-12-19 04:49:34 quinn Exp $ */; - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "pazpar2.h" -#include "eventl.h" -#include "command.h" -#include "http.h" -#include "termlists.h" -#include "reclists.h" -#include "relevance.h" - -#define PAZPAR2_VERSION "0.1" -#define MAX_CHUNK 15 - -static void client_fatal(struct client *cl); -static void connection_destroy(struct connection *co); -static int client_prep_connection(struct client *cl); -static void ingest_records(struct client *cl, Z_Records *r); -void session_alert_watch(struct session *s, int what); - -IOCHAN channel_list = 0; // Master list of connections we're handling events to - -static struct connection *connection_freelist = 0; -static struct client *client_freelist = 0; - -static struct host *hosts = 0; // The hosts we know about -static struct database *databases = 0; // The databases we know about - -static char *client_states[] = { - "Client_Connecting", - "Client_Connected", - "Client_Idle", - "Client_Initializing", - "Client_Searching", - "Client_Presenting", - "Client_Error", - "Client_Failed", - "Client_Disconnected", - "Client_Stopped" -}; - -struct parameters global_parameters = -{ - 30, - "81", - "Index Data PazPar2 (MasterKey)", - PAZPAR2_VERSION, - 600, // 10 minutes - 60, - 100, - MAX_CHUNK, - 0, - 0, - 0, - 0 -}; - - -static int send_apdu(struct client *c, Z_APDU *a) -{ - struct connection *co = c->connection; - char *buf; - int len, r; - - if (!z_APDU(global_parameters.odr_out, &a, 0, 0)) - { - odr_perror(global_parameters.odr_out, "Encoding APDU"); - abort(); - } - buf = odr_getbuf(global_parameters.odr_out, &len, 0); - r = cs_put(co->link, buf, len); - if (r < 0) - { - yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(co->link))); - return -1; - } - else if (r == 1) - { - fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n"); - exit(1); - } - odr_reset(global_parameters.odr_out); /* release the APDU structure */ - co->state = Conn_Waiting; - return 0; -} - - -static void send_init(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_initRequest); - - a->u.initRequest->implementationId = global_parameters.implementationId; - a->u.initRequest->implementationName = global_parameters.implementationName; - a->u.initRequest->implementationVersion = - global_parameters.implementationVersion; - ODR_MASK_SET(a->u.initRequest->options, Z_Options_search); - ODR_MASK_SET(a->u.initRequest->options, Z_Options_present); - ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets); - - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1); - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2); - ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3); - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Initializing; - } - else - cl->state = Client_Error; - odr_reset(global_parameters.odr_out); -} - -static void send_search(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = cl->session; - struct database *db = cl->database; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest); - int ndb, cerror, cpos; - char **databaselist; - Z_Query *zquery; - struct ccl_rpn_node *cn; - int ssub = 0, lslb = 100000, mspn = 10; - - yaz_log(YLOG_DEBUG, "Sending search"); - - cn = ccl_find_str(global_parameters.ccl_filter, se->query, &cerror, &cpos); - if (!cn) - return; - a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out, - sizeof(Z_Query)); - zquery->which = Z_Query_type_1; - zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn); - ccl_rpn_delete(cn); - - for (ndb = 0; *db->databases[ndb]; ndb++) - ; - databaselist = odr_malloc(global_parameters.odr_out, sizeof(char*) * ndb); - for (ndb = 0; *db->databases[ndb]; ndb++) - databaselist[ndb] = db->databases[ndb]; - - a->u.presentRequest->preferredRecordSyntax = - yaz_oidval_to_z3950oid(global_parameters.odr_out, - CLASS_RECSYN, VAL_USMARC); - a->u.searchRequest->smallSetUpperBound = &ssub; - a->u.searchRequest->largeSetLowerBound = &lslb; - a->u.searchRequest->mediumSetPresentNumber = &mspn; - a->u.searchRequest->resultSetName = "Default"; - a->u.searchRequest->databaseNames = databaselist; - a->u.searchRequest->num_databaseNames = ndb; - - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Searching; - cl->requestid = se->requestid; - } - else - cl->state = Client_Error; - - odr_reset(global_parameters.odr_out); -} - -static void send_present(IOCHAN i) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_presentRequest); - int toget; - int start = cl->records + 1; - - toget = global_parameters.chunk; - if (toget > cl->hits - cl->records) - toget = cl->hits - cl->records; - - yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget); - - a->u.presentRequest->resultSetStartPoint = &start; - a->u.presentRequest->numberOfRecordsRequested = &toget; - - a->u.presentRequest->resultSetId = "Default"; - - a->u.presentRequest->preferredRecordSyntax = - yaz_oidval_to_z3950oid(global_parameters.odr_out, - CLASS_RECSYN, VAL_USMARC); - - if (send_apdu(cl, a) >= 0) - { - iochan_setflags(i, EVENT_INPUT); - cl->state = Client_Presenting; - } - else - cl->state = Client_Error; - odr_reset(global_parameters.odr_out); -} - -static void do_initResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_InitResponse *r = a->u.initResponse; - - yaz_log(YLOG_DEBUG, "Received init response"); - - if (*r->result) - { - cl->state = Client_Idle; - } - else - cl->state = Client_Failed; // FIXME need to do something to the connection -} - -static void do_searchResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = cl->session; - Z_SearchResponse *r = a->u.searchResponse; - - yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus); - - if (*r->searchStatus) - { - cl->hits = *r->resultCount; - se->total_hits += cl->hits; - if (r->presentStatus && !*r->presentStatus && r->records) - { - yaz_log(YLOG_DEBUG, "Records in search response"); - cl->records += *r->numberOfRecordsReturned; - ingest_records(cl, r->records); - } - cl->state = Client_Idle; - } - else - { /*"FAILED"*/ - cl->hits = 0; - cl->state = Client_Error; - if (r->records) { - Z_Records *recs = r->records; - if (recs->which == Z_Records_NSD) - { - yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); - cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; - cl->state = Client_Error; - } - } - } -} - -const char *find_field(const char *rec, const char *field) -{ - char lbuf[5]; - char *line; - - lbuf[0] = '\n'; - strcpy(lbuf + 1, field); - - if ((line = strstr(rec, lbuf))) - return ++line; - else - return 0; -} - -const char *find_subfield(const char *field, char subfield) -{ - const char *p = field; - - while (*p && *p != '\n') - { - while (*p != '\n' && *p != '\t') - p++; - if (*p == '\t' && *(++p) == subfield) { - if (*(++p) == ' ') - { - while (isspace(*p)) - p++; - return p; - } - } - } - return 0; -} - -// Extract 245 $a $b 100 $a -char *extract_title(struct session *s, const char *rec) -{ - const char *field, *subfield; - char *e, *ef; - unsigned char *obuf, *p; - - wrbuf_rewind(s->wrbuf); - - if (!(field = find_field(rec, "245"))) - return 0; - if (!(subfield = find_subfield(field, 'a'))) - return 0; - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_write(s->wrbuf, subfield, ef - subfield); - if ((subfield = find_subfield(field, 'b'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_putc(s->wrbuf, ' '); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - if ((field = find_field(rec, "100"))) - { - if ((subfield = find_subfield(field, 'a'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, ", by "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - wrbuf_putc(s->wrbuf, '\0'); - obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf)); - for (p = obuf; *p; p++) - if (*p == '&' || *p == '<' || *p > 122 || *p < ' ') - *p = ' '; - return (char*) obuf; -} - -// Extract 245 $a $b 100 $a -char *extract_mergekey(struct session *s, const char *rec) -{ - const char *field, *subfield; - char *e, *ef; - char *out, *p, *pout; - - wrbuf_rewind(s->wrbuf); - - if (!(field = find_field(rec, "245"))) - return 0; - if (!(subfield = find_subfield(field, 'a'))) - return 0; - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_write(s->wrbuf, subfield, ef - subfield); - if ((subfield = find_subfield(field, 'b'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, " field "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - if ((field = find_field(rec, "100"))) - { - if ((subfield = find_subfield(field, 'a'))) - { - ef = index(subfield, '\n'); - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - if (ef) - { - wrbuf_puts(s->wrbuf, " field "); - wrbuf_write(s->wrbuf, subfield, ef - subfield); - } - } - } - wrbuf_putc(s->wrbuf, '\0'); - p = wrbuf_buf(s->wrbuf); - out = pout = nmem_malloc(s->nmem, strlen(p) + 1); - - while (*p) - { - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); - while (*p && !isalnum(*p)) - p++; - *(pout++) = ' '; - } - if (out != pout) - *(--pout) = '\0'; - - return out; -} - -#ifdef RECHEAP -static void push_record(struct session *s, struct record *r) -{ - int p; - assert(s->recheap_max + 1 < s->recheap_size); - - s->recheap[p = ++s->recheap_max] = r; - while (p > 0) - { - int parent = (p - 1) >> 1; - if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0) - { - struct record *tmp; - tmp = s->recheap[parent]; - s->recheap[parent] = s->recheap[p]; - s->recheap[p] = tmp; - p = parent; - } - else - break; - } -} - -static struct record *top_record(struct session *s) -{ - return s-> recheap_max >= 0 ? s->recheap[0] : 0; -} - -static struct record *pop_record(struct session *s) -{ - struct record *res; - int p = 0; - int lastnonleaf = (s->recheap_max - 1) >> 1; - - if (s->recheap_max < 0) - return 0; - - res = s->recheap[0]; - - s->recheap[p] = s->recheap[s->recheap_max--]; - - while (p <= lastnonleaf) - { - int right = (p + 1) << 1; - int left = right - 1; - int min = left; - - if (right < s->recheap_max && - strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0) - min = right; - if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0) - { - struct record *tmp = s->recheap[min]; - s->recheap[min] = s->recheap[p]; - s->recheap[p] = tmp; - p = min; - } - else - break; - } - return res; -} - -// Like pop_record but collapses identical (merge_key) records -// The heap will contain multiple independent matching records and possibly -// one cluster, created the last time the list was scanned -static struct record *pop_mrecord(struct session *s) -{ - struct record *this; - struct record *next; - - if (!(this = pop_record(s))) - return 0; - - // Collapse identical records - while ((next = top_record(s))) - { - struct record *p, *tmpnext; - if (strcmp(this->merge_key, next->merge_key)) - break; - // Absorb record (and clustersiblings) into a supercluster - for (p = next; p; p = tmpnext) { - tmpnext = p->next_cluster; - p->next_cluster = this->next_cluster; - this->next_cluster = p; - } - - pop_record(s); - } - return this; -} - -// Reads records in sort order. Store records in top of heapspace until rewind is called. -static struct record *read_recheap(struct session *s) -{ - struct record *r = pop_mrecord(s); - - if (r) - { - if (s->recheap_scratch < 0) - s->recheap_scratch = s->recheap_size; - s->recheap[--s->recheap_scratch] = r; - } - - return r; -} - -// Return records to heap after read -static void rewind_recheap(struct session *s) -{ - while (s->recheap_scratch >= 0) { - push_record(s, s->recheap[s->recheap_scratch++]); - if (s->recheap_scratch >= s->recheap_size) - s->recheap_scratch = -1; - } -} - -#endif - -// FIXME needs to be generalized. Should flexibly generate X lists per search -static void extract_subject(struct session *s, const char *rec) -{ - const char *field, *subfield; - - while ((field = find_field(rec, "650"))) - { - rec = field; - if ((subfield = find_subfield(field, 'a'))) - { - char *e, *ef; - char buf[1024]; - int len; - - ef = index(subfield, '\n'); - if (!ef) - return; - if ((e = index(subfield, '\t')) && e < ef) - ef = e; - while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')') - ef--; - len = ef - subfield; - assert(len < 1023); - memcpy(buf, subfield, len); - buf[len] = '\0'; - if (*buf) - termlist_insert(s->termlist, buf); - } - } -} - -static void pull_relevance_field(struct session *s, struct record *head, const char *rec, - char *field, int mult) -{ - const char *fb; - while ((fb = find_field(rec, field))) - { - char *ffield = strchr(fb, '\t'); - if (!ffield) - return; - char *eol = strchr(ffield, '\n'); - if (!eol) - return; - relevance_countwords(s->relevance, head, ffield, eol - ffield, mult); - rec = field + 1; // Crude way to cause a loop through repeating fields - } -} - -static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec) -{ - relevance_newrec(s->relevance, head); - pull_relevance_field(s, head, rec->buf, "100", 2); - pull_relevance_field(s, head, rec->buf, "245", 4); - //pull_relevance_field(s, head, rec->buf, "530", 1); - pull_relevance_field(s, head, rec->buf, "630", 1); - pull_relevance_field(s, head, rec->buf, "650", 1); - pull_relevance_field(s, head, rec->buf, "700", 1); - relevance_donerecord(s->relevance, head); -} - -static struct record *ingest_record(struct client *cl, char *buf, int len) -{ - struct session *se = cl->session; - struct record *res; - struct record *head; - const char *recbuf; - - wrbuf_rewind(se->wrbuf); - yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE); - if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0) - { - yaz_log(YLOG_WARN, "Failed to decode MARC record"); - return 0; - } - wrbuf_putc(se->wrbuf, '\0'); - recbuf = wrbuf_buf(se->wrbuf); - - res = nmem_malloc(se->nmem, sizeof(struct record)); - res->buf = nmem_strdup(se->nmem, recbuf); - - extract_subject(se, res->buf); - - res->title = extract_title(se, res->buf); - res->merge_key = extract_mergekey(se, res->buf); - if (!res->merge_key) - return 0; - res->client = cl; - res->next_cluster = 0; - res->target_offset = -1; - res->term_frequency_vec = 0; - - head = reclist_insert(se->reclist, res); - - pull_relevance_keys(se, head, res); - - se->total_records++; - - return res; -} - -static void ingest_records(struct client *cl, Z_Records *r) -{ - struct record *rec; - struct session *s = cl->session; - Z_NamePlusRecordList *rlist; - int i; - - if (r->which != Z_Records_DBOSD) - return; - rlist = r->u.databaseOrSurDiagnostics; - for (i = 0; i < rlist->num_records; i++) - { - Z_NamePlusRecord *npr = rlist->records[i]; - Z_External *e; - char *buf; - int len; - - if (npr->which != Z_NamePlusRecord_databaseRecord) - { - yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic"); - continue; - } - e = npr->u.databaseRecord; - if (e->which != Z_External_octet) - { - yaz_log(YLOG_WARN, "Unexpected external branch, probably BER"); - continue; - } - buf = (char*) e->u.octet_aligned->buf; - len = e->u.octet_aligned->len; - - rec = ingest_record(cl, buf, len); - if (!rec) - continue; - } - if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records) - session_alert_watch(s, SESSION_WATCH_RECORDS); -} - -static void do_presentResponse(IOCHAN i, Z_APDU *a) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - Z_PresentResponse *r = a->u.presentResponse; - - if (r->records) { - Z_Records *recs = r->records; - if (recs->which == Z_Records_NSD) - { - yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); - cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; - cl->state = Client_Error; - } - } - - if (!*r->presentStatus && cl->state != Client_Error) - { - yaz_log(YLOG_DEBUG, "Good Present response"); - cl->records += *r->numberOfRecordsReturned; - ingest_records(cl, r->records); - cl->state = Client_Idle; - } - else if (*r->presentStatus) - { - yaz_log(YLOG_WARN, "Bad Present response"); - cl->state = Client_Error; - } -} - -static void handler(IOCHAN i, int event) -{ - struct connection *co = iochan_getdata(i); - struct client *cl = co->client; - struct session *se = 0; - - if (cl) - se = cl->session; - else - { - yaz_log(YLOG_WARN, "Destroying orphan connection (fix me?)"); - connection_destroy(co); - return; - } - - if (co->state == Conn_Connecting && event & EVENT_OUTPUT) - { - int errcode; - socklen_t errlen = sizeof(errcode); - - if (getsockopt(cs_fileno(co->link), SOL_SOCKET, SO_ERROR, &errcode, - &errlen) < 0 || errcode != 0) - { - client_fatal(cl); - return; - } - else - { - yaz_log(YLOG_DEBUG, "Connect OK"); - co->state = Conn_Open; - if (cl) - cl->state = Client_Connected; - } - } - - else if (event & EVENT_INPUT) - { - int len = cs_get(co->link, &co->ibuf, &co->ibufsize); - - if (len < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Error reading from Z server"); - connection_destroy(co); - return; - } - else if (len == 0) - { - yaz_log(YLOG_WARN, "EOF reading from Z server"); - connection_destroy(co); - return; - } - else if (len > 1) // We discard input if we have no connection - { - co->state = Conn_Open; - - if (cl && (cl->requestid == se->requestid || cl->state == Client_Initializing)) - { - Z_APDU *a; - - odr_reset(global_parameters.odr_in); - odr_setbuf(global_parameters.odr_in, co->ibuf, len, 0); - if (!z_APDU(global_parameters.odr_in, &a, 0, 0)) - { - client_fatal(cl); - return; - } - switch (a->which) - { - case Z_APDU_initResponse: - do_initResponse(i, a); - break; - case Z_APDU_searchResponse: - do_searchResponse(i, a); - break; - case Z_APDU_presentResponse: - do_presentResponse(i, a); - break; - default: - yaz_log(YLOG_WARN, "Unexpected result from server"); - client_fatal(cl); - return; - } - // We aren't expecting staggered output from target - // if (cs_more(t->link)) - // iochan_setevent(i, EVENT_INPUT); - } - else // we throw away response and go to idle mode - { - yaz_log(YLOG_DEBUG, "Ignoring result of expired operation"); - cl->state = Client_Idle; - } - } - /* if len==1 we do nothing but wait for more input */ - } - - if (cl->state == Client_Connected) { - send_init(i); - } - - if (cl->state == Client_Idle) - { - if (cl->requestid != se->requestid && *se->query) { - send_search(i); - } - else if (cl->hits > 0 && cl->records < global_parameters.toget && - cl->records < cl->hits) { - send_present(i); - } - } -} - -// Disassociate connection from client -static void connection_release(struct connection *co) -{ - struct client *cl = co->client; - - yaz_log(YLOG_DEBUG, "Connection release %s", co->host->hostport); - if (!cl) - return; - cl->connection = 0; - co->client = 0; -} - -// Close connection and recycle structure -static void connection_destroy(struct connection *co) -{ - struct host *h = co->host; - cs_close(co->link); - iochan_destroy(co->iochan); - - yaz_log(YLOG_DEBUG, "Connection destroy %s", co->host->hostport); - if (h->connections == co) - h->connections = co->next; - else - { - struct connection *pco; - for (pco = h->connections; pco && pco->next != co; pco = pco->next) - ; - if (pco) - pco->next = co->next; - else - abort(); - } - if (co->client) - { - if (co->client->state != Client_Idle) - co->client->state = Client_Disconnected; - co->client->connection = 0; - } - co->next = connection_freelist; - connection_freelist = co; -} - -// Creates a new connection for client, associated with the host of -// client's database -static struct connection *connection_create(struct client *cl) -{ - struct connection *new; - COMSTACK link; - int res; - void *addr; - - yaz_log(YLOG_DEBUG, "Connection create %s", cl->database->url); - if (!(link = cs_create(tcpip_type, 0, PROTO_Z3950))) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack"); - exit(1); - } - - if (!(addr = cs_straddr(link, cl->database->host->ipport))) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address failed?"); - return 0; - } - - res = cs_connect(link, addr); - if (res < 0) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "cs_connect %s", cl->database->url); - return 0; - } - - if ((new = connection_freelist)) - connection_freelist = new->next; - else - { - new = xmalloc(sizeof (struct connection)); - new->ibuf = 0; - new->ibufsize = 0; - } - new->state = Conn_Connecting; - new->host = cl->database->host; - new->next = new->host->connections; - new->host->connections = new; - new->client = cl; - cl->connection = new; - new->link = link; - - new->iochan = iochan_create(cs_fileno(link), handler, 0); - iochan_setdata(new->iochan, new); - new->iochan->next = channel_list; - channel_list = new->iochan; - return new; -} - -// Close connection and set state to error -static void client_fatal(struct client *cl) -{ - yaz_log(YLOG_WARN, "Fatal error from %s", cl->database->url); - connection_destroy(cl->connection); - cl->state = Client_Error; -} - -// Ensure that client has a connection associated -static int client_prep_connection(struct client *cl) -{ - struct connection *co; - struct session *se = cl->session; - struct host *host = cl->database->host; - - co = cl->connection; - - yaz_log(YLOG_DEBUG, "Client prep %s", cl->database->url); - - if (!co) - { - // See if someone else has an idle connection - // We should look at timestamps here to select the longest-idle connection - for (co = host->connections; co; co = co->next) - if (co->state == Conn_Open && (!co->client || co->client->session != se)) - break; - if (co) - { - connection_release(co); - cl->connection = co; - co->client = cl; - } - else - co = connection_create(cl); - } - if (co) - { - if (co->state == Conn_Connecting) - cl->state = Client_Connecting; - else if (co->state == Conn_Open) - { - if (cl->state == Client_Error || cl->state == Client_Disconnected) - cl->state = Client_Idle; - } - iochan_setflag(co->iochan, EVENT_OUTPUT); - return 1; - } - else - return 0; -} - -void load_simpletargets(const char *fn) -{ - FILE *f = fopen(fn, "r"); - char line[256]; - - if (!f) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn); - exit(1); - } - - while (fgets(line, 255, f)) - { - char *url, *db; - struct host *host; - struct database *database; - - if (strncmp(line, "target ", 7)) - continue; - url = line + 7; - url[strlen(url) - 1] = '\0'; - yaz_log(YLOG_DEBUG, "Target: %s", url); - if ((db = strchr(url, '/'))) - *(db++) = '\0'; - else - db = "Default"; - - for (host = hosts; host; host = host->next) - if (!strcmp(url, host->hostport)) - break; - if (!host) - { - struct addrinfo *addrinfo, hints; - char *port; - char ipport[128]; - unsigned char addrbuf[4]; - int res; - - host = xmalloc(sizeof(struct host)); - host->hostport = xstrdup(url); - host->connections = 0; - - if ((port = strchr(url, ':'))) - *(port++) = '\0'; - else - port = "210"; - - hints.ai_flags = 0; - hints.ai_family = PF_INET; - hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = IPPROTO_TCP; - hints.ai_addrlen = 0; - hints.ai_addr = 0; - hints.ai_canonname = 0; - hints.ai_next = 0; - // This is not robust code. It assumes that getaddrinfo returns AF_INET - // address. - if ((res = getaddrinfo(url, port, &hints, &addrinfo))) - { - yaz_log(YLOG_WARN, "Failed to resolve %s: %s", url, gai_strerror(res)); - continue; - } - assert(addrinfo->ai_family == PF_INET); - memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); - sprintf(ipport, "%hhd.%hhd.%hhd.%hhd:%s", - addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); - host->ipport = xstrdup(ipport); - freeaddrinfo(addrinfo); - host->next = hosts; - hosts = host; - } - database = xmalloc(sizeof(struct database)); - database->host = host; - database->url = xmalloc(strlen(url) + strlen(db) + 2); - strcpy(database->url, url); - strcat(database->url, "/"); - strcat(database->url, db); - strcpy(database->databases[0], db); - *database->databases[1] = '\0'; - database->errors = 0; - database->next = databases; - databases = database; - - } - fclose(f); -} - -static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) -{ - switch (n->kind) - { - case CCL_RPN_AND: - case CCL_RPN_OR: - case CCL_RPN_NOT: - case CCL_RPN_PROX: - pull_terms(nmem, n->u.p[0], termlist, num); - pull_terms(nmem, n->u.p[1], termlist, num); - break; - case CCL_RPN_TERM: - termlist[(*num)++] = nmem_strdup(nmem, n->u.t.term); - break; - default: // NOOP - break; - } -} - -// Extract terms from query into null-terminated termlist -static int extract_terms(NMEM nmem, char *query, char **termlist) -{ - int error, pos; - struct ccl_rpn_node *n; - int num = 0; - - n = ccl_find_str(global_parameters.ccl_filter, query, &error, &pos); - if (!n) - return -1; - pull_terms(nmem, n, termlist, &num); - termlist[num] = 0; - ccl_rpn_delete(n); - return 0; -} - -static struct client *client_create(void) -{ - struct client *r; - if (client_freelist) - { - r = client_freelist; - client_freelist = client_freelist->next; - } - else - r = xmalloc(sizeof(struct client)); - r->database = 0; - r->connection = 0; - r->session = 0; - r->hits = 0; - r->records = 0; - r->setno = 0; - r->requestid = -1; - r->diagnostic = 0; - r->state = Client_Disconnected; - r->next = 0; - return r; -} - -void client_destroy(struct client *c) -{ - struct session *se = c->session; - if (c == se->clients) - se->clients = c->next; - else - { - struct client *cc; - for (cc = se->clients; cc && cc->next != c; cc = cc->next) - ; - if (cc) - cc->next = c->next; - } - if (c->connection) - connection_release(c->connection); - c->next = client_freelist; - client_freelist = c; -} - -void session_set_watch(struct session *s, int what, session_watchfun fun, void *data) -{ - s->watchlist[what].fun = fun; - s->watchlist[what].data = data; -} - -void session_alert_watch(struct session *s, int what) -{ - if (!s->watchlist[what].fun) - return; - (*s->watchlist[what].fun)(s->watchlist[what].data); - s->watchlist[what].fun = 0; - s->watchlist[what].data = 0; -} - -// This should be extended with parameters to control selection criteria -// Associates a set of clients with a session; -int select_targets(struct session *se) -{ - struct database *db; - int c = 0; - - while (se->clients) - client_destroy(se->clients); - for (db = databases; db; db = db->next) - { - struct client *cl = client_create(); - cl->database = db; - cl->session = se; - cl->next = se->clients; - se->clients = cl; - c++; - } - return c; -} - -char *search(struct session *se, char *query) -{ - int live_channels = 0; - struct client *cl; - - yaz_log(YLOG_DEBUG, "Search"); - - strcpy(se->query, query); - se->requestid++; - nmem_reset(se->nmem); - for (cl = se->clients; cl; cl = cl->next) - { - cl->hits = -1; - cl->records = 0; - cl->diagnostic = 0; - - if (client_prep_connection(cl)) - live_channels++; - } - if (live_channels) - { - char *p[512]; - int maxrecs = live_channels * global_parameters.toget; - se->termlist = termlist_create(se->nmem, maxrecs, 15); - se->reclist = reclist_create(se->nmem, maxrecs); - extract_terms(se->nmem, query, p); - se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs); - se->total_records = se->total_hits = 0; - } - else - return "NOTARGETS"; - - return 0; -} - -void destroy_session(struct session *s) -{ - yaz_log(YLOG_LOG, "Destroying session"); - while (s->clients) - client_destroy(s->clients); - nmem_destroy(s->nmem); - wrbuf_free(s->wrbuf, 1); -} - -struct session *new_session() -{ - int i; - struct session *session = xmalloc(sizeof(*session)); - - yaz_log(YLOG_DEBUG, "New pazpar2 session"); - - session->total_hits = 0; - session->total_records = 0; - session->termlist = 0; - session->reclist = 0; - session->requestid = -1; - session->clients = 0; - session->query[0] = '\0'; - session->nmem = nmem_create(); - session->wrbuf = wrbuf_alloc(); - for (i = 0; i <= SESSION_WATCH_MAX; i++) - { - session->watchlist[i].data = 0; - session->watchlist[i].fun = 0; - } - - select_targets(session); - - return session; -} - -struct hitsbytarget *hitsbytarget(struct session *se, int *count) -{ - static struct hitsbytarget res[1000]; // FIXME MM - struct client *cl; - - *count = 0; - for (cl = se->clients; cl; cl = cl->next) - { - strcpy(res[*count].id, cl->database->host->hostport); - res[*count].hits = cl->hits; - res[*count].records = cl->records; - res[*count].diagnostic = cl->diagnostic; - res[*count].state = client_states[cl->state]; - res[*count].connected = cl->connection ? 1 : 0; - (*count)++; - } - - return res; -} - -struct termlist_score **termlist(struct session *s, int *num) -{ - return termlist_highscore(s->termlist, num); -} - -struct record **show(struct session *s, int start, int *num, int *total, int *sumhits) -{ - struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *)); - int i; - - relevance_prepare_read(s->relevance, s->reclist); - - *total = s->reclist->num_records; - *sumhits = s->total_hits; - - for (i = 0; i < start; i++) - if (!reclist_read_record(s->reclist)) - { - *num = 0; - return 0; - } - - for (i = 0; i < *num; i++) - { - struct record *r = reclist_read_record(s->reclist); - if (!r) - { - *num = i; - break; - } - recs[i] = r; - } - return recs; -} - -void statistics(struct session *se, struct statistics *stat) -{ - struct client *cl; - int count = 0; - - bzero(stat, sizeof(*stat)); - for (cl = se->clients; cl; cl = cl->next) - { - if (!cl->connection) - stat->num_no_connection++; - switch (cl->state) - { - case Client_Connecting: stat->num_connecting++; break; - case Client_Initializing: stat->num_initializing++; break; - case Client_Searching: stat->num_searching++; break; - case Client_Presenting: stat->num_presenting++; break; - case Client_Idle: stat->num_idle++; break; - case Client_Failed: stat->num_failed++; break; - case Client_Error: stat->num_error++; break; - default: break; - } - count++; - } - stat->num_hits = se->total_hits; - stat->num_records = se->total_records; - - stat->num_clients = count; -} - -static CCL_bibset load_cclfile(const char *fn) -{ - CCL_bibset res = ccl_qual_mk(); - if (ccl_qual_fname(res, fn) < 0) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "%s", fn); - exit(1); - } - return res; -} - -int main(int argc, char **argv) -{ - int ret; - char *arg; - int setport = 0; - - if (signal(SIGPIPE, SIG_IGN) < 0) - yaz_log(YLOG_WARN|YLOG_ERRNO, "signal"); - - yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0); - - while ((ret = options("c:h:p:C:s:", argv, argc, &arg)) != -2) - { - switch (ret) { - case 'c': - command_init(atoi(arg)); - setport++; - break; - case 'h': - http_init(arg); - setport++; - break; - case 'C': - global_parameters.ccl_filter = load_cclfile(arg); - break; - case 'p': - http_set_proxyaddr(arg); - break; - case 's': - load_simpletargets(arg); - break; - default: - fprintf(stderr, "Usage: pazpar2\n" - " -h [host:]port (REST protocol listener)\n" - " -c cmdport (telnet-style)\n" - " -C cclconfig\n" - " -s simpletargetfile\n" - " -p hostname[:portno] (HTTP proxy)\n"); - exit(1); - } - } - - if (!setport) - { - fprintf(stderr, "Set command port with -h or -c\n"); - exit(1); - } - - global_parameters.ccl_filter = load_cclfile("default.bib"); - global_parameters.yaz_marc = yaz_marc_create(); - yaz_marc_subfield_str(global_parameters.yaz_marc, "\t"); - global_parameters.odr_in = odr_createmem(ODR_DECODE); - global_parameters.odr_out = odr_createmem(ODR_ENCODE); - - event_loop(&channel_list); - - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/pazpar2.h b/pazpar2.h deleted file mode 100644 index 8de4b91..0000000 --- a/pazpar2.h +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PAZPAR2_H -#define PAZPAR2_H - -struct record; - -#include - -#include -#include -#include -#include -#include "termlists.h" -#include "relevance.h" -#include "eventl.h" - -#define MAX_DATABASES 512 - -struct record { - struct client *client; - int target_offset; - char *buf; - char *merge_key; - char *title; - int relevance; - int *term_frequency_vec; - struct record *next_cluster; -}; - -struct connection; - -// Represents a host (irrespective of databases) -struct host { - char *hostport; - char *ipport; - struct connection *connections; // All connections to this - struct host *next; -}; - -// Represents a (virtual) database on a host -struct database { - struct host *host; - char *url; - char databases[MAX_DATABASES][128]; - int errors; - struct database *next; -}; - -struct client; - -// Represents a physical, reusable connection to a remote Z39.50 host -struct connection { - IOCHAN iochan; - COMSTACK link; - struct host *host; - struct client *client; - char *ibuf; - int ibufsize; - enum { - Conn_Connecting, - Conn_Open, - Conn_Waiting, - } state; - struct connection *next; -}; - -// Represents client state for a connection to one search target -struct client { - struct database *database; - struct connection *connection; - struct session *session; - int hits; - int records; - int setno; - int requestid; // ID of current outstanding request - int diagnostic; - enum client_state - { - Client_Connecting, - Client_Connected, - Client_Idle, - Client_Initializing, - Client_Searching, - Client_Presenting, - Client_Error, - Client_Failed, - Client_Disconnected, - Client_Stopped - } state; - struct client *next; -}; - -#define SESSION_WATCH_RECORDS 0 -#define SESSION_WATCH_MAX 0 - -typedef void (*session_watchfun)(void *data); - -// End-user session -struct session { - struct client *clients; - int requestid; - char query[1024]; - NMEM nmem; // Nmem for each operation (i.e. search) - WRBUF wrbuf; // Wrbuf for scratch(i.e. search) - struct termlist *termlist; - struct relevance *relevance; - struct reclist *reclist; - struct { - void *data; - session_watchfun fun; - } watchlist[SESSION_WATCH_MAX + 1]; - int total_hits; - int total_records; -}; - -struct statistics { - int num_clients; - int num_no_connection; - int num_connecting; - int num_initializing; - int num_searching; - int num_presenting; - int num_idle; - int num_failed; - int num_error; - int num_hits; - int num_records; -}; - -struct hitsbytarget { - char id[256]; - int hits; - int diagnostic; - int records; - char* state; - int connected; -}; - -struct parameters { - int timeout; /* operations timeout, in seconds */ - char implementationId[128]; - char implementationName[128]; - char implementationVersion[128]; - int target_timeout; // seconds - int session_timeout; - int toget; - int chunk; - CCL_bibset ccl_filter; - yaz_marc_t yaz_marc; - ODR odr_out; - ODR odr_in; -}; - -struct hitsbytarget *hitsbytarget(struct session *s, int *count); -int select_targets(struct session *se); -struct session *new_session(); -void destroy_session(struct session *s); -int load_targets(struct session *s, const char *fn); -void statistics(struct session *s, struct statistics *stat); -char *search(struct session *s, char *query); -struct record **show(struct session *s, int start, int *num, int *total, int *sumhits); -struct termlist_score **termlist(struct session *s, int *num); -void session_set_watch(struct session *s, int what, session_watchfun fun, void *data); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/reclists.c b/reclists.c deleted file mode 100644 index 579e34b..0000000 --- a/reclists.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * $Id: reclists.c,v 1.4 2006-12-08 21:40:58 quinn Exp $ - */ - -#include - -#include - -#include "pazpar2.h" -#include "reclists.h" - -struct reclist_bucket -{ - struct record *record; - struct reclist_bucket *next; -}; - -struct record *reclist_read_record(struct reclist *l) -{ - if (l->pointer < l->num_records) - return l->flatlist[l->pointer++]; - else - return 0; -} - -void reclist_rewind(struct reclist *l) -{ - l->pointer = 0; -} - -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -struct reclist *reclist_create(NMEM nmem, int numrecs) -{ - int hashsize = 1; - struct reclist *res; - - assert(numrecs); - while (hashsize < numrecs) - hashsize <<= 1; - res = nmem_malloc(nmem, sizeof(struct reclist)); - res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*)); - bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*)); - res->hashtable_size = hashsize; - res->nmem = nmem; - res->hashmask = hashsize - 1; // Creates a bitmask - - res->num_records = 0; - res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); - res->flatlist_size = numrecs; - - return res; -} - -struct record *reclist_insert(struct reclist *l, struct record *record) -{ - unsigned int bucket; - struct reclist_bucket **p; - struct record *head; - - bucket = hash((unsigned char*) record->merge_key) & l->hashmask; - for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) - { - // We found a matching record. Merge them - if (!strcmp(record->merge_key, (*p)->record->merge_key)) - { - struct record *existing = (*p)->record; - record->next_cluster = existing->next_cluster; - existing->next_cluster = record; - head = existing; - break; - } - } - if (!*p) // We made it to the end of the bucket without finding match - { - struct reclist_bucket *new = nmem_malloc(l->nmem, - sizeof(struct reclist_bucket)); - new->record = record; - record->next_cluster = 0; - new->next = 0; - *p = new; - l->flatlist[l->num_records++] = record; - head = record; - } - return head; -} - - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/reclists.h b/reclists.h deleted file mode 100644 index f9d38c3..0000000 --- a/reclists.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RECLISTS_H -#define RECLISTS_H - -struct reclist -{ - struct reclist_bucket **hashtable; - int hashtable_size; - int hashmask; - - struct record **flatlist; - int flatlist_size; - int num_records; - int pointer; - - NMEM nmem; -}; - -struct reclist *reclist_create(NMEM, int numrecs); -struct record * reclist_insert(struct reclist *tl, struct record *record); -struct record *reclist_read_record(struct reclist *l); -void reclist_rewind(struct reclist *l); - -#endif diff --git a/relevance.c b/relevance.c deleted file mode 100644 index c627a98..0000000 --- a/relevance.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * $Id: relevance.c,v 1.3 2006-11-27 14:35:15 quinn Exp $ - */ - -#include -#include -#include - -#include "relevance.h" -#include "pazpar2.h" - -struct relevance -{ - int *doc_frequency_vec; - int vec_len; - struct word_trie *wt; - NMEM nmem; -}; - -// We use this data structure to recognize terms in input records, -// and map them to record term vectors for counting. -struct word_trie -{ - struct - { - struct word_trie *child; - int termno; - } list[26]; -}; - -static struct word_trie *create_word_trie_node(NMEM nmem) -{ - struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie)); - int i; - for (i = 0; i < 26; i++) - { - res->list[i].child = 0; - res->list[i].termno = -1; - } - return res; -} - -static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num) -{ - while (*term) { - int c = tolower(*term); - if (c < 'a' || c > 'z') - term++; - else - { - c -= 'a'; - if (!*(++term)) - n->list[c].termno = num; - else - { - if (!n->list[c].child) - { - struct word_trie *new = create_word_trie_node(nmem); - n->list[c].child = new; - } - word_trie_addterm(nmem, n->list[c].child, term, num); - } - break; - } - } -} - -#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' : -1) - -static int word_trie_match(struct word_trie *t, const char *word, int len, int *skipped) -{ - int c = raw_char(tolower(*word)); - - if (!len) - return 0; - - word++; len--; - (*skipped)++; - if (!len || raw_char(*word) < 0) - { - if (t->list[c].termno > 0) - return t->list[c].termno; - else - return 0; - } - else - { - if (t->list[c].child) - { - return word_trie_match(t->list[c].child, word, len, skipped); - } - else - return 0; - } - -} - - -static struct word_trie *build_word_trie(NMEM nmem, const char **terms) -{ - struct word_trie *res = create_word_trie_node(nmem); - const char **p; - int i; - - for (i = 1, p = terms; *p; p++, i++) - word_trie_addterm(nmem, res, *p, i); - return res; -} - -struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) -{ - struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); - const char **p; - int i; - - for (p = terms, i = 0; *p; p++, i++) - ; - res->vec_len = ++i; - res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); - res->nmem = nmem; - res->wt = build_word_trie(nmem, terms); - return res; -} - -void relevance_newrec(struct relevance *r, struct record *rec) -{ - if (!rec->term_frequency_vec) - { - rec->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); - bzero(rec->term_frequency_vec, r->vec_len * sizeof(int)); - } -} - - -// FIXME. The definition of a word is crude here.. should support -// some form of localization mechanism? -void relevance_countwords(struct relevance *r, struct record *head, - const char *words, int len, int multiplier) -{ - while (len) - { - char c; - int res; - int skipped; - while (len && (c = raw_char(tolower(*words))) < 0) - { - words++; - len--; - } - if (!len) - return; - skipped = 0; - if ((res = word_trie_match(r->wt, words, len, &skipped))) - { - words += skipped; - len -= skipped; - head->term_frequency_vec[res] += multiplier; - } - else - { - while (len && (c = raw_char(tolower(*words))) >= 0) - { - words++; - len--; - } - } - head->term_frequency_vec[0]++; - } -} - -void relevance_donerecord(struct relevance *r, struct record *head) -{ - int i; - - for (i = 1; i < r->vec_len; i++) - if (head->term_frequency_vec[i] > 0) - r->doc_frequency_vec[i]++; - - r->doc_frequency_vec[0]++; -} - -#ifdef FLOAT_REL -static int comp(const void *p1, const void *p2) -{ - float res; - struct record **r1 = (struct record **) p1; - struct record **r2 = (struct record **) p2; - res = (*r2)->relevance - (*r1)->relevance; - if (res > 0) - return 1; - else if (res < 0) - return -1; - else - return 0; -} -#else -static int comp(const void *p1, const void *p2) -{ - struct record **r1 = (struct record **) p1; - struct record **r2 = (struct record **) p2; - return (*r2)->relevance - (*r1)->relevance; -} -#endif - -// Prepare for a relevance-sorted read of up to num entries -void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) -{ - int i; - float *idfvec = xmalloc(rel->vec_len * sizeof(float)); - - // Calculate document frequency vector for each term. - for (i = 1; i < rel->vec_len; i++) - { - if (!rel->doc_frequency_vec[i]) - idfvec[i] = 0; - else - idfvec[i] = log((float) rel->doc_frequency_vec[0] / rel->doc_frequency_vec[i]); - } - // Calculate relevance for each document - for (i = 0; i < reclist->num_records; i++) - { - int t; - struct record *rec = reclist->flatlist[i]; - float relevance; - relevance = 0; - for (t = 1; t < rel->vec_len; t++) - { - float termfreq; - if (!rec->term_frequency_vec[0]) - break; - termfreq = (float) rec->term_frequency_vec[t] / rec->term_frequency_vec[0]; - relevance += termfreq * idfvec[t]; - } - rec->relevance = (int) (relevance * 100000); - } - qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp); - reclist->pointer = 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/relevance.h b/relevance.h deleted file mode 100644 index 38c3d9c..0000000 --- a/relevance.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef RELEVANCE_H -#define RELEVANCE_H - -#include - -#include "pazpar2.h" -#include "reclists.h" - -struct relevance; - -struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); -void relevance_newrec(struct relevance *r, struct record *rec); -void relevance_countwords(struct relevance *r, struct record *rec, - const char *words, int len, int multiplier); -void relevance_donerecord(struct relevance *r, struct record *rec); - -void relevance_prepare_read(struct relevance *rel, struct reclist *rec); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..52e23c1 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,45 @@ +# ParaZ. Copyright (C) 2000-2004, Index Data ApS +# All rights reserved. +# $Id: Makefile,v 1.1 2006-12-20 20:47:16 quinn Exp $ + +SHELL=/bin/sh + +CC=gcc + +YAZCONF=yaz-config +YAZLIBS=`$(YAZCONF) --libs` +YAZCFLAGS=`$(YAZCONF) --cflags` + +PROG=pazpar2 +PROGO=pazpar2.o eventl.o util.o command.o http.o http_command.o termlists.o \ + reclists.o relevance.o + +all: $(PROG) + +$(PROG): $(PROGO) + $(CC) $(CFLAGS) $(YAZCFLAGS) -o $(PROG) $(PROGO) $(YAZLIBS) + +.c.o: + $(CC) -c $(CFLAGS) -I. $(YAZCFLAGS) $< + +clean: + rm -f *.[oa] test core mon.out gmon.out errlist $(PROG) + + +## Dependencies go below + +command.o: command.c command.h util.h eventl.h pazpar2.h termlists.h \ + relevance.h reclists.h +eventl.o: eventl.c eventl.h +http.o: http.c command.h util.h eventl.h pazpar2.h termlists.h \ + relevance.h reclists.h http.h http_command.h +http_command.o: http_command.c command.h util.h eventl.h pazpar2.h \ + termlists.h relevance.h reclists.h http.h http_command.h +pazpar2.o: pazpar2.c pazpar2.h termlists.h relevance.h reclists.h \ + eventl.h command.h http.h +reclists.o: reclists.c pazpar2.h termlists.h relevance.h reclists.h \ + eventl.h +relevance.o: relevance.c relevance.h pazpar2.h termlists.h eventl.h \ + reclists.h +termlists.o: termlists.c termlists.h +util.o: util.c diff --git a/src/command.c b/src/command.c new file mode 100644 index 0000000..5411b39 --- /dev/null +++ b/src/command.c @@ -0,0 +1,392 @@ +/* $Id: command.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" + +extern IOCHAN channel_list; + +struct command_session { + IOCHAN channel; + char *outbuf; + + int outbuflen; + int outbufwrit; + + struct session *psession; +}; + +void command_destroy(struct command_session *s); +void command_prompt(struct command_session *s); +void command_puts(struct command_session *s, const char *buf); + +static int cmd_quit(struct command_session *s, char **argv, int argc) +{ + IOCHAN i = s->channel; + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return 0; +} + +#ifdef GAGA +static int cmd_load(struct command_session *s, char **argv, int argc) +{ + if (argc != 2) { + command_puts(s, "Usage: load filename\n"); + } + if (load_targets(s->psession, argv[1]) < 0) + command_puts(s, "Failed to open file\n"); + return 1; +} +#endif + +static int cmd_search(struct command_session *s, char **argv, int argc) +{ + if (argc != 2) + { + command_puts(s, "Usage: search word\n"); + return 1; + } + search(s->psession, argv[1]); + return 1; +} + +static int cmd_hitsbytarget(struct command_session *s, char **argv, int argc) +{ + int count; + int i; + + struct hitsbytarget *ht = hitsbytarget(s->psession, &count); + for (i = 0; i < count; i++) + { + char buf[1024]; + + sprintf(buf, "%s: %d (%d records, diag=%d, state=%s conn=%d)\n", ht[i].id, ht[i].hits, + ht[i].records, ht[i].diagnostic, ht[i].state, ht[i].connected); + command_puts(s, buf); + } + return 1; +} + +static int cmd_show(struct command_session *s, char **argv, int argc) +{ + struct record **recs; + int num = 10; + int merged, total; + int i; + + if (argc == 2) + num = atoi(argv[1]); + + recs = show(s->psession, 0, &num, &merged, &total); + + for (i = 0; i < num; i++) + { + int rc; + struct record *cnode; + struct record *r = recs[i]; + + command_puts(s, r->merge_key); + for (rc = 1, cnode = r->next_cluster; cnode; cnode = cnode->next_cluster, rc++) + ; + if (rc > 1) + { + char buf[256]; + sprintf(buf, " (%d records)", rc); + command_puts(s, buf); + } + command_puts(s, "\n"); + } + return 1; +} + +static int cmd_stat(struct command_session *s, char **argv, int argc) +{ + char buf[1024]; + struct statistics stat; + + statistics(s->psession, &stat); + sprintf(buf, "Number of connections: %d\n", stat.num_clients); + command_puts(s, buf); + if (stat.num_no_connection) + { + sprintf(buf, "#No_connection: %d\n", stat.num_no_connection); + command_puts(s, buf); + } + if (stat.num_connecting) + { + sprintf(buf, "#Connecting: %d\n", stat.num_connecting); + command_puts(s, buf); + } + if (stat.num_initializing) + { + sprintf(buf, "#Initializing: %d\n", stat.num_initializing); + command_puts(s, buf); + } + if (stat.num_searching) + { + sprintf(buf, "#Searching: %d\n", stat.num_searching); + command_puts(s, buf); + } + if (stat.num_presenting) + { + sprintf(buf, "#Ppresenting: %d\n", stat.num_presenting); + command_puts(s, buf); + } + if (stat.num_idle) + { + sprintf(buf, "#Idle: %d\n", stat.num_idle); + command_puts(s, buf); + } + if (stat.num_failed) + { + sprintf(buf, "#Failed: %d\n", stat.num_failed); + command_puts(s, buf); + } + if (stat.num_error) + { + sprintf(buf, "#Error: %d\n", stat.num_error); + command_puts(s, buf); + } + return 1; +} + +static struct { + char *cmd; + int (*fun)(struct command_session *s, char *argv[], int argc); +} cmd_array[] = { + {"quit", cmd_quit}, +#ifdef GAGA + {"load", cmd_load}, +#endif + {"find", cmd_search}, + {"ht", cmd_hitsbytarget}, + {"stat", cmd_stat}, + {"show", cmd_show}, + {0,0} +}; + +void command_command(struct command_session *s, char *command) +{ + char *p; + char *argv[20]; + int argc = 0; + int i; + int res = -1; + + p = command; + while (*p) + { + while (isspace(*p)) + p++; + if (!*p) + break; + argv[argc++] = p; + while (*p && !isspace(*p)) + p++; + if (!*p) + break; + *(p++) = '\0'; + } + if (argc) { + for (i = 0; cmd_array[i].cmd; i++) + { + if (!strcmp(cmd_array[i].cmd, argv[0])) { + res = (cmd_array[i].fun)(s, argv, argc); + + break; + } + } + if (res < 0) { + command_puts(s, "Unknown command.\n"); + command_prompt(s); + } + else if (res == 1) { + command_prompt(s); + } + } + else + command_prompt(s); + +} + + +static void command_io(IOCHAN i, int event) +{ + int res; + char buf[1024]; + struct command_session *s; + + s = iochan_getdata(i); + + + switch (event) + { + case EVENT_INPUT: + res = read(iochan_getfd(i), buf, 1024); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "read command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return; + } + if (!index(buf, '\n')) { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Did not receive complete command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + return; + } + buf[res] = '\0'; + command_command(s, buf); + break; + case EVENT_OUTPUT: + if (!s->outbuflen || s->outbufwrit < 0) + { + yaz_log(YLOG_WARN, "Called with outevent but no data"); + iochan_clearflag(i, EVENT_OUTPUT); + } + else + { + res = write(iochan_getfd(i), s->outbuf + s->outbufwrit, s->outbuflen - + s->outbufwrit); + if (res < 0) { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write command"); + close(iochan_getfd(i)); + iochan_destroy(i); + command_destroy(s); + } + else + { + s->outbufwrit += res; + if (s->outbufwrit >= s->outbuflen) + { + s->outbuflen = s->outbufwrit = 0; + iochan_clearflag(i, EVENT_OUTPUT); + } + } + } + break; + default: + yaz_log(YLOG_WARN, "Bad voodoo on socket"); + } +} + +void command_puts(struct command_session *s, const char *buf) +{ + int len = strlen(buf); + memcpy(s->outbuf + s->outbuflen, buf, len); + s->outbuflen += len; + iochan_setflag(s->channel, EVENT_OUTPUT); +} + +void command_prompt(struct command_session *s) +{ + command_puts(s, "Pazpar2> "); +} + + +/* Accept a new command connection */ +static void command_accept(IOCHAN i, int event) +{ + struct sockaddr_in addr; + int fd = iochan_getfd(i); + socklen_t len; + int s; + IOCHAN c; + struct command_session *ses; + int flags; + + len = sizeof addr; + if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); + return; + } + if ((flags = fcntl(s, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + + yaz_log(YLOG_LOG, "New command connection"); + c = iochan_create(s, command_io, EVENT_INPUT | EVENT_EXCEPT); + + ses = xmalloc(sizeof(*ses)); + ses->outbuf = xmalloc(50000); + ses->outbuflen = 0; + ses->outbufwrit = 0; + ses->channel = c; + ses->psession = new_session(); + iochan_setdata(c, ses); + + command_puts(ses, "Welcome to pazpar2\n\n"); + command_prompt(ses); + + c->next = channel_list; + channel_list = c; +} + +void command_destroy(struct command_session *s) { + xfree(s->outbuf); + xfree(s); +} + +/* Create a command-channel listener */ +void command_init(int port) +{ + IOCHAN c; + int l; + struct protoent *p; + struct sockaddr_in myaddr; + int one = 1; + + yaz_log(YLOG_LOG, "Command port is %d", port); + if (!(p = getprotobyname("tcp"))) { + abort(); + } + if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); + if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + + bzero(&myaddr, sizeof myaddr); + myaddr.sin_family = AF_INET; + myaddr.sin_addr.s_addr = INADDR_ANY; + myaddr.sin_port = htons(port); + if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); + if (listen(l, SOMAXCONN) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); + + c = iochan_create(l, command_accept, EVENT_INPUT | EVENT_EXCEPT); + //iochan_setdata(c, &l); + c->next = channel_list; + channel_list = c; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/command.h b/src/command.h new file mode 100644 index 0000000..25e5b99 --- /dev/null +++ b/src/command.h @@ -0,0 +1,6 @@ +#ifndef COMMAND_H +#define COMMAND_H + +void command_init(int port); + +#endif diff --git a/src/eventl.c b/src/eventl.c new file mode 100644 index 0000000..324dca1 --- /dev/null +++ b/src/eventl.c @@ -0,0 +1,154 @@ +/* + * ParaZ - a simple tool for harvesting performance data for parallel + * operations using Z39.50. + * Copyright (c) 2000-2004 Index Data ApS + * See LICENSE file for details. + */ + +/* + * $Id: eventl.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + * Based on revision YAZ' server/eventl.c 1.29. + */ + +#include +#include +#ifdef WIN32 +#include +#else +#include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include "eventl.h" +#include + +IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags) +{ + IOCHAN new_iochan; + + if (!(new_iochan = (IOCHAN)xmalloc(sizeof(*new_iochan)))) + return 0; + new_iochan->destroyed = 0; + new_iochan->fd = fd; + new_iochan->flags = flags; + new_iochan->fun = cb; + new_iochan->force_event = 0; + new_iochan->last_event = new_iochan->max_idle = 0; + new_iochan->next = NULL; + return new_iochan; +} + +int event_loop(IOCHAN *iochans) +{ + do /* loop as long as there are active associations to process */ + { + IOCHAN p, nextp; + fd_set in, out, except; + int res, max; + static struct timeval nullto = {0, 0}, to; + struct timeval *timeout; + + FD_ZERO(&in); + FD_ZERO(&out); + FD_ZERO(&except); + timeout = &to; /* hang on select */ + to.tv_sec = 30; + to.tv_usec = 0; + max = 0; + for (p = *iochans; p; p = p->next) + { + if (p->fd < 0) + continue; + if (p->force_event) + timeout = &nullto; /* polling select */ + if (p->flags & EVENT_INPUT) + FD_SET(p->fd, &in); + if (p->flags & EVENT_OUTPUT) + FD_SET(p->fd, &out); + if (p->flags & EVENT_EXCEPT) + FD_SET(p->fd, &except); + if (p->fd > max) + max = p->fd; + } + if ((res = select(max + 1, &in, &out, &except, timeout)) < 0) + { + if (errno == EINTR) + continue; + else + abort(); + } + for (p = *iochans; p; p = p->next) + { + int force_event = p->force_event; + time_t now = time(0); + + p->force_event = 0; + if (!p->destroyed && ((p->max_idle && now - p->last_event > + p->max_idle) || force_event == EVENT_TIMEOUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_TIMEOUT); + } + if (p->fd < 0) + continue; + if (!p->destroyed && (FD_ISSET(p->fd, &in) || + force_event == EVENT_INPUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_INPUT); + } + if (!p->destroyed && (FD_ISSET(p->fd, &out) || + force_event == EVENT_OUTPUT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_OUTPUT); + } + if (!p->destroyed && (FD_ISSET(p->fd, &except) || + force_event == EVENT_EXCEPT)) + { + p->last_event = now; + (*p->fun)(p, EVENT_EXCEPT); + } + } + for (p = *iochans; p; p = nextp) + { + nextp = p->next; + + if (p->destroyed) + { + IOCHAN tmp = p, pr; + + /* Now reset the pointers */ + if (p == *iochans) + *iochans = p->next; + else + { + for (pr = *iochans; pr; pr = pr->next) + if (pr->next == p) + break; + assert(pr); /* grave error if it weren't there */ + pr->next = p->next; + } + if (nextp == p) + nextp = p->next; + xfree(tmp); + } + } + } + while (*iochans); + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/eventl.h b/src/eventl.h new file mode 100644 index 0000000..199d944 --- /dev/null +++ b/src/eventl.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 1995-1999, Index Data + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: eventl.h,v $ + * Revision 1.1 2006-12-20 20:47:16 quinn + * Reorganized source tree + * + * Revision 1.3 2006/12/12 02:36:24 quinn + * Implemented session timeout; ping command + * + * Revision 1.2 2006/11/18 05:00:38 quinn + * Added record retrieval, etc. + * + * Revision 1.1.1.1 2006/11/14 20:44:38 quinn + * PazPar2 + * + * Revision 1.1.1.1 2000/02/23 14:40:18 heikki + * Original import to cvs + * + * Revision 1.11 1999/04/20 09:56:48 adam + * Added 'name' paramter to encoder/decoder routines (typedef Odr_fun). + * Modified all encoders/decoders to reflect this change. + * + * Revision 1.10 1998/01/29 13:30:23 adam + * Better event handle system for NT/Unix. + * + * Revision 1.9 1997/09/01 09:31:48 adam + * Removed definition statserv_remove from statserv.h to eventl.h. + * + * Revision 1.8 1995/06/19 12:39:09 quinn + * Fixed bug in timeout code. Added BER dumper. + * + * Revision 1.7 1995/06/16 10:31:34 quinn + * Added session timeout. + * + * Revision 1.6 1995/05/16 08:51:02 quinn + * License, documentation, and memory fixes + * + * Revision 1.5 1995/05/15 11:56:37 quinn + * Asynchronous facilities. Restructuring of seshigh code. + * + * Revision 1.4 1995/03/27 08:34:23 quinn + * Added dynamic server functionality. + * Released bindings to session.c (is now redundant) + * + * Revision 1.3 1995/03/15 08:37:42 quinn + * Now we're pretty much set for nonblocking I/O. + * + * Revision 1.2 1995/03/14 10:28:00 quinn + * More work on demo server. + * + * Revision 1.1 1995/03/10 18:22:45 quinn + * The rudiments of an asynchronous server. + * + */ + +#ifndef EVENTL_H +#define EVENTL_H + +#include + +struct iochan; + +typedef void (*IOC_CALLBACK)(struct iochan *i, int event); + +typedef struct iochan +{ + int fd; + int flags; +#define EVENT_INPUT 0x01 +#define EVENT_OUTPUT 0x02 +#define EVENT_EXCEPT 0x04 +#define EVENT_TIMEOUT 0x08 +#define EVENT_WORK 0x10 + int force_event; + IOC_CALLBACK fun; + void *data; + int destroyed; + time_t last_event; + time_t max_idle; + + struct iochan *next; +} *IOCHAN; + +#define iochan_destroy(i) (void)((i)->destroyed = 1) +#define iochan_getfd(i) ((i)->fd) +#define iochan_setfd(i, f) ((i)->fd = (f)) +#define iochan_getdata(i) ((i)->data) +#define iochan_setdata(i, d) ((i)->data = d) +#define iochan_getflags(i) ((i)->flags) +#define iochan_setflags(i, d) ((i)->flags = d) +#define iochan_setflag(i, d) ((i)->flags |= d) +#define iochan_clearflag(i, d) ((i)->flags &= ~(d)) +#define iochan_getflag(i, d) ((i)->flags & d ? 1 : 0) +#define iochan_getfun(i) ((i)->fun) +#define iochan_setfun(i, d) ((i)->fun = d) +#define iochan_setevent(i, e) ((i)->force_event = (e)) +#define iochan_getnext(i) ((i)->next) +#define iochan_settimeout(i, t) ((i)->max_idle = (t), (i)->last_event = time(0)) +#define iochan_activity(i) ((i)->last_event = time(0)) + +IOCHAN iochan_create(int fd, IOC_CALLBACK cb, int flags); +int event_loop(IOCHAN *iochans); + +#endif diff --git a/src/http.c b/src/http.c new file mode 100644 index 0000000..401f5ae --- /dev/null +++ b/src/http.c @@ -0,0 +1,854 @@ +/* + * $Id: http.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" +#include "http.h" +#include "http_command.h" + +static void proxy_io(IOCHAN i, int event); +static struct http_channel *http_create(void); +static void http_destroy(IOCHAN i); + +extern IOCHAN channel_list; + +static struct sockaddr_in *proxy_addr = 0; // If this is set, we proxy normal HTTP requests +static char proxy_url[256] = ""; +static struct http_buf *http_buf_freelist = 0; +static struct http_channel *http_channel_freelist = 0; + +static struct http_buf *http_buf_create() +{ + struct http_buf *r; + + if (http_buf_freelist) + { + r = http_buf_freelist; + http_buf_freelist = http_buf_freelist->next; + } + else + r = xmalloc(sizeof(struct http_buf)); + r->offset = 0; + r->len = 0; + r->next = 0; + return r; +} + +static void http_buf_destroy(struct http_buf *b) +{ + b->next = http_buf_freelist; + http_buf_freelist = b; +} + +static void http_buf_destroy_queue(struct http_buf *b) +{ + struct http_buf *p; + while (b) + { + p = b->next; + http_buf_destroy(b); + b = p; + } +} + +#ifdef GAGA +// Calculate length of chain +static int http_buf_len(struct http_buf *b) +{ + int sum = 0; + for (; b; b = b->next) + sum += b->len; + return sum; +} +#endif + +static struct http_buf *http_buf_bybuf(char *b, int len) +{ + struct http_buf *res = 0; + struct http_buf **p = &res; + + while (len) + { + *p = http_buf_create(); + int tocopy = len; + if (tocopy > HTTP_BUF_SIZE) + tocopy = HTTP_BUF_SIZE; + memcpy((*p)->buf, b, tocopy); + (*p)->len = tocopy; + len -= tocopy; + b += tocopy; + p = &(*p)->next; + } + return res; +} + +// Add a (chain of) buffers to the end of an existing queue. +static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b) +{ + while (*queue) + queue = &(*queue)->next; + *queue = b; +} + +static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) +{ + // Heavens to Betsy (buf)! + return http_buf_bybuf(wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); +} + +// Non-destructively collapse chain of buffers into a string (max *len) +// Return +static int http_buf_peek(struct http_buf *b, char *buf, int len) +{ + int rd = 0; + while (b && rd < len) + { + int toread = len - rd; + if (toread > b->len) + toread = b->len; + memcpy(buf + rd, b->buf + b->offset, toread); + rd += toread; + b = b->next; + } + buf[rd] = '\0'; + return rd; +} + +// Ddestructively munch up to len from head of queue. +static int http_buf_read(struct http_buf **b, char *buf, int len) +{ + int rd = 0; + while ((*b) && rd < len) + { + int toread = len - rd; + if (toread > (*b)->len) + toread = (*b)->len; + memcpy(buf + rd, (*b)->buf + (*b)->offset, toread); + rd += toread; + if (toread < (*b)->len) + { + (*b)->len -= toread; + (*b)->offset += toread; + break; + } + else + { + struct http_buf *n = (*b)->next; + http_buf_destroy(*b); + *b = n; + } + } + buf[rd] = '\0'; + return rd; +} + +void static urldecode(char *i, char *o) +{ + while (*i) + { + if (*i == '+') + { + *(o++) = ' '; + i++; + } + else if (*i == '%') + { + i++; + sscanf(i, "%2hhx", o); + i += 2; + o++; + } + else + *(o++) = *(i++); + } + *o = '\0'; +} + +void http_addheader(struct http_response *r, const char *name, const char *value) +{ + struct http_channel *c = r->channel; + struct http_header *h = nmem_malloc(c->nmem, sizeof *h); + h->name = nmem_strdup(c->nmem, name); + h->value = nmem_strdup(c->nmem, value); + h->next = r->headers; + r->headers = h; +} + +char *http_argbyname(struct http_request *r, char *name) +{ + struct http_argument *p; + if (!name) + return 0; + for (p = r->arguments; p; p = p->next) + if (!strcmp(p->name, name)) + return p->value; + return 0; +} + +char *http_headerbyname(struct http_request *r, char *name) +{ + struct http_header *p; + for (p = r->headers; p; p = p->next) + if (!strcmp(p->name, name)) + return p->value; + return 0; +} + +struct http_response *http_create_response(struct http_channel *c) +{ + struct http_response *r = nmem_malloc(c->nmem, sizeof(*r)); + strcpy(r->code, "200"); + r->msg = "OK"; + r->channel = c; + r->headers = 0; + r->payload = 0; + return r; +} + +// Check if we have a complete request. Return 0 or length (including trailing newline) +// FIXME: Does not deal gracefully with requests carrying payload +// but this is kind of OK since we will reject anything other than an empty GET +static int request_check(struct http_buf *queue) +{ + char tmp[4096]; + int len = 0; + char *buf = tmp; + + http_buf_peek(queue, tmp, 4096); + while (*buf) // Check if we have a sequence of lines terminated by an empty line + { + char *b = strstr(buf, "\r\n"); + + if (!b) + return 0; + + len += (b - buf) + 2; + if (b == buf) + return len; + buf = b + 2; + } + return 0; +} + +struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, + int len) +{ + struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); + char *p, *p2; + char tmp[4096]; + char *buf = tmp; + + if (len > 4096) + return 0; + if (http_buf_read(queue, buf, len) < len) + return 0; + + r->channel = c; + r->arguments = 0; + r->headers = 0; + // Parse first line + for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) + *(p2++) = *p; + if (*p != ' ') + { + yaz_log(YLOG_WARN, "Unexpected HTTP method in request"); + return 0; + } + *p2 = '\0'; + + if (!(buf = strchr(buf, ' '))) + { + yaz_log(YLOG_WARN, "Syntax error in request (1)"); + return 0; + } + buf++; + if (!(p = strchr(buf, ' '))) + { + yaz_log(YLOG_WARN, "Syntax error in request (2)"); + return 0; + } + *(p++) = '\0'; + if ((p2 = strchr(buf, '?'))) // Do we have arguments? + *(p2++) = '\0'; + r->path = nmem_strdup(c->nmem, buf); + if (p2) + { + // Parse Arguments + while (*p2) + { + struct http_argument *a; + char *equal = strchr(p2, '='); + char *eoa = strchr(p2, '&'); + if (!equal) + { + yaz_log(YLOG_WARN, "Expected '=' in argument"); + return 0; + } + if (!eoa) + eoa = equal + strlen(equal); // last argument + else + *(eoa++) = '\0'; + a = nmem_malloc(c->nmem, sizeof(struct http_argument)); + *(equal++) = '\0'; + a->name = nmem_strdup(c->nmem, p2); + urldecode(equal, equal); + a->value = nmem_strdup(c->nmem, equal); + a->next = r->arguments; + r->arguments = a; + p2 = eoa; + } + } + buf = p; + + if (strncmp(buf, "HTTP/", 5)) + strcpy(r->http_version, "1.0"); + else + { + buf += 5; + if (!(p = strstr(buf, "\r\n"))) + return 0; + *(p++) = '\0'; + p++; + strcpy(r->http_version, buf); + buf = p; + } + strcpy(c->version, r->http_version); + + r->headers = 0; + while (*buf) + { + if (!(p = strstr(buf, "\r\n"))) + return 0; + if (p == buf) + break; + else + { + struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); + if (!(p2 = strchr(buf, ':'))) + return 0; + *(p2++) = '\0'; + h->name = nmem_strdup(c->nmem, buf); + while (isspace(*p2)) + p2++; + if (p2 >= p) // Empty header? + { + buf = p + 2; + continue; + } + *p = '\0'; + h->value = nmem_strdup(c->nmem, p2); + h->next = r->headers; + r->headers = h; + buf = p + 2; + } + } + + return r; +} + + +static struct http_buf *http_serialize_response(struct http_channel *c, + struct http_response *r) +{ + wrbuf_rewind(c->wrbuf); + struct http_header *h; + + wrbuf_printf(c->wrbuf, "HTTP/1.1 %s %s\r\n", r->code, r->msg); + for (h = r->headers; h; h = h->next) + wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); + wrbuf_printf(c->wrbuf, "Content-length: %d\r\n", r->payload ? (int) strlen(r->payload) : 0); + wrbuf_printf(c->wrbuf, "Content-type: text/xml\r\n"); + wrbuf_puts(c->wrbuf, "\r\n"); + + if (r->payload) + wrbuf_puts(c->wrbuf, r->payload); + + return http_buf_bywrbuf(c->wrbuf); +} + +// Serialize a HTTP request +static struct http_buf *http_serialize_request(struct http_request *r) +{ + struct http_channel *c = r->channel; + wrbuf_rewind(c->wrbuf); + struct http_header *h; + struct http_argument *a; + + wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); + + if (r->arguments) + { + wrbuf_putc(c->wrbuf, '?'); + for (a = r->arguments; a; a = a->next) { + if (a != r->arguments) + wrbuf_putc(c->wrbuf, '&'); + wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); + } + } + + wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); + + for (h = r->headers; h; h = h->next) + wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); + + wrbuf_puts(c->wrbuf, "\r\n"); + + return http_buf_bywrbuf(c->wrbuf); +} + + +static int http_weshouldproxy(struct http_request *rq) +{ + if (proxy_addr && !strstr(rq->path, "search.pz2")) + return 1; + return 0; +} + +static int http_proxy(struct http_request *rq) +{ + struct http_channel *c = rq->channel; + struct http_proxy *p = c->proxy; + struct http_header *hp; + struct http_buf *requestbuf; + + if (!p) // This is a new connection. Create a proxy channel + { + int sock; + struct protoent *pe; + int one = 1; + int flags; + + if (!(pe = getprotobyname("tcp"))) { + abort(); + } + if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "socket"); + return -1; + } + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + if ((flags = fcntl(sock, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + if (connect(sock, (struct sockaddr *) proxy_addr, sizeof(*proxy_addr)) < 0) + if (errno != EINPROGRESS) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect"); + return -1; + } + + p = xmalloc(sizeof(struct http_proxy)); + p->oqueue = 0; + p->channel = c; + c->proxy = p; + // We will add EVENT_OUTPUT below + p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT); + iochan_setdata(p->iochan, p); + p->iochan->next = channel_list; + channel_list = p->iochan; + } + + // Modify Host: header + for (hp = rq->headers; hp; hp = hp->next) + if (!strcmp(hp->name, "Host")) + break; + if (!hp) + { + yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); + return -1; + } + hp->value = nmem_strdup(c->nmem, proxy_url); + requestbuf = http_serialize_request(rq); + http_buf_enqueue(&p->oqueue, requestbuf); + iochan_setflag(p->iochan, EVENT_OUTPUT); + return 0; +} + +void http_send_response(struct http_channel *ch) +{ + struct http_response *rs = ch->response; + assert(rs); + struct http_buf *hb = http_serialize_response(ch, rs); + if (!hb) + { + yaz_log(YLOG_WARN, "Failed to serialize HTTP response"); + http_destroy(ch->iochan); + } + else + { + http_buf_enqueue(&ch->oqueue, hb); + iochan_setflag(ch->iochan, EVENT_OUTPUT); + ch->state = Http_Idle; + } +} + +static void http_io(IOCHAN i, int event) +{ + struct http_channel *hc = iochan_getdata(i); + + switch (event) + { + int res, reqlen; + struct http_buf *htbuf; + + case EVENT_INPUT: + htbuf = http_buf_create(); + res = read(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1); + if (res <= 0 && errno != EAGAIN) + { + http_buf_destroy(htbuf); + http_destroy(i); + return; + } + if (res > 0) + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->iqueue, htbuf); + } + + if (hc->state == Http_Busy) + return; + + if ((reqlen = request_check(hc->iqueue)) <= 2) + return; + + nmem_reset(hc->nmem); + if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) + { + yaz_log(YLOG_WARN, "Failed to parse request"); + http_destroy(i); + return; + } + hc->response = 0; + yaz_log(YLOG_LOG, "Request: %s %s v %s", hc->request->method, + hc->request->path, hc->request->http_version); + if (http_weshouldproxy(hc->request)) + http_proxy(hc->request); + else + { + // Execute our business logic! + hc->state = Http_Busy; + http_command(hc); + } + if (hc->iqueue) + { + yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); + iochan_setevent(i, EVENT_INPUT); + } + + break; + + case EVENT_OUTPUT: + if (hc->oqueue) + { + struct http_buf *wb = hc->oqueue; + res = write(iochan_getfd(hc->iochan), wb->buf + wb->offset, wb->len); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); + http_destroy(i); + return; + } + if (res == wb->len) + { + hc->oqueue = hc->oqueue->next; + http_buf_destroy(wb); + } + else + { + wb->len -= res; + wb->offset += res; + } + if (!hc->oqueue) { + if (!strcmp(hc->version, "1.0")) + { + http_destroy(i); + return; + } + else + { + iochan_clearflag(i, EVENT_OUTPUT); + if (hc->iqueue) + iochan_setevent(hc->iochan, EVENT_INPUT); + } + } + } + + if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) + http_destroy(i); // Server closed; we're done + break; + default: + yaz_log(YLOG_WARN, "Unexpected event on connection"); + http_destroy(i); + } +} + +// Handles I/O on a client connection to a backend web server (proxy mode) +static void proxy_io(IOCHAN pi, int event) +{ + struct http_proxy *pc = iochan_getdata(pi); + struct http_channel *hc = pc->channel; + + switch (event) + { + int res; + struct http_buf *htbuf; + + case EVENT_INPUT: + htbuf = http_buf_create(); + res = read(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1); + if (res == 0 || (res < 0 && errno != EINPROGRESS)) + { + if (hc->oqueue) + { + yaz_log(YLOG_WARN, "Proxy read came up short"); + // Close channel and alert client HTTP channel that we're gone + http_buf_destroy(htbuf); + close(iochan_getfd(pi)); + iochan_destroy(pi); + pc->iochan = 0; + } + else + { + http_destroy(hc->iochan); + return; + } + } + else + { + htbuf->buf[res] = '\0'; + htbuf->len = res; + http_buf_enqueue(&hc->oqueue, htbuf); + } + iochan_setflag(hc->iochan, EVENT_OUTPUT); + break; + case EVENT_OUTPUT: + if (!(htbuf = pc->oqueue)) + { + iochan_clearflag(pi, EVENT_OUTPUT); + return; + } + res = write(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len); + if (res <= 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "write"); + http_destroy(hc->iochan); + return; + } + if (res == htbuf->len) + { + struct http_buf *np = htbuf->next; + http_buf_destroy(htbuf); + pc->oqueue = np; + } + else + { + htbuf->len -= res; + htbuf->offset += res; + } + + if (!pc->oqueue) { + iochan_setflags(pi, EVENT_INPUT); // Turns off output flag + } + break; + default: + yaz_log(YLOG_WARN, "Unexpected event on connection"); + http_destroy(hc->iochan); + } +} + +// Cleanup channel +static void http_destroy(IOCHAN i) +{ + struct http_channel *s = iochan_getdata(i); + + if (s->proxy) + { + if (s->proxy->iochan) + { + close(iochan_getfd(s->proxy->iochan)); + iochan_destroy(s->proxy->iochan); + } + http_buf_destroy_queue(s->proxy->oqueue); + xfree(s->proxy); + } + s->next = http_channel_freelist; + http_channel_freelist = s; + close(iochan_getfd(i)); + iochan_destroy(i); +} + +static struct http_channel *http_create(void) +{ + struct http_channel *r = http_channel_freelist; + + if (r) + { + http_channel_freelist = r->next; + nmem_reset(r->nmem); + wrbuf_rewind(r->wrbuf); + } + else + { + r = xmalloc(sizeof(struct http_channel)); + r->nmem = nmem_create(); + r->wrbuf = wrbuf_alloc(); + } + r->proxy = 0; + r->iochan = 0; + r->iqueue = r->oqueue = 0; + r->state = Http_Idle; + r->request = 0; + r->response = 0; + return r; +} + + +/* Accept a new command connection */ +static void http_accept(IOCHAN i, int event) +{ + struct sockaddr_in addr; + int fd = iochan_getfd(i); + socklen_t len; + int s; + IOCHAN c; + int flags; + struct http_channel *ch; + + len = sizeof addr; + if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "accept"); + return; + } + if ((flags = fcntl(s, F_GETFL, 0)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl"); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2"); + + yaz_log(YLOG_LOG, "New command connection"); + c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT); + + ch = http_create(); + ch->iochan = c; + iochan_setdata(c, ch); + + c->next = channel_list; + channel_list = c; +} + +/* Create a http-channel listener, syntax [host:]port */ +void http_init(const char *addr) +{ + IOCHAN c; + int l; + struct protoent *p; + struct sockaddr_in myaddr; + int one = 1; + const char *pp; + int port; + + yaz_log(YLOG_LOG, "HTTP listener is %s", addr); + + bzero(&myaddr, sizeof myaddr); + myaddr.sin_family = AF_INET; + pp = strchr(addr, ':'); + if (pp) + { + int len = pp - addr; + char hostname[128]; + struct hostent *he; + + strncpy(hostname, addr, len); + hostname[len] = '\0'; + if (!(he = gethostbyname(hostname))) + { + yaz_log(YLOG_FATAL, "Unable to resolve '%s'", hostname); + exit(1); + } + memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length); + port = atoi(pp + 1); + } + else + { + port = atoi(addr); + myaddr.sin_addr.s_addr = INADDR_ANY; + } + myaddr.sin_port = htons(port); + + if (!(p = getprotobyname("tcp"))) { + abort(); + } + if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket"); + if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*) + &one, sizeof(one)) < 0) + abort(); + + if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind"); + if (listen(l, SOMAXCONN) < 0) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen"); + + c = iochan_create(l, http_accept, EVENT_INPUT | EVENT_EXCEPT); + c->next = channel_list; + channel_list = c; +} + +void http_set_proxyaddr(char *host) +{ + char *p; + int port; + struct hostent *he; + + strcpy(proxy_url, host); + p = strchr(host, ':'); + yaz_log(YLOG_DEBUG, "Proxying for %s", host); + if (p) { + port = atoi(p + 1); + *p = '\0'; + } + else + port = 80; + if (!(he = gethostbyname(host))) + { + fprintf(stderr, "Failed to lookup '%s'\n", host); + exit(1); + } + proxy_addr = xmalloc(sizeof(struct sockaddr_in)); + proxy_addr->sin_family = he->h_addrtype; + memcpy(&proxy_addr->sin_addr.s_addr, he->h_addr_list[0], he->h_length); + proxy_addr->sin_port = htons(port); +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/http.h b/src/http.h new file mode 100644 index 0000000..7501424 --- /dev/null +++ b/src/http.h @@ -0,0 +1,88 @@ +#ifndef HTTP_H +#define HTTP_H + +// Generic I/O buffer +struct http_buf +{ +#define HTTP_BUF_SIZE 4096 + char buf[4096]; + int offset; + int len; + struct http_buf *next; +}; + +struct http_channel +{ + IOCHAN iochan; + struct http_buf *iqueue; + struct http_buf *oqueue; + char version[10]; + struct http_proxy *proxy; + enum + { + Http_Idle, + Http_Busy // Don't process new HTTP requests while we're busy + } state; + NMEM nmem; + WRBUF wrbuf; + struct http_request *request; + struct http_response *response; + struct http_channel *next; // for freelist +}; + +struct http_proxy // attached to iochan for proxy connection +{ + IOCHAN iochan; + struct http_channel *channel; + struct http_buf *oqueue; +}; + +struct http_header +{ + char *name; + char *value; + struct http_header *next; +}; + +struct http_argument +{ + char *name; + char *value; + struct http_argument *next; +}; + +struct http_request +{ + struct http_channel *channel; + char http_version[20]; + char method[20]; + char *path; + struct http_header *headers; + struct http_argument *arguments; +}; + +struct http_response +{ + char code[4]; + char *msg; + struct http_channel *channel; + struct http_header *headers; + char *payload; +}; + +void http_set_proxyaddr(char *url); +void http_init(const char *addr); +void http_addheader(struct http_response *r, const char *name, const char *value); +char *http_argbyname(struct http_request *r, char *name); +char *http_headerbyname(struct http_request *r, char *name); +struct http_response *http_create_response(struct http_channel *c); +void http_send_response(struct http_channel *c); + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +#endif diff --git a/src/http_command.c b/src/http_command.c new file mode 100644 index 0000000..6c99232 --- /dev/null +++ b/src/http_command.c @@ -0,0 +1,403 @@ +/* + * $Id: http_command.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "command.h" +#include "util.h" +#include "eventl.h" +#include "pazpar2.h" +#include "http.h" +#include "http_command.h" + +extern struct parameters global_parameters; +extern IOCHAN channel_list; + +struct http_session { + IOCHAN timeout_iochan; // NOTE: This is NOT associated with a socket + struct session *psession; + unsigned int session_id; + int timestamp; + struct http_session *next; +}; + +static struct http_session *session_list = 0; + +void http_session_destroy(struct http_session *s); + +static void session_timeout(IOCHAN i, int event) +{ + struct http_session *s = iochan_getdata(i); + http_session_destroy(s); +} + +struct http_session *http_session_create() +{ + struct http_session *r = xmalloc(sizeof(*r)); + r->psession = new_session(); + r->session_id = 0; + r->timestamp = 0; + r->next = session_list; + session_list = r; + r->timeout_iochan = iochan_create(-1, session_timeout, 0); + iochan_setdata(r->timeout_iochan, r); + iochan_settimeout(r->timeout_iochan, global_parameters.session_timeout); + r->timeout_iochan->next = channel_list; + channel_list = r->timeout_iochan; + return r; +} + +void http_session_destroy(struct http_session *s) +{ + struct http_session **p; + + for (p = &session_list; *p; p = &(*p)->next) + if (*p == s) + { + *p = (*p)->next; + break; + } + iochan_destroy(s->timeout_iochan); + destroy_session(s->psession); + xfree(s); +} + +static void error(struct http_response *rs, char *code, char *msg, char *txt) +{ + struct http_channel *c = rs->channel; + char tmp[1024]; + + if (!txt) + txt = msg; + rs->msg = nmem_strdup(c->nmem, msg); + strcpy(rs->code, code); + sprintf(tmp, "%s", txt); + rs->payload = nmem_strdup(c->nmem, tmp); + http_send_response(c); +} + +unsigned int make_sessionid() +{ + struct timeval t; + unsigned int res; + static int seq = 0; + + seq++; + if (gettimeofday(&t, 0) < 0) + abort(); + res = t.tv_sec; + res = ((res << 8) | (seq & 0xff)) & ((unsigned int) (1 << 31) - 1); + return res; +} + +static struct http_session *locate_session(struct http_request *rq, struct http_response *rs) +{ + struct http_session *p; + char *session = http_argbyname(rq, "session"); + unsigned int id; + + if (!session) + { + error(rs, "417", "Must supply session", 0); + return 0; + } + id = atoi(session); + for (p = session_list; p; p = p->next) + if (id == p->session_id) + { + iochan_activity(p->timeout_iochan); + return p; + } + error(rs, "417", "Session does not exist, or it has expired", 0); + return 0; +} + +static void cmd_exit(struct http_channel *c) +{ + yaz_log(YLOG_WARN, "exit"); + exit(0); +} + + +static void cmd_init(struct http_channel *c) +{ + unsigned int sesid; + char buf[1024]; + struct http_session *s = http_session_create(); + struct http_response *rs = c->response; + + yaz_log(YLOG_DEBUG, "HTTP Session init"); + sesid = make_sessionid(); + s->session_id = sesid; + sprintf(buf, "OK%u", sesid); + rs->payload = nmem_strdup(c->nmem, buf); + http_send_response(c); +} + +static void cmd_termlist(struct http_channel *c) +{ + struct http_response *rs = c->response; + struct http_request *rq = c->request; + struct http_session *s = locate_session(rq, rs); + struct termlist_score **p; + int len; + int i; + + if (!s) + return; + wrbuf_rewind(c->wrbuf); + + wrbuf_puts(c->wrbuf, ""); + p = termlist(s->psession, &len); + if (p) + for (i = 0; i < len; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s", p[i]->term); + wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); + wrbuf_puts(c->wrbuf, ""); + } + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(rq->channel->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + + +static void cmd_bytarget(struct http_channel *c) +{ + struct http_response *rs = c->response; + struct http_request *rq = c->request; + struct http_session *s = locate_session(rq, rs); + struct hitsbytarget *ht; + int count, i; + + if (!s) + return; + if (!(ht = hitsbytarget(s->psession, &count))) + { + error(rs, "500", "Failed to retrieve hitcounts", 0); + return; + } + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "OK"); + + for (i = 0; i < count; i++) + { + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].id); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].hits); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].diagnostic); + wrbuf_printf(c->wrbuf, "%d\n", ht[i].records); + wrbuf_printf(c->wrbuf, "%s\n", ht[i].state); + wrbuf_puts(c->wrbuf, ""); + } + + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + +static void show_records(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + struct record **rl; + char *start = http_argbyname(rq, "start"); + char *num = http_argbyname(rq, "num"); + int startn = 0; + int numn = 20; + int total; + int total_hits; + int i; + + if (!s) + return; + + if (start) + startn = atoi(start); + if (num) + numn = atoi(num); + + rl = show(s->psession, startn, &numn, &total, &total_hits); + + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, "\nOK\n"); + wrbuf_printf(c->wrbuf, "%d\n", total); + wrbuf_printf(c->wrbuf, "%d\n", total_hits); + wrbuf_printf(c->wrbuf, "%d\n", startn); + wrbuf_printf(c->wrbuf, "%d\n", numn); + + for (i = 0; i < numn; i++) + { + int ccount; + struct record *p; + + wrbuf_puts(c->wrbuf, "\n"); + wrbuf_printf(c->wrbuf, "%s\n", rl[i]->title); + for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) + ; + if (ccount > 1) + wrbuf_printf(c->wrbuf, "%d\n", ccount); + wrbuf_puts(c->wrbuf, "\n"); + } + + wrbuf_puts(c->wrbuf, "\n"); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + +static void show_records_ready(void *data) +{ + struct http_channel *c = (struct http_channel *) data; + + show_records(c); +} + +static void cmd_show(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + char *block = http_argbyname(rq, "block"); + + if (!s) + return; + + if (block) + { + if (!s->psession->reclist || !s->psession->reclist->num_records) + { + session_set_watch(s->psession, SESSION_WATCH_RECORDS, show_records_ready, c); + yaz_log(YLOG_DEBUG, "Blocking on cmd_show"); + return; + } + } + + show_records(c); +} + +static void cmd_ping(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + if (!s) + return; + rs->payload = "OK"; + http_send_response(c); +} + +static void cmd_search(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + char *query = http_argbyname(rq, "query"); + char *res; + + if (!s) + return; + if (!query) + { + error(rs, "417", "Must supply query", 0); + return; + } + res = search(s->psession, query); + if (res) + { + error(rs, "417", res, res); + return; + } + rs->payload = "OK"; + http_send_response(c); +} + + +static void cmd_stat(struct http_channel *c) +{ + struct http_request *rq = c->request; + struct http_response *rs = c->response; + struct http_session *s = locate_session(rq, rs); + struct statistics stat; + + if (!s) + return; + + statistics(s->psession, &stat); + + wrbuf_rewind(c->wrbuf); + wrbuf_puts(c->wrbuf, ""); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_hits); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_records); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_clients); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_no_connection); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_connecting); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_initializing); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_searching); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_presenting); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_idle); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_failed); + wrbuf_printf(c->wrbuf, "%d\n", stat.num_error); + wrbuf_puts(c->wrbuf, ""); + rs->payload = nmem_strdup(c->nmem, wrbuf_buf(c->wrbuf)); + http_send_response(c); +} + + +struct { + char *name; + void (*fun)(struct http_channel *c); +} commands[] = { + { "init", cmd_init }, + { "stat", cmd_stat }, + { "bytarget", cmd_bytarget }, + { "show", cmd_show }, + { "search", cmd_search }, + { "termlist", cmd_termlist }, + { "exit", cmd_exit }, + { "ping", cmd_ping }, + {0,0} +}; + +void http_command(struct http_channel *c) +{ + char *command = http_argbyname(c->request, "command"); + struct http_response *rs = http_create_response(c); + int i; + + c->response = rs; + if (!command) + { + error(rs, "417", "Must supply command", 0); + return; + } + for (i = 0; commands[i].name; i++) + if (!strcmp(commands[i].name, command)) + { + (*commands[i].fun)(c); + break; + } + if (!commands[i].name) + error(rs, "417", "Unknown command", 0); + + return; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/http_command.h b/src/http_command.h new file mode 100644 index 0000000..e127925 --- /dev/null +++ b/src/http_command.h @@ -0,0 +1,8 @@ +#ifndef HTTP_COMMAND_H +#define HTTP_COMMAND + +#include "http.h" + +void http_command(struct http_channel *c); + +#endif diff --git a/src/pazpar2.c b/src/pazpar2.c new file mode 100644 index 0000000..c95ce3a --- /dev/null +++ b/src/pazpar2.c @@ -0,0 +1,1395 @@ +/* $Id: pazpar2.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pazpar2.h" +#include "eventl.h" +#include "command.h" +#include "http.h" +#include "termlists.h" +#include "reclists.h" +#include "relevance.h" + +#define PAZPAR2_VERSION "0.1" +#define MAX_CHUNK 15 + +static void client_fatal(struct client *cl); +static void connection_destroy(struct connection *co); +static int client_prep_connection(struct client *cl); +static void ingest_records(struct client *cl, Z_Records *r); +void session_alert_watch(struct session *s, int what); + +IOCHAN channel_list = 0; // Master list of connections we're handling events to + +static struct connection *connection_freelist = 0; +static struct client *client_freelist = 0; + +static struct host *hosts = 0; // The hosts we know about +static struct database *databases = 0; // The databases we know about + +static char *client_states[] = { + "Client_Connecting", + "Client_Connected", + "Client_Idle", + "Client_Initializing", + "Client_Searching", + "Client_Presenting", + "Client_Error", + "Client_Failed", + "Client_Disconnected", + "Client_Stopped" +}; + +struct parameters global_parameters = +{ + 30, + "81", + "Index Data PazPar2 (MasterKey)", + PAZPAR2_VERSION, + 600, // 10 minutes + 60, + 100, + MAX_CHUNK, + 0, + 0, + 0, + 0 +}; + + +static int send_apdu(struct client *c, Z_APDU *a) +{ + struct connection *co = c->connection; + char *buf; + int len, r; + + if (!z_APDU(global_parameters.odr_out, &a, 0, 0)) + { + odr_perror(global_parameters.odr_out, "Encoding APDU"); + abort(); + } + buf = odr_getbuf(global_parameters.odr_out, &len, 0); + r = cs_put(co->link, buf, len); + if (r < 0) + { + yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(co->link))); + return -1; + } + else if (r == 1) + { + fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n"); + exit(1); + } + odr_reset(global_parameters.odr_out); /* release the APDU structure */ + co->state = Conn_Waiting; + return 0; +} + + +static void send_init(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_initRequest); + + a->u.initRequest->implementationId = global_parameters.implementationId; + a->u.initRequest->implementationName = global_parameters.implementationName; + a->u.initRequest->implementationVersion = + global_parameters.implementationVersion; + ODR_MASK_SET(a->u.initRequest->options, Z_Options_search); + ODR_MASK_SET(a->u.initRequest->options, Z_Options_present); + ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets); + + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1); + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2); + ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3); + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Initializing; + } + else + cl->state = Client_Error; + odr_reset(global_parameters.odr_out); +} + +static void send_search(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = cl->session; + struct database *db = cl->database; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest); + int ndb, cerror, cpos; + char **databaselist; + Z_Query *zquery; + struct ccl_rpn_node *cn; + int ssub = 0, lslb = 100000, mspn = 10; + + yaz_log(YLOG_DEBUG, "Sending search"); + + cn = ccl_find_str(global_parameters.ccl_filter, se->query, &cerror, &cpos); + if (!cn) + return; + a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out, + sizeof(Z_Query)); + zquery->which = Z_Query_type_1; + zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn); + ccl_rpn_delete(cn); + + for (ndb = 0; *db->databases[ndb]; ndb++) + ; + databaselist = odr_malloc(global_parameters.odr_out, sizeof(char*) * ndb); + for (ndb = 0; *db->databases[ndb]; ndb++) + databaselist[ndb] = db->databases[ndb]; + + a->u.presentRequest->preferredRecordSyntax = + yaz_oidval_to_z3950oid(global_parameters.odr_out, + CLASS_RECSYN, VAL_USMARC); + a->u.searchRequest->smallSetUpperBound = &ssub; + a->u.searchRequest->largeSetLowerBound = &lslb; + a->u.searchRequest->mediumSetPresentNumber = &mspn; + a->u.searchRequest->resultSetName = "Default"; + a->u.searchRequest->databaseNames = databaselist; + a->u.searchRequest->num_databaseNames = ndb; + + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Searching; + cl->requestid = se->requestid; + } + else + cl->state = Client_Error; + + odr_reset(global_parameters.odr_out); +} + +static void send_present(IOCHAN i) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_presentRequest); + int toget; + int start = cl->records + 1; + + toget = global_parameters.chunk; + if (toget > cl->hits - cl->records) + toget = cl->hits - cl->records; + + yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget); + + a->u.presentRequest->resultSetStartPoint = &start; + a->u.presentRequest->numberOfRecordsRequested = &toget; + + a->u.presentRequest->resultSetId = "Default"; + + a->u.presentRequest->preferredRecordSyntax = + yaz_oidval_to_z3950oid(global_parameters.odr_out, + CLASS_RECSYN, VAL_USMARC); + + if (send_apdu(cl, a) >= 0) + { + iochan_setflags(i, EVENT_INPUT); + cl->state = Client_Presenting; + } + else + cl->state = Client_Error; + odr_reset(global_parameters.odr_out); +} + +static void do_initResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_InitResponse *r = a->u.initResponse; + + yaz_log(YLOG_DEBUG, "Received init response"); + + if (*r->result) + { + cl->state = Client_Idle; + } + else + cl->state = Client_Failed; // FIXME need to do something to the connection +} + +static void do_searchResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = cl->session; + Z_SearchResponse *r = a->u.searchResponse; + + yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus); + + if (*r->searchStatus) + { + cl->hits = *r->resultCount; + se->total_hits += cl->hits; + if (r->presentStatus && !*r->presentStatus && r->records) + { + yaz_log(YLOG_DEBUG, "Records in search response"); + cl->records += *r->numberOfRecordsReturned; + ingest_records(cl, r->records); + } + cl->state = Client_Idle; + } + else + { /*"FAILED"*/ + cl->hits = 0; + cl->state = Client_Error; + if (r->records) { + Z_Records *recs = r->records; + if (recs->which == Z_Records_NSD) + { + yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); + cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; + cl->state = Client_Error; + } + } + } +} + +const char *find_field(const char *rec, const char *field) +{ + char lbuf[5]; + char *line; + + lbuf[0] = '\n'; + strcpy(lbuf + 1, field); + + if ((line = strstr(rec, lbuf))) + return ++line; + else + return 0; +} + +const char *find_subfield(const char *field, char subfield) +{ + const char *p = field; + + while (*p && *p != '\n') + { + while (*p != '\n' && *p != '\t') + p++; + if (*p == '\t' && *(++p) == subfield) { + if (*(++p) == ' ') + { + while (isspace(*p)) + p++; + return p; + } + } + } + return 0; +} + +// Extract 245 $a $b 100 $a +char *extract_title(struct session *s, const char *rec) +{ + const char *field, *subfield; + char *e, *ef; + unsigned char *obuf, *p; + + wrbuf_rewind(s->wrbuf); + + if (!(field = find_field(rec, "245"))) + return 0; + if (!(subfield = find_subfield(field, 'a'))) + return 0; + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_write(s->wrbuf, subfield, ef - subfield); + if ((subfield = find_subfield(field, 'b'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_putc(s->wrbuf, ' '); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + if ((field = find_field(rec, "100"))) + { + if ((subfield = find_subfield(field, 'a'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, ", by "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + wrbuf_putc(s->wrbuf, '\0'); + obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf)); + for (p = obuf; *p; p++) + if (*p == '&' || *p == '<' || *p > 122 || *p < ' ') + *p = ' '; + return (char*) obuf; +} + +// Extract 245 $a $b 100 $a +char *extract_mergekey(struct session *s, const char *rec) +{ + const char *field, *subfield; + char *e, *ef; + char *out, *p, *pout; + + wrbuf_rewind(s->wrbuf); + + if (!(field = find_field(rec, "245"))) + return 0; + if (!(subfield = find_subfield(field, 'a'))) + return 0; + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_write(s->wrbuf, subfield, ef - subfield); + if ((subfield = find_subfield(field, 'b'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, " field "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + if ((field = find_field(rec, "100"))) + { + if ((subfield = find_subfield(field, 'a'))) + { + ef = index(subfield, '\n'); + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + if (ef) + { + wrbuf_puts(s->wrbuf, " field "); + wrbuf_write(s->wrbuf, subfield, ef - subfield); + } + } + } + wrbuf_putc(s->wrbuf, '\0'); + p = wrbuf_buf(s->wrbuf); + out = pout = nmem_malloc(s->nmem, strlen(p) + 1); + + while (*p) + { + while (isalnum(*p)) + *(pout++) = tolower(*(p++)); + while (*p && !isalnum(*p)) + p++; + *(pout++) = ' '; + } + if (out != pout) + *(--pout) = '\0'; + + return out; +} + +#ifdef RECHEAP +static void push_record(struct session *s, struct record *r) +{ + int p; + assert(s->recheap_max + 1 < s->recheap_size); + + s->recheap[p = ++s->recheap_max] = r; + while (p > 0) + { + int parent = (p - 1) >> 1; + if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0) + { + struct record *tmp; + tmp = s->recheap[parent]; + s->recheap[parent] = s->recheap[p]; + s->recheap[p] = tmp; + p = parent; + } + else + break; + } +} + +static struct record *top_record(struct session *s) +{ + return s-> recheap_max >= 0 ? s->recheap[0] : 0; +} + +static struct record *pop_record(struct session *s) +{ + struct record *res; + int p = 0; + int lastnonleaf = (s->recheap_max - 1) >> 1; + + if (s->recheap_max < 0) + return 0; + + res = s->recheap[0]; + + s->recheap[p] = s->recheap[s->recheap_max--]; + + while (p <= lastnonleaf) + { + int right = (p + 1) << 1; + int left = right - 1; + int min = left; + + if (right < s->recheap_max && + strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0) + min = right; + if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0) + { + struct record *tmp = s->recheap[min]; + s->recheap[min] = s->recheap[p]; + s->recheap[p] = tmp; + p = min; + } + else + break; + } + return res; +} + +// Like pop_record but collapses identical (merge_key) records +// The heap will contain multiple independent matching records and possibly +// one cluster, created the last time the list was scanned +static struct record *pop_mrecord(struct session *s) +{ + struct record *this; + struct record *next; + + if (!(this = pop_record(s))) + return 0; + + // Collapse identical records + while ((next = top_record(s))) + { + struct record *p, *tmpnext; + if (strcmp(this->merge_key, next->merge_key)) + break; + // Absorb record (and clustersiblings) into a supercluster + for (p = next; p; p = tmpnext) { + tmpnext = p->next_cluster; + p->next_cluster = this->next_cluster; + this->next_cluster = p; + } + + pop_record(s); + } + return this; +} + +// Reads records in sort order. Store records in top of heapspace until rewind is called. +static struct record *read_recheap(struct session *s) +{ + struct record *r = pop_mrecord(s); + + if (r) + { + if (s->recheap_scratch < 0) + s->recheap_scratch = s->recheap_size; + s->recheap[--s->recheap_scratch] = r; + } + + return r; +} + +// Return records to heap after read +static void rewind_recheap(struct session *s) +{ + while (s->recheap_scratch >= 0) { + push_record(s, s->recheap[s->recheap_scratch++]); + if (s->recheap_scratch >= s->recheap_size) + s->recheap_scratch = -1; + } +} + +#endif + +// FIXME needs to be generalized. Should flexibly generate X lists per search +static void extract_subject(struct session *s, const char *rec) +{ + const char *field, *subfield; + + while ((field = find_field(rec, "650"))) + { + rec = field; + if ((subfield = find_subfield(field, 'a'))) + { + char *e, *ef; + char buf[1024]; + int len; + + ef = index(subfield, '\n'); + if (!ef) + return; + if ((e = index(subfield, '\t')) && e < ef) + ef = e; + while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')') + ef--; + len = ef - subfield; + assert(len < 1023); + memcpy(buf, subfield, len); + buf[len] = '\0'; + if (*buf) + termlist_insert(s->termlist, buf); + } + } +} + +static void pull_relevance_field(struct session *s, struct record *head, const char *rec, + char *field, int mult) +{ + const char *fb; + while ((fb = find_field(rec, field))) + { + char *ffield = strchr(fb, '\t'); + if (!ffield) + return; + char *eol = strchr(ffield, '\n'); + if (!eol) + return; + relevance_countwords(s->relevance, head, ffield, eol - ffield, mult); + rec = field + 1; // Crude way to cause a loop through repeating fields + } +} + +static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec) +{ + relevance_newrec(s->relevance, head); + pull_relevance_field(s, head, rec->buf, "100", 2); + pull_relevance_field(s, head, rec->buf, "245", 4); + //pull_relevance_field(s, head, rec->buf, "530", 1); + pull_relevance_field(s, head, rec->buf, "630", 1); + pull_relevance_field(s, head, rec->buf, "650", 1); + pull_relevance_field(s, head, rec->buf, "700", 1); + relevance_donerecord(s->relevance, head); +} + +static struct record *ingest_record(struct client *cl, char *buf, int len) +{ + struct session *se = cl->session; + struct record *res; + struct record *head; + const char *recbuf; + + wrbuf_rewind(se->wrbuf); + yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE); + if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0) + { + yaz_log(YLOG_WARN, "Failed to decode MARC record"); + return 0; + } + wrbuf_putc(se->wrbuf, '\0'); + recbuf = wrbuf_buf(se->wrbuf); + + res = nmem_malloc(se->nmem, sizeof(struct record)); + res->buf = nmem_strdup(se->nmem, recbuf); + + extract_subject(se, res->buf); + + res->title = extract_title(se, res->buf); + res->merge_key = extract_mergekey(se, res->buf); + if (!res->merge_key) + return 0; + res->client = cl; + res->next_cluster = 0; + res->target_offset = -1; + res->term_frequency_vec = 0; + + head = reclist_insert(se->reclist, res); + + pull_relevance_keys(se, head, res); + + se->total_records++; + + return res; +} + +static void ingest_records(struct client *cl, Z_Records *r) +{ + struct record *rec; + struct session *s = cl->session; + Z_NamePlusRecordList *rlist; + int i; + + if (r->which != Z_Records_DBOSD) + return; + rlist = r->u.databaseOrSurDiagnostics; + for (i = 0; i < rlist->num_records; i++) + { + Z_NamePlusRecord *npr = rlist->records[i]; + Z_External *e; + char *buf; + int len; + + if (npr->which != Z_NamePlusRecord_databaseRecord) + { + yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic"); + continue; + } + e = npr->u.databaseRecord; + if (e->which != Z_External_octet) + { + yaz_log(YLOG_WARN, "Unexpected external branch, probably BER"); + continue; + } + buf = (char*) e->u.octet_aligned->buf; + len = e->u.octet_aligned->len; + + rec = ingest_record(cl, buf, len); + if (!rec) + continue; + } + if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records) + session_alert_watch(s, SESSION_WATCH_RECORDS); +} + +static void do_presentResponse(IOCHAN i, Z_APDU *a) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + Z_PresentResponse *r = a->u.presentResponse; + + if (r->records) { + Z_Records *recs = r->records; + if (recs->which == Z_Records_NSD) + { + yaz_log(YLOG_WARN, "Non-surrogate diagnostic"); + cl->diagnostic = *recs->u.nonSurrogateDiagnostic->condition; + cl->state = Client_Error; + } + } + + if (!*r->presentStatus && cl->state != Client_Error) + { + yaz_log(YLOG_DEBUG, "Good Present response"); + cl->records += *r->numberOfRecordsReturned; + ingest_records(cl, r->records); + cl->state = Client_Idle; + } + else if (*r->presentStatus) + { + yaz_log(YLOG_WARN, "Bad Present response"); + cl->state = Client_Error; + } +} + +static void handler(IOCHAN i, int event) +{ + struct connection *co = iochan_getdata(i); + struct client *cl = co->client; + struct session *se = 0; + + if (cl) + se = cl->session; + else + { + yaz_log(YLOG_WARN, "Destroying orphan connection (fix me?)"); + connection_destroy(co); + return; + } + + if (co->state == Conn_Connecting && event & EVENT_OUTPUT) + { + int errcode; + socklen_t errlen = sizeof(errcode); + + if (getsockopt(cs_fileno(co->link), SOL_SOCKET, SO_ERROR, &errcode, + &errlen) < 0 || errcode != 0) + { + client_fatal(cl); + return; + } + else + { + yaz_log(YLOG_DEBUG, "Connect OK"); + co->state = Conn_Open; + if (cl) + cl->state = Client_Connected; + } + } + + else if (event & EVENT_INPUT) + { + int len = cs_get(co->link, &co->ibuf, &co->ibufsize); + + if (len < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Error reading from Z server"); + connection_destroy(co); + return; + } + else if (len == 0) + { + yaz_log(YLOG_WARN, "EOF reading from Z server"); + connection_destroy(co); + return; + } + else if (len > 1) // We discard input if we have no connection + { + co->state = Conn_Open; + + if (cl && (cl->requestid == se->requestid || cl->state == Client_Initializing)) + { + Z_APDU *a; + + odr_reset(global_parameters.odr_in); + odr_setbuf(global_parameters.odr_in, co->ibuf, len, 0); + if (!z_APDU(global_parameters.odr_in, &a, 0, 0)) + { + client_fatal(cl); + return; + } + switch (a->which) + { + case Z_APDU_initResponse: + do_initResponse(i, a); + break; + case Z_APDU_searchResponse: + do_searchResponse(i, a); + break; + case Z_APDU_presentResponse: + do_presentResponse(i, a); + break; + default: + yaz_log(YLOG_WARN, "Unexpected result from server"); + client_fatal(cl); + return; + } + // We aren't expecting staggered output from target + // if (cs_more(t->link)) + // iochan_setevent(i, EVENT_INPUT); + } + else // we throw away response and go to idle mode + { + yaz_log(YLOG_DEBUG, "Ignoring result of expired operation"); + cl->state = Client_Idle; + } + } + /* if len==1 we do nothing but wait for more input */ + } + + if (cl->state == Client_Connected) { + send_init(i); + } + + if (cl->state == Client_Idle) + { + if (cl->requestid != se->requestid && *se->query) { + send_search(i); + } + else if (cl->hits > 0 && cl->records < global_parameters.toget && + cl->records < cl->hits) { + send_present(i); + } + } +} + +// Disassociate connection from client +static void connection_release(struct connection *co) +{ + struct client *cl = co->client; + + yaz_log(YLOG_DEBUG, "Connection release %s", co->host->hostport); + if (!cl) + return; + cl->connection = 0; + co->client = 0; +} + +// Close connection and recycle structure +static void connection_destroy(struct connection *co) +{ + struct host *h = co->host; + cs_close(co->link); + iochan_destroy(co->iochan); + + yaz_log(YLOG_DEBUG, "Connection destroy %s", co->host->hostport); + if (h->connections == co) + h->connections = co->next; + else + { + struct connection *pco; + for (pco = h->connections; pco && pco->next != co; pco = pco->next) + ; + if (pco) + pco->next = co->next; + else + abort(); + } + if (co->client) + { + if (co->client->state != Client_Idle) + co->client->state = Client_Disconnected; + co->client->connection = 0; + } + co->next = connection_freelist; + connection_freelist = co; +} + +// Creates a new connection for client, associated with the host of +// client's database +static struct connection *connection_create(struct client *cl) +{ + struct connection *new; + COMSTACK link; + int res; + void *addr; + + yaz_log(YLOG_DEBUG, "Connection create %s", cl->database->url); + if (!(link = cs_create(tcpip_type, 0, PROTO_Z3950))) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack"); + exit(1); + } + + if (!(addr = cs_straddr(link, cl->database->host->ipport))) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "Lookup of IP address failed?"); + return 0; + } + + res = cs_connect(link, addr); + if (res < 0) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "cs_connect %s", cl->database->url); + return 0; + } + + if ((new = connection_freelist)) + connection_freelist = new->next; + else + { + new = xmalloc(sizeof (struct connection)); + new->ibuf = 0; + new->ibufsize = 0; + } + new->state = Conn_Connecting; + new->host = cl->database->host; + new->next = new->host->connections; + new->host->connections = new; + new->client = cl; + cl->connection = new; + new->link = link; + + new->iochan = iochan_create(cs_fileno(link), handler, 0); + iochan_setdata(new->iochan, new); + new->iochan->next = channel_list; + channel_list = new->iochan; + return new; +} + +// Close connection and set state to error +static void client_fatal(struct client *cl) +{ + yaz_log(YLOG_WARN, "Fatal error from %s", cl->database->url); + connection_destroy(cl->connection); + cl->state = Client_Error; +} + +// Ensure that client has a connection associated +static int client_prep_connection(struct client *cl) +{ + struct connection *co; + struct session *se = cl->session; + struct host *host = cl->database->host; + + co = cl->connection; + + yaz_log(YLOG_DEBUG, "Client prep %s", cl->database->url); + + if (!co) + { + // See if someone else has an idle connection + // We should look at timestamps here to select the longest-idle connection + for (co = host->connections; co; co = co->next) + if (co->state == Conn_Open && (!co->client || co->client->session != se)) + break; + if (co) + { + connection_release(co); + cl->connection = co; + co->client = cl; + } + else + co = connection_create(cl); + } + if (co) + { + if (co->state == Conn_Connecting) + cl->state = Client_Connecting; + else if (co->state == Conn_Open) + { + if (cl->state == Client_Error || cl->state == Client_Disconnected) + cl->state = Client_Idle; + } + iochan_setflag(co->iochan, EVENT_OUTPUT); + return 1; + } + else + return 0; +} + +void load_simpletargets(const char *fn) +{ + FILE *f = fopen(fn, "r"); + char line[256]; + + if (!f) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn); + exit(1); + } + + while (fgets(line, 255, f)) + { + char *url, *db; + struct host *host; + struct database *database; + + if (strncmp(line, "target ", 7)) + continue; + url = line + 7; + url[strlen(url) - 1] = '\0'; + yaz_log(YLOG_DEBUG, "Target: %s", url); + if ((db = strchr(url, '/'))) + *(db++) = '\0'; + else + db = "Default"; + + for (host = hosts; host; host = host->next) + if (!strcmp(url, host->hostport)) + break; + if (!host) + { + struct addrinfo *addrinfo, hints; + char *port; + char ipport[128]; + unsigned char addrbuf[4]; + int res; + + host = xmalloc(sizeof(struct host)); + host->hostport = xstrdup(url); + host->connections = 0; + + if ((port = strchr(url, ':'))) + *(port++) = '\0'; + else + port = "210"; + + hints.ai_flags = 0; + hints.ai_family = PF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_addrlen = 0; + hints.ai_addr = 0; + hints.ai_canonname = 0; + hints.ai_next = 0; + // This is not robust code. It assumes that getaddrinfo returns AF_INET + // address. + if ((res = getaddrinfo(url, port, &hints, &addrinfo))) + { + yaz_log(YLOG_WARN, "Failed to resolve %s: %s", url, gai_strerror(res)); + continue; + } + assert(addrinfo->ai_family == PF_INET); + memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); + sprintf(ipport, "%hhd.%hhd.%hhd.%hhd:%s", + addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); + host->ipport = xstrdup(ipport); + freeaddrinfo(addrinfo); + host->next = hosts; + hosts = host; + } + database = xmalloc(sizeof(struct database)); + database->host = host; + database->url = xmalloc(strlen(url) + strlen(db) + 2); + strcpy(database->url, url); + strcat(database->url, "/"); + strcat(database->url, db); + strcpy(database->databases[0], db); + *database->databases[1] = '\0'; + database->errors = 0; + database->next = databases; + databases = database; + + } + fclose(f); +} + +static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) +{ + switch (n->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + case CCL_RPN_PROX: + pull_terms(nmem, n->u.p[0], termlist, num); + pull_terms(nmem, n->u.p[1], termlist, num); + break; + case CCL_RPN_TERM: + termlist[(*num)++] = nmem_strdup(nmem, n->u.t.term); + break; + default: // NOOP + break; + } +} + +// Extract terms from query into null-terminated termlist +static int extract_terms(NMEM nmem, char *query, char **termlist) +{ + int error, pos; + struct ccl_rpn_node *n; + int num = 0; + + n = ccl_find_str(global_parameters.ccl_filter, query, &error, &pos); + if (!n) + return -1; + pull_terms(nmem, n, termlist, &num); + termlist[num] = 0; + ccl_rpn_delete(n); + return 0; +} + +static struct client *client_create(void) +{ + struct client *r; + if (client_freelist) + { + r = client_freelist; + client_freelist = client_freelist->next; + } + else + r = xmalloc(sizeof(struct client)); + r->database = 0; + r->connection = 0; + r->session = 0; + r->hits = 0; + r->records = 0; + r->setno = 0; + r->requestid = -1; + r->diagnostic = 0; + r->state = Client_Disconnected; + r->next = 0; + return r; +} + +void client_destroy(struct client *c) +{ + struct session *se = c->session; + if (c == se->clients) + se->clients = c->next; + else + { + struct client *cc; + for (cc = se->clients; cc && cc->next != c; cc = cc->next) + ; + if (cc) + cc->next = c->next; + } + if (c->connection) + connection_release(c->connection); + c->next = client_freelist; + client_freelist = c; +} + +void session_set_watch(struct session *s, int what, session_watchfun fun, void *data) +{ + s->watchlist[what].fun = fun; + s->watchlist[what].data = data; +} + +void session_alert_watch(struct session *s, int what) +{ + if (!s->watchlist[what].fun) + return; + (*s->watchlist[what].fun)(s->watchlist[what].data); + s->watchlist[what].fun = 0; + s->watchlist[what].data = 0; +} + +// This should be extended with parameters to control selection criteria +// Associates a set of clients with a session; +int select_targets(struct session *se) +{ + struct database *db; + int c = 0; + + while (se->clients) + client_destroy(se->clients); + for (db = databases; db; db = db->next) + { + struct client *cl = client_create(); + cl->database = db; + cl->session = se; + cl->next = se->clients; + se->clients = cl; + c++; + } + return c; +} + +char *search(struct session *se, char *query) +{ + int live_channels = 0; + struct client *cl; + + yaz_log(YLOG_DEBUG, "Search"); + + strcpy(se->query, query); + se->requestid++; + nmem_reset(se->nmem); + for (cl = se->clients; cl; cl = cl->next) + { + cl->hits = -1; + cl->records = 0; + cl->diagnostic = 0; + + if (client_prep_connection(cl)) + live_channels++; + } + if (live_channels) + { + char *p[512]; + int maxrecs = live_channels * global_parameters.toget; + se->termlist = termlist_create(se->nmem, maxrecs, 15); + se->reclist = reclist_create(se->nmem, maxrecs); + extract_terms(se->nmem, query, p); + se->relevance = relevance_create(se->nmem, (const char **) p, maxrecs); + se->total_records = se->total_hits = 0; + } + else + return "NOTARGETS"; + + return 0; +} + +void destroy_session(struct session *s) +{ + yaz_log(YLOG_LOG, "Destroying session"); + while (s->clients) + client_destroy(s->clients); + nmem_destroy(s->nmem); + wrbuf_free(s->wrbuf, 1); +} + +struct session *new_session() +{ + int i; + struct session *session = xmalloc(sizeof(*session)); + + yaz_log(YLOG_DEBUG, "New pazpar2 session"); + + session->total_hits = 0; + session->total_records = 0; + session->termlist = 0; + session->reclist = 0; + session->requestid = -1; + session->clients = 0; + session->query[0] = '\0'; + session->nmem = nmem_create(); + session->wrbuf = wrbuf_alloc(); + for (i = 0; i <= SESSION_WATCH_MAX; i++) + { + session->watchlist[i].data = 0; + session->watchlist[i].fun = 0; + } + + select_targets(session); + + return session; +} + +struct hitsbytarget *hitsbytarget(struct session *se, int *count) +{ + static struct hitsbytarget res[1000]; // FIXME MM + struct client *cl; + + *count = 0; + for (cl = se->clients; cl; cl = cl->next) + { + strcpy(res[*count].id, cl->database->host->hostport); + res[*count].hits = cl->hits; + res[*count].records = cl->records; + res[*count].diagnostic = cl->diagnostic; + res[*count].state = client_states[cl->state]; + res[*count].connected = cl->connection ? 1 : 0; + (*count)++; + } + + return res; +} + +struct termlist_score **termlist(struct session *s, int *num) +{ + return termlist_highscore(s->termlist, num); +} + +struct record **show(struct session *s, int start, int *num, int *total, int *sumhits) +{ + struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *)); + int i; + + relevance_prepare_read(s->relevance, s->reclist); + + *total = s->reclist->num_records; + *sumhits = s->total_hits; + + for (i = 0; i < start; i++) + if (!reclist_read_record(s->reclist)) + { + *num = 0; + return 0; + } + + for (i = 0; i < *num; i++) + { + struct record *r = reclist_read_record(s->reclist); + if (!r) + { + *num = i; + break; + } + recs[i] = r; + } + return recs; +} + +void statistics(struct session *se, struct statistics *stat) +{ + struct client *cl; + int count = 0; + + bzero(stat, sizeof(*stat)); + for (cl = se->clients; cl; cl = cl->next) + { + if (!cl->connection) + stat->num_no_connection++; + switch (cl->state) + { + case Client_Connecting: stat->num_connecting++; break; + case Client_Initializing: stat->num_initializing++; break; + case Client_Searching: stat->num_searching++; break; + case Client_Presenting: stat->num_presenting++; break; + case Client_Idle: stat->num_idle++; break; + case Client_Failed: stat->num_failed++; break; + case Client_Error: stat->num_error++; break; + default: break; + } + count++; + } + stat->num_hits = se->total_hits; + stat->num_records = se->total_records; + + stat->num_clients = count; +} + +static CCL_bibset load_cclfile(const char *fn) +{ + CCL_bibset res = ccl_qual_mk(); + if (ccl_qual_fname(res, fn) < 0) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "%s", fn); + exit(1); + } + return res; +} + +int main(int argc, char **argv) +{ + int ret; + char *arg; + int setport = 0; + + if (signal(SIGPIPE, SIG_IGN) < 0) + yaz_log(YLOG_WARN|YLOG_ERRNO, "signal"); + + yaz_log_init(YLOG_DEFAULT_LEVEL, "pazpar2", 0); + + while ((ret = options("c:h:p:C:s:", argv, argc, &arg)) != -2) + { + switch (ret) { + case 'c': + command_init(atoi(arg)); + setport++; + break; + case 'h': + http_init(arg); + setport++; + break; + case 'C': + global_parameters.ccl_filter = load_cclfile(arg); + break; + case 'p': + http_set_proxyaddr(arg); + break; + case 's': + load_simpletargets(arg); + break; + default: + fprintf(stderr, "Usage: pazpar2\n" + " -h [host:]port (REST protocol listener)\n" + " -c cmdport (telnet-style)\n" + " -C cclconfig\n" + " -s simpletargetfile\n" + " -p hostname[:portno] (HTTP proxy)\n"); + exit(1); + } + } + + if (!setport) + { + fprintf(stderr, "Set command port with -h or -c\n"); + exit(1); + } + + global_parameters.ccl_filter = load_cclfile("default.bib"); + global_parameters.yaz_marc = yaz_marc_create(); + yaz_marc_subfield_str(global_parameters.yaz_marc, "\t"); + global_parameters.odr_in = odr_createmem(ODR_DECODE); + global_parameters.odr_out = odr_createmem(ODR_ENCODE); + + event_loop(&channel_list); + + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/pazpar2.h b/src/pazpar2.h new file mode 100644 index 0000000..8de4b91 --- /dev/null +++ b/src/pazpar2.h @@ -0,0 +1,172 @@ +#ifndef PAZPAR2_H +#define PAZPAR2_H + +struct record; + +#include + +#include +#include +#include +#include +#include "termlists.h" +#include "relevance.h" +#include "eventl.h" + +#define MAX_DATABASES 512 + +struct record { + struct client *client; + int target_offset; + char *buf; + char *merge_key; + char *title; + int relevance; + int *term_frequency_vec; + struct record *next_cluster; +}; + +struct connection; + +// Represents a host (irrespective of databases) +struct host { + char *hostport; + char *ipport; + struct connection *connections; // All connections to this + struct host *next; +}; + +// Represents a (virtual) database on a host +struct database { + struct host *host; + char *url; + char databases[MAX_DATABASES][128]; + int errors; + struct database *next; +}; + +struct client; + +// Represents a physical, reusable connection to a remote Z39.50 host +struct connection { + IOCHAN iochan; + COMSTACK link; + struct host *host; + struct client *client; + char *ibuf; + int ibufsize; + enum { + Conn_Connecting, + Conn_Open, + Conn_Waiting, + } state; + struct connection *next; +}; + +// Represents client state for a connection to one search target +struct client { + struct database *database; + struct connection *connection; + struct session *session; + int hits; + int records; + int setno; + int requestid; // ID of current outstanding request + int diagnostic; + enum client_state + { + Client_Connecting, + Client_Connected, + Client_Idle, + Client_Initializing, + Client_Searching, + Client_Presenting, + Client_Error, + Client_Failed, + Client_Disconnected, + Client_Stopped + } state; + struct client *next; +}; + +#define SESSION_WATCH_RECORDS 0 +#define SESSION_WATCH_MAX 0 + +typedef void (*session_watchfun)(void *data); + +// End-user session +struct session { + struct client *clients; + int requestid; + char query[1024]; + NMEM nmem; // Nmem for each operation (i.e. search) + WRBUF wrbuf; // Wrbuf for scratch(i.e. search) + struct termlist *termlist; + struct relevance *relevance; + struct reclist *reclist; + struct { + void *data; + session_watchfun fun; + } watchlist[SESSION_WATCH_MAX + 1]; + int total_hits; + int total_records; +}; + +struct statistics { + int num_clients; + int num_no_connection; + int num_connecting; + int num_initializing; + int num_searching; + int num_presenting; + int num_idle; + int num_failed; + int num_error; + int num_hits; + int num_records; +}; + +struct hitsbytarget { + char id[256]; + int hits; + int diagnostic; + int records; + char* state; + int connected; +}; + +struct parameters { + int timeout; /* operations timeout, in seconds */ + char implementationId[128]; + char implementationName[128]; + char implementationVersion[128]; + int target_timeout; // seconds + int session_timeout; + int toget; + int chunk; + CCL_bibset ccl_filter; + yaz_marc_t yaz_marc; + ODR odr_out; + ODR odr_in; +}; + +struct hitsbytarget *hitsbytarget(struct session *s, int *count); +int select_targets(struct session *se); +struct session *new_session(); +void destroy_session(struct session *s); +int load_targets(struct session *s, const char *fn); +void statistics(struct session *s, struct statistics *stat); +char *search(struct session *s, char *query); +struct record **show(struct session *s, int start, int *num, int *total, int *sumhits); +struct termlist_score **termlist(struct session *s, int *num); +void session_set_watch(struct session *s, int what, session_watchfun fun, void *data); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/reclists.c b/src/reclists.c new file mode 100644 index 0000000..40f1d76 --- /dev/null +++ b/src/reclists.c @@ -0,0 +1,110 @@ +/* + * $Id: reclists.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include + +#include + +#include "pazpar2.h" +#include "reclists.h" + +struct reclist_bucket +{ + struct record *record; + struct reclist_bucket *next; +}; + +struct record *reclist_read_record(struct reclist *l) +{ + if (l->pointer < l->num_records) + return l->flatlist[l->pointer++]; + else + return 0; +} + +void reclist_rewind(struct reclist *l) +{ + l->pointer = 0; +} + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct reclist *reclist_create(NMEM nmem, int numrecs) +{ + int hashsize = 1; + struct reclist *res; + + assert(numrecs); + while (hashsize < numrecs) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct reclist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct reclist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct reclist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->num_records = 0; + res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); + res->flatlist_size = numrecs; + + return res; +} + +struct record *reclist_insert(struct reclist *l, struct record *record) +{ + unsigned int bucket; + struct reclist_bucket **p; + struct record *head; + + bucket = hash((unsigned char*) record->merge_key) & l->hashmask; + for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) + { + // We found a matching record. Merge them + if (!strcmp(record->merge_key, (*p)->record->merge_key)) + { + struct record *existing = (*p)->record; + record->next_cluster = existing->next_cluster; + existing->next_cluster = record; + head = existing; + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + struct reclist_bucket *new = nmem_malloc(l->nmem, + sizeof(struct reclist_bucket)); + new->record = record; + record->next_cluster = 0; + new->next = 0; + *p = new; + l->flatlist[l->num_records++] = record; + head = record; + } + return head; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/reclists.h b/src/reclists.h new file mode 100644 index 0000000..f9d38c3 --- /dev/null +++ b/src/reclists.h @@ -0,0 +1,23 @@ +#ifndef RECLISTS_H +#define RECLISTS_H + +struct reclist +{ + struct reclist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct record **flatlist; + int flatlist_size; + int num_records; + int pointer; + + NMEM nmem; +}; + +struct reclist *reclist_create(NMEM, int numrecs); +struct record * reclist_insert(struct reclist *tl, struct record *record); +struct record *reclist_read_record(struct reclist *l); +void reclist_rewind(struct reclist *l); + +#endif diff --git a/src/relevance.c b/src/relevance.c new file mode 100644 index 0000000..2823eff --- /dev/null +++ b/src/relevance.c @@ -0,0 +1,247 @@ +/* + * $Id: relevance.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include + +#include "relevance.h" +#include "pazpar2.h" + +struct relevance +{ + int *doc_frequency_vec; + int vec_len; + struct word_trie *wt; + NMEM nmem; +}; + +// We use this data structure to recognize terms in input records, +// and map them to record term vectors for counting. +struct word_trie +{ + struct + { + struct word_trie *child; + int termno; + } list[26]; +}; + +static struct word_trie *create_word_trie_node(NMEM nmem) +{ + struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie)); + int i; + for (i = 0; i < 26; i++) + { + res->list[i].child = 0; + res->list[i].termno = -1; + } + return res; +} + +static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num) +{ + while (*term) { + int c = tolower(*term); + if (c < 'a' || c > 'z') + term++; + else + { + c -= 'a'; + if (!*(++term)) + n->list[c].termno = num; + else + { + if (!n->list[c].child) + { + struct word_trie *new = create_word_trie_node(nmem); + n->list[c].child = new; + } + word_trie_addterm(nmem, n->list[c].child, term, num); + } + break; + } + } +} + +#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' : -1) + +static int word_trie_match(struct word_trie *t, const char *word, int len, int *skipped) +{ + int c = raw_char(tolower(*word)); + + if (!len) + return 0; + + word++; len--; + (*skipped)++; + if (!len || raw_char(*word) < 0) + { + if (t->list[c].termno > 0) + return t->list[c].termno; + else + return 0; + } + else + { + if (t->list[c].child) + { + return word_trie_match(t->list[c].child, word, len, skipped); + } + else + return 0; + } + +} + + +static struct word_trie *build_word_trie(NMEM nmem, const char **terms) +{ + struct word_trie *res = create_word_trie_node(nmem); + const char **p; + int i; + + for (i = 1, p = terms; *p; p++, i++) + word_trie_addterm(nmem, res, *p, i); + return res; +} + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) +{ + struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); + const char **p; + int i; + + for (p = terms, i = 0; *p; p++, i++) + ; + res->vec_len = ++i; + res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); + bzero(res->doc_frequency_vec, res->vec_len * sizeof(int)); + res->nmem = nmem; + res->wt = build_word_trie(nmem, terms); + return res; +} + +void relevance_newrec(struct relevance *r, struct record *rec) +{ + if (!rec->term_frequency_vec) + { + rec->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int)); + bzero(rec->term_frequency_vec, r->vec_len * sizeof(int)); + } +} + + +// FIXME. The definition of a word is crude here.. should support +// some form of localization mechanism? +void relevance_countwords(struct relevance *r, struct record *head, + const char *words, int len, int multiplier) +{ + while (len) + { + char c; + int res; + int skipped; + while (len && (c = raw_char(tolower(*words))) < 0) + { + words++; + len--; + } + if (!len) + return; + skipped = 0; + if ((res = word_trie_match(r->wt, words, len, &skipped))) + { + words += skipped; + len -= skipped; + head->term_frequency_vec[res] += multiplier; + } + else + { + while (len && (c = raw_char(tolower(*words))) >= 0) + { + words++; + len--; + } + } + head->term_frequency_vec[0]++; + } +} + +void relevance_donerecord(struct relevance *r, struct record *head) +{ + int i; + + for (i = 1; i < r->vec_len; i++) + if (head->term_frequency_vec[i] > 0) + r->doc_frequency_vec[i]++; + + r->doc_frequency_vec[0]++; +} + +#ifdef FLOAT_REL +static int comp(const void *p1, const void *p2) +{ + float res; + struct record **r1 = (struct record **) p1; + struct record **r2 = (struct record **) p2; + res = (*r2)->relevance - (*r1)->relevance; + if (res > 0) + return 1; + else if (res < 0) + return -1; + else + return 0; +} +#else +static int comp(const void *p1, const void *p2) +{ + struct record **r1 = (struct record **) p1; + struct record **r2 = (struct record **) p2; + return (*r2)->relevance - (*r1)->relevance; +} +#endif + +// Prepare for a relevance-sorted read of up to num entries +void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) +{ + int i; + float *idfvec = xmalloc(rel->vec_len * sizeof(float)); + + // Calculate document frequency vector for each term. + for (i = 1; i < rel->vec_len; i++) + { + if (!rel->doc_frequency_vec[i]) + idfvec[i] = 0; + else + idfvec[i] = log((float) rel->doc_frequency_vec[0] / rel->doc_frequency_vec[i]); + } + // Calculate relevance for each document + for (i = 0; i < reclist->num_records; i++) + { + int t; + struct record *rec = reclist->flatlist[i]; + float relevance; + relevance = 0; + for (t = 1; t < rel->vec_len; t++) + { + float termfreq; + if (!rec->term_frequency_vec[0]) + break; + termfreq = (float) rec->term_frequency_vec[t] / rec->term_frequency_vec[0]; + relevance += termfreq * idfvec[t]; + } + rec->relevance = (int) (relevance * 100000); + } + qsort(reclist->flatlist, reclist->num_records, sizeof(struct record*), comp); + reclist->pointer = 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/relevance.h b/src/relevance.h new file mode 100644 index 0000000..38c3d9c --- /dev/null +++ b/src/relevance.h @@ -0,0 +1,27 @@ +#ifndef RELEVANCE_H +#define RELEVANCE_H + +#include + +#include "pazpar2.h" +#include "reclists.h" + +struct relevance; + +struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); +void relevance_newrec(struct relevance *r, struct record *rec); +void relevance_countwords(struct relevance *r, struct record *rec, + const char *words, int len, int multiplier); +void relevance_donerecord(struct relevance *r, struct record *rec); + +void relevance_prepare_read(struct relevance *rel, struct reclist *rec); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/termlists.c b/src/termlists.c new file mode 100644 index 0000000..7f32e3d --- /dev/null +++ b/src/termlists.c @@ -0,0 +1,163 @@ +/* + * $Id: termlists.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ + */ + +#include +#include +#include + +#include "termlists.h" + +// Discussion: +// As terms are found in incoming records, they are added to (or updated in) a +// Hash table. When term records are updated, a frequency value is updated. At +// the same time, a highscore is maintained for the most frequent terms. + +struct termlist_bucket +{ + struct termlist_score term; + struct termlist_bucket *next; +}; + +struct termlist +{ + struct termlist_bucket **hashtable; + int hashtable_size; + int hashmask; + + struct termlist_score **highscore; + int highscore_size; + int highscore_num; + int highscore_min; + + NMEM nmem; +}; + + +// Jenkins one-at-a-time hash (from wikipedia) +static unsigned int hash(const unsigned char *key) +{ + unsigned int hash = 0; + + while (*key) + { + hash += *(key++); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) +{ + int hashsize = 1; + int halfnumterms; + struct termlist *res; + + // Calculate a hash size smallest power of 2 larger than 50% of expected numterms + halfnumterms = numterms >> 1; + if (halfnumterms < 0) + halfnumterms = 1; + while (hashsize < halfnumterms) + hashsize <<= 1; + res = nmem_malloc(nmem, sizeof(struct termlist)); + res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*)); + bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*)); + res->hashtable_size = hashsize; + res->nmem = nmem; + res->hashmask = hashsize - 1; // Creates a bitmask + + res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); + res->highscore_size = highscore_size; + res->highscore_num = 0; + res->highscore_min = 0; + + return res; +} + +static void update_highscore(struct termlist *tl, struct termlist_score *t) +{ + int i; + int smallest; + int me = -1; + + if (t->frequency < tl->highscore_min) + return; + + smallest = 0; + for (i = 0; i < tl->highscore_num; i++) + { + if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) + smallest = i; + if (tl->highscore[i] == t) + me = i; + } + if (tl->highscore_num) + tl->highscore_min = tl->highscore[smallest]->frequency; + if (me >= 0) + return; + if (tl->highscore_num < tl->highscore_size) + { + tl->highscore[tl->highscore_num++] = t; + if (t->frequency < tl->highscore_min) + tl->highscore_min = t->frequency; + } + else + { + if (t->frequency > tl->highscore[smallest]->frequency) + { + tl->highscore[smallest] = t; + } + } +} + +void termlist_insert(struct termlist *tl, const char *term) +{ + unsigned int bucket; + struct termlist_bucket **p; + + bucket = hash((unsigned char *)term) & tl->hashmask; + for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) + { + if (!strcmp(term, (*p)->term.term)) + { + (*p)->term.frequency++; + update_highscore(tl, &((*p)->term)); + break; + } + } + if (!*p) // We made it to the end of the bucket without finding match + { + struct termlist_bucket *new = nmem_malloc(tl->nmem, + sizeof(struct termlist_bucket)); + new->term.term = nmem_strdup(tl->nmem, term); + new->term.frequency = 1; + new->next = 0; + *p = new; + update_highscore(tl, &new->term); + } +} + +static int compare(const void *s1, const void *s2) +{ + struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2; + return (*p2)->frequency - (*p1)->frequency; +} + +struct termlist_score **termlist_highscore(struct termlist *tl, int *len) +{ + qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); + *len = tl->highscore_num; + return tl->highscore; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/termlists.h b/src/termlists.h new file mode 100644 index 0000000..2dbee2d --- /dev/null +++ b/src/termlists.h @@ -0,0 +1,26 @@ +#ifndef TERMLISTS_H +#define TERMLISTS_H + +#include + +struct termlist_score +{ + char *term; + int frequency; +}; + +struct termlist; + +struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size); +void termlist_insert(struct termlist *tl, const char *term); +struct termlist_score **termlist_highscore(struct termlist *tl, int *len); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..a8547fa --- /dev/null +++ b/src/util.c @@ -0,0 +1,11 @@ +/* $Id: util.c,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#include +#include + +void die(char *string, char *add) +{ + yaz_log(YLOG_FATAL, "Fatal error: %s (%s)", string, add ? add : ""); + abort(); +} + diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..0bae58e --- /dev/null +++ b/src/util.h @@ -0,0 +1,8 @@ +/* $Id: util.h,v 1.1 2006-12-20 20:47:16 quinn Exp $ */ + +#ifndef UTIL_H +#define UTIL_H + +void die(char *string, char *add); + +#endif diff --git a/termlists.c b/termlists.c deleted file mode 100644 index c9a9442..0000000 --- a/termlists.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * $Id: termlists.c,v 1.3 2006-12-08 21:40:58 quinn Exp $ - */ - -#include -#include -#include - -#include "termlists.h" - -// Discussion: -// As terms are found in incoming records, they are added to (or updated in) a -// Hash table. When term records are updated, a frequency value is updated. At -// the same time, a highscore is maintained for the most frequent terms. - -struct termlist_bucket -{ - struct termlist_score term; - struct termlist_bucket *next; -}; - -struct termlist -{ - struct termlist_bucket **hashtable; - int hashtable_size; - int hashmask; - - struct termlist_score **highscore; - int highscore_size; - int highscore_num; - int highscore_min; - - NMEM nmem; -}; - - -// Jenkins one-at-a-time hash (from wikipedia) -static unsigned int hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while (*key) - { - hash += *(key++); - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size) -{ - int hashsize = 1; - int halfnumterms; - struct termlist *res; - - // Calculate a hash size smallest power of 2 larger than 50% of expected numterms - halfnumterms = numterms >> 1; - if (halfnumterms < 0) - halfnumterms = 1; - while (hashsize < halfnumterms) - hashsize <<= 1; - res = nmem_malloc(nmem, sizeof(struct termlist)); - res->hashtable = nmem_malloc(nmem, hashsize * sizeof(struct termlist_bucket*)); - bzero(res->hashtable, hashsize * sizeof(struct termlist_bucket*)); - res->hashtable_size = hashsize; - res->nmem = nmem; - res->hashmask = hashsize - 1; // Creates a bitmask - - res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); - res->highscore_size = highscore_size; - res->highscore_num = 0; - res->highscore_min = 0; - - return res; -} - -static void update_highscore(struct termlist *tl, struct termlist_score *t) -{ - int i; - int smallest; - int me = -1; - - if (t->frequency < tl->highscore_min) - return; - - smallest = 0; - for (i = 0; i < tl->highscore_num; i++) - { - if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) - smallest = i; - if (tl->highscore[i] == t) - me = i; - } - if (tl->highscore_num) - tl->highscore_min = tl->highscore[smallest]->frequency; - if (me >= 0) - return; - if (tl->highscore_num < tl->highscore_size) - { - tl->highscore[tl->highscore_num++] = t; - if (t->frequency < tl->highscore_min) - tl->highscore_min = t->frequency; - } - else - { - if (t->frequency > tl->highscore[smallest]->frequency) - { - tl->highscore[smallest] = t; - } - } -} - -void termlist_insert(struct termlist *tl, const char *term) -{ - unsigned int bucket; - struct termlist_bucket **p; - - bucket = hash((unsigned char *)term) & tl->hashmask; - for (p = &tl->hashtable[bucket]; *p; p = &(*p)->next) - { - if (!strcmp(term, (*p)->term.term)) - { - (*p)->term.frequency++; - update_highscore(tl, &((*p)->term)); - break; - } - } - if (!*p) // We made it to the end of the bucket without finding match - { - struct termlist_bucket *new = nmem_malloc(tl->nmem, - sizeof(struct termlist_bucket)); - new->term.term = nmem_strdup(tl->nmem, term); - new->term.frequency = 1; - new->next = 0; - *p = new; - update_highscore(tl, &new->term); - } -} - -static int compare(const void *s1, const void *s2) -{ - struct termlist_score **p1 = (struct termlist_score**) s1, **p2 = (struct termlist_score **) s2; - return (*p2)->frequency - (*p1)->frequency; -} - -struct termlist_score **termlist_highscore(struct termlist *tl, int *len) -{ - qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); - *len = tl->highscore_num; - return tl->highscore; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/termlists.h b/termlists.h deleted file mode 100644 index 2dbee2d..0000000 --- a/termlists.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef TERMLISTS_H -#define TERMLISTS_H - -#include - -struct termlist_score -{ - char *term; - int frequency; -}; - -struct termlist; - -struct termlist *termlist_create(NMEM nmem, int numterms, int highscore_size); -void termlist_insert(struct termlist *tl, const char *term); -struct termlist_score **termlist_highscore(struct termlist *tl, int *len); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ diff --git a/test.pz b/test.pz deleted file mode 100644 index 0fb10c8..0000000 --- a/test.pz +++ /dev/null @@ -1 +0,0 @@ -target localhost:9999/Default diff --git a/util.c b/util.c deleted file mode 100644 index fc67cfa..0000000 --- a/util.c +++ /dev/null @@ -1,11 +0,0 @@ -/* $Id: util.c,v 1.2 2006-11-18 05:00:38 quinn Exp $ */ - -#include -#include - -void die(char *string, char *add) -{ - yaz_log(YLOG_FATAL, "Fatal error: %s (%s)", string, add ? add : ""); - abort(); -} - diff --git a/util.h b/util.h deleted file mode 100644 index 10f7981..0000000 --- a/util.h +++ /dev/null @@ -1,8 +0,0 @@ -/* $Id: util.h,v 1.1 2006-11-14 20:44:38 quinn Exp $ */ - -#ifndef UTIL_H -#define UTIL_H - -void die(char *string, char *add); - -#endif -- 1.7.10.4