From 5ef6f852ac65edc487f8a02539fa3b2bd0290ada Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Mon, 25 Nov 2013 16:02:57 +0100 Subject: [PATCH] Testing against DBC --- heikki/dbc-os/bibliotek.dk.fields.txt | 189 +++++++ heikki/dbc-os/bibliotek.dk.xml | 25 + heikki/dbc-os/dbc-opensearch-gw.cfg | 67 +++ heikki/dbc-os/dbc-opensearch-gw.pl | 920 +++++++++++++++++++++++++++++++++ heikki/dbc-os/test2.cfg | 128 +++++ heikki/dbc-os/test2.sh | 120 +++++ heikki/test1.sh | 2 +- 7 files changed, 1450 insertions(+), 1 deletion(-) create mode 100644 heikki/dbc-os/bibliotek.dk.fields.txt create mode 100644 heikki/dbc-os/bibliotek.dk.xml create mode 100644 heikki/dbc-os/dbc-opensearch-gw.cfg create mode 100755 heikki/dbc-os/dbc-opensearch-gw.pl create mode 100644 heikki/dbc-os/test2.cfg create mode 100755 heikki/dbc-os/test2.sh diff --git a/heikki/dbc-os/bibliotek.dk.fields.txt b/heikki/dbc-os/bibliotek.dk.fields.txt new file mode 100644 index 0000000..1472234 --- /dev/null +++ b/heikki/dbc-os/bibliotek.dk.fields.txt @@ -0,0 +1,189 @@ +# Kopieret fra drift-danbib.zpunkt til brønd_3 mapning til bibliotek.dk 18/9-2013 BDM +# Subset of bib-1 attributes map to CCL qualifiers +# $Id: ccl2rpn.danbib.search.zpunkt,v 1.11 2011-08-02 10:01:42 bdm Exp $ + +dkcclterm.ti ti BIB1,u=4 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lti lti BIB1,u=4 r=3 p=1 s=1 t=r c=3 +dkcclterm.se se BIB1,u=5 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lse lse BIB1,u=5 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ut ut BIB1,u=6 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lut lut BIB1,u=6 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ib ib BIB1,u=7 r=3 p=3 s=1 t=l,r c=2 +*dkcclterm.in in BIB1,u=8 r=3 p=3 s=1 t=l,r c=2 +dkcclterm.nl nl BIB1,u=9 r=3 p=3 s=103 t=l,r c=2 +dkcclterm.id id BIB1,u=12 r=3 p=3 s=103 t=l,r c=2 #OBS - søgekode lid i bibliotek.dk classic +*dkcclphrase.ddc ddc BIB1,u=13 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.udk udk BIB1,u=14 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.lcc lcc BIB1,u=16 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.nlm nlm BIB1,u=17 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.nal nal BIB1,u=18 r=3 p=1 s=1 t=r c=3 +*dkcclterm.kl kl BIB1,u=20 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lkl lkl BIB1,u=20 r=3 p=1 s=1 t=r c=3 +dkcclterm.em em BIB1,u=21 r=3 p=3 s=pw t=l,r c=1 #søger ikke umiddelbart som frase med proximity i broenden +dkcclterm.em emne BIB1,u=21 r=3 p=3 s=2 t=l,r c=1 #søgning fra tekstbokse med 'and'-søgning +dkcclphrase.lem lem BIB1,u=21 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ms ms BIB1,u=25 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lms lms BIB1,u=25 r=3 p=1 s=1 t=r c=3 +dkcclterm.år år BIB1,u=31 r=o p=3 s=4 t=l,r c=2 #ved ikke hvordan man udnytter relation r=1, r=3 eller r=5 i brønden +Specialstruktur for dfa år_før BIB1,u=31 r=1 p=3 s=4 t=l,r c=1 #fra tekstbokse +Specialstruktur for dfa år_lig BIB1,u=31 r=3 p=3 s=4 t=l,r c=1 #fra tekstbokse +Specialstruktur for dfa år_efter BIB1,u=31 r=5 p=3 s=4 t=l,r c=1 #fra tekstbokse +*dkcclterm.nt nt BIB1,u=33 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lnt lnt BIB1,u=33 r=3 p=1 s=1 t=r c=3 +*dkcclterm.pa pa BIB1,u=35 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lpa lpa BIB1,u=35 r=3 p=1 s=1 t=r c=3 +*dkcclterm.tt tt BIB1,u=42 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.ltt ltt BIB1,u=42 r=3 p=1 s=1 t=r c=3 +Specialindex til limiter spr BIB1,u=54 r=3 p=3 s=1 t=l,r c=2 #=E9t sprog pr. post - struktur 1 ikke defineret i DanZig - lige nu term.primaryLanguage, som sikkert ændrer indhold til flere hovedsprog senere +dkcclterm.sp sp BIB1,u=54 r=3 p=3 s=2 t=l,r c=2 #alle sprog - svarer til struktur 2 i DanZig +dkcclterm.ul ul BIB1,u=55 r=3 p=3 s=2 t=l,r c=2 +Kun postejer fra 001b owner_id BIB1,u=56 r=3 p=3 s=2 t=l,r c=1 #use-attribut 56 refererer til ln (DBC1 1=6) i danZIG +Kun postejer fra 001b lok BIB1,u=56 r=3 p=3 s=2 t=l,r c=1 #lok er defineret som langord til DAN1 1=11 i praksisregler +dkcclterm.pu pu BIB1,u=59 r=3 p=3 s=pw t=l,r c=1 +*dkcclterm.co co BIB1,u=60 r=3 p=3 s=2 t=l,r c=2 +dkcclterm.no no BIB1,u=63 r=3 p=3 s=pw t=l,r c=1 +Specialindex til limiter mp_id BIB1,u=1001 r=3 p=3 s=2 t=l,r c=1 #BRUG ma i stedet da 2-bogstavskoder i stedet for 4 +Specialindex til limiter kat BIB1,u=1001 r=3 p=3 s=2 t=l,r c=1 +dkcclterm.fo fo BIB1,u=1003 r=3 p=3 s=101 t=l,r c=1 #Afviger i dfa classic fra Praksisregler, da uden *c +dkcclterm.fo forfatter BIB1,u=1003 r=3 p=3 s=102 t=l,r c=1 #Til tekstboksene. I target er s=102 defineret som uordnet, ok i brønden? +dkcclphrase.lfo lfo BIB1,u=1003 r=3 p=1 s=101 t=r c=2 #Afviger i dfa classic fra Praksisregler, da uden *c +*dkcclterm.pe pe BIB1,u=1004 r=3 p=3 s=101 t=l,r c=1 +*dkcclphrase.lpe lpe BIB1,u=1004 r=3 p=1 s=101 t=r c=2 +*dkcclterm.ko ko BIB1,u=1005 r=3 p=3 s=101 t=l,r c=1 +*dkcclphrase.lko lko BIB1,u=1005 r=3 p=1 s=101 t=r c=2 +dkcclterm.is is BIB1,u=1007 r=3 p=3 s=1 t=l,r c=2 +dkcclterm.ep ep BIB1,u=1009 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lep lep BIB1,u=1009 r=3 p=1 s=1 t=r c=3 +dkcclterm.op op BIB1,u=1011 r=3 p=3 s=2 t=l,r c=1 +dkcclterm.aj aj BIB1,u=1012 r=3 p=3 s=2 t=l,r c=1 +term.default default_id BIB1,u=1016 r=3 p=3 s=pw t=l,r c=1 #dannet af DKABM-data (forskel dfa classic og danbib: fb i stedet for id) +term.default term BIB1,u=1016 r=3 p=3 s=pw t=l,r c=1 #alle 3 ender med samme udfald da umiddelbart ingen proximity +term.default fritekst BIB1,u=1016 r=3 p=3 s=2 t=l,r c=1 #søgning fra tekstbokse med 'and'-søgning +dkcclterm.fl fl BIB1,u=1018 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.km km BIB1,u=1024 r=3 p=3 s=2 t=l,r c=2 +*dkcclterm.rt rt BIB1,u=1026 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lrt lrt BIB1,u=1026 r=3 p=1 s=1 t=r c=2 +dkcclterm.ma mk BIB1,u=1031 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.ma ma BIB1,u=1031 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.ma mat BIB1,u=1031 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.ww ww BIB1,u=1032 r=3 p=3 s=1 t=l,r c=2 +dkcclterm.vp vp BIB1,u=1033 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.vp vært BIB1,u=1033 r=3 p=3 s=2 t=l,r c=1 #søgning fra tekstbokse med 'and'-søgning +dkcclphrase.lvp lvp BIB1,u=1033 r=3 p=1 s=1 t=r c=3 +dkcclterm.cl cl BIB1,u=1040 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lcl lcl BIB1,u=1040 r=3 p=1 s=1 t=r c=3 +dkcclterm.ek ek BIB1,u=1074 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lek lek BIB1,u=1074 r=3 p=1 s=1 t=r c=3 +*dkcclterm.uk uk BIB1,u=1080 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.luk luk BIB1,u=1080 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ac ac BIB1,u=1085 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lac lac BIB1,u=1085 r=3 p=1 s=1 t=r c=3 +*dkcclterm.cp cp BIB1,u=1086 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lcp lcp BIB1,u=1086 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ed ed BIB1,u=1087 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.led led BIB1,u=1087 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ag ag BIB1,u=1088 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lag lag BIB1,u=1088 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.bcm bcm BIB1,u=1089 r=3 p=1 s=1 t=r c=3 +*dkcclphrase.dbk dbk BIB1,u=1090 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ic ic BIB1,u=1091 r=3 p=3 s=1 t=l,r c=2 +*dkcclterm.im im BIB1,u=1092 r=3 p=3 s=1 t=l,r c=2 +*dkcclterm.ir ir BIB1,u=1093 r=3 p=3 s=1 t=l,r c=2 +dkcclterm.ou ou BIB1,u=1095 r=3 p=3 s=pw t=l,r c=2 +*dkcclterm.st st BIB1,u=1096 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lst lst BIB1,u=1096 r=3 p=1 s=1 t=r c=3 +dkcclterm.br br BIB1,u=1157 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lbr lbr BIB1,u=1157 r=3 p=1 s=1 t=r c=3 +dkcclterm.oc oc BIB1,u=1211 r=3 p=3 s=103 t=l,r c=2 +dkcclterm.bc bc BIB1,u=1214 r=3 p=3 s=1 t=l,r c=2 #producentens stregkode (023a)- nu med i danZIG +*dkcclterm.au au DAN1,u=1 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lau lau DAN1,u=1 r=3 p=1 s=1 t=r c=3 +dkcclterm.ke ke DAN1,u=2 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lke lke DAN1,u=2 r=3 p=1 s=1 t=r c=3 +dkcclterm.db db DAN1,u=3 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.ldb ldb DAN1,u=3 r=3 p=1 s=1 t=r c=3 +dkcclterm.df df DAN1,u=4 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.ldf ldf DAN1,u=4 r=3 p=1 s=1 t=r c=3 +dkcclterm.ds ds DAN1,u=5 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lds lds DAN1,u=5 r=3 p=1 s=1 t=r c=3 +dkcclterm.me me DAN1,u=6 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lme lme DAN1,u=6 r=3 p=1 s=1 t=r c=3 +dkcclterm.fm fm DAN1,u=7 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lfm lfm DAN1,u=7 r=3 p=1 s=1 t=r c=3 +dkcclterm.nb nb DAN1,u=8 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lnb lnb DAN1,u=8 r=3 p=1 s=1 t=r c=3 +dkcclterm.po po DAN1,u=9 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.po person DAN1,u=9 r=3 p=3 s=102 t=l,r c=1 #Til tekstboksene. I target er s=102 defineret som uordnet +dkcclphrase.lpo lpo DAN1,u=9 r=3 p=1 s=101 t=r c=3 #OBS struktur 1 i dfa classic +dkcclterm.dk dk DAN1,u=10 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.ldk ldk DAN1,u=10 r=3 p=1 s=1 t=r c=3 +dkcclterm.ok ok DAN1,u=11 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lok lok DAN1,u=11 r=3 p=1 s=1 t=r c=3 #NYT index i broenden - ikke medtaget i libv3, da brugt til BIB1 1:56 +*dkcclterm.gd gd DAN1,u=12 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lgd lgd DAN1,u=12 r=3 p=1 s=1 t=r c=3 +*dkcclterm.fg fg DAN1,u=13 r=3 p=3 s=pw t=l,r c=2 +dkcclterm.kk kk DAN1,u=14 r=3 p=3 s=pw t=l,r c=2 +dkcclterm.ix ix DAN1,u=15 r=3 p=3 s=pw t=l,r c=2 +dkcclterm.nm nm DAN1,u=16 r=3 p=3 s=pw t=l,r c=2 +*dkcclterm.nr nr DAN1,u=17 r=3 p=3 s=pw t=l,r c=2 +*dkcclterm.en en DAN1,u=18 r=3 p=3 s=pw t=l,r c=2 +*dkcclterm.tf tf DAN1,u=19 r=3 p=3 s=pw t=l,r c=2 +dkcclphrase.lvx lvx DAN1,u=20 r=3 p=1 s=1 t=r c=3 +dkcclterm.bs bs DAN1,u=21 r=3 p=3 s=pw t=l,r c=2 +*dkcclterm.ef ef DAN1,u=22 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lef lef DAN1,u=22 r=3 p=1 s=1 t=r c=3 +*dkcclterm.es es DAN1,u=23 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.les les DAN1,u=23 r=3 p=1 s=1 t=r c=3 +dkcclterm.mo mo DAN1,u=24 r=3 p=3 s=pw t=l,r c=1 +*dkcclterm.fv fv DAN1,u=25 r=3 p=3 s=pw t=l,r c=2 +dkcclterm.fb fb DAN1,u=26 r=3 p=3 s=pw t=l,r c=2 +term.creator personer DAN1,u=26 r=3 p=3 s=888 t=l,r c=1 #DKABM svarende til join for fo+fb+no til filmsøg i tekstboks m 'and'-søgning +dkcclterm.ts ts DAN1,u=27 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lts lts DAN1,u=27 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ar ar DAN1,u=36 r=3 p=3 s=2 t=l,r c=2 #NYT index i broenden - ikke medtaget i libv3, da DBC1 1:211-1:214 udfoldet i stedet - betyder, at de enkelte målgruppesystemer ikke kan søges enkeltvis i brønden fx. kun pegi +*dkcclterm.ej ej DAN1,u=37 r=3 p=3 s=2 t=l,r c=1 +dkcclterm.hm hm DAN1,u=38 r=3 p=3 s=pw t=l,r c=2 # hovedmateriale fra 009 *g +*dkcclterm.ht ht DAN1,u=39 r=3 p=3 s=pw t=l,r c=1 +*dkcclphrase.lht lht DAN1,u=39 r=3 p=1 s=1 t=r c=3 +*dkcclterm.ip ip DAN1,u=40 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.ka ka DAN1,u=41 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.kg kg DAN1,u=42 r=3 p=3 s=pw t=l,r c=1 # primært til Promat til konsulentgruppering +dkcclterm.kr kr DAN1,u=43 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.kx kx DAN1,u=44 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.ld ld DAN1,u=45 r=3 p=3 s=103 t=l,r c=2 #brugt i stedet for DBC1 1=62. Completeness c=1 i dfa classic +dkcclterm.ns ns DAN1,u=46 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.sf sf DAN1,u=47 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.so so DAN1,u=48 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lso lso DAN1,u=48 r=3 p=1 s=1 t=r c=3 +dkcclterm.kn kn DAN1,u=49 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lkn lkn DAN1,u=49 r=3 p=1 s=1 t=r c=3 +dkcclterm.uu uu DAN1,u=50 r=3 p=3 s=pw t=l,r c=1 +dkcclphrase.lff lff DAN1,u=51 r=3 p=1 s=101 t=r c=2 +dkcclterm.ll ll DAN1,u=52 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.nv nv DAN1,u=53 r=3 p=3 s=pw t=l,r c=1 +dkcclterm.tg tg DAN1,u=54 r=3 p=3 s=pw t=l,r c=1 #c=2 i DanZIG - har bedt LEA om at rette 2/8-2011 +dkcclterm.ln ln DBC1,u=6 r=3 p=3 s=2 t=l,r c=1 #officiel use-attribut BIB1 1=56, men hos os kun til 001 *b +term.reviewer rc DBC1,u=8 r=3 p=3 s=pw t=l,r c=1 #fra DKABM +phrase.reviewer lrc DBC1,u=8 r=3 p=1 s=1 t=r c=2 #fra DKABM +term.reviewedCreator rf DBC1,u=9 r=3 p=3 s=pw t=l,r c=1 #fra DKABM +phrase.reviewedCreator lrf DBC1,u=9 r=3 p=1 s=1 t=r c=2 #fra DKABM +term.reviewedTitle ri DBC1,u=10 r=3 p=3 s=pw t=l,r c=1 #fra DKABM +phrase.reviewedTitle lri DBC1,u=10 r=3 p=1 s=1 t=r c=3 #fra DKABM +Intet index pi DBC1,u=15 r=3 p=3 s=pw t=l,r c=1 #overhovedet noget indhold? +Intet index iv DBC1,u=17 r=3 p=3 s=1 t=l,r c=2 +#u=63-64 kun brugt i dfa-basen: +Intet index tj DBC1,u=64 r=3 p=3 s=pw t=l,r c=1 #titelregister uden felt 526 - bliver måske muligt med term.title +Specialstruktur på target titel DBC1,u=64 r=3 p=3 s=2 t=l,r c=1 #søgning på titel fra tekstbokse med 'and'-søgning +Intet index ltj DBC1,u=64 r=3 p=1 s=1 t=r c=3 +Intet index n50 DBC1,u=140 r=3 p=3 s=2 t=l,r c=1 +#u=141-142 kun brugt i dfa-basen: +Intet index bo DBC1,u=141 r=3 p=3 s=2 t=l,r c=1 +Intet index mm DBC1,u=142 r=3 p=3 s=pw t=l,r c=1 +Intet index lmm DBC1,u=142 r=3 p=1 s=1 t=r c=3 +Intet cclindex ra DBC1,u=211 r=3 p=3 s=pw t=l,r c=1 #evt. term.audience (dkdcplus:age) men kun som afgrænsning +Intet cclindex rb DBC1,u=212 r=3 p=3 s=pw t=l,r c=1 #evt. term.audience (dkdcplus:medieraad) men kun som afgrænsning +Intet index rd DBC1,u=213 r=3 p=3 s=pw t=l,r c=1 +Intet cclindex rp DBC1,u=214 r=3 p=3 s=pw t=l,r c=1 #evt. term.audience (dkdcplus:pegi) men kun som afgrænsning +Intet index n51 DBC1,u=215 r=3 p=3 s=2 t=l,r c=1 +term.primaryLanguage hs DBC1,u=217 r=3 p=3 s=pw t=l,r c=2 #hovedsprog (ikke endnu i danZig) - evt. term.primaryLanguage, der sikkert udvides til at rumme 041 *a og *p +Intet index wcx DBC1,u=224 r=3 p=3 s=pw t=l,r c=1 #kun i dfa classic til oclc-numre diff --git a/heikki/dbc-os/bibliotek.dk.xml b/heikki/dbc-os/bibliotek.dk.xml new file mode 100644 index 0000000..5320b6f --- /dev/null +++ b/heikki/dbc-os/bibliotek.dk.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/dbc-os/dbc-opensearch-gw.cfg b/heikki/dbc-os/dbc-opensearch-gw.cfg new file mode 100644 index 0000000..47bb1ba --- /dev/null +++ b/heikki/dbc-os/dbc-opensearch-gw.cfg @@ -0,0 +1,67 @@ +# Config file for dbc-opensearch +# +# The program first tries to load /etc/dbc-opensearch-gw.cfg +# If that fails, tries to load dbc-opensearch-gw.cfg in current dir +# If that fails, outputs a warning and runs with defaults. + +# Defining the back end + +# These three are concatenated to get the URL to the opensearch server. +# Later more stuff is appended, like the query, start and number of records, +# etc. Any URL parameter can be overridden in the database name, see below + +database: Default +baseurl: http://openbibdk.addi.dk/0.8/ +objectformat: dkabm +#constantparams: action=search&facets.numberOfTerms=10&facets.facetName=facet.creator&facets.facetName=facet.type&facets.facetName=facet.subject&agency=100200&profile=test&collectionType=work +constantparams: action=search&agency=100200&profile=test&collectionType=work&rank=rank_general +fields: bibliotek.dk.fields.txt + +database: bibliotek.work +baseurl: http://openbibdk.addi.dk/0.8/ +objectformat: marcxchange +constantparams: action=search&facets.numberOfTerms=10&facets.facetName=facet.creator&facets.facetName=facet.type&facets.facetName=facet.subject&agency=100200&profile=test&collectionType=work +fields: bibliotek.dk.fields.txt + + +database: bibliotek.manifestation +baseurl: http://openbibdk.addi.dk/0.8/ +objectformat: marcxchange +constantparams: action=search&facets.numberOfTerms=10&facets.facetName=facet.creator&facets.facetName=facet.type&facets.facetName=facet.subject&agency=100200&profile=test&collectionType=manifestation +fields: bibliotek.dk.fields.txt + +# Looks like the collectionType can be one of +# 'manifestation' for single records +# 'work' for some kind of clustered records +# 'work-1' looks like manifestation to me, but can be something else + +# agency=400151 gets (only) music stuff, and that has no marc stuff +# agency=100200 returns books and articles, that can handle marc stuff +# +# includeMarcXchange=true asks for MarcXchange record (in some versions). +# OR objectFormat=marcxchange (other versions) + +#chunksize: 10; # initial number of records to get, grows to max present req. + +# Set to 1 to get much more debug log +debug: 0 +#debug: 1 + +# Instead of talking to opensearch, it is possible to load test data from +# a file. The start position and number of records is appended to the file +# name given below, so that you can test with different files. +#test_data: /tmp/test_data_file # f.ex. /tmp/test_data_file_1_10.xml + +# Formatting of the results +# 0 - do not format +# 1 - do format +# (passed directly to LibXML's toString) +#prettyxml: 0 +prettyxml: 1 + +# Operators +#op_and: AND +#op_or: OR +#op_not: NOT + + diff --git a/heikki/dbc-os/dbc-opensearch-gw.pl b/heikki/dbc-os/dbc-opensearch-gw.pl new file mode 100755 index 0000000..c7f5101 --- /dev/null +++ b/heikki/dbc-os/dbc-opensearch-gw.pl @@ -0,0 +1,920 @@ +#!/usr/bin/perl -w + +# DBC-OPENSEARCH gateway +# Z39.50 server (and SRU and...) and a client to DBC's opensearch +# +# Based on DBC's Primo gateway +# +# Supports sortby with one argument with or without /ascending/descending +# These are translated to parameters to the opensearch server. The rest of +# the query is passed on verbatim. +# +# See DBC-45 in our Jira +# +# Programmed by +# Heikki Levanto, Index Data +# +# (C) Copyright 2012-2013 Index Data + +# Example opensearch url (split for readability) + +#http://opensearch.addi.dk/next_2.2/? +# action=search& +# query=hammer& +# agency=100200& +# profile=test& +# start=1& +# stepValue=3& +# facets.numberOfTerms=5& +# facets.facetName=facet.creator& +# facets.facetName=facet.type + +#http://opensearch.addi.dk/next_2.2/?action=search&query=hammer&agency=100200&profile=test&start=1&stepValue=3&facets.numberOfTerms=5&facets.facetName=facet.creator&facets.facetName=facet.type + +# A simple way to test the script: +# ./dbc-opensearch-gw.pl -1 & +# zoomsh "open @:9999/default,agency=100200&profile=test" \ +# "find cql:hamlet sortby creator" "show 0" "quit") + +use strict; +use warnings; +use utf8; +use Encode; +use URI::Escape; +use Net::Z3950::SimpleServer; +use Net::Z3950::OID; +use Data::Dumper; +use LWP::UserAgent; +use HTTP::Cookies; +use XML::LibXML; +use File::Basename; + +my $gwversion = "1.5"; + +############# Configuration +my $configfilename = "dbc-opensearch-gw.cfg"; + + +# The following can be overwritten in the config file +# It consists of namevalue pairs, separated by a colon +# The names are like the variable names below, without the $ +# as in +# chunksize: 10 +# White space and #comments are ignored + +my %baseurl; +my %constantparams; +my $fields = {}; +my $objectformat = {}; +$baseurl{'Default'} = "http://opensearch.addi.dk/2.2/"; +$constantparams{'Default'} = "action=search&collectionType=manifestation"; +my $chunksize = 10; # initial, grows to max present req. +my $prettyxml = 0; # 1 for formatting XML a bit, 0 for not +my $debug = 0; +my $test_data = ""; +my $op_and = "AND"; +my $op_or = "OR"; +my $op_not = "NOT"; + +# Magic value to tell that a term is not to be included in the query +# when it contained sort stuff that has been extracted in the session +my $magic_sort_indicator = " MAGIC SORT INDICATOR "; + +############ Config file + +sub readconfig { + my $options = shift; + my $cfile = $options->{CONFIG}; + # Override config filename with command line value + if ($cfile ne "default-config") { + $configfilename = $cfile; + } + if (! -r $configfilename) { + # die if we explicit gave a config file and it isn't present + die "Error opening configuration file given by -c $configfilename: $!\n" if ($cfile ne "default-config"); + yazlog("WARN: Could not open config file $configfilename. Running with defaults."); + return; + } + yazlog("Reading configuration file $configfilename"); + open(my $F,$configfilename) + or die "Error opening config file $configfilename: $!\n"; + my $database = "Default"; + my $line = 1; + while ( <$F> ) { + chomp(); + s/\#.*$//; # remove comments + s/\s*$//; # and trailing spaces + if ( /^ *([^ :]+) *: *(.*)$/ ) { + yazlog("Config setting $1 : $2") if $debug; + if ($1 eq "baseurl") { + $baseurl{$database} = $2; + } elsif ($1 eq "urlpath") { + die "$configfilename:$line: urlpath not supported anymore. Use baseurl"; + } elsif ($1 eq "constantparams") { + $constantparams{$database} = $2; + } elsif ($1 eq "chunksize") { + $chunksize = $2; + } elsif ($1 eq "prettyxml") { + $prettyxml = $2; + } elsif ($1 eq "debug") { + $debug = $2; + } elsif ($1 eq "test_data") { + $test_data = $2; + } elsif ($1 eq "op_and") { + $op_and =$2; + } elsif ($1 eq "op_or") { + $op_or =$2; + } elsif ($1 eq "op_not") { + $op_not =$2; + } elsif ($1 eq "objectformat") { + $objectformat->{$database} = $2; + } elsif ($1 eq "fields") { + my $fname = $2; + if ($fname !~ /^\// ) { + $fname = dirname($configfilename) . "/" . $fname; + } + $fields->{$database} = readfields($fname); + } elsif ($1 eq "database") { + $constantparams{$2} = $constantparams{$database}; + $baseurl{$2} = $baseurl{$database}; + $database =$2; + } else { + die "$configfilename:$line: Bad directive: $1\n"; + } + } elsif (/^$/) { + ; + } else { + die "$configfilename:$line: Bad syntax\n"; + } + $line++; + } + # Only log if debugging, as these are displayed before + # yaz processes the command line, and opens log file from -l + yazlog("Opensearch gateway $gwversion starting") if ($debug); + yazlog("Loaded config from $cfile") if ($debug); +} + +####### fields +sub readfields { + my $fname = shift; + yazlog("Reading fields file $fname"); + open(my $F, $fname) or die "Error open fields file $fname\n"; + my $fr = {}; + while ( <$F> ) { + chomp(); + s/\#.*$//; # remove comments + s/\s*$//; # and trailing spaces + my @list = split(/\s+/,$_); + my $cqlfield = $list[0]; + if (defined($cqlfield) && $cqlfield =~ /\./) { + if (defined($fr->{$cqlfield})) { + print "$cqlfield already defined\n" if $debug; + } else { + foreach (@list) { + if ( /^([^,]+),([^=]+)=(.*)$/ ) { + $fr->{$cqlfield}->{$2} = $3; + my $s = $1; + if ($s =~ /^\d/ ) { + $fr->{$cqlfield}->{set} = $s; + } elsif ($s =~ /^bib-?1$/i ) { + $fr->{$cqlfield}->{set} = '1.2.840.10003.3.1'; + } elsif ($s =~ /^dan-?1$/i ) { + $fr->{$cqlfield}->{set} = '1.2.840.10003.3.15'; + } elsif ($s =~ /^dbc-?1$/i ) { + $fr->{$cqlfield}->{set} = '1.2.840.10003.3.1000.105.1'; + } else { + die "Unknown attribute set $s\n"; + } + } elsif ( /^([^=]+)=(.*)$/ ) { + $fr->{$cqlfield}->{$1} = $2; + } + } + } + } + } + + if ($debug) { + print Dumper($fr); + foreach my $f (keys %{$fr}) { + print "f=$f\n"; + foreach my $s (keys %{$fr->{$f}}) { + my $x = $fr->{$f}->{$s}; + print " $s=$x\n"; + } + } + } + return $fr; +} + +sub read_test_data { + my $filename = shift; + my $start = shift; + my $chunksize = shift; + $filename = $filename . "_" . $start . "_" . $chunksize . ".xml"; + yazlog("WARN: fetching test data only: $filename"); + open(F,$filename) + or die "Error opening test data file $filename: $!\n"; + my $content; + while ( ) { + $content .= $_; + } + yazlog("Loaded test data $filename"); + return $content; +} + +############## Helpers + +# Simple logger +sub yazlog { + my $msg = shift; + if ($msg) { + Net::Z3950::SimpleServer::yazlog($msg); + } +} + +# Set the error items in the handle, and return an empty string +# to signal error +sub err { + my $href = shift; + my $errno = shift; + my $errtxt = shift; + my $logmsg = shift; # optional + if ( $href ) { # defensive coding + $href->{ERR_CODE}=$errno; + $href->{ERR_STR}=$errtxt; + } + yazlog("ERROR $errno: $errtxt"); + yazlog($logmsg); + return ""; +} + +# Dump a handle, without the full record store +sub dumphandle { + return unless $debug; + my $href = shift; + my $msg = shift; + yazlog("Dumphandle: " . $msg); + my $session = $href->{HANDLE}; + my $recs = $session->{records}; + $session->{records} = "<<< records omitted>>>"; + yazlog(Dumper($href)); + $session->{records} = $recs; +} + +############## http client + +# Fetch a page from the given URL +sub fetchpage { + my $href = shift; + my $url = shift; + my $session = $href->{HANDLE}; + my $ua = new LWP::UserAgent; + if ( ! $session->{cookies} ) { + $session->{cookies} = HTTP::Cookies->new( ); + yazlog("Initialized a new cookie jar") if ($debug); + } + $ua->cookie_jar( $session->{cookies} ); + my $req = new HTTP::Request GET => $url; + my $res = $ua->request($req); + if ( ! $res->is_success ) { + return err($href, 2, #temporary system error + "HTTP error from opensearch: ".$res->code. ": " .$res->message, + "fetching " . $url ); + } + my $content = $res->content; + yazlog( "Received " . length($content). " bytes from $url"); + if ( !utf8::valid($content) ) { + yazlog("The data is NOT VALID utf-8!!"); + # Could return an error here, but probably better to limp along + } + # Force Perl to think the content as being utf-8 + # If we get bad utf-8 data, things may fail in strange ways + # But without this, Perl assumes byte data, and helpfully + # converts it into utf-8, resulting in double-encoding. + # See bug 4669. + Encode::_utf8_on($content); + # TODO - Check the http content-type header to see if we really got utf-8! + + return $content; +} + +# Get number of records from opensearch. +# Detects some simple error codes +# Returns a XPathContext that has the actual document, and some namespaces +# defined. It can be used for finding nodes. +# Or an empty string to indicate errors +sub opensearchclient { + my $href = shift; + my $startrec = shift; + + my $session = $href->{HANDLE}; + my $query = $session->{query}; + my $numrecs = $session->{chunksize}; + my $dbname = $session->{dbbase}; + my $extraargs = $session->{dbargs}; + my $sort = $session->{sort}; + + my $urlparams = "?" . $constantparams{$dbname} . #all after '?' + "&start=$startrec". "&stepValue=$numrecs"; + if (defined($session->{comp})) { + $urlparams .= "&objectFormat=" . $session->{comp}; + } + if ( $sort ) { + $urlparams .= "&sort=$sort"; + } + my $burl = $baseurl{$dbname}; + yazlog("initial url parts: $burl $urlparams $query") + if $debug; + while ( $extraargs =~ /([^ &=]+)=([^ &]+)&?/g ) { + my $k = uri_unescape($1); + my $v = uri_unescape($2); + yazlog("Looking at extra parameter '$k' = '$v'") if $debug; + if ( $k eq "host" ) { + $burl = "http://" . $v. "/"; + yazlog("Replaced host, got baseurl '$burl' ") if $debug; + } elsif ( $k eq "gwdebug" ) { + yazlog("Setting debug to $v because of a databasename parameter") + if ($debug || $v); + $debug = $v; + } elsif ( $urlparams =~ s/([?&])($k)=([^ &]+)/$1$k=$v/ ) { + yazlog("Replaced '$k': was '$3' is now '$v' ") if $debug; + } else { + $urlparams .= "&$k=$v"; + yazlog("Appended '$k' = '$v'") if $debug; + } + } + yazlog("dbname: $dbname"); + yazlog("final url parts: $burl $urlparams $query") + if $debug; + my $url = $burl . $urlparams . $query; + yazlog("final url: $url") + if $debug; + + my $page; + if (!$test_data) { + $page = fetchpage($href, $url); + } + else { + $page = read_test_data($test_data, $startrec, $numrecs); + } + + if (!$page) { + return; + } + my $xmldom; + eval { $xmldom = XML::LibXML->load_xml(string => $page); }; + if ( $@ ) { + return err( $href,100, #unspecified error + "Received bad XML from Opensearch: $@ ", + substr( $page,0,200 )."..."); + } + my $xml = XML::LibXML::XPathContext->new($xmldom); + $xml->registerNs('os', 'http://oss.dbc.dk/ns/opensearch'); + + # check error + my $err = $xml->findvalue('//os:searchResponse/os:error'); + if ($err) { + return err( $href, 2, #temporary system error + "Error from Opensearch: " . $err, + substr( $page,0,400 )."..."); + } + return $xml; +} + +# Extract the hits into the cache in the session +sub get_results { + my $href = shift; + my $xml = shift; + my $session = $href->{HANDLE}; + my $i = 0; + my $first = 0; + my $last = 0; + foreach my $rec ( $xml->findnodes('//os:searchResult') ) { + my $recno = $xml->findvalue('os:collection/os:resultPosition',$rec) ; + if ( $recno <= 0 ) { + return err( $href, 2, #temporary system error + "Got a bad record from opensearch (no resultPosition)" ); + } + $first = $recno unless ($first); + $last = $recno; + # Clone the node, so we get namespace definitions too + my $clone = $rec->cloneNode(1); + my $comp = $session->{comp}; + $session->{records}->{$comp}->[$recno] = $clone->toString($prettyxml); + yazlog("Doc $recno: " . + length($session->{records}->{$comp}->[$recno]) . " bytes" ) + if $debug; + }; + yazlog("Extracted records $first - $last") if $debug; +} + + +# extract facets from the xml into the session, in a form that can +# be returned directly in the searchresponse. +sub facets { + my $href = shift; + my $xml = shift; + my $session = $href->{HANDLE}; + my $zfacetlist = []; + bless $zfacetlist, 'Net::Z3950::FacetList'; + + my $i = 0; + + foreach my $facetnode ( $xml->findnodes('//os:facetResult/os:facet') ) { + #yazlog("Got facet " . $facetnode ); + my $facetname = $xml->findvalue('os:facetName', $facetnode); + my $zfacetfield = {}; + bless $zfacetfield, 'Net::Z3950::FacetField'; + $zfacetlist->[$i++] = $zfacetfield; + my $zattributes = []; + bless $zattributes, 'Net::Z3950::RPN::Attributes'; + $zfacetfield->{'attributes'} = $zattributes; + my $zattribute = {}; + bless $zattribute, 'Net::Z3950::RPN::Attribute'; + $zattribute->{'attributeType'} = 1; + $zattribute->{'attributeValue'} = $facetname; + $zattributes->[0]=$zattribute; + my $zfacetterms = []; + bless $zfacetterms, 'Net::Z3950::FacetTerms'; + $zfacetfield->{'terms'} = $zfacetterms; + my $debugfacets = $facetname . " :"; + my $j = 0; + foreach my $facetterm ( $xml->findnodes('os:facetTerm',$facetnode) ) { + # They seem to misspell frequency. Check both, for the case they + # get around to fixing it. + my $freq = $xml->findvalue('os:frequence', $facetterm) || + $xml->findvalue('os:frequency', $facetterm); + my $term = $xml->findvalue('os:term', $facetterm); + $debugfacets .= " '" . $term . "'=" . $freq; + my $zfacetterm = {}; + bless $zfacetterm, 'Net::Z3950::FacetTerm'; + $zfacetterm->{'term'} = $term; + $zfacetterm->{'count'} = $freq; + $zfacetterms->[$j++] = $zfacetterm; + } + yazlog($debugfacets) if ($debug); + } # facet loop + if ( $i ) { + $session->{facets} = $zfacetlist; + } + return; +} + +# Check that we have the needed records in the cache, fetch if need be +sub getrecords { + my $href = shift; + my $start = shift; + my $num = shift; + my $session = $href->{HANDLE}; + + if (defined($href->{COMP})) { + $session->{comp} = $href->{COMP}; + } else { + $session->{comp} = $session->{def_comp}; + } + yazlog("Checking start=$start, num=$num") if ($debug); + if ( $num > $session->{chunksize} ) { + $session->{chunksize} = $num; + } + # Skip the records we already have + my $comp = $session->{comp}; + while ( $num && $session->{records}->{$comp}->[$start] ) { + $start++; + $num--; + } + if ( $num == 0 && $session->{hits} ) { # we have a hit count and have them all + yazlog("no need to get more records") if ($debug); + return; # no need to fetch anything + } + my ($xml,$page) = opensearchclient($href, $start); + if (!$xml) { + return; # error has been set already + } + if ( ! $session->{hits} ){ + my $hits = $xml->findvalue('//os:searchResponse/os:result/os:hitCount'); + if ( length($hits) == 0 ) { # can't just say !$hits, triggers on "0" + return err($href, 100, "No hitcount in response"); + } + $session->{hits} = $hits; + # Do not attempt to extract facets on zero hits + if ($hits > 0) { + facets($href,$xml); + } + } + get_results($href,$xml); +} + +# Remove the sortby clause from the CQL query, translate to +# opensearch sort parameter, and put it in the session. +# Handles only one sort key +sub fixsortquery { + my $href = shift; + my $qry = shift; + my $session = $href->{HANDLE}; + my $sortclause = ""; + if ( $qry =~ /^(.*?) +sortby *(\w+)(\/(\w+))?(.*) *$/ ) { + yazlog("Separated query '$1' from sort clause '$2' '$3' leaving '$5' ") if $debug; + $qry = $1; + $sortclause= $2; + my $direction = $4 || "ascending"; + if ( $5 ) { + return err($href, 211, "Only one sort key supported" ); + } + if ( $sortclause ne "random" ) { + $sortclause .= "_" . $direction; + } + } + return ( $qry, $sortclause ); +} + +################# Query translation +sub map_use_attr { + my $href = shift; + my $t = shift; + my $session = $href->{HANDLE}; + my $fr = shift; + + my $dbbase = $session->{dbbase}; + if (!defined($fields->{$dbbase})) { + return err($href, 3, "No mapping defined for numeric attribtues"); + } + $fr = $fields->{$dbbase}; + my $a_set = '1.2.840.10003.3.1'; + my $i = 0; + my $a_u = 1016; # use, type 1 + my $a_r = 3; # relation, type 2 + my $a_p = -1; # position, type 3 + my $a_s = -1; # structure, type 4 + my $a_t = 100; # truncation, type 5 + my $a_c = -1; # completeness, type 6 + while (my $attr = $t->{attributes}->[$i++]) { + my $t = $attr->{attributeType}; + my $v = $attr->{attributeValue}; + if ($t == 1) { + $a_u = $v; + if (defined($attr->{attributeSet})) { + $a_set = $attr->{attributeSet}; + } + } elsif ($t == 2) { + $a_r = $v; + } elsif ($t == 3) { + $a_p = $v; + } elsif ($t == 4) { + $a_s = $v; + } elsif ($t == 5) { + $a_t = $v; + } elsif ($t == 6) { + $a_c = $v; + } else { + return err($href, 113, $t); + } + } + my $best = undef; + my $use_ok = 0; + my $relation_ok = 0; + my $position_ok = 0; + my $structure_ok = 0; + my $completeness_ok = 0; + foreach my $f (keys %{$fr}) { + my $accept = $f; + foreach my $s (keys %{$fr->{$f}}) { + my $v = $fr->{$f}->{$s}; + if ($s eq "u") { + if ($a_u != $v) { + $accept = undef; + } elsif (defined($fr->{$f}->{set}) && $a_set ne $fr->{$f}->{set}) { + $accept = undef; + } else { + $use_ok = 1; + } + } + if ($s eq "r") { + if ($v =~ /^\d+?$/) { + if ($v != $a_r) { + $accept = undef; + } else { + $relation_ok = 1; + } + } else { + $relation_ok = 1; + } + } + if ($s eq "p") { + if ($a_p != -1 && $v != $a_p) { + $accept = undef; + } else { + $position_ok = 1; + } + } + if ($s eq "s") { + if ($a_s == -1) { + $structure_ok = 1; + } elsif ($v =~ /^\d+?$/) { + if ($v != $a_s) { + $accept = undef; + } else { + $structure_ok = 1; + } + } elsif ($v eq "pw") { + if ($a_s != 1 && $a_s != 2) { + $accept = undef; + } else { + $structure_ok = 1; + } + } + } + if ($s eq "t") { + if ($v =~ /^\d+?$/) { + $accept = undef unless $v == $a_t; + } else { + if ($a_t == 1) { + $accept = undef unless $v =~ /l/; + } elsif ($a_t == 2) { + $accept = undef unless $v =~ /r/; + } else { + $accept = undef unless $a_t == 100; + } + } + } + if ($s eq "c" ) { + if ($a_c != -1 && $v =~ /^\d+?$/ && $v != $a_c) { + $accept = undef; + } else { + $completeness_ok = 1; + } + } + } + $best = $accept if $accept; + } + if (!defined($best)) { + return err($href, 114, $a_u) unless ($use_ok); + return err($href, 117, $a_r) unless ($relation_ok); + return err($href, 119, $a_p) unless ($position_ok); + return err($href, 118, $a_s) unless ($structure_ok); + return err($href, 122, $a_c) unless ($completeness_ok); + return err($href, 123, ""); + } + return $best; +} + +sub q_term { + my $href = shift; + my $t = shift; + my $session = $href->{HANDLE}; + my $field = ""; + my $operator = "="; + my $sort = ""; + my $quote = ""; + my $rtrunc = ""; + my $ltrunc = ""; + my $term = $t->{term}; + if ($term eq "") { + # ### Can not test, simpleServer gets such a bad handle + return err($href, 108, # malformed query + "Empty term not supported" ); + } + my $i = 0; + while (my $attr = $t->{attributes}->[$i++]) + { + #print "Attr: " . Dumper($attr) ; + my $aval = $attr->{attributeValue}; + my $type = $attr->{attributeType}; + if ($type == 1) { + if ($aval =~ /^\d+?$/) { # numeric use + $field = map_use_attr($href, $t); + return if ($href->{ERR_CODE}); + } else { + $field = $aval; + } + } elsif ($type == 2) { # Relation + if ($aval == 1) { + $operator = "<"; + } elsif ($aval == 2) { + $operator = "<="; + } elsif ($aval == 3) { + $operator = "="; + } elsif ($aval == 4) { + $operator = ">="; + } elsif ($aval == 5) { + $operator = ">"; + } else { + return err($href, 117, # unsupp relation + $aval, "Unsupported relation $aval"); + } + } elsif ($type == 3) { # position + if ($aval < 1 || $aval > 3) { + return err ($href, 119, # unsupp position + $aval, "Unsupported position $aval"); + } + } elsif ($type == 4) { # structure + if ($aval == 1) { # phrase + # Not working, DBC-112 + # $operator = "adj"; + $quote = '"'; + } elsif ($aval == 2 || $aval == 4) { # word / year + # nothing special to do + } else { + return err($href, 118, # unsupp structure + $aval, "Unsupported structure $aval"); + } + } elsif ($type == 5) { # truncation + if ($aval == 1) { # right trunc + $rtrunc = '*'; + } elsif ($aval == 2) { + $ltrunc = '*'; + } elsif ($aval == 3) { + $ltrunc = '*'; + $rtrunc = '*'; + } elsif ($aval == 100) { # none + ; + } else { + return err($href, 120, # unsupp relation + $aval, "Unsupported truncation $aval"); + } + } elsif ($type == 6) { # completeness + ; + } elsif ($type == 7) { # sort + if ($aval != 1 && $aval != 2) { + return err($href, 237, # illegal sort + $aval, "Illegal sort (attr 7): $aval"); + } + $sort = $aval; + } else { + return err($href, 113, # unupported attribute type + $type, + "Unsupported attribute type= " . $type. + " val='" . $aval ."'"); + } + } # attr loop + if ($sort) { + if ($session->{sort}) { + return err($href, 237, # illegal sort + "Only one sort supported"); + } + my $direction = "_ascending"; + if ($sort == 2) { $direction = "_descending"; } + if ($field eq "random" ) { $direction = ""; } + $session->{sort} = $field.$direction; + return $magic_sort_indicator; + } + if (($rtrunc || $ltrunc) && $quote) { # We can not do truncation on phrases + return err($href, 120, # unsupp trunc + "", "Can not do truncation on phrases"); + } + # Escape characters that would be taken as wildcards + $term =~ s/([*?^"])/\\$1/g; + $term = $quote.$ltrunc.$term.$rtrunc.$quote; + my $clause = $term; + if ($field) { + $clause = $field . " " . $operator . " " . $term; + } + yazlog("q_term: $clause" ) if ($debug); + return $clause; +} + +sub q_node { + my $href = shift; + my $n = shift; + my $class = ref($n); + if ( $class eq "Net::Z3950::RPN::Term" ) { + return q_term($href, $n); + } + my %ops = ( "Net::Z3950::RPN::And" => $op_and, + "Net::Z3950::RPN::Or" => $op_or, + "Net::Z3950::RPN::AndNot" => $op_not ); + my $op = $ops{$class} ; + if ( $op ) { + my $left = q_node($href,$n->[0]); + return "" unless $left; + my $right = q_node($href,$n->[1]); + return "" unless $right; + return $left if ( $right eq $magic_sort_indicator ); + return $right if ( $left eq $magic_sort_indicator ); + my $clause = "( $left $op $right )"; + yazlog("q_node: $clause") if ($debug); + return $clause; + } + my $opname = $class; + $opname =~ s/^.*:+//; # Remove the Net::... for error msg + return err($href,110, # operator not supported + $opname, + "Operator '$class' not supported. Only 'And'"); +} + + +sub q_query { + my $href = shift; + my $qry = $href->{RPN}; + my $class = ref($qry); + yazlog("Translating query") if ($debug); + if ( $class ne "Net::Z3950::APDU::Query" ) { + return err($href,100, # unspecified error + "Programming error, no query found", + "Class of query is '$class', not Net::Z3950::APDU::Query" ); + } + # TODO - check attributeSet + my $query = q_node($href,$qry->{query}); + yazlog("Translated query: $query" ) if ($debug); + return $query; +} + + +################# Request callbacks + +sub init_handler { + my $href = shift; + my $session = {}; + $session->{chunksize} = $chunksize; # to start with + $session->{records} = {}; + $href->{HANDLE} = $session; + dumphandle( $href, "Init:"); +} + +sub search_handler { + my $href = shift; + my $session = $href->{HANDLE}; + dumphandle( $href, "Search:"); + $session->{hits} = 0; + $session->{facets} = []; + $session->{records} = {}; + my $db = $href->{DATABASES}[0]; + $session->{dbbase} = $db; + $session->{dbbase} =~ s/,.*$//; # without extraargs + if (! exists $constantparams{$session->{dbbase}}) { + return err( $href, 235, #Database does not exist + $session->{dbbase}); + } + if (defined($objectformat->{$session->{dbbase}})) { + $session->{def_comp} = $objectformat->{$session->{dbbase}}; + } else { + $session->{def_comp} = "dkabm"; + } + if ($db =~ /.*,(.*)$/ ) { + $session->{dbargs} = $1; + } else { + $session->{dbargs} = ""; + } + $session->{sort} = ''; + my $qry = $href->{CQL}; + if ( $qry ) { + my $sortby = ""; + ( $qry, $sortby ) = fixsortquery($href,$qry) ; # Remove CQL sortby clause + if ( !$qry ) { + return; # error already set + $session->{sort} = $sortby; + } + } else { + $qry = q_query($href); + } + if ( !$qry ) { + return; # err is already set + } + $session->{query} = "&query=" . uri_escape($qry); + my $number = $href->{PRESENT_NUMBER}; + #my $number = $session->{chunk_size}; + getrecords($href, 1, $number); + $href->{HITS} = $session->{hits}; + if ( $session->{facets} ) { + $href->{OUTPUTFACETS} = $session->{facets}; + } +} + +sub present_handler { +} + +sub fetch_handler { + my $href = shift; + dumphandle( $href, "Fetch:"); + my $offset = $href->{OFFSET}; + my $session = $href->{HANDLE}; + getrecords($href,$offset,1); + my $comp = $session->{comp}; + my $record = $session->{records}->{$comp}->[$offset]; + if ( !$record ) { + return err( $href, 13, # present out of range, + "".$offset ); + } + $href->{REP_FORM} = Net::Z3950::OID::xml; + $href->{RECORD} = $record; + $href->{LEN} = length($record); + $href->{NUMBER} = $offset; + $href->{BASENAME} = $session->{dbbase}; +} + +sub close_handler { + my $href = shift; + dumphandle( $href, "Close:"); +} + + +########### Main program + +# +my $handler = new Net::Z3950::SimpleServer(START => \&readconfig, + INIT => \&init_handler, + CLOSE => \&close_handler, + SEARCH => \&search_handler, + FETCH => \&fetch_handler, + PRESENT => \&present_handler); + +$handler->launch_server("opensearch-gw.pl", @ARGV); diff --git a/heikki/dbc-os/test2.cfg b/heikki/dbc-os/test2.cfg new file mode 100644 index 0000000..da81bfc --- /dev/null +++ b/heikki/dbc-os/test2.cfg @@ -0,0 +1,128 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/dbc-os/test2.sh b/heikki/dbc-os/test2.sh new file mode 100755 index 0000000..9dfe54a --- /dev/null +++ b/heikki/dbc-os/test2.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# +# Simple script (and config) to get pz2 to run against DBC's OpenSearch, and +# calculate rankings. See how they differ for different queries +# + +if [ "$1" == "clean" ] +then + echo "Cleaning up" + rm -f $PIDFILE $YAZPIDFILE *.out *.log *.data *~ + exit +fi +killall pazpar2 dbc-opensearch-gw + +rm -f *.out *.log + +URL="http://localhost:9017/" +CFG="test2.cfg" + +PZ="../../src/pazpar2" +if [ ! -x $PZ ] +then + echo "$PZ2 not executable. Panic" + exit 1 +fi + +PIDFILE=pz2.pid + +# Start the gateway. + ./dbc-opensearch-gw.pl -1 \ + -c dbc-opensearch-gw.cfg \ + -l dbc-opensearch-gw.log \ + @:9994 & + + +$PZ -f $CFG -l pz2.log -p $PIDFILE & +sleep 0.2 # make sure it has time to start +echo "Init" +curl -s "$URL?command=init" > init.out +SESSION=`xml_grep --text_only "//session" init.out ` +# cat init.out; echo +echo "Got session $SESSION" +SES="&session=$SESSION" + + +if [ -z "$1" ] +then + Q="computer" +else + Q=$1 +fi +QRY=`echo $Q | sed 's/ /+/g' ` + +#SEARCH="command=search$SES&$QRY&rank=1&sort=relevance" +#SEARCH="command=search$SES&$QRY" +SEARCH="command=search$SES&query=$QRY&sort=relevance" +echo $SEARCH +curl -s "$URL?$SEARCH" > search.out +cat search.out | grep search +echo +sleep 0.5 # let the search start working + +STAT="command=stat&$SES" +echo "" > stat.out +LOOPING=1 +while [ $LOOPING = 1 ] +do + sleep 0.5 + curl -s "$URL?$STAT" > stat.out + ACT=`xml_grep --text_only "//activeclients" stat.out` + HIT=`xml_grep --text_only "//hits" stat.out` + REC=`xml_grep --text_only "//records" stat.out` + echo "$ACT $HIT $REC" + if grep -q "0" stat.out + then + LOOPING=0 + fi + echo >> stats.out + cat stat.out >> stats.out +done + + +SHOW="command=show$SES&sort=relevance_h&start=0&num=100" +echo $SHOW +curl -s "http://localhost:9017/?$SHOW" > show.out +#grep "relevance" show.out | grep += | grep -v "(0)" +#grep "round-robin" show.out +grep '^ ' show.out | head -11 +grep 'Received' dbc-opensearch-gw.log | head -1 >> titles.out +grep '^ ' show.out >> titles.out + +# Plot it +DF=`echo $QRY | sed 's/@//g' | sed 's/[+"]/_/g' | sed s"/'//g "` +grep "round-robin" show.out | + cut -d' ' -f 6,7 | + sed 's/[^0-9 ]//g' | + awk '{print FNR,$0}'> $DF.data + + + +echo '\ + set term png + set out "plot.png" + set yrange [0:300000] + plot \' > plot.cmd +for F in *.data +do + BF=`basename $F .data` + echo -n " \"$F\" using 1:2 with points title \"$BF\", " >> plot.cmd +done +echo "0 notitle" >> plot.cmd + +gnuplot < plot.cmd + +echo + +echo "All done" +kill `cat $PIDFILE` +rm -f $PIDFILE + diff --git a/heikki/test1.sh b/heikki/test1.sh index 5aedbaa..081cb4f 100755 --- a/heikki/test1.sh +++ b/heikki/test1.sh @@ -18,8 +18,8 @@ CFG="test1.cfg" PZ="../src/pazpar2" PIDFILE=pz2.pid -YAZPIDFILE=yaz-ztest.pid +YAZPIDFILE=yaz-ztest.pid yaz-ztest -p $YAZPIDFILE -l yaz-ztest.log & rm -f *.out *.log -- 1.7.10.4