From: Heikki Levanto Date: Wed, 27 Nov 2013 14:57:42 +0000 (+0100) Subject: solr and dbc tests X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=533ff0b399b62625ddd68e1a59cb34ef70795bf9;p=pazpar2-moved-to-github.git solr and dbc tests --- diff --git a/heikki/README-HEIKKI b/heikki/README-HEIKKI index 4e1eebf..0571886 100644 --- a/heikki/README-HEIKKI +++ b/heikki/README-HEIKKI @@ -47,3 +47,18 @@ I should also add stuff directly to the client, and to the record, as I need. Next: Plot the tf/idf scores against round-robin sorted order. Will be messy, but later when we get a target that returns sorted records, it will make sense. + + +Wed 27-Nov +Setting up multiple SOLR targets in the same pazpar2 + - Add #999 to the z-urls, so pazpar2 won't merge them. Different number for each + +This URL shows the databases, with their numbers +http://lui.indexdata.com/solr/select?q=database:*&facet=true&facet.method=fc&facet.field=author_exact&facet.field=subject_exact&facet.field=date&facet.field=medium_exact&facet.field=database&rows=0&facet.mincount=1 + +Add this to the target defs + + +After this, it should be possible to get records from different databases, some +with many records, some with a few. This is a good testing ground for merging +rankings! Test first with a round-robin, and plot the scores. diff --git a/heikki/dbc-os/bibliotek.dk.xml b/heikki/dbc-os/bibliotek.dk.xml index 5320b6f..91d297e 100644 --- a/heikki/dbc-os/bibliotek.dk.xml +++ b/heikki/dbc-os/bibliotek.dk.xml @@ -3,7 +3,8 @@ - + + diff --git a/heikki/dbc-os/dbc-opensearch-gw.cfg b/heikki/dbc-os/dbc-opensearch-gw.cfg index 47bb1ba..2b09c34 100644 --- a/heikki/dbc-os/dbc-opensearch-gw.cfg +++ b/heikki/dbc-os/dbc-opensearch-gw.cfg @@ -14,7 +14,8 @@ database: Default baseurl: http://openbibdk.addi.dk/0.8/ objectformat: dkabm #constantparams: action=search&facets.numberOfTerms=10&facets.facetName=facet.creator&facets.facetName=facet.type&facets.facetName=facet.subject&agency=100200&profile=test&collectionType=work -constantparams: action=search&agency=100200&profile=test&collectionType=work&rank=rank_general +#constantparams: action=search&agency=100200&profile=test&collectionType=work&rank=rank_general +constantparams: action=search&agency=100200&profile=test&collectionType=work fields: bibliotek.dk.fields.txt database: bibliotek.work diff --git a/heikki/dbc-os/test2.cfg b/heikki/dbc-os/test2.cfg index da81bfc..86540f5 100644 --- a/heikki/dbc-os/test2.cfg +++ b/heikki/dbc-os/test2.cfg @@ -32,7 +32,9 @@ - + + + @@ -76,10 +78,10 @@ - + @@ -113,7 +115,7 @@ - + diff --git a/heikki/dbc-os/test2.sh b/heikki/dbc-os/test2.sh index 9dfe54a..f187620 100755 --- a/heikki/dbc-os/test2.sh +++ b/heikki/dbc-os/test2.sh @@ -51,9 +51,11 @@ else fi QRY=`echo $Q | sed 's/ /+/g' ` +SORT="sort=score" #SEARCH="command=search$SES&$QRY&rank=1&sort=relevance" #SEARCH="command=search$SES&$QRY" -SEARCH="command=search$SES&query=$QRY&sort=relevance" +#SEARCH="command=search$SES&query=$QRY&sort=relevance" +SEARCH="command=search$SES&query=$QRY&$SORT" echo $SEARCH curl -s "$URL?$SEARCH" > search.out cat search.out | grep search @@ -80,7 +82,7 @@ do done -SHOW="command=show$SES&sort=relevance_h&start=0&num=100" +SHOW="command=show$SES&start=0&num=100&$SORT" echo $SHOW curl -s "http://localhost:9017/?$SHOW" > show.out #grep "relevance" show.out | grep += | grep -v "(0)" @@ -101,11 +103,12 @@ grep "round-robin" show.out | echo '\ set term png set out "plot.png" - set yrange [0:300000] + #set yrange [0:300000] + set logscale y plot \' > plot.cmd for F in *.data do - BF=`basename $F .data` + BF=`basename $F .data | sed 's/_/ /g' ` echo -n " \"$F\" using 1:2 with points title \"$BF\", " >> plot.cmd done echo "0 notitle" >> plot.cmd diff --git a/heikki/solr/opencontent.xml b/heikki/solr/opencontent.xml new file mode 100644 index 0000000..804dd2c --- /dev/null +++ b/heikki/solr/opencontent.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/solr/plot1.cmd b/heikki/solr/plot1.cmd new file mode 100644 index 0000000..bcf1627 --- /dev/null +++ b/heikki/solr/plot1.cmd @@ -0,0 +1,9 @@ +\ + set term png + set out "plot.png" + #set yrange [0:300000] + plot \ + "hp.data" using 0:1 with points title "harry potter", \ + "vw.data" using 0:1 with points title "vietnam war", \ + "wa.data" using 0:1 with points title "water or fire or ice" + diff --git a/heikki/solr/solr-pz2.xsl b/heikki/solr/solr-pz2.xsl new file mode 100644 index 0000000..4fe7bc1 --- /dev/null +++ b/heikki/solr/solr-pz2.xsl @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/solr/solr.lui.xml b/heikki/solr/solr.lui.xml new file mode 100644 index 0000000..4e5905d --- /dev/null +++ b/heikki/solr/solr.lui.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/solr/test3.cfg b/heikki/solr/test3.cfg new file mode 100644 index 0000000..7816b85 --- /dev/null +++ b/heikki/solr/test3.cfg @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/heikki/solr/test3.sh b/heikki/solr/test3.sh new file mode 100755 index 0000000..f40845d --- /dev/null +++ b/heikki/solr/test3.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# +# Simple script (and config) to get pz2 to run against DBC's OpenSearch, and +# calculate rankings. See how they differ for different queries +# + +if [ "$1" == "clean" ] +then + echo "Cleaning up" + rm -f $PIDFILE $YAZPIDFILE *.out *.log *.data *~ plot.cmd + exit +fi +killall pazpar2 + +rm -f *.out *.log + +URL="http://localhost:9017/" +CFG="test3.cfg" + +PZ="../../src/pazpar2" +if [ ! -x $PZ ] +then + echo "$PZ2 not executable. Panic" + exit 1 +fi + +PIDFILE=pz2.pid + +$PZ -f $CFG -l pz2.log -p $PIDFILE & +sleep 0.2 # make sure it has time to start +echo "Init" +curl -s "$URL?command=init" > init.out +SESSION=`xml_grep --text_only "//session" init.out ` +# cat init.out; echo +echo "Got session $SESSION" +SES="&session=$SESSION" + + +if [ -z "$1" ] +then + Q="computer" +else + Q=$1 +fi +QRY=`echo $Q | sed 's/ /+/g' ` + +SORT="sort=score" +#SEARCH="command=search$SES&$QRY&rank=1&sort=relevance" +#SEARCH="command=search$SES&$QRY" +#SEARCH="command=search$SES&query=$QRY&sort=relevance" +SEARCH="command=search$SES&query=$QRY&$SORT" +echo $SEARCH +curl -s "$URL?$SEARCH" > search.out +cat search.out | grep search +echo +sleep 0.5 # let the search start working + +STAT="command=stat&$SES" +echo "" > stat.out +LOOPING=1 +while [ $LOOPING = 1 ] +do + sleep 0.5 + curl -s "$URL?$STAT" > stat.out + ACT=`xml_grep --text_only "//activeclients" stat.out` + HIT=`xml_grep --text_only "//hits" stat.out` + REC=`xml_grep --text_only "//records" stat.out` + echo "$ACT $HIT $REC" + if grep -q "0" stat.out + then + LOOPING=0 + fi + echo >> stats.out + cat stat.out >> stats.out +done + + +SHOW="command=show$SES&start=0&num=100&$SORT" +echo $SHOW +curl -s "http://localhost:9017/?$SHOW" > show.out +#grep "relevance" show.out | grep += | grep -v "(0)" +#grep "round-robin" show.out +grep '^ ' show.out | head -11 +grep 'Received' dbc-opensearch-gw.log | head -1 >> titles.out +grep '^ ' show.out >> titles.out + +# Plot it +DF=`echo $QRY | sed 's/@//g' | sed 's/[+"]/_/g' | sed s"/'//g "` +grep "round-robin" show.out | + cut -d' ' -f 6,7 | + sed 's/[^0-9 ]//g' | + awk '{print FNR,$0}'> $DF.data + + + +echo '\ + set term png + set out "plot.png" + #set yrange [0:300000] + set logscale y + plot \' > plot.cmd +for F in *.data +do + BF=`basename $F .data | sed 's/_/ /g' ` + echo -n " \"$F\" using 1:2 with points title \"$BF\", " >> plot.cmd +done +echo "0 notitle" >> plot.cmd + +gnuplot < plot.cmd + +echo + +echo "All done" +kill `cat $PIDFILE` +rm -f $PIDFILE +