From a425a433260c0f07b69ce6ca9d1fde9b496e8999 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 4 Dec 2013 15:15:51 +0100 Subject: [PATCH] Merge scores, also for DBC output --- heikki/dbc-os/test2.cfg | 1 + heikki/dbc-os/test2.sh | 30 +++++++++++++++++++++++++++++- heikki/solr/test3.sh | 28 +++++++++++++++++++++++++++- src/relevance.c | 19 +++++++++++-------- 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/heikki/dbc-os/test2.cfg b/heikki/dbc-os/test2.cfg index 86540f5..6523dd5 100644 --- a/heikki/dbc-os/test2.cfg +++ b/heikki/dbc-os/test2.cfg @@ -115,6 +115,7 @@ + diff --git a/heikki/dbc-os/test2.sh b/heikki/dbc-os/test2.sh index bb96855..ec9f381 100755 --- a/heikki/dbc-os/test2.sh +++ b/heikki/dbc-os/test2.sh @@ -27,7 +27,7 @@ fi PIDFILE=pz2.pid # Start the gateway. - ./dbc-opensearch-gw.pl -1 \ + ../../../dbc-opensearch-gw/dbc-opensearch-gw.pl -1 \ -c dbc-opensearch-gw.cfg \ -l dbc-opensearch-gw.log \ @:9994 & @@ -98,6 +98,22 @@ grep "round-robin" show.out | sed 's/[^0-9 ]//g' | awk '{print FNR,$0}'> $DF.data +grep mergeplot show.out > merge.tmp +LINENUMBER="1" +LAST="" +echo "0 0 0" > merge.data +for lno in `cat merge.tmp | cut -d ' ' -f2` +do + if [ "$lno" != "$LAST" ] + then + echo "Found line $lno at $LINENUMBER" + grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data + LAST=$lno + LINENUMBER=$(($LINENUMBER + 1)) + fi +done +echo "$LINENUMBER 0 0 0" >> merge.data + echo '\ @@ -115,6 +131,18 @@ echo "0 notitle" >> plot.cmd gnuplot < plot.cmd + +echo " + set term png + set out \"cluster.png\" + set title \"$HEADLINE\" + plot \"merge.data\" using 1:3 with points title \"records\", \ + \"merge.data\" using 1:4 with points title \"merged score\", \ + \"merge.data\" using 1:5 with points title \"sum score\", \ + \"merge.data\" using 1:6 with points title \"avg score\" +" > plot.cmd +cat plot.cmd | gnuplot + echo echo "All done" diff --git a/heikki/solr/test3.sh b/heikki/solr/test3.sh index 649c79c..f117635 100755 --- a/heikki/solr/test3.sh +++ b/heikki/solr/test3.sh @@ -97,7 +97,23 @@ echo "Client numbers" cat scores.data | cut -d' ' -f2 | sort -u head -10 scores.data -exit 1 +grep mergeplot show.out > merge.tmp +LINENUMBER="1" +LAST="" +echo "0 0 0" > merge.data +for lno in `cat merge.tmp | cut -d ' ' -f2` +do + if [ "$lno" != "$LAST" ] + then + echo "Found line $lno at $LINENUMBER" + grep "mergeplot $lno " merge.tmp | sed "s/mergeplot/$LINENUMBER/" >> merge.data + LAST=$lno + LINENUMBER=$(($LINENUMBER + 1)) + fi +done +echo "$LINENUMBER 0 0 0" >> merge.data + +#exit 1 T1=`grep ": 1 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2` T2=`grep ": 2 " scores.data | head -1 | cut -d'#' -f2 | cut -d' ' -f2` @@ -119,6 +135,16 @@ echo " " > plot.cmd cat plot.cmd | gnuplot +echo " + set term png + set out \"cluster.png\" + set title \"$HEADLINE\" + plot \"merge.data\" using 1:3 with points title \"records\", \ + \"merge.data\" using 1:4 with points title \"merged score\", \ + \"merge.data\" using 1:5 with points title \"sum score\", \ + \"merge.data\" using 1:6 with points title \"avg score\" +" > plot.cmd +cat plot.cmd | gnuplot echo "All done" diff --git a/src/relevance.c b/src/relevance.c index 5450cae..b403048 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -510,7 +510,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, int tfrel = relevance; // keep the old tf/idf score int robinscore = 0; int solrscore = 0; - int normscore; + int normscore = 0; const char *score; const char *id; const char *title; @@ -542,20 +542,21 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, i++; } idbuf[i] = '\0'; - if ( norm->count ) + if ( norm->count && *score ) { //float avg = norm->sum / norm->count; normscore = 10000.0 * ( atof(score) / norm->max ); wrbuf_printf(w, "normscore: score(%s) / max(%f) *10000 = %d\n", score, norm->max, normscore); } else - yaz_log(YLOG_LOG, "normscore: no count, can not normalize %s ", score ); + yaz_log(YLOG_LOG, "normscore: no count, can not normalize score '%s' ", score ); // If we have a score in the best record, we probably have in them all // and we can try to merge scores if ( *score ) { float scores[nclust]; float s = 0.0; + float sum = 0.0; int i=0; if ( rec->records && rec->records->next ) { // have more than one record @@ -563,7 +564,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, { scores[i] = atof( getfield(record,"score") ); yaz_log(YLOG_LOG,"mergescore %d: %f", i, scores[i] ); - wrbuf_printf(w,"mergeplot %d: %f x\n", clusternumber, 10000*scores[i] ); + wrbuf_printf(w,"mergeplot %d %f x\n", clusternumber, 10000*scores[i] ); } qsort(scores, nclust, sizeof(float), sort_float ); for (i = 0; iposition, but something from rec that - // corresponds to the hit number, for plotting. } // merge score id = getfield(bestrecord, "id"); // clear the id, we only want the first numerical part -- 1.7.10.4