From b6b190610799a920163200fd5920406adcc3f6c0 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Fri, 22 Nov 2013 14:15:58 +0100 Subject: [PATCH] relevance-h now calculates some kind of round-robin score --- heikki/README-HEIKKI | 20 ++++++++++++++++++++ heikki/service.xml | 1 + heikki/test1.sh | 30 ++++++++++++++++++++++++++---- src/relevance.c | 37 +++++++++++++++++++++++++++++++++++-- 4 files changed, 82 insertions(+), 6 deletions(-) diff --git a/heikki/README-HEIKKI b/heikki/README-HEIKKI index 77977e8..a6ca02d 100644 --- a/heikki/README-HEIKKI +++ b/heikki/README-HEIKKI @@ -18,3 +18,23 @@ Wed 20-Nov-2013 Make a script that tests ranking against yaz-zserver (as that is the default config). Mostly to have a script to build on later. Thu 21-Nov-2013. Start my own complete config + +Fri 22-Nov-2013. Adam defined a new sort type, relevance_h, and put it place +in the code. Now I have a place to implement my stuff. Relevant places: + pazpar2_config.c:1020 - minor + session.c:1318 - call relevance_prepare_read also for my type + reclists.c:104 - parse params + reclists.c:166 - compare function (for quicksort) + relevance.c:417 - calculate score + (same function as for relevance, but with extra arg for type) + +The compare function compares positions, when sorting by Metadata_sortkey_position +This loops through the records (in the cluster) and finds the smallest rec->pos +and then compares those. + +Next: See if I can implement a round robin. + - clients.h declares int clients_count(void) + - rec->client is a pointer to the client, but we don't have an ordinal from that + - keep an array of structs with the pointer, and locate the client number that way + - robin-score = pos * n_clients + client_num + diff --git a/heikki/service.xml b/heikki/service.xml index 5651f30..088ba98 100644 --- a/heikki/service.xml +++ b/heikki/service.xml @@ -3,6 +3,7 @@ + diff --git a/heikki/test1.sh b/heikki/test1.sh index 0814bf0..a742ce0 100755 --- a/heikki/test1.sh +++ b/heikki/test1.sh @@ -12,6 +12,7 @@ then exit fi +URL="http://localhost:9017/" CFG="test1.cfg" PZ="../src/pazpar2" @@ -26,7 +27,7 @@ rm -f *.out *.log $PZ -f $CFG -l pz2.log -p $PIDFILE & sleep 0.2 # make sure it has time to start echo "Init" -curl -s "http://localhost:9017/?command=init" > init.out +curl -s "$URL?command=init" > init.out SESSION=`xml_grep --text_only "//session" init.out ` # cat init.out; echo echo "Got session $SESSION" @@ -38,14 +39,35 @@ QRY="query=computer" #SEARCH="command=search$SES&$QRY" SEARCH="command=search$SES&$QRY&sort=relevance" echo $SEARCH -curl -s "http://localhost:9017/?$SEARCH" > search.out +curl -s "$URL?$SEARCH" > search.out cat search.out | grep search echo -SHOW="command=show$SES&sort=relevance" +STAT="command=stat&$SES" +echo "" > stat.out +LOOPING=1 +while [ $LOOPING = 1 ] +do + sleep 0.5 + curl -s "$URL?$STAT" > stat.out + ACT=`xml_grep --text_only "//activeclients" stat.out` + HIT=`xml_grep --text_only "//hits" stat.out` + REC=`xml_grep --text_only "//records" stat.out` + echo "$ACT $HIT $REC" + if grep -q "0" stat.out + then + LOOPING=0 + fi + echo >> stats.out + cat stat.out >> stats.out +done + + +SHOW="command=show$SES&sort=relevance_h&start=0&num=1000" echo $SHOW curl -s "http://localhost:9017/?$SHOW" > show.out -grep "relevance" show.out | grep += | grep -v "(0)" +#grep "relevance" show.out | grep += | grep -v "(0)" +grep "round-robin" show.out echo echo "All done" diff --git a/src/relevance.c b/src/relevance.c index 2fd7414..a17c4a1 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "pazpar2_config.h" #include "relevance.h" #include "session.h" +#include "client.h" #ifdef WIN32 #define log2(x) (log(x)/log(2)) @@ -358,6 +359,12 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, { int i; float *idfvec = xmalloc(rel->vec_len * sizeof(float)); + int n_clients = clients_count(); + struct client * clients[n_clients]; + yaz_log(YLOG_LOG,"round-robin: have %d clients", n_clients); + for (i = 0; i < n_clients; i++) + clients[i] = 0; + reclist_enter(reclist); // Calculate document frequency vector for each term. @@ -414,8 +421,34 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist, { wrbuf_printf(w, "score = relevance(%d);\n", relevance); } - if (0 && type == Metadata_sortkey_relevance_h) - relevance *= 2; + // Experimental round-robin + // Overwrites the score calculated above, but I keep it there to + // get the log entries + if (type == Metadata_sortkey_relevance_h) { + struct record *record; + int thisclient = 0; + struct record *bestrecord = 0; + int nclust = 0; + for (record = rec->records; record; record = record->next) { + if ( bestrecord == 0 || bestrecord->position < record->position ) + bestrecord = record; + nclust++; + } + while ( clients[thisclient] != 0 + && clients[thisclient] != bestrecord->client ) + thisclient++; + if ( clients[thisclient] == 0 ) + { + yaz_log(YLOG_LOG,"round-robin: found new client at %d: p=%p\n", thisclient, bestrecord->client); + clients[thisclient] = bestrecord->client; + } + + relevance = -(bestrecord->position * n_clients + thisclient) ; + wrbuf_printf(w,"round-robin score: pos=%d client=%d ncl=%d score=%d\n", + bestrecord->position, thisclient, nclust, relevance ); + yaz_log(YLOG_LOG,"round-robin score: pos=%d client=%d ncl=%d score=%d", + bestrecord->position, thisclient, nclust, relevance ); + } rec->relevance_score = relevance; } reclist_leave(reclist); -- 1.7.10.4