Added a lot more metadata for MARC21. This changes the ranking result
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 16 Jul 2007 09:39:55 +0000 (09:39 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 16 Jul 2007 09:39:55 +0000 (09:39 +0000)
a bit. Added test for 'record' command with- and without offset.

debian/cfg.patch
etc/marc21.xsl
etc/pazpar2.cfg.dist
test/test_http.cfg
test/test_http_5.res
test/test_http_6.res [new file with mode: 0644]
test/test_http_7.res [new file with mode: 0644]
test/test_http_urls
www/jsdemo/example_client.js

index 5f3b019..585e810 100644 (file)
@@ -1,7 +1,7 @@
-5,7c5,6
-<   <listen port="9004"/>
-<   <proxy host="localhost:80"/>
-<   <settings src="../etc/settings"/>
+6,8c6,7
+<     <listen port="9004"/>
+<     <proxy host="localhost:80"/>
+<     <settings src="../etc/settings"/>
 ---
->   <listen port="8004"/>
->   <settings src="/etc/pazpar2/settings/edu.xml"/>
+>     <listen port="8004"/>
+>     <settings src="/etc/pazpar2/settings/edu.xml"/>
index a6b6445..a500ed9 100644 (file)
@@ -1,10 +1,17 @@
 <?xml version="1.0" encoding="UTF-8"?>
+<!-- $Id: marc21.xsl,v 1.14 2007-07-16 09:39:55 adam Exp $ -->
 <xsl:stylesheet
     version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:pz="http://www.indexdata.com/pazpar2/1.0"
     xmlns:marc="http://www.loc.gov/MARC21/slim">
+  
+  <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
 
+<!-- Extract metadata from MARC21/USMARC 
+      http://www.loc.gov/marc/bibliographic/ecbdhome.html
+-->  
+  
   <xsl:template match="/marc:record">
     <pz:record>
 
        </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='245']">
-        <pz:metadata type="title">
-          <xsl:value-of select="marc:subfield[@code='a']"/>
-          <xsl:text> </xsl:text>
-          <xsl:value-of select="marc:subfield[@code='b']"/>
-        </pz:metadata>
-      </xsl:for-each>
-
       <xsl:for-each select="marc:datafield[@tag='020']">
         <pz:metadata type="isbn">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='260']">
-        <pz:metadata type="date">
-         <xsl:value-of select="marc:subfield[@code='c']"/>
+      <xsl:for-each select="marc:datafield[@tag='022']">
+        <pz:metadata type="issn">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='650' or @tag='653']">
-       <pz:metadata type="subject">
+      <xsl:for-each select="marc:datafield[@tag='027']">
+        <pz:metadata type="tech-rep-nr">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
       </xsl:for-each>
        <pz:metadata type="author">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
+       <pz:metadata type="author-title">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="author-date">
+         <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='110']">
+       <pz:metadata type="corporate-name">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="corporate-location">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="corporate-date">
+           <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='111']">
+       <pz:metadata type="meeting-name">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="meeting-location">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="meeting-date">
+           <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='260']">
+       <pz:metadata type="date">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='245']">
+        <pz:metadata type="title">
+          <xsl:value-of select="marc:subfield[@code='a']"/>
+        </pz:metadata>
+        <pz:metadata type="title-remainder">
+          <xsl:value-of select="marc:subfield[@code='b']"/>
+        </pz:metadata>
+        <pz:metadata type="title-responsibility">
+          <xsl:value-of select="marc:subfield[@code='c']"/>
+        </pz:metadata>
+        <pz:metadata type="title-dates">
+          <xsl:value-of select="marc:subfield[@code='f']"/>
+        </pz:metadata>
+        <pz:metadata type="title-medium">
+          <xsl:value-of select="marc:subfield[@code='h']"/>
+        </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='250']">
+       <pz:metadata type="edition">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='260']">
+        <pz:metadata type="publication-place">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+        <pz:metadata type="publication-name">
+         <xsl:value-of select="marc:subfield[@code='b']"/>
+       </pz:metadata>
+        <pz:metadata type="publication-date">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='520']">
-        <pz:metadata type="description">
+      <xsl:for-each select="marc:datafield[@tag='300']">
+       <pz:metadata type="physical-extent">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
+       <pz:metadata type="physical-format">
+         <xsl:value-of select="marc:subfield[@code='b']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-dimensions">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-accomp">
+         <xsl:value-of select="marc:subfield[@code='e']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-unittype">
+         <xsl:value-of select="marc:subfield[@code='f']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-unitsize">
+         <xsl:value-of select="marc:subfield[@code='g']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-specified">
+         <xsl:value-of select="marc:subfield[@code='3']"/>
+       </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='700']">
-       <pz:metadata type="author">
+      <xsl:for-each select="marc:datafield[@tag='440']">
+       <pz:metadata type="series-title">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
       </xsl:for-each>
 
-      <xsl:for-each select="marc:datafield[@tag='720']">
-       <pz:metadata type="author">
+      <xsl:for-each select="marc:datafield[@tag &gt;= 500 and @tag &lt;= 599]
+                           [@tag != '506' and @tag != '530' and
+                           @tag != '540' and @tag != '546'
+                            and @tag != '522']">
+       <pz:metadata type="description">
+            <xsl:value-of select="*/text()"/>
+        </pz:metadata>
+      </xsl:for-each>
+      
+      <xsl:for-each select="marc:datafield[@tag='650' or @tag='653']">
+       <pz:metadata type="subject">
          <xsl:value-of select="marc:subfield[@code='a']"/>
        </pz:metadata>
       </xsl:for-each>
index d60f8f3..8d5c48e 100644 (file)
@@ -1,24 +1,61 @@
 <?xml version="1.0" encoding="UTF-8"?>
+<!-- $Id: pazpar2.cfg.dist,v 1.12 2007-07-16 09:39:55 adam Exp $ -->
 <pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
+  
+  <server>
+    <listen port="9004"/>
+    <proxy host="localhost:80"/>
+    <settings src="../etc/settings"/>
+    
+    <service>
+      <!-- we try to keep same order as in marc21.xsl -->
+      <metadata name="id"/>
+      <metadata name="lccn" merge="unique"/>
+      <metadata name="isbn"/>
+      <metadata name="issn"/>
+      <metadata name="tech-rep-nr"/>
+      <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
+      <metadata name="author-title"/>
+      <metadata name="author-date"/>
+      <metadata name="corporate-name"/>
+      <metadata name="corporate-location"/>
+      <metadata name="corporate-date"/>
+      <metadata name="meeting-name"/>
+      <metadata name="meeting-location"/>
+      <metadata name="meeting-date"/>
+      <metadata name="date" brief="yes" sortkey="numeric" type="year"
+               merge="range" termlist="yes"/>
+      <metadata name="title" brief="yes" sortkey="skiparticle"
+               merge="longest" rank="6"/>
+      <metadata name="title-remainder" brief="yes" merge="longest" rank="5"/>
+      <metadata name="title-responsibility" brief="yes" merge="longest"/>
+      <metadata name="title-dates" brief="yes" merge="longest"/>
+      <metadata name="title-medium" brief="yes" merge="longest"/>
+      <metadata name="edition"/>
+      <metadata name="publication-place"/>
+      <metadata name="publication-name"/>
+      <metadata name="publication-date"/>
+      <metadata name="physical-extent"/>       
+      <metadata name="physical-format"/>       
+      <metadata name="physical-dimensions"/>   
+      <metadata name="physical-accomp"/>       
+      <metadata name="physical-unittype"/>     
+      <metadata name="physical-unitsize"/>     
+      <metadata name="physical-specified"/>    
 
-<server>
-  <listen port="9004"/>
-  <proxy host="localhost:80"/>
-  <settings src="../etc/settings"/>
-
-  <service>
-    <metadata name="url" merge="unique"/>
-    <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
-    <metadata name="isbn"/>
-    <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
-           termlist="yes"/>
-    <metadata name="citation"/>
-    <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
-    <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
-    <metadata name="id"/>
-    <metadata name="lccn" merge="unique"/>
-    <metadata name="description" merge="longest" rank="3"/>
-  </service>
-</server>
+      <metadata name="series-title"/>  
 
+      <metadata name="description" merge="longest" rank="3"/>
+      <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
+      <metadata name="url" merge="unique"/>
+      <metadata name="citation"/>
+    </service>
+  </server>
+  
 </pazpar2>
+<!-- Keep this comment at the end of the file
+     Local variables:
+     mode: nxml
+     End:
+-->
+
index 483eb70..6785316 100644 (file)
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
-<!-- $Id: test_http.cfg,v 1.1 2007-05-15 15:50:48 adam Exp $ -->
+<!-- $Id: test_http.cfg,v 1.2 2007-07-16 09:39:56 adam Exp $ -->
 <!-- Used by test_http.sh -->
 <server>
   <listen port="9763"/>
@@ -10,6 +10,7 @@
   <service>
     <metadata name="url" merge="unique"/>
     <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
+    <metadata name="title-remainder" brief="yes" merge="longest" rank="5"/>
     <metadata name="isbn"/>
     <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
            termlist="yes"/>
@@ -17,7 +18,7 @@
     <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
     <metadata name="id"/>
     <metadata name="lccn" merge="unique"/>
-    <metadata name="description" merge="longest" rank="3"/>
+    <metadata name="description" brief="yes" merge="longest" rank="3"/>
   </service>
 </server>
 
index a6a1052..b887fa9 100644 (file)
 </hit>
 <hit>
 
-<md-title>The Computer Bible</md-title>
-<md-date>1973-1980</md-date>
-<md-author>Freedman, David Noel</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
-<recid>2</recid>
+<md-title>The use of passwords for controlled access to computer resources</md-title>
+<md-date>1977</md-date>
+<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>6</recid>
 </hit>
 <hit>
 
 <md-title>A plan for community college computer development</md-title>
-<md-date>1971</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<md-date>1971</md-date>
+<md-description>Cover title</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
 <recid>8</recid>
 </hit>
 <hit>
 
-<md-title>Washington metropolitan area rail computer feasibility study; final report</md-title>
+<md-title>Washington metropolitan area rail computer feasibility study;</md-title>
+<md-title-remainder>final report</md-title-remainder>
 <md-date>1971</md-date>
-<md-author>Englund, Carl R</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<md-author>Englund, Carl R</md-author>
+<md-description>&quot;Contract DOT-UT-10003.&quot;</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
 <recid>7</recid>
 </hit>
 <hit>
 
-<md-title>The use of passwords for controlled access to computer resources</md-title>
-<md-date>1977</md-date>
-<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
-<recid>6</recid>
+<md-title>The Computer Bible</md-title>
+<md-date>1973-1980</md-date>
+<md-description>Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>2</recid>
 </hit>
 <hit>
 
-<md-title>The Puget Sound Region : a portfolio of thematic computer maps</md-title>
-<md-date>1974</md-date>
-<md-author>Hoerauf, Eugene A</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
-<recid>3</recid>
+<md-title>Computer science &amp; technology</md-title>
+<md-title-remainder>proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title-remainder>
+<md-date>1977</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>5</recid>
 </hit>
 <hit>
 
-<md-title>Computer science &amp; technology : proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title>
-<md-date>1977</md-date>
-<md-author>Evans, John Martin</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
-<recid>5</recid>
+<md-title>The Puget Sound Region</md-title>
+<md-title-remainder>a portfolio of thematic computer maps</md-title-remainder>
+<md-date>1974</md-date>
+<md-author>Mairs, John W</md-author>
+<md-description>Scale of maps ca. 1:1,000,000</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>3</recid>
 </hit>
 <hit>
 
-<md-title>Computer processing of dynamic images from an Anger scintillation camera : the proceedings of a workshop</md-title>
+<md-title>Computer processing of dynamic images from an Anger scintillation camera</md-title>
+<md-title-remainder>the proceedings of a workshop</md-title-remainder>
 <md-date>1974</md-date>
-<md-author>Larson, Kenneth B</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
 <recid>1</recid>
 </hit>
 <hit>
 
-<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine : proceedings of the workshop</md-title>
+<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine</md-title>
+<md-title-remainder>proceedings of the workshop</md-title-remainder>
 <md-date>1977</md-date>
-<md-author>Ter-Pogossian, Michel M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
 <recid>4</recid>
 </hit>
 </show>
diff --git a/test/test_http_6.res b/test/test_http_6.res
new file mode 100644 (file)
index 0000000..4fc8f51
--- /dev/null
@@ -0,0 +1,17 @@
+<record>
+<recid>0</recid>
+
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author>
+<md-lccn>11224467</md-lccn>
+<md-lccn>11224466</md-lccn><location id="z3950.indexdata.com/marc" name="Local Test">
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author>
+<md-id>11224467</md-id>
+<md-lccn>11224467</md-lccn></location>
+<location id="z3950.indexdata.com/marc" name="Local Test">
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author>
+<md-id>11224466</md-id>
+<md-lccn>11224466</md-lccn></location>
+</record>
diff --git a/test/test_http_7.res b/test/test_http_7.res
new file mode 100644 (file)
index 0000000..781b985
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<record xmlns="http://www.loc.gov/MARC21/slim"><leader>00362nam  22001698a 4504</leader><controlfield tag="001">   11224467 </controlfield><controlfield tag="003">DLC</controlfield><controlfield tag="005">00000000000000.0</controlfield><controlfield tag="008">910710c19910701nju           00010 eng  </controlfield><datafield tag="010" ind1=" " ind2=" "><subfield code="a">11224467</subfield></datafield><datafield tag="040" ind1=" " ind2=" "><subfield code="a">DLC</subfield><subfield code="c">DLC</subfield></datafield><datafield tag="050" ind1="0" ind2="0"><subfield code="a">123-xyz</subfield></datafield><datafield tag="100" ind1="1" ind2="0"><subfield code="a">Jack Collins</subfield></datafield><datafield tag="245" ind1="1" ind2="0"><subfield code="a">How to program a computer</subfield></datafield><datafield tag="260" ind1="1" ind2=" "><subfield code="a">Penguin</subfield></datafield><datafield tag="263" ind1=" " ind2=" "><subfield code="a">8710</subfield></datafield><datafield tag="300" ind1=" " ind2=" "><subfield code="a">p. cm.</subfield></datafield></record>
index eab57f6..fe1f8f3 100644 (file)
@@ -4,3 +4,5 @@ http://localhost:9763/search.pz2?session=1&command=ping
 http://localhost:9763/search.pz2?session=1&command=search&query=computer
 2
 http://localhost:9763/search.pz2?session=1&command=show&start=0&number=1&block=1
+http://localhost:9763/search.pz2?session=1&command=record&id=0
+http://localhost:9763/search.pz2?session=1&command=record&id=0&offset=0
index 0863997..9d3984b 100644 (file)
@@ -1,5 +1,5 @@
 /* A very simple client that shows a basic usage of the pz2.js
-** $Id: example_client.js,v 1.2 2007-06-22 10:54:46 adam Exp $
+** $Id: example_client.js,v 1.3 2007-07-16 09:39:56 adam Exp $
 */
 
 // create a parameters array and pass it to the pz2's constructor
@@ -62,11 +62,18 @@ function my_onshow(data) {
     
     for (var i = 0; i < data.hits.length; i++) {
         var hit = data.hits[i];
-        body.innerHTML += '<div class="record" id="rec_' + hit.recid + '" onclick="showDetails(this.id)">'
-                        +'<span>' + (i + 1 + recPerPage * ( curPage - 1)) + '. </span>'
-                        +'<span class="jslink"><b>' + hit["md-title"] +
-                        ' </b></span> by <span><i>' + hit["md-author"] + '</i></span></div>';
-
+       var html = '<div class="record" id="rec_' + hit.recid + '" onclick="showDetails(this.id)">'
+                    +'<span>' + (i + 1 + recPerPage * ( curPage - 1)) + '. </span>'
+                    +'<span class="jslink"><b>' + hit["md-title"] +
+                    ' </b></span>'; 
+       if (hit["md-title-remainder"] !== undefined) {
+           html += '<span>' + hit["md-title-remainder"] + '</span>';
+       }
+       if (hit["md-title-responsibility"] !== undefined) {
+           html += '<span><i>' + hit["md-title-responsibility"] + '</i></span>';
+       }
+       html += '</div>';
+       body.innerHTML += html;
         if ( hit.recid == curDetRecId ) {
             drawCurDetails();
         }
@@ -163,6 +170,7 @@ function drawCurDetails ()
                             '"><table><tr><td><b>Ttle</b></td><td><b>:</b> '+data["md-title"] +
                             "</td></tr><tr><td><b>Date</b></td><td><b>:</b> " + data["md-date"] +
                             "</td></tr><tr><td><b>Author</b></td><td><b>:</b> " + data["md-author"] +
+                            '</td></tr><tr><td><b>URL</b></td><td><b>:</b> <a href="' + data["md-url"] + '">' + data["md-url"] + '</a>' +
                             "</td></tr><tr><td><b>Subject</b></td><td><b>:</b> " + data["md-subject"] + 
                             "</td></tr><tr><td><b>Location</b></td><td><b>:</b> " + data["location"][0].name + 
                             "</td></tr></table></div>";