3 # $Id: ezeerex2pqfproperties.pl,v 1.5 2006-06-19 16:45:18 mike Exp $
6 # ./ezeerex2pqfproperties.pl zeerex.xml
11 use XML::LibXML::XPathContext;
13 my $text = join('', <>);
14 my $parser = new XML::LibXML();
15 my $doc = $parser->parse_string($text);
16 my $root = $doc->getDocumentElement();
17 my $xc = XML::LibXML::XPathContext->new($root);
18 $xc->registerNs(z => 'http://explain.z3950.org/dtd/2.0/');
21 print_default_set($xc);
24 print_relation_modifiers($xc);
26 print_structures($xc);
27 print_truncations($xc);
29 # We could limit the sets output to those that are actually used by an
30 # SRU index: that way we could avoid defining
31 # set.bib1 = 1.2.840.10003.3.1
32 # which is a Z39.50 attribute set that we don't need for CQL. But
33 # doing that would be a marginal gain.
38 my(@nodes) = $xc->findnodes('z:indexInfo/z:set');
39 foreach my $node (@nodes) {
40 my $name = $node->findvalue('@name');
41 my $identifier = $node->findvalue('@identifier');
42 print "set.$name = $identifier\n";
46 sub print_default_set {
49 my (@nodes) = $xc->findnodes('z:configInfo/' .
50 'z:default[@type="contextSet"]');
51 foreach my $node (@nodes) {
52 ### Look this up and render as a URI
53 print "set = ", $node->findvalue('.'), "\n";
60 foreach my $node ($xc->findnodes('z:indexInfo/' .
61 'z:index[@search="true"]')) {
62 my @pqf = $xc->findnodes("z:map/z:attr", $node);
63 die("no PQF mapping for index '" .
64 $xc->findvalue("z:title", $node) . "'")
66 my $ptype = $xc->findvalue('@type', $pqf[0]);
67 my $pval = $xc->findvalue(".", $pqf[0]);
69 foreach my $map ($xc->findnodes("z:map", $node)) {
70 my $setname = $xc->findvalue('z:name/@set', $map);
71 my $indexname = $xc->findvalue('z:name', $map);
72 ### We need a way for the ZeeRex record to specify other
73 # attributes to be specified along with the access-point,
74 # e.g. @attr 4=3 for whole-field indexes.
75 print "index.$setname.$indexname = $ptype=$pval\n"
81 # I don't think these are affected by the ZeeRex record
97 # I don't think these are affected by the ZeeRex record
98 sub print_relation_modifiers {
102 relationModifier.relevant = 2=102
103 relationModifier.fuzzy = 5=103
104 relationModifier.stem = 2=101
105 relationModifier.phonetic = 2=100
106 relationModifier.regexp = 5=102
110 # I don't think these are affected by the ZeeRex record
111 sub print_positions {
115 position.first = 3=1 6=1
116 position.any = 3=3 6=1
117 position.last = 3=4 6=1
118 position.firstAndLast = 3=3 6=3
122 # I don't think these are affected by the ZeeRex record
123 sub print_structures {
127 structure.exact = 4=108
134 # I don't think these are affected by the ZeeRex record
135 sub print_truncations {
139 truncation.right = 5=1
140 truncation.left = 5=2
141 truncation.both = 5=3
142 truncation.none = 5=100
143 truncation.regexp = 5=102
144 truncation.z3958 = 5=104