1 # $Id: Utils.pm,v 1.26 2007-03-19 18:51:03 mike Exp $
3 package ZOOM::IRSpy::Utils;
10 our @EXPORT_OK = qw(isodate
20 use XML::LibXML::XPathContext;
22 our $IRSPY_NS = 'http://indexdata.com/irspy/1.0';
25 # Utility functions follow, exported for use of web UI
29 my($sec, $min, $hour, $mday, $mon, $year) = localtime($time);
30 return sprintf("%04d-%02d-%02dT%02d:%02d:%02d",
31 $year+1900, $mon+1, $mday, $hour, $min, $sec);
35 # I can't -- just can't, can't, can't -- believe that this function
36 # isn't provided by one of the core XML modules. But the evidence all
37 # says that it's not: among other things, XML::Generator and
38 # Template::Plugin both roll their own. So I will do likewise. D'oh!
41 my($text, $fallback, $opts) = @_;
42 if (!defined $opts && ref $fallback) {
43 # The second and third arguments are both optional
47 $opts = {} if !defined $opts;
49 $text = $fallback if !defined $text;
51 confess "xml_encode(): text and fallback both undefined"
57 # Internet Explorer can't display ' (!) so don't create it
58 #$text =~ s/['']/'/g;
59 $text =~ s/[""]/"/g;
60 $text =~ s/ / /g if $opts->{nbsp};
66 # Quotes a term for use in a CQL query
70 $term =~ s/([""\\])/\\$1/g;
71 $term = qq["$term"] if $term =~ /[\s""\/]/;
76 # Makes a CQL query that finds a specified target. Arguments may be
77 # either an ID alone, or a (host, port, db) triple.
79 my($host, $port, $db) = @_;
81 $host .= ":$port/$db" if defined $port;
82 return "rec.id=" . cql_quote($host);
86 # PRIVATE to irspy_namespace() and irspy_xpath_context()
88 e => 'http://explain.z3950.org/dtd/2.0/',
97 confess "irspy_namespace(undef)" if !defined $prefix;
98 my $uri = $_namespaces{$prefix};
99 die "irspy_namespace(): no URI for namespace prefix '$prefix'"
106 sub irspy_xpath_context {
109 if (ref $record && $record->isa("ZOOM::Record")) {
110 $record = $record->render();
117 my $parser = new XML::LibXML();
118 my $doc = $parser->parse_string($record);
119 $root = $doc->getDocumentElement();
122 my $xc = XML::LibXML::XPathContext->new($root);
123 foreach my $prefix (keys %_namespaces) {
124 $xc->registerNs($prefix, $_namespaces{$prefix});
130 sub modify_xml_document {
131 my($xc, $fieldsByKey, $data) = @_;
134 foreach my $key (keys %$data) {
135 my $value = $data->{$key};
136 my $ref = $fieldsByKey->{$key} or die "no field '$key'";
137 my($name, $nlines, $caption, $xpath, @addAfter) = @$ref;
138 #print "Considering $key='$value' ($xpath)<br/>\n";
139 my @nodes = $xc->findnodes($xpath);
141 warn scalar(@nodes), " nodes match '$xpath'" if @nodes > 1;
142 my $node = $nodes[0];
144 if ($node->isa("XML::LibXML::Attr")) {
145 if ($value ne $node->getValue()) {
146 $node->setValue($value);
148 #print "Attr $key: '", $node->getValue(), "' -> '$value' ($xpath)<br/>\n";
150 } elsif ($node->isa("XML::LibXML::Element")) {
151 # The contents could be any mixture of text and
152 # comments and maybe even other crud such as processing
153 # instructions. The simplest thing is just to throw it all
154 # away and start again, making a single Text node the
155 # canonical representation. But before we do that,
156 # we'll check whether the element is already
157 # canonical, to determine whether our change is a
160 my @children = $node->childNodes();
161 if (@children == 1) {
162 my $child = $node->firstChild();
163 if (ref $child && ref $child eq "XML::LibXML::Text") {
164 $old = $child->getData();
165 next if $value eq $old;
169 $node->removeChildNodes();
170 my $child = new XML::LibXML::Text($value);
171 $node->appendChild($child);
173 #print "Elem $key: '$old' -> '$value' ($xpath)<br/>\n";
175 warn "unexpected node type $node";
179 next if !$value; # No need to create a new empty node
180 my($ppath, $selector) = $xpath =~ /(.*)\/(.*)/;
181 dom_add_node($xc, $ppath, $selector, $value, @addAfter);
182 #print "New $key ($xpath) = '$value'<br/>\n";
192 my($xc, $ppath, $selector, $value, @addAfter) = @_;
194 #print "Adding $selector='$value' at '$ppath' after (", join(", ", map { "'$_'" } @addAfter), ")<br/>\n";
195 my $node = find_or_make_node($xc, $ppath, 0);
196 die "couldn't find or make node '$node'" if !defined $node;
198 my $is_attr = ($selector =~ s/^@//);
199 my(undef, $prefix, $simpleSel) = $selector =~ /((.*?):)?(.*)/;
200 #warn "selector='$selector', prefix='$prefix', simpleSel='$simpleSel'";
202 if (defined $prefix) {
203 ### This seems to no-op (thank, DOM!) but I have have no
204 # idea, and it's not needed for IRSpy, so I am not going
206 $node->setAttributeNS(irspy_namespace($prefix),
209 $node->setAttribute($simpleSel, $value);
214 my $new = new XML::LibXML::Element($simpleSel);
215 $new->setNamespace(irspy_namespace($prefix), $prefix)
218 $new->appendText($value);
219 foreach my $predecessor (reverse @addAfter) {
220 my($child) = $xc->findnodes($predecessor, $node);
221 if (defined $child) {
222 $node->insertAfter($new, $child);
223 #warn "Added after '$predecessor'";
228 # Didn't find any of the nodes that are supposed to precede the
229 # new one, so we need to insert the new node as the first of the
230 # parent's children. However *sigh* there is no prependChild()
231 # analogous to appendChild(), so we have to go the long way round.
232 my @children = $node->childNodes();
234 $node->insertBefore($new, $children[0]);
235 #warn "Added new first child";
237 $node->appendChild($new);
238 #warn "Added new only child";
242 my $text = xml_encode(inheritance_tree($xc));
243 $text =~ s/\n/<br\/>$&/sg;
244 print "<pre>$text</pre>\n";
249 sub find_or_make_node {
250 my($xc, $path, $recursion_level) = @_;
252 die "deep recursion in find_or_make_node($path)"
253 if $recursion_level == 10;
254 $path = "." if $path eq "";
256 my @nodes = $xc->findnodes($path);
258 # Oh dear, the parent node doesn't exist. We could make it,
259 my(undef, $ppath, $element) = $path =~ /((.*)\/)?(.*)/;
260 $ppath = "" if !defined $ppath;
261 #warn "path='$path', ppath='$ppath', element='$element'";
262 #warn "no node '$path': making it";
263 my $parent = find_or_make_node($xc, $ppath, $recursion_level-1);
265 my(undef, $prefix, $nsElem) = $element =~ /((.*?):)?(.*)/;
266 #warn "element='$element', prefix='$prefix', nsElem='$nsElem'";
267 my $new = new XML::LibXML::Element($nsElem);
268 if (defined $prefix) {
269 #warn "setNamespace($prefix)";
270 $new->setNamespace(irspy_namespace($prefix), $prefix);
273 $parent->appendChild($new);
276 warn scalar(@nodes), " nodes match parent '$path'" if @nodes > 1;
281 sub inheritance_tree {
282 my($type, $level) = @_;
283 $level = 0 if !defined $level;
284 return "Woah! Too deep, man!\n" if $level > 20;
286 $type = ref $type if ref $type;
288 $text = "--> " if $level == 0;
289 $text .= ("\t" x $level) . "$type\n";
290 my @ISA = eval "\@${type}::ISA";
291 foreach my $superclass (@ISA) {
292 $text .= inheritance_tree($superclass, $level+1);
299 # This function is made available in xslt using the register_function call
301 my ($arg1, $arg2) = @_;
302 return ($arg1->to_literal()) cmp ($arg2->to_literal());
306 ### It feels like this should be in YAZ, exported via ZOOM-Perl.
307 my %_bib1_access_point = (
308 1 => "Personal name",
309 2 => "Corporate name",
310 3 => "Conference name",
313 6 => "Title uniform",
316 9 => "LC card number",
317 10 => "BNB card no.",
319 12 => "Local number",
320 13 => "Dewey classification",
321 14 => "UDC classification",
322 15 => "Bliss classification",
323 16 => "LC call number",
324 17 => "NLM call number",
325 18 => "NAL call number",
326 19 => "MOS call number",
327 20 => "Local classification",
328 21 => "Subject heading",
329 22 => "Subject Rameau",
330 23 => "BDI index subject",
331 24 => "INSPEC subject",
332 25 => "MESH subject",
334 27 => "LC subject heading",
335 28 => "RVM subject heading",
336 29 => "Local subject index",
338 31 => "Date of publication",
339 32 => "Date of acquisition",
341 34 => "Title collective",
342 35 => "Title parallel",
344 37 => "Title added title page",
345 38 => "Title caption",
346 39 => "Title running",
348 41 => "Title other variant",
349 42 => "Title former",
350 43 => "Title abbreviated",
351 44 => "Title expanded",
352 45 => "Subject precis",
353 46 => "Subject rswk",
354 47 => "Subject subdivision",
355 48 => "No. nat'l biblio.",
356 49 => "No. legal deposit",
357 50 => "No. govt pub.",
358 51 => "No. music publisher",
360 53 => "Number local call",
361 54 => "Code--language",
362 55 => "Code--geographic area",
363 56 => "Code--institution",
364 57 => "Name and title *",
365 58 => "Name geographic",
366 59 => "Place publication",
368 61 => "Microform generation",
371 1000 => "Author-title",
372 1001 => "Record type",
375 1004 => "Author-name personal",
376 1005 => "Author-name corporate",
377 1006 => "Author-name conference",
378 1007 => "Identifier--standard",
379 1008 => "Subject--LC children's",
380 1009 => "Subject name -- personal",
381 1010 => "Body of text",
382 1011 => "Date/time added to db",
383 1012 => "Date/time last modified",
384 1013 => "Authority/format id",
385 1014 => "Concept-text",
386 1015 => "Concept-reference",
388 1017 => "Server-choice",
390 1019 => "Record-source",
393 1022 => "Geographic-class",
394 1023 => "Indexed-by",
397 1026 => "Related-periodical",
398 1027 => "Report-number",
399 1028 => "Stock-number",
400 1030 => "Thematic-number",
401 1031 => "Material-type",
404 1034 => "Content-type",
406 1036 => "Author-Title-Subject",
407 1032 => "Doc-id (semantic definition change)",
409 1038 => "Abstract-language",
410 1039 => "Application-kind",
411 1040 => "Classification",
412 1041 => "Classification-basic",
413 1042 => "Classification-local-record",
415 1044 => "Possessing-institution",
416 1045 => "Record-linking",
417 1046 => "Record-status",
419 1048 => "Control-number-GKD",
420 1049 => "Control-number-linking",
421 1050 => "Control-number-PND",
422 1051 => "Control-number-SWD",
423 1052 => "Control-number-ZDB",
424 1053 => "Country-publication (country of Publication)",
425 1054 => "Date-conference (meeting date)",
426 1055 => "Date-record-status",
427 1056 => "Dissertation-information",
428 1057 => "Meeting-organizer",
429 1058 => "Note-availability",
430 1059 => "Number-CAS-registry (CAS registry number)",
431 1060 => "Number-document (document number)",
432 1061 => "Number-local-accounting",
433 1062 => "Number-local-acquisition",
434 1063 => "Number-local-call-copy-specific",
435 1064 => "Number-of-reference (reference count)",
436 1065 => "Number-norm",
437 1066 => "Number-volume",
438 1067 => "Place-conference (meeting location)",
439 1068 => "Reference (references and footnotes)",
440 1069 => "Referenced-journal (reference work)",
441 1070 => "Section-code",
442 1071 => "Section-heading",
443 1072 => "Subject-GOO",
444 1073 => "Subject-name-conference",
445 1074 => "Subject-name-corporate",
446 1075 => "Subject-genre/form",
447 1076 => "Subject-name-geographical",
448 1077 => "Subject--chronological",
449 1078 => "Subject--title",
450 1079 => "Subject--topical",
451 1080 => "Subject-uncontrolled",
452 1081 => "Terminology-chemical (chemical name)",
453 1082 => "Title-translated",
454 1083 => "Year-of-beginning",
455 1084 => "Year-of-ending",
456 1085 => "Subject-AGROVOC",
457 1086 => "Subject-COMPASS",
458 1087 => "Subject-EPT",
459 1088 => "Subject-NAL",
460 1089 => "Classification-BCM",
461 1090 => "Classification-DB",
462 1091 => "Identifier-ISRC",
463 1092 => "Identifier-ISMN",
464 1093 => "Identifier-ISRN",
465 1094 => "Identifier-DOI",
466 1095 => "Code-language-original",
467 1096 => "Title-later",
469 1098 => "DC-Creator",
470 1099 => "DC-Subject",
471 1100 => "DC-Description",
472 1101 => "DC-Publisher",
474 1103 => "DC-ResourceType",
475 1104 => "DC-ResourceIdentifier",
476 1105 => "DC-Language",
477 1106 => "DC-OtherContributor",
480 1109 => "DC-Relation",
481 1110 => "DC-Coverage",
482 1111 => "DC-RightsManagement",
483 1112 => "Controlled Subject Index",
484 1113 => "Subject Thesaurus",
485 1114 => "Index Terms -- Controlled",
486 1115 => "Controlled Term",
487 1116 => "Spatial Domain",
488 1117 => "Bounding Coordinates",
489 1118 => "West Bounding Coordinate",
490 1119 => "East Bounding Coordinate",
491 1120 => "North Bounding Coordinate",
492 1121 => "South Bounding Coordinate",
494 1123 => "Place Keyword Thesaurus",
495 1124 => "Place Keyword",
496 1125 => "Time Period",
497 1126 => "Time Period Textual",
498 1127 => "Time Period Structured",
499 1128 => "Beginning Date",
500 1129 => "Ending Date",
501 1130 => "Availability",
502 1131 => "Distributor",
503 1132 => "Distributor Name",
504 1133 => "Distributor Organization",
505 1134 => "Distributor Street Address",
506 1135 => "Distributor City",
507 1136 => "Distributor State or Province",
508 1137 => "Distributor Zip or Postal Code",
509 1138 => "Distributor Country",
510 1139 => "Distributor Network Address",
511 1140 => "Distributor Hours of Service",
512 1141 => "Distributor Telephone",
513 1142 => "Distributor Fax",
514 1143 => "Resource Description",
515 1144 => "Order Process",
516 1145 => "Order Information",
518 1147 => "Cost Information",
519 1148 => "Technical Prerequisites",
520 1149 => "Available Time Period",
521 1150 => "Available Time Textual",
522 1151 => "Available Time Structured",
523 1152 => "Available Linkage",
524 1153 => "Linkage Type",
526 1155 => "Sources of Data",
527 1156 => "Methodology",
528 1157 => "Access Constraints",
529 1158 => "General Access Constraints",
530 1159 => "Originator Dissemination Control",
531 1160 => "Security Classification Control",
532 1161 => "Use Constraints",
533 1162 => "Point of Contact",
534 1163 => "Contact Name",
535 1164 => "Contact Organization",
536 1165 => "Contact Street Address",
537 1166 => "Contact City",
538 1167 => "Contact State or Province",
539 1168 => "Contact Zip or Postal Code",
540 1169 => "Contact Country",
541 1170 => "Contact Network Address",
542 1171 => "Contact Hours of Service",
543 1172 => "Contact Telephone",
544 1173 => "Contact Fax",
545 1174 => "Supplemental Information",
547 1176 => "Agency Program",
548 1177 => "Cross Reference",
549 1178 => "Cross Reference Title",
550 1179 => "Cross Reference Relationship",
551 1180 => "Cross Reference Linkage",
552 1181 => "Schedule Number",
553 1182 => "Original Control Identifier",
554 1183 => "Language of Record",
555 1184 => "Record Review Date",
557 1186 => "Performer-Individual",
558 1187 => "Performer-Group",
559 1188 => "Instrumentation",
560 1189 => "Instrumentation-Original",
561 1190 => "Instrumentation-Current",
562 1191 => "Arrangement",
563 1192 => "Arrangement-Original",
564 1193 => "Arrangement-Current",
565 1194 => "Musical Key-Original",
566 1195 => "Musical Key-Current",
567 1196 => "Date-Composition",
568 1197 => "Date-Recording",
569 1198 => "Place-Recording",
570 1199 => "Country-Recording",
571 1200 => "Number-ISWC",
572 1201 => "Number-Matrix",
573 1202 => "Number-Plate",
574 1203 => "Classification-McColvin",
576 1205 => "Number-Copies",
577 1206 => "Musical Theme",
578 1207 => "Instruments - total number",
579 1208 => "Instruments - distinct number",
580 1209 => "Identifier - URN",
581 1210 => "Sears Subject Heading",
582 1211 => "OCLC Number",
583 1212 => "Composition",
584 1213 => "Intellectual level",
588 1217 => "Nationality",
590 1219 => "Compression",
592 1221 => "Subject - occupation",
593 1222 => "Subject - function",
597 sub bib1_access_point {
600 return $_bib1_access_point{$ap} ||
601 "unknown BIB-1 attribute '$ap'";
606 my($rs, $which, $elementSetName) = @_;
608 # There is a slight race condition here on the element-set name,
609 # but it shouldn't be a problem as this is (currently) only called
610 # from parts of the program that run single-threaded.
611 my $old = $rs->option(elementSetName => $elementSetName);
612 my $rec = $rs->record($which);
613 $rs->option(elementSetName => $old);
615 return $rec->render();