--- /dev/null
+001 $ id
+010 a lccn
+020 a isbn
+022 a issn
+027 a tech-rep-nr
+035 a system-control-nr
+100 a author
+100 c author-title
+100 d author-date
+110 a corporate-name
+110 c corporate-location
+110 d corporate-date
+111 a meeting-name
+111 c meeting-location
+111 d meeting-date
+260 c date
+245 a title
+245 b title-remainder
+245 c title-responsibility
+245 f title-dates
+245 h title-medium
+245 n title-number-section
+250 a edition
+260 a publication-place
+260 b publication-name
+260 c publication-date
+300 a physical-extent
+300 b physical-format
+300 c physical-dimensions
+300 e physical-accomp
+300 f physical-unittype
+300 g physical-unitsize
+300 3 physical-specified
+440 a series-title
+500 $ description
+505 $ description
+518 $ description
+520 $ description
+522 $ description
+600 a subject
+600 a subject
+610 a subject
+610 a subject
+611 a subject
+611 a subject
+630 a subject
+630 a subject
+648 a subject
+648 a subject
+650 a subject
+650 * subject-long
+651 a subject
+651 * subject-long
+653 a subject
+653 * subject-long
+654 a subject
+654 * subject-long
+655 a subject
+655 * subject-long
+656 a subject
+656 * subject-long
+657 a subject
+657 * subject-long
+658 a subject
+658 * subject-long
+662 a subject
+662 * subject-long
+69X a subject
+69X * subject-long
+773 * citation
+856 u electronic-url
+856 y electronic-text
+856 3 electronic-text
+856 z electronic-note
+852 y publicnote
+852 h callnumber
+900 a fulltext
+900 b fulltext
+901 a iii-id
+907 a iii-id
+926 * holding
+948 * holding
+991 * holding
settings.h settings.c sel_thread.c sel_thread.h getaddrinfo.c \
charsets.c charsets.h \
client.c client.h connection.c connection.h host.h parameters.h \
- dirent.c direntz.h
+ dirent.c direntz.h marcmap.c marcmap.h marchash.c marchash.h
pazpar2_SOURCES = pazpar2.c
pazpar2_LDADD = libpazpar2.a $(YAZLIB)
#include "client.h"
#include "settings.h"
#include "normalize7bit.h"
+#include "marcmap.h"
#define TERMLIST_HIGH_SCORE 25
insert_settings_parameters(sdb, se, parms);
- new = xsltApplyStylesheet(m->stylesheet, rdoc, (const char **) parms);
- root= xmlDocGetRootElement(new);
+ if (m->stylesheet)
+ {
+ new = xsltApplyStylesheet(m->stylesheet, rdoc, (const char **) parms);
+ }
+ else if (m->marcmap)
+ {
+ new = marcmap_apply(m->marcmap, rdoc);
+ }
+
+ root = xmlDocGetRootElement(new);
+
if (!new || !root || !(root->children))
{
yaz_log(YLOG_WARN, "XSLT transformation failed from %s",
{
(*m) = nmem_malloc(se->session_nmem, sizeof(**m));
(*m)->next = 0;
- if (!((*m)->stylesheet = conf_load_stylesheet(stylesheets[i])))
+
+ // XSLT
+ if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-4], ".xsl"))
+ {
+ (*m)->marcmap = NULL;
+ if (!((*m)->stylesheet = conf_load_stylesheet(stylesheets[i])))
+ {
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load stylesheet: %s",
+ stylesheets[i]);
+ return -1;
+ }
+ }
+ // marcmap
+ else if (!strcmp(&stylesheets[i][strlen(stylesheets[i])-5], ".mmap"))
{
- yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load stylesheet: %s",
- stylesheets[i]);
- return -1;
+ (*m)->stylesheet = NULL;
+ if (!((*m)->marcmap = marcmap_load(stylesheets[i], se->session_nmem)))
+ {
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "Unable to load marcmap: %s",
+ stylesheets[i]);
+ return -1;
+ }
}
+
m = &(*m)->next;
}
}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <yaz/nmem.h>
+
+#include <marchash.h>
+
+// Jenkins one-at-a-time hash (from pp2 reclists.c, wikipedia)
+static unsigned int hash(const unsigned char *key)
+{
+ unsigned int hash = 0;
+
+ while (*key)
+ {
+ hash += *(key++);
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
+}
+
+inline char *strtrimcat (char *dest, char *src)
+{
+ char *in;
+ char *out;
+ char *last_nonspace;
+ in = src;
+ out = dest;
+ // move to end of dest
+ while (*out)
+ out++;
+ // initialise last non-space charater
+ last_nonspace = out;
+ // skip leading whitespace
+ while (isspace(*in))
+ in++;
+ while (*in)
+ {
+ *out = *in;
+ if (!isspace(*in))
+ last_nonspace = out;
+ out++;
+ in++;
+ }
+ *(++last_nonspace) = '\0';
+}
+
+inline char *strtrimcpy (char *dest, char *src)
+{
+ *dest = '\0';
+ strtrimcat(dest, src);
+}
+
+struct marchash *marchash_create (NMEM nmem)
+{
+ struct marchash *new;
+ new = nmem_malloc(nmem, sizeof (struct marchash));
+ memset(new, 0, sizeof (struct marchash));
+ new->nmem = nmem;
+ return new;
+}
+
+int marchash_ingest_marcxml (struct marchash *marchash, xmlNodePtr rec_node)
+{
+ xmlNodePtr field_node;
+ xmlNodePtr sub_node;
+ field_node = rec_node->children;
+ struct marcfield *field;
+
+ while (field_node)
+ {
+ if (field_node->type == XML_ELEMENT_NODE)
+ {
+ field = NULL;
+ if (!strcmp(field_node->name, "controlfield"))
+ {
+ field = marchash_add_field(marchash, xmlGetProp(field_node, "tag"), xmlNodeGetContent(field_node));
+ }
+ else if (!strcmp(field_node->name, "datafield"))
+ {
+ field = marchash_add_field(marchash, xmlGetProp(field_node, "tag"), xmlNodeGetContent(field_node));
+ }
+ if (field)
+ {
+ sub_node = field_node->children;
+ while (sub_node)
+ {
+ if ((sub_node->type == XML_ELEMENT_NODE) && (!strcmp(sub_node->name, "subfield")))
+ {
+ marchash_add_subfield(marchash, field, xmlGetProp(sub_node, "code")[0], xmlNodeGetContent(sub_node));
+ }
+ sub_node = sub_node->next;
+ }
+ }
+ }
+ field_node = field_node->next;
+ }
+}
+
+struct marcfield *marchash_add_field (struct marchash *marchash, char *key, char *val)
+{
+ int slot;
+ struct marcfield *new;
+ struct marcfield *last;
+
+ slot = hash(key) & MARCHASH_MASK;
+ new = marchash->table[slot];
+ last = NULL;
+
+ while (new)
+ {
+ last = new;
+ new = new->next;
+ }
+
+ new = nmem_malloc(marchash->nmem, sizeof (struct marcfield));
+
+ if (last)
+ last->next = new;
+ else
+ marchash->table[slot] = new;
+
+ new->next = NULL;
+ new->subfields = NULL;
+ strncpy(new->key, key, 4);
+
+ // only 3 char in a marc field name
+ if (new->key[3] != '\0')
+ return 0;
+
+ new->val = nmem_malloc(marchash->nmem, sizeof (char) * strlen(val) + 1);
+ strtrimcpy(new->val, val);
+
+ return new;
+}
+
+struct marcsubfield *marchash_add_subfield (struct marchash *marchash, struct marcfield *field, char key, char *val)
+{
+ struct marcsubfield *new;
+ struct marcsubfield *last;
+ last = NULL;
+ new = field->subfields;
+
+ while (new)
+ {
+ last = new;
+ new = new->next;
+ }
+
+ new = nmem_malloc(marchash->nmem, sizeof (struct marcsubfield));
+
+ if (last)
+ last->next = new;
+ else
+ field->subfields = new;
+
+ new->next = NULL;
+ new->key = key;
+ new->val = nmem_malloc(marchash->nmem, sizeof (char) * strlen(val) + 1);
+ strcpy(new->val, val);
+ return new;
+}
+
+struct marcfield *marchash_get_field (struct marchash *marchash, char *key, struct marcfield *last)
+{
+ struct marcfield *cur;
+ if (last)
+ cur = last->next;
+ else
+ cur = marchash->table[hash(key) & MARCHASH_MASK];
+ while (cur)
+ {
+ if (!strcmp(cur->key, key))
+ return cur;
+ cur = cur->next;
+ }
+ return NULL;
+}
+
+struct marcsubfield *marchash_get_subfield (char key, struct marcfield *field, struct marcsubfield *last)
+{
+ struct marcsubfield *cur;
+ if (last)
+ cur = last->next;
+ else
+ cur = field->subfields;
+ while (cur)
+ {
+ if (cur->key == key)
+ return cur;
+ cur = cur->next;
+ }
+ return NULL;
+}
+
+char *marchash_catenate_subfields (struct marcfield *field, char *delim, NMEM nmem)
+{
+ char *output;
+ struct marcsubfield *cur;
+ int delimsize = strlen(delim);
+ int outsize = 1-delimsize;
+ // maybe it would make sense to have an nmem strcpy/strcat?
+ cur = field -> subfields;
+ while (cur)
+ {
+ outsize += strlen(cur->val) + delimsize;
+ cur = cur->next;
+ }
+ if (outsize > 0)
+ output = nmem_malloc(nmem, outsize);
+ else
+ return NULL;
+ *output = '\0';
+ cur = field -> subfields;
+ while (cur)
+ {
+ strtrimcat(output, cur->val);
+ if (cur->next)
+ strcat(output, delim);
+ cur = cur->next;
+ }
+ return output;
+}
--- /dev/null
+#ifndef MARCHASH_H
+#define MARCHASH_H
+
+#define MARCHASH_MASK 127
+
+struct marchash
+{
+ struct marcfield *table[MARCHASH_MASK + 1];
+ NMEM nmem;
+};
+
+struct marcfield
+{
+ char key[4];
+ char *val;
+ struct marcsubfield *subfields;
+ struct marcfield *next;
+};
+
+struct marcsubfield
+{
+ char key;
+ char *val;
+ struct marcsubfield *next;
+};
+
+struct marchash *marchash_create (NMEM nmem);
+int marchash_ingest_marcxml (struct marchash *marchash, xmlNodePtr rec_node);
+struct marcfield *marchash_add_field (struct marchash *marchash, char *key, char *value);
+struct marcsubfield *marchash_add_subfield (struct marchash *marchash, struct marcfield *field, char key, char *value);
+struct marcfield *marchash_get_field (struct marchash *marchash, char *key, struct marcfield *last);
+struct marcsubfield *marchash_get_subfield (char key, struct marcfield *field, struct marcsubfield *last);
+#endif
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <yaz/nmem.h>
+
+#include <marcmap.h>
+#include <marchash.h>
+
+struct marcmap *marcmap_load(char *filename, NMEM nmem) {
+ struct marcmap *mm;
+ struct marcmap *mmhead;
+ FILE *fp;
+ char c;
+ char buf[256];
+ int len;
+ int field;
+ int newrec;
+
+ len = 0;
+ field = 0;
+ newrec = 1;
+ mm = NULL;
+ mmhead = NULL;
+ fp = fopen(filename, "r");
+
+ while ((c = getc(fp) ) != EOF)
+ {
+ // allocate some space
+ if (newrec)
+ {
+ if (mm != NULL)
+ {
+ mm->next = nmem_malloc(nmem, sizeof(struct marcmap));
+ mm = mm->next;
+ }
+ // first one!
+ else
+ { mm = nmem_malloc(nmem, sizeof(struct marcmap));
+ mmhead = mm;
+ }
+ newrec = 0;
+ }
+ // whitespace saves and moves on
+ if (c == ' ' || c == '\n' || c == '\t')
+ {
+ buf[len] = '\0';
+ len++;
+ // first field, marc
+ if (field == 0)
+ {
+ // allow blank lines
+ if (!(len <3))
+ {
+ mm->field = nmem_malloc(nmem, len * sizeof(char));
+ strncpy(mm->field, buf, len);
+ }
+ }
+ // second, marc subfield, just a char
+ else if (field == 1)
+ {
+ mm->subfield = buf[len-2];
+ }
+ // third, pz fieldname
+ else if (field == 2)
+ {
+ mm->pz = nmem_malloc(nmem, len * sizeof(char));
+ strncpy(mm->pz, buf, len);
+ }
+
+ // new line, new record
+ if (c == '\n')
+ {
+ field = 0;
+ newrec = 1;
+ }
+ else
+ {
+ field++;
+ }
+ len = 0;
+ }
+ else
+ {
+ buf[len] = c;
+ len++;
+ }
+ }
+ mm->next = NULL;
+ return mmhead;
+}
+
+xmlDoc *marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in)
+{
+ char mergekey[1024];
+ char medium[32];
+ char *s;
+ NMEM nmem;
+ xmlNsPtr ns_pz;
+ xmlDocPtr xml_out;
+ xmlNodePtr xml_out_root;
+ xmlNodePtr rec_node;
+ xmlNodePtr meta_node;
+ struct marchash *marchash;
+ struct marcfield *field;
+ struct marcsubfield *subfield;
+ struct marcmap *mmcur;
+
+ xml_out = xmlNewDoc(BAD_CAST "1.0");
+ xml_out_root = xmlNewNode(NULL, BAD_CAST "record");
+ xmlDocSetRootElement(xml_out, xml_out_root);
+ ns_pz = xmlNewNs(xml_out_root, BAD_CAST "http://www.indexdata.com/pazpar2/1.0", BAD_CAST "pz");
+ nmem = nmem_create();
+ rec_node = xmlDocGetRootElement(xml_in);
+ marchash = marchash_create(nmem);
+ marchash_ingest_marcxml(marchash, rec_node);
+
+ mmcur = marcmap;
+ while (mmcur != NULL)
+ {
+ if (field = marchash_get_field(marchash, mmcur->field, NULL))
+ do
+ {
+ // field value
+ if ((mmcur->subfield == '$') && (s = field->val))
+ {
+ meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", s);
+ xmlSetProp(meta_node, BAD_CAST "type", mmcur->pz);
+ }
+ // catenate all subfields
+ else if ((mmcur->subfield == '*') && (s = marchash_catenate_subfields(field, " ", nmem)))
+ {
+ meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", s);
+ xmlSetProp(meta_node, BAD_CAST "type", mmcur->pz);
+ }
+ // subfield value
+ else if (mmcur->subfield)
+ {
+ if (subfield = marchash_get_subfield(mmcur->subfield, field, NULL))
+ do
+ if (s = subfield->val)
+ {
+ meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", s);
+ xmlSetProp(meta_node, BAD_CAST "type", mmcur->pz);
+ }
+ while (subfield = marchash_get_subfield(mmcur->subfield, field, subfield));
+ }
+
+ }
+ while (field = marchash_get_field(marchash, mmcur->field, field));
+ mmcur = mmcur->next;
+ }
+
+ // hard coded mappings
+
+ // medium
+ if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('h', field, NULL)))
+ {
+ strncpy(medium, subfield->val, 32);
+ }
+ else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
+ strcpy(medium, "electronic resource");
+ else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('b', field, NULL)))
+ strcpy(medium, "electronic resource");
+ else if ((field = marchash_get_field(marchash, "773", NULL)) && (subfield = marchash_get_subfield('t', field, NULL)))
+ strcpy(medium, "article");
+ else
+ strcpy(medium, "book");
+
+ meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST medium);
+ xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST "medium");
+
+ // merge key
+ memset(mergekey, 0, 1024);
+ strcpy(mergekey, "title ");
+ if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
+ strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
+ strncat(mergekey, " author ", 1023 - strlen(mergekey));
+ if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
+ strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
+ strncat(mergekey, " medium ", 1023 - strlen(mergekey));
+ strncat(mergekey, medium, 1023 - strlen(mergekey));
+
+ xmlSetProp(xml_out_root, BAD_CAST "mergekey", BAD_CAST mergekey);
+
+ nmem_destroy(nmem);
+ return xml_out;
+}
--- /dev/null
+#ifndef MARCMAP_H
+#define MARCMAP_H
+
+struct marcmap
+{
+ char *field;
+ char subfield;
+ char *pz;
+ struct marcmap *next;
+};
+
+struct marcmap *marcmap_load(char *filename, NMEM nmem);
+xmlDoc *marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in);
+
+#endif
// Simple sequence of stylesheets run in series.
struct database_retrievalmap {
xsltStylesheet *stylesheet;
+ struct marcmap *marcmap;
struct database_retrievalmap *next;
};