2 * Copyright (C) 1994-2002, Index Data
5 * $Id: recgrs.c,v 1.53 2002-07-03 10:05:19 adam Exp $
10 #include <sys/types.h>
21 #define GRS_MAX_WORD 512
27 struct grs_handler *next;
31 struct grs_handler *handlers;
34 static int read_grs_type (struct grs_handlers *h,
35 struct grs_read_info *p, const char *type,
38 struct grs_handler *gh = h->handlers;
39 const char *cp = strchr (type, '.');
41 if (cp == NULL || cp == type)
43 cp = strlen(type) + type;
47 strcpy (p->type, cp+1);
48 for (gh = h->handlers; gh; gh = gh->next)
50 if (!memcmp (type, gh->type->type, cp-type))
55 gh->clientData = (*gh->type->init)();
57 p->clientData = gh->clientData;
58 *root = (gh->type->read)(p);
59 gh->clientData = p->clientData;
66 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
68 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
69 gh->next = h->handlers;
76 static void *grs_init(RecType recType)
78 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
81 grs_add_handler (h, recTypeGrs_sgml);
82 grs_add_handler (h, recTypeGrs_regx);
84 grs_add_handler (h, recTypeGrs_tcl);
86 grs_add_handler (h, recTypeGrs_marc);
88 grs_add_handler (h, recTypeGrs_xml);
93 static void grs_destroy(void *clientData)
95 struct grs_handlers *h = (struct grs_handlers *) clientData;
96 struct grs_handler *gh = h->handlers, *gh_next;
101 (*gh->type->destroy)(gh->clientData);
108 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
109 int level, RecWord *wrd, int use)
112 char tag_path_full[1024];
120 wrd->string = n->u.data.data;
121 wrd->length = n->u.data.len;
122 wrd->attrSet = VAL_IDXPATH,
124 if (p->flagShowRecords)
126 printf("%*s data=", (level + 1) * 4, "");
127 for (i = 0; i<wrd->length && i < 8; i++)
128 fputc (wrd->string[i], stdout);
137 for (nn = n; nn; nn = nn->parent)
139 if (nn->which == DATA1N_tag)
141 size_t tlen = strlen(nn->u.tag.tag);
142 if (tlen + flen > (sizeof(tag_path_full)-2))
144 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
146 tag_path_full[flen++] = '/';
148 else if (nn->which == DATA1N_root)
152 wrd->string = tag_path_full;
154 wrd->attrSet = VAL_IDXPATH;
156 if (p->flagShowRecords)
158 printf("%*s tag=", (level + 1) * 4, "");
159 for (i = 0; i<wrd->length && i < 40; i++)
160 fputc (wrd->string[i], stdout);
173 static void index_termlist (data1_node *par, data1_node *n,
174 struct recExtractCtrl *p, int level, RecWord *wrd)
176 data1_termlist *tlist = 0;
177 data1_datatype dtype = DATA1K_string;
179 * cycle up towards the root until we find a tag with an att..
180 * this has the effect of indexing locally defined tags with
181 * the attribute of their ancestor in the record.
184 while (!par->u.tag.element)
185 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
187 if (!par || !(tlist = par->u.tag.element->termlists))
189 if (par->u.tag.element->tag)
190 dtype = par->u.tag.element->tag->kind;
192 for (; tlist; tlist = tlist->next)
195 /* consider source */
198 if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
200 wrd->string = n->u.data.data;
201 wrd->length = n->u.data.len;
203 else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
205 wrd->string = n->u.tag.tag;
206 wrd->length = strlen(n->u.tag.tag);
208 else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
209 n->which == DATA1N_tag)
211 data1_xattr *p = n->u.tag.attributes;
212 while (p && strcmp (p->name, xattr))
216 wrd->string = p->value;
217 wrd->length = strlen(p->value);
222 if (p->flagShowRecords)
225 printf("%*sIdx: [%s]", (level + 1) * 4, "",
227 printf("%s:%s [%d] %s",
228 tlist->att->parent->name,
229 tlist->att->name, tlist->att->value,
232 for (i = 0; i<wrd->length && i < 8; i++)
233 fputc (wrd->string[i], stdout);
237 fputc ('\n', stdout);
241 wrd->reg_type = *tlist->structure;
242 wrd->attrSet = (int) (tlist->att->parent->reference);
243 wrd->attrUse = tlist->att->locals->local;
250 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
253 for (; n; n = n->next)
255 if (p->flagShowRecords) /* display element description to user */
257 if (n->which == DATA1N_root)
259 printf("%*s", level * 4, "");
260 printf("Record type: '%s'\n", n->u.root.type);
262 else if (n->which == DATA1N_tag)
266 printf("%*s", level * 4, "");
267 if (!(e = n->u.tag.element))
268 printf("Local tag: '%s'\n", n->u.tag.tag);
271 printf("Elm: '%s' ", e->name);
274 data1_tag *t = e->tag;
276 printf("TagNam: '%s' ", t->names->name);
279 printf("%s[%d],", t->tagset->name, t->tagset->type);
282 if (t->which == DATA1T_numeric)
283 printf("%d)", t->value.numeric);
285 printf("'%s')", t->value.string);
292 if (n->which == DATA1N_tag)
294 index_termlist (n, n, p, level, wrd);
295 /* index start tag */
296 if (!n->root->u.root.absyn)
297 index_xpath (n, p, level, wrd, 1);
301 if (dumpkeys(n->child, p, level + 1, wrd) < 0)
305 if (n->which == DATA1N_data)
307 data1_node *par = get_parent_tag(p->dh, n);
309 if (p->flagShowRecords)
311 printf("%*s", level * 4, "");
313 if (n->u.data.len > 32)
314 printf("'%.24s ... %.6s'\n", n->u.data.data,
315 n->u.data.data + n->u.data.len-6);
316 else if (n->u.data.len > 0)
317 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
323 index_termlist (par, n, p, level, wrd);
324 if (!n->root->u.root.absyn)
325 index_xpath (n, p, level, wrd, 1016);
329 if (n->which == DATA1N_tag)
332 if (!n->root->u.root.absyn)
333 index_xpath (n, p, level, wrd, 2);
337 if (p->flagShowRecords && n->which == DATA1N_root)
339 printf("%*s-------------\n\n", level * 4, "");
345 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
348 int oidtmp[OID_SIZE];
351 oe.proto = PROTO_Z3950;
352 oe.oclass = CLASS_SCHEMA;
355 oe.value = n->u.root.absyn->reference;
357 if ((oid_ent_to_oid (&oe, oidtmp)))
358 (*p->schemaAdd)(p, oidtmp);
361 return dumpkeys(n, p, 0, &wrd);
364 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
368 struct grs_read_info gri;
370 int oidtmp[OID_SIZE];
373 gri.readf = p->readf;
374 gri.seekf = p->seekf;
375 gri.tellf = p->tellf;
378 gri.offset = p->offset;
382 if (read_grs_type (h, &gri, p->subType, &n))
383 return RECCTRL_EXTRACT_ERROR;
385 return RECCTRL_EXTRACT_EOF;
386 oe.proto = PROTO_Z3950;
387 oe.oclass = CLASS_SCHEMA;
389 if (!n->u.root.absyn)
390 return RECCTRL_EXTRACT_ERROR;
394 oe.value = n->u.root.absyn->reference;
395 if ((oid_ent_to_oid (&oe, oidtmp)))
396 (*p->schemaAdd)(p, oidtmp);
399 data1_pr_tree (p->dh, n, stdout);
402 if (dumpkeys(n, p, 0, &wrd) < 0)
404 data1_free_tree(p->dh, n);
405 return RECCTRL_EXTRACT_ERROR;
407 data1_free_tree(p->dh, n);
408 return RECCTRL_EXTRACT_OK;
411 static int grs_extract(void *clientData, struct recExtractCtrl *p)
414 NMEM mem = nmem_create ();
415 struct grs_handlers *h = (struct grs_handlers *) clientData;
417 ret = grs_extract_sub(h, p, mem);
423 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
425 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
427 data1_esetname *eset;
433 case Z_RecordComp_simple:
434 if (c->u.simple->which != Z_ElementSetNames_generic)
435 return 26; /* only generic form supported. Fix this later */
436 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
437 c->u.simple->u.generic)))
439 logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
440 return 25; /* invalid esetname */
442 logf(LOG_DEBUG, "Esetname '%s' in simple compspec",
443 c->u.simple->u.generic);
446 case Z_RecordComp_complex:
447 if (c->u.complex->generic)
449 /* insert check for schema */
450 if ((p = c->u.complex->generic->elementSpec))
454 case Z_ElementSpec_elementSetName:
456 data1_getesetbyname(dh, n->u.root.absyn,
457 p->u.elementSetName)))
459 logf(LOG_LOG, "Unknown esetname '%s'",
460 p->u.elementSetName);
461 return 25; /* invalid esetname */
463 logf(LOG_DEBUG, "Esetname '%s' in complex compspec",
464 p->u.elementSetName);
467 case Z_ElementSpec_externalSpec:
468 if (p->u.externalSpec->which == Z_External_espec1)
470 logf(LOG_DEBUG, "Got Espec-1");
471 espec = p->u.externalSpec-> u.espec1;
475 logf(LOG_LOG, "Unknown external espec.");
476 return 25; /* bad. what is proper diagnostic? */
487 logf (LOG_DEBUG, "Element: Espec-1 match");
488 return data1_doespec1(dh, n, espec);
492 logf (LOG_DEBUG, "Element: all match");
497 static void add_idzebra_info (struct recRetrieveCtrl *p, data1_node *top,
500 const char *idzebra_ns[7];
502 idzebra_ns[0] = "xmlns:idzebra";
503 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
506 data1_tag_add_attr (p->dh, mem, top, idzebra_ns);
508 data1_mk_tag_data_int (p->dh, top, "idzebra:size", p->recordSize,
511 data1_mk_tag_data_int (p->dh, top, "idzebra:score",
514 data1_mk_tag_data_int (p->dh, top, "idzebra:localnumber", p->localno,
517 data1_mk_tag_data_text(p->dh, top, "idzebra:filename",
521 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
523 data1_node *node = 0, *onode = 0, *top;
526 int res, selected = 0;
528 struct grs_read_info gri;
530 struct grs_handlers *h = (struct grs_handlers *) clientData;
531 int requested_schema = VAL_NONE;
532 data1_marctab *marctab;
536 gri.readf = p->readf;
537 gri.seekf = p->seekf;
538 gri.tellf = p->tellf;
545 logf (LOG_DEBUG, "grs_retrieve");
546 if (read_grs_type (h, &gri, p->subType, &node))
559 data1_pr_tree (p->dh, node, stdout);
561 top = data1_get_root_tag (p->dh, node);
563 logf (LOG_DEBUG, "grs_retrieve: size");
564 if ((dnew = data1_mk_tag_data_wd(p->dh, top, "size", mem)))
566 dnew->u.data.what = DATA1I_text;
567 dnew->u.data.data = dnew->lbuf;
568 sprintf(dnew->u.data.data, "%d", p->recordSize);
569 dnew->u.data.len = strlen(dnew->u.data.data);
572 tagname = res_get_def(p->res, "tagrank", "rank");
573 if (strcmp(tagname, "0") && p->score >= 0 &&
574 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
576 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
577 dnew->u.data.what = DATA1I_num;
578 dnew->u.data.data = dnew->lbuf;
579 sprintf(dnew->u.data.data, "%d", p->score);
580 dnew->u.data.len = strlen(dnew->u.data.data);
583 tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
584 if (strcmp(tagname, "0") && p->localno > 0 &&
585 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
587 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
588 dnew->u.data.what = DATA1I_text;
589 dnew->u.data.data = dnew->lbuf;
591 sprintf(dnew->u.data.data, "%d", p->localno);
592 dnew->u.data.len = strlen(dnew->u.data.data);
595 data1_pr_tree (p->dh, node, stdout);
597 if (p->comp && p->comp->which == Z_RecordComp_complex &&
598 p->comp->u.complex->generic &&
599 p->comp->u.complex->generic->schema)
601 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
603 requested_schema = oe->value;
606 /* If schema has been specified, map if possible, then check that
607 * we got the right one
609 if (requested_schema != VAL_NONE)
611 logf (LOG_DEBUG, "grs_retrieve: schema mapping");
612 for (map = node->u.root.absyn->maptabs; map; map = map->next)
614 if (map->target_absyn_ref == requested_schema)
617 if (!(node = data1_map_record(p->dh, onode, map, mem)))
626 if (node->u.root.absyn &&
627 requested_schema != node->u.root.absyn->reference)
635 * Does the requested format match a known syntax-mapping? (this reflects
636 * the overlap of schema and formatting which is inherent in the MARC
639 yaz_log (LOG_DEBUG, "grs_retrieve: syntax mapping");
640 if (node->u.root.absyn)
641 for (map = node->u.root.absyn->maptabs; map; map = map->next)
643 if (map->target_absyn_ref == p->input_format)
646 if (!(node = data1_map_record(p->dh, onode, map, mem)))
655 yaz_log (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
656 if (node->u.root.absyn &&
657 node->u.root.absyn->reference != VAL_NONE &&
658 p->input_format == VAL_GRS1)
662 int oidtmp[OID_SIZE];
664 oe.proto = PROTO_Z3950;
665 oe.oclass = CLASS_SCHEMA;
666 oe.value = node->u.root.absyn->reference;
668 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
671 data1_handle dh = p->dh;
675 for (ii = oid; *ii >= 0; ii++)
679 sprintf(p, "%d", *ii);
684 if ((dnew = data1_mk_tag_data_wd(dh, node,
685 "schemaIdentifier", mem)))
687 dnew->u.data.what = DATA1I_oid;
688 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
689 memcpy(dnew->u.data.data, tmp, p - tmp);
690 dnew->u.data.len = p - tmp;
695 logf (LOG_DEBUG, "grs_retrieve: element spec");
696 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
700 data1_free_tree(p->dh, onode);
701 data1_free_tree(p->dh, node);
705 else if (p->comp && !res)
709 data1_pr_tree (p->dh, node, stdout);
711 logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
712 switch (p->output_format = (p->input_format != VAL_NONE ?
713 p->input_format : VAL_SUTRS))
717 add_idzebra_info (p, top, mem);
719 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
724 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
725 memcpy (new_buf, p->rec_buf, p->rec_len);
726 p->rec_buf = new_buf;
731 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
733 p->diagnostic = 238; /* not available in requested syntax */
735 p->rec_len = (size_t) (-1);
738 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
742 p->rec_len = (size_t) (-1);
745 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
749 p->rec_len = (size_t) (-1);
752 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
757 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
758 memcpy (new_buf, p->rec_buf, p->rec_len);
759 p->rec_buf = new_buf;
763 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
768 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
769 memcpy (new_buf, p->rec_buf, p->rec_len);
770 p->rec_buf = new_buf;
774 if (!node->u.root.absyn)
779 for (marctab = node->u.root.absyn->marc; marctab;
780 marctab = marctab->next)
781 if (marctab->reference == p->input_format)
788 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
789 selected, &p->rec_len)))
793 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
794 memcpy (new_buf, p->rec_buf, p->rec_len);
795 p->rec_buf = new_buf;
799 data1_free_tree(p->dh, node);
801 data1_free_tree(p->dh, onode);
806 static struct recType grs_type =
815 RecType recTypeGrs = &grs_type;