1 /* $Id: recgrs.c,v 1.11 2006-11-30 11:03:57 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create(void)
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 if (!strcmp(attr->name, attname)) {
299 if (p->u.relation.op[0]) {
300 if (*p->u.relation.op != '=') {
302 "Only '=' relation is supported (%s)",p->u.relation.op);
303 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
306 if (!strcmp(attr->value, p->u.relation.value)) {
311 /* attribute exists, no value specified */
321 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
322 if (!strcmp(p->u.boolean.op,"and")) {
323 return d1_check_xpath_predicate(n, p->u.boolean.left)
324 && d1_check_xpath_predicate(n, p->u.boolean.right);
326 else if (!strcmp(p->u.boolean.op,"or")) {
327 return (d1_check_xpath_predicate(n, p->u.boolean.left)
328 || d1_check_xpath_predicate(n, p->u.boolean.right));
330 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
339 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
341 struct DFA_state *s = dfaar[0]; /* start state */
344 const char *p = text;
347 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
349 if (c >= t->ch[0] && c <= t->ch[1])
353 /* move to next state and return if we get a match */
361 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
362 if (c >= t->ch[0] && c <= t->ch[1])
372 New function, looking for xpath "element" definitions in abs, by
373 tagpath, using a kind of ugly regxp search.The DFA was built while
374 parsing abs, so here we just go trough them and try to match
375 against the given tagpath. The first matching entry is returned.
379 Added support for enhanced xelm. Now [] predicates are considered
380 as well, when selecting indexing rules... (why the hell it's called
387 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
389 data1_absyn *abs = n->root->u.root.absyn;
391 data1_xpelement *xpe = 0;
394 struct xpath_location_step *xp;
396 char *pexpr = xmalloc(strlen(tagpath)+5);
398 sprintf (pexpr, "/%s\n", tagpath);
400 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
401 xpe->match_state = -1; /* don't know if it matches yet */
403 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
406 int ok = xpe->match_state;
408 { /* don't know whether there is a match yet */
409 data1_xpelement *xpe1;
412 ok = dfa_match_first(xpe->dfa->states, pexpr);
415 /* mark this and following ones with same regexp */
416 for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
417 xpe1->match_state = ok;
420 assert (ok == 0 || ok == 1);
423 /* we have to check the perdicates up to the root node */
426 /* find the first tag up in the node structure */
427 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
430 /* go from inside out in the node structure, while going
431 backwards trough xpath location steps ... */
432 for (i = xpe->xpath_len - 1; i>0; i--)
434 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
440 if (nn->which == DATA1N_tag)
452 yaz_log(YLOG_DEBUG, "Got it");
453 return xpe->termlists;
460 1 start element (tag)
462 3 start attr (and attr-exact)
470 Now, if there is a matching xelm described in abs, for the
471 indexed element or the attribute, then the data is handled according
472 to those definitions...
474 modified by pop, 2002-12-13
477 /* add xpath index for an attribute */
478 static void index_xpath_attr (char *tag_path, char *name, char *value,
479 char *structure, struct recExtractCtrl *p,
482 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
483 wrd->index_type = '0';
484 wrd->term_buf = tag_path;
485 wrd->term_len = strlen(tag_path);
489 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
490 wrd->index_type = 'w';
491 wrd->term_buf = value;
492 wrd->term_len = strlen(value);
495 wrd->index_name = ZEBRA_XPATH_ELM_END;
496 wrd->index_type = '0';
497 wrd->term_buf = tag_path;
498 wrd->term_len = strlen(tag_path);
503 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
508 /* we have to fetch the whole path to the data tag */
509 for (nn = n; nn; nn = nn->parent)
511 if (nn->which == DATA1N_tag)
513 size_t tlen = strlen(nn->u.tag.tag);
514 if (tlen + flen > (max - 2))
516 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
518 tag_path_full[flen++] = '/';
521 if (nn->which == DATA1N_root)
524 tag_path_full[flen] = 0;
528 static void index_xpath(struct source_parser *sp, data1_node *n,
529 struct recExtractCtrl *p,
530 int level, RecWord *wrd,
536 char tag_path_full[1024];
537 int termlist_only = 1;
540 if (!n->root->u.root.absyn
542 n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)
550 wrd->term_buf = n->u.data.data;
551 wrd->term_len = n->u.data.len;
554 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
556 /* If we have a matching termlist... */
557 if (n->root->u.root.absyn &&
558 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
561 for (; tl; tl = tl->next)
563 /* need to copy recword because it may be changed */
565 wrd->index_type = *tl->structure;
566 memcpy (&wrd_tl, wrd, sizeof(*wrd));
568 sp_parse(sp, n, &wrd_tl, tl->source);
570 /* this is just the old fashioned attribute based index */
571 wrd_tl.index_name = tl->index_name;
572 if (p->flagShowRecords)
575 printf("%*sIdx: [%s]", (level + 1) * 4, "",
577 printf("%s %s", tl->index_name, tl->source);
578 printf (" XData:\"");
579 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
580 fputc (wrd_tl.term_buf[i], stdout);
582 if (wrd_tl.term_len > 40)
584 fputc ('\n', stdout);
587 (*p->tokenAdd)(&wrd_tl);
588 if (wrd_tl.seqno > max_seqno)
589 max_seqno = wrd_tl.seqno;
592 wrd->seqno = max_seqno;
595 /* xpath indexing is done, if there was no termlist given,
596 or no ! in the termlist, and default indexing is enabled... */
597 if (!p->flagShowRecords && !xpdone && !termlist_only)
599 wrd->index_name = xpath_index;
600 wrd->index_type = 'w';
607 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
609 wrd->index_type = '0';
610 wrd->term_buf = tag_path_full;
611 wrd->term_len = strlen(tag_path_full);
612 wrd->index_name = xpath_index;
613 if (p->flagShowRecords)
615 printf("%*s tag=", (level + 1) * 4, "");
616 for (i = 0; i<wrd->term_len && i < 40; i++)
617 fputc (wrd->term_buf[i], stdout);
626 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
628 if (xpath_is_start == 1) /* only for the starting tag... */
630 #define MAX_ATTR_COUNT 50
631 data1_termlist *tll[MAX_ATTR_COUNT];
634 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
636 char attr_tag_path_full[1024];
638 /* this could be cached as well */
639 sprintf (attr_tag_path_full, "@%s/%s",
640 xp->name, tag_path_full);
642 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
644 /* attribute (no value) */
645 wrd->index_type = '0';
646 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
647 wrd->term_buf = xp->name;
648 wrd->term_len = strlen(xp->name);
655 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2)
657 /* attribute value exact */
658 strcpy (comb, xp->name);
660 strcat (comb, xp->value);
662 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
663 wrd->index_type = '0';
664 wrd->term_buf = comb;
665 wrd->term_len = strlen(comb);
674 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
676 char attr_tag_path_full[1024];
679 sprintf (attr_tag_path_full, "@%s/%s",
680 xp->name, tag_path_full);
684 /* If there is a termlist given (=xelm directive) */
685 for (; tl; tl = tl->next)
689 /* add xpath index for the attribute */
690 index_xpath_attr (attr_tag_path_full, xp->name,
691 xp->value, tl->structure,
695 /* index attribute value (only path/@attr) */
698 wrd->index_name = tl->index_name;
699 wrd->index_type = *tl->structure;
700 wrd->term_buf = xp->value;
701 wrd->term_len = strlen(xp->value);
707 /* if there was no termlist for the given path,
708 or the termlist didn't have a ! element, index
709 the attribute as "w" */
710 if ((!xpdone) && (!termlist_only))
712 index_xpath_attr (attr_tag_path_full, xp->name,
713 xp->value, "w", p, wrd);
722 static void index_termlist (struct source_parser *sp, data1_node *par,
724 struct recExtractCtrl *p, int level, RecWord *wrd)
726 data1_termlist *tlist = 0;
727 data1_datatype dtype = DATA1K_string;
730 * cycle up towards the root until we find a tag with an att..
731 * this has the effect of indexing locally defined tags with
732 * the attribute of their ancestor in the record.
735 while (!par->u.tag.element)
736 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
738 if (!par || !(tlist = par->u.tag.element->termlists))
740 if (par->u.tag.element->tag)
741 dtype = par->u.tag.element->tag->kind;
743 for (; tlist; tlist = tlist->next)
745 /* consider source */
747 assert(tlist->source);
748 sp_parse(sp, n, wrd, tlist->source);
750 if (wrd->term_buf && wrd->term_len)
752 if (p->flagShowRecords)
755 printf("%*sIdx: [%s]", (level + 1) * 4, "",
757 printf("%s %s", tlist->index_name, tlist->source);
758 printf (" XData:\"");
759 for (i = 0; i<wrd->term_len && i < 40; i++)
760 fputc (wrd->term_buf[i], stdout);
762 if (wrd->term_len > 40)
764 fputc ('\n', stdout);
768 wrd->index_type = *tlist->structure;
769 wrd->index_name = tlist->index_name;
776 static int dumpkeys_r(struct source_parser *sp,
777 data1_node *n, struct recExtractCtrl *p, int level,
780 for (; n; n = n->next)
782 if (p->flagShowRecords) /* display element description to user */
784 if (n->which == DATA1N_root)
786 printf("%*s", level * 4, "");
787 printf("Record type: '%s'\n", n->u.root.type);
789 else if (n->which == DATA1N_tag)
793 printf("%*s", level * 4, "");
794 if (!(e = n->u.tag.element))
795 printf("Local tag: '%s'\n", n->u.tag.tag);
798 printf("Elm: '%s' ", e->name);
801 data1_tag *t = e->tag;
803 printf("TagNam: '%s' ", t->names->name);
806 printf("%s[%d],", t->tagset->name, t->tagset->type);
809 if (t->which == DATA1T_numeric)
810 printf("%d)", t->value.numeric);
812 printf("'%s')", t->value.string);
819 if (n->which == DATA1N_tag)
821 index_termlist(sp, n, n, p, level, wrd);
822 /* index start tag */
823 if (n->root->u.root.absyn)
824 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
829 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
833 if (n->which == DATA1N_data)
835 data1_node *par = get_parent_tag(p->dh, n);
837 if (p->flagShowRecords)
839 printf("%*s", level * 4, "");
841 if (n->u.data.len > 256)
842 printf("'%.170s ... %.70s'\n", n->u.data.data,
843 n->u.data.data + n->u.data.len-70);
844 else if (n->u.data.len > 0)
845 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
851 index_termlist(sp, par, n, p, level, wrd);
853 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
857 if (n->which == DATA1N_tag)
860 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
864 if (p->flagShowRecords && n->which == DATA1N_root)
866 printf("%*s-------------\n\n", level * 4, "");
872 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
874 struct source_parser *sp = source_parser_create();
875 int r = dumpkeys_r(sp, n, p, 0, wrd);
876 source_parser_destroy(sp);
880 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
883 int oidtmp[OID_SIZE];
886 oe.proto = PROTO_Z3950;
887 oe.oclass = CLASS_SCHEMA;
890 oe.value = n->u.root.absyn->reference;
892 if ((oid_ent_to_oid (&oe, oidtmp)))
893 (*p->schemaAdd)(p, oidtmp);
897 /* data1_pr_tree(p->dh, n, stdout); */
899 return dumpkeys(n, p, &wrd);
902 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
904 data1_node *(*grs_read)(struct grs_read_info *))
907 struct grs_read_info gri;
909 int oidtmp[OID_SIZE];
912 gri.stream = p->stream;
915 gri.clientData = clientData;
917 n = (*grs_read)(&gri);
919 return RECCTRL_EXTRACT_EOF;
920 oe.proto = PROTO_Z3950;
921 oe.oclass = CLASS_SCHEMA;
923 if (!n->u.root.absyn)
924 return RECCTRL_EXTRACT_ERROR;
928 oe.value = n->u.root.absyn->reference;
929 if ((oid_ent_to_oid (&oe, oidtmp)))
930 (*p->schemaAdd)(p, oidtmp);
932 data1_concat_text(p->dh, mem, n);
934 /* ensure our data1 tree is UTF-8 */
935 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
938 data1_remove_idzebra_subtree (p->dh, n);
941 data1_pr_tree (p->dh, n, stdout);
945 if (dumpkeys(n, p, &wrd) < 0)
947 return RECCTRL_EXTRACT_ERROR_GENERIC;
949 return RECCTRL_EXTRACT_OK;
952 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
953 data1_node *(*grs_read)(struct grs_read_info *))
956 NMEM mem = nmem_create ();
957 ret = grs_extract_sub(clientData, p, mem, grs_read);
963 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
965 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
966 char **addinfo, ODR o)
968 data1_esetname *eset;
974 case Z_RecordComp_simple:
975 if (c->u.simple->which != Z_ElementSetNames_generic)
976 return 26; /* only generic form supported. Fix this later */
977 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
978 c->u.simple->u.generic)))
980 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
981 *addinfo = odr_strdup(o, c->u.simple->u.generic);
982 return 25; /* invalid esetname */
984 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
985 c->u.simple->u.generic);
988 case Z_RecordComp_complex:
989 if (c->u.complex->generic)
991 /* insert check for schema */
992 if ((p = c->u.complex->generic->elementSpec))
996 case Z_ElementSpec_elementSetName:
998 data1_getesetbyname(dh, n->u.root.absyn,
999 p->u.elementSetName)))
1001 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1002 p->u.elementSetName);
1003 *addinfo = odr_strdup(o, p->u.elementSetName);
1004 return 25; /* invalid esetname */
1006 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1007 p->u.elementSetName);
1010 case Z_ElementSpec_externalSpec:
1011 if (p->u.externalSpec->which == Z_External_espec1)
1013 yaz_log(YLOG_DEBUG, "Got Espec-1");
1014 espec = p->u.externalSpec-> u.espec1;
1018 yaz_log(YLOG_LOG, "Unknown external espec.");
1019 return 25; /* bad. what is proper diagnostic? */
1026 return 26; /* fix */
1030 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1031 return data1_doespec1(dh, n, espec);
1035 yaz_log(YLOG_DEBUG, "Element: all match");
1040 /* Add Zebra info in separate namespace ...
1043 <metadata xmlns="http://www.indexdata.dk/zebra/">
1045 <localnumber>447</localnumber>
1046 <filename>records/genera.xml</filename>
1051 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1054 const char *idzebra_ns[3];
1055 const char *i2 = "\n ";
1056 const char *i4 = "\n ";
1059 idzebra_ns[0] = "xmlns";
1060 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1063 data1_mk_text (p->dh, mem, i2, top);
1065 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1067 data1_mk_text (p->dh, mem, "\n", top);
1069 data1_mk_text (p->dh, mem, i4, n);
1071 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1075 data1_mk_text (p->dh, mem, i4, n);
1076 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1078 data1_mk_text (p->dh, mem, i4, n);
1079 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1082 data1_mk_text (p->dh, mem, i4, n);
1083 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1085 data1_mk_text (p->dh, mem, i2, n);
1088 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1089 data1_node *(*grs_read)(struct grs_read_info *))
1091 data1_node *node = 0, *onode = 0, *top;
1094 int res, selected = 0;
1096 struct grs_read_info gri;
1097 const char *tagname;
1099 int requested_schema = VAL_NONE;
1100 data1_marctab *marctab;
1103 mem = nmem_create();
1104 gri.stream = p->stream;
1107 gri.clientData = clientData;
1109 yaz_log(YLOG_DEBUG, "grs_retrieve");
1110 node = (*grs_read)(&gri);
1117 data1_concat_text(p->dh, mem, node);
1119 data1_remove_idzebra_subtree (p->dh, node);
1122 data1_pr_tree (p->dh, node, stdout);
1124 top = data1_get_root_tag (p->dh, node);
1126 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1127 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1129 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1131 dnew->u.data.what = DATA1I_text;
1132 dnew->u.data.data = dnew->lbuf;
1133 sprintf(dnew->u.data.data, "%d", p->recordSize);
1134 dnew->u.data.len = strlen(dnew->u.data.data);
1137 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1138 if (tagname && p->score >= 0 &&
1139 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1141 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1142 dnew->u.data.what = DATA1I_num;
1143 dnew->u.data.data = dnew->lbuf;
1144 sprintf(dnew->u.data.data, "%d", p->score);
1145 dnew->u.data.len = strlen(dnew->u.data.data);
1148 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1149 "localControlNumber");
1150 if (tagname && p->localno > 0 &&
1151 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1153 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1154 dnew->u.data.what = DATA1I_text;
1155 dnew->u.data.data = dnew->lbuf;
1157 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1158 dnew->u.data.len = strlen(dnew->u.data.data);
1161 if (p->input_format == VAL_TEXT_XML)
1162 zebra_xml_metadata (p, top, mem);
1165 data1_pr_tree (p->dh, node, stdout);
1167 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1168 p->comp->u.complex->generic &&
1169 p->comp->u.complex->generic->which == Z_Schema_oid &&
1170 p->comp->u.complex->generic->schema.oid)
1172 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1174 requested_schema = oe->value;
1176 /* If schema has been specified, map if possible, then check that
1177 * we got the right one
1179 if (requested_schema != VAL_NONE)
1181 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1182 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1184 if (map->target_absyn_ref == requested_schema)
1187 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1196 if (node->u.root.absyn &&
1197 requested_schema != node->u.root.absyn->reference)
1199 p->diagnostic = 238;
1205 * Does the requested format match a known syntax-mapping? (this reflects
1206 * the overlap of schema and formatting which is inherent in the MARC
1209 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1210 if (node->u.root.absyn)
1211 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1213 if (map->target_absyn_ref == p->input_format)
1216 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1225 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1226 if (node->u.root.absyn &&
1227 node->u.root.absyn->reference != VAL_NONE &&
1228 p->input_format == VAL_GRS1)
1232 int oidtmp[OID_SIZE];
1234 oe.proto = PROTO_Z3950;
1235 oe.oclass = CLASS_SCHEMA;
1236 oe.value = node->u.root.absyn->reference;
1238 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1241 data1_handle dh = p->dh;
1245 for (ii = oid; *ii >= 0; ii++)
1249 sprintf(p, "%d", *ii);
1252 if ((dnew = data1_mk_tag_data_wd(dh, top,
1253 "schemaIdentifier", mem)))
1255 dnew->u.data.what = DATA1I_oid;
1256 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1257 memcpy(dnew->u.data.data, tmp, p - tmp);
1258 dnew->u.data.len = p - tmp;
1263 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1264 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1267 p->diagnostic = res;
1271 else if (p->comp && !res)
1275 data1_pr_tree (p->dh, node, stdout);
1277 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1278 switch (p->output_format = (p->input_format != VAL_NONE ?
1279 p->input_format : VAL_SUTRS))
1283 data1_pr_tree (p->dh, node, stdout);
1285 /* default output encoding for XML is UTF-8 */
1286 data1_iconv (p->dh, mem, node,
1287 p->encoding ? p->encoding : "UTF-8",
1288 data1_get_encoding(p->dh, node));
1290 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1292 p->diagnostic = 238;
1295 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1296 memcpy (new_buf, p->rec_buf, p->rec_len);
1297 p->rec_buf = new_buf;
1301 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1303 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1305 p->diagnostic = 238; /* not available in requested syntax */
1310 /* ensure our data1 tree is UTF-8 */
1311 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1313 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1315 p->diagnostic = 238;
1320 /* ensure our data1 tree is UTF-8 */
1321 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1322 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1324 p->diagnostic = 238;
1330 data1_iconv (p->dh, mem, node, p->encoding,
1331 data1_get_encoding(p->dh, node));
1332 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1334 p->diagnostic = 238;
1337 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1338 memcpy (new_buf, p->rec_buf, p->rec_len);
1339 p->rec_buf = new_buf;
1344 data1_iconv (p->dh, mem, node, p->encoding,
1345 data1_get_encoding(p->dh, node));
1346 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1348 p->diagnostic = 238;
1351 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1352 memcpy (new_buf, p->rec_buf, p->rec_len);
1353 p->rec_buf = new_buf;
1357 if (!node->u.root.absyn)
1359 p->diagnostic = 238;
1362 for (marctab = node->u.root.absyn->marc; marctab;
1363 marctab = marctab->next)
1364 if (marctab->reference == p->input_format)
1368 p->diagnostic = 238;
1372 data1_iconv (p->dh, mem, node, p->encoding,
1373 data1_get_encoding(p->dh, node));
1374 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1375 selected, &p->rec_len)))
1376 p->diagnostic = 238;
1379 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1380 memcpy (new_buf, p->rec_buf, p->rec_len);
1381 p->rec_buf = new_buf;
1391 * indent-tabs-mode: nil
1393 * vim: shiftwidth=4 tabstop=8 expandtab