1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/rpn2cql.h>
26 #include <yaz/xmalloc.h>
27 #include <yaz/diagsrw.h>
28 #include <yaz/tokenizer.h>
29 #include <yaz/wrbuf.h>
30 #include <yaz/z-core.h>
31 #include <yaz/oid_db.h>
34 struct cql_prop_entry {
37 Z_AttributeList attr_list;
38 struct cql_prop_entry *next;
41 struct cql_transform_t_ {
42 struct cql_prop_entry *entry;
43 yaz_tok_cfg_t tok_cfg;
51 cql_transform_t cql_transform_create(void)
53 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
54 ct->tok_cfg = yaz_tok_cfg_create();
55 ct->w = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
73 while (t == YAZ_TOK_STRING && ae_num < 20)
75 WRBUF type_str = wrbuf_alloc();
77 Z_AttributeElement *elem = 0;
78 const char *value_str = 0;
79 /* attset type=value OR type=value */
81 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
82 elem->attributeSet = 0;
84 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
85 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
89 wrbuf_destroy(type_str);
91 wrbuf_destroy(set_str);
94 if (t == YAZ_TOK_STRING)
96 wrbuf_puts(ct->w, " ");
97 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
101 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
102 wrbuf_cstr(set_str), ct->nmem);
104 type_str = wrbuf_alloc();
105 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
106 t = yaz_tok_move(tp);
108 elem->attributeType = nmem_intdup(ct->nmem, 0);
109 if (sscanf(wrbuf_cstr(type_str), "%d", elem->attributeType)
112 wrbuf_destroy(type_str);
114 wrbuf_destroy(set_str);
115 yaz_log(YLOG_WARN, "Expected numeric attribute type");
120 wrbuf_destroy(type_str);
122 wrbuf_destroy(set_str);
126 yaz_log(YLOG_WARN, "Expected = after after attribute type");
130 t = yaz_tok_move(tp);
131 if (t != YAZ_TOK_STRING) /* value */
133 yaz_log(YLOG_WARN, "Missing attribute value");
137 value_str = yaz_tok_parse_string(tp);
138 if (isdigit(*value_str))
140 elem->which = Z_AttributeValue_numeric;
141 elem->value.numeric =
142 nmem_intdup(ct->nmem, atoi(value_str));
146 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
147 nmem_malloc(ct->nmem, sizeof(*ca));
148 elem->which = Z_AttributeValue_complex;
149 elem->value.complex = ca;
151 ca->list = (Z_StringOrNumeric **)
152 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
153 ca->list[0] = (Z_StringOrNumeric *)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
155 ca->list[0]->which = Z_StringOrNumeric_string;
156 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
157 ca->num_semanticAction = 0;
158 ca->semanticAction = 0;
160 wrbuf_puts(ct->w, "=");
161 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
162 t = yaz_tok_move(tp);
163 wrbuf_puts(ct->w, " ");
166 if (ret == 0) /* OK? */
168 struct cql_prop_entry **pp = &ct->entry;
171 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
172 (*pp)->pattern = xstrdup(pattern);
173 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
175 (*pp)->attr_list.num_attributes = ae_num;
177 (*pp)->attr_list.attributes = 0;
180 (*pp)->attr_list.attributes = (Z_AttributeElement **)
181 nmem_malloc(ct->nmem,
182 ae_num * sizeof(Z_AttributeElement *));
183 memcpy((*pp)->attr_list.attributes, ae,
184 ae_num * sizeof(Z_AttributeElement *));
190 ODR pr = odr_createmem(ODR_PRINT);
191 Z_AttributeList *alp = &(*pp)->attr_list;
192 odr_setprint(pr, yaz_log_file());
193 z_AttributeList(pr, &alp, 0, 0);
201 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
205 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
206 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
207 r = cql_transform_parse_tok_line(ct, pattern, tp);
208 yaz_tok_parse_destroy(tp);
212 cql_transform_t cql_transform_open_FILE(FILE *f)
214 cql_transform_t ct = cql_transform_create();
217 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
219 while (fgets(line, sizeof(line)-1, f))
221 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
224 t = yaz_tok_move(tp);
225 if (t == YAZ_TOK_STRING)
227 char * pattern = xstrdup(yaz_tok_parse_string(tp));
228 t = yaz_tok_move(tp);
231 yaz_tok_parse_destroy(tp);
232 cql_transform_close(ct);
235 if (cql_transform_parse_tok_line(ct, pattern, tp))
237 yaz_tok_parse_destroy(tp);
238 cql_transform_close(ct);
243 else if (t != YAZ_TOK_EOF)
245 yaz_tok_parse_destroy(tp);
246 cql_transform_close(ct);
249 yaz_tok_parse_destroy(tp);
254 void cql_transform_close(cql_transform_t ct)
256 struct cql_prop_entry *pe;
262 struct cql_prop_entry *pe_next = pe->next;
269 yaz_tok_cfg_destroy(ct->tok_cfg);
270 wrbuf_destroy(ct->w);
271 nmem_destroy(ct->nmem);
275 cql_transform_t cql_transform_open_fname(const char *fname)
278 FILE *f = fopen(fname, "r");
281 ct = cql_transform_open_FILE(f);
287 struct Z_AttributeElement {
288 Z_AttributeSetId *attributeSet; /* OPT */
293 Z_ComplexAttribute *complex;
294 #define Z_AttributeValue_numeric 1
295 #define Z_AttributeValue_complex 2
300 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
302 ODR odr_a = odr_createmem(ODR_ENCODE);
303 ODR odr_b = odr_createmem(ODR_ENCODE);
308 z_AttributeElement(odr_a, &a, 0, 0);
309 z_AttributeElement(odr_b, &b, 0, 0);
311 buf_a = odr_getbuf(odr_a, &len_a, 0);
312 buf_b = odr_getbuf(odr_b, &len_b, 0);
314 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
321 const char *cql_lookup_reverse(cql_transform_t ct,
322 const char *category,
323 Z_AttributeList *attributes)
325 struct cql_prop_entry *e;
326 size_t clen = strlen(category);
327 for (e = ct->entry; e; e = e->next)
329 if (!strncmp(e->pattern, category, clen))
331 /* category matches.. See if attributes in pattern value
332 are all listed in actual attributes */
334 for (i = 0; i < e->attr_list.num_attributes; i++)
336 /* entry attribute */
337 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
339 for (j = 0; j < attributes->num_attributes; j++)
341 /* actual attribute */
342 Z_AttributeElement *a_ae = attributes->attributes[j];
343 int r = compare_attr(e_ae, a_ae);
347 if (j == attributes->num_attributes)
348 break; /* i was not found at all.. try next pattern */
351 if (i == e->attr_list.num_attributes)
358 static const char *cql_lookup_property(cql_transform_t ct,
359 const char *pat1, const char *pat2,
363 struct cql_prop_entry *e;
365 if (pat1 && pat2 && pat3)
366 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
367 else if (pat1 && pat2)
368 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
369 else if (pat1 && pat3)
370 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
372 sprintf(pattern, "%.39s", pat1);
376 for (e = ct->entry; e; e = e->next)
378 if (!cql_strcmp(e->pattern, pattern))
384 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
385 const char *uri, const char *val, const char *default_val,
386 void (*pr)(const char *buf, void *client_data),
391 const char *eval = val ? val : default_val;
392 const char *prefix = 0;
396 struct cql_prop_entry *e;
398 for (e = ct->entry; e; e = e->next)
399 if (!memcmp(e->pattern, "set.", 4) && e->value &&
400 !strcmp(e->value, uri))
402 prefix = e->pattern+4;
405 /* must have a prefix now - if not it's an error */
411 res = cql_lookup_property(ct, category, prefix, eval);
412 /* we have some aliases for some relations unfortunately.. */
413 if (!res && !prefix && !strcmp(category, "relation"))
415 if (!strcmp(val, "=="))
416 res = cql_lookup_property(ct, category, prefix, "exact");
417 if (!strcmp(val, "="))
418 res = cql_lookup_property(ct, category, prefix, "eq");
419 if (!strcmp(val, "<="))
420 res = cql_lookup_property(ct, category, prefix, "le");
421 if (!strcmp(val, ">="))
422 res = cql_lookup_property(ct, category, prefix, "ge");
425 res = cql_lookup_property(ct, category, prefix, "*");
431 const char *cp0 = res, *cp1;
432 while ((cp1 = strchr(cp0, '=')))
435 while (*cp1 && *cp1 != ' ')
437 if (cp1 - cp0 >= sizeof(buf))
439 memcpy(buf, cp0, cp1 - cp0);
441 (*pr)("@attr ", client_data);
443 for (i = 0; buf[i]; i++)
446 (*pr)(eval, client_data);
452 (*pr)(tmp, client_data);
455 (*pr)(" ", client_data);
463 if (errcode && !ct->error)
467 ct->addinfo = xstrdup(val);
474 int cql_pr_attr(cql_transform_t ct, const char *category,
475 const char *val, const char *default_val,
476 void (*pr)(const char *buf, void *client_data),
480 return cql_pr_attr_uri(ct, category, 0 /* uri */,
481 val, default_val, pr, client_data, errcode);
485 static void cql_pr_int(int val,
486 void (*pr)(const char *buf, void *client_data),
489 char buf[21]; /* enough characters to 2^64 */
490 sprintf(buf, "%d", val);
491 (*pr)(buf, client_data);
492 (*pr)(" ", client_data);
496 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
497 void (*pr)(const char *buf, void *client_data),
501 int distance; /* to be filled in later depending on unit */
502 int distance_defined = 0;
504 int proxrel = 2; /* less than or equal */
505 int unit = 2; /* word */
509 const char *name = mods->u.st.index;
510 const char *term = mods->u.st.term;
511 const char *relation = mods->u.st.relation;
513 if (!strcmp(name, "distance")) {
514 distance = strtol(term, (char**) 0, 0);
515 distance_defined = 1;
516 if (!strcmp(relation, "="))
518 else if (!strcmp(relation, ">"))
520 else if (!strcmp(relation, "<"))
522 else if (!strcmp(relation, ">="))
524 else if (!strcmp(relation, "<="))
526 else if (!strcmp(relation, "<>"))
530 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
531 ct->addinfo = xstrdup(relation);
535 else if (!strcmp(name, "ordered"))
537 else if (!strcmp(name, "unordered"))
539 else if (!strcmp(name, "unit"))
541 if (!strcmp(term, "word"))
543 else if (!strcmp(term, "sentence"))
545 else if (!strcmp(term, "paragraph"))
547 else if (!strcmp(term, "element"))
551 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
552 ct->addinfo = xstrdup(term);
558 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
559 ct->addinfo = xstrdup(name);
562 mods = mods->u.st.modifiers;
565 if (!distance_defined)
566 distance = (unit == 2) ? 1 : 0;
568 cql_pr_int(exclusion, pr, client_data);
569 cql_pr_int(distance, pr, client_data);
570 cql_pr_int(ordered, pr, client_data);
571 cql_pr_int(proxrel, pr, client_data);
572 (*pr)("k ", client_data);
573 cql_pr_int(unit, pr, client_data);
578 /* Returns location of first wildcard character in the `length'
579 * characters starting at `term', or a null pointer of there are
580 * none -- like memchr().
582 static const char *wcchar(int start, const char *term, int length)
586 if (start || term[-1] != '\\')
587 if (strchr("*?", *term))
597 /* ### checks for CQL relation-name rather than Type-1 attribute */
598 static int has_modifier(struct cql_node *cn, const char *name) {
599 struct cql_node *mod;
600 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
601 if (!strcmp(mod->u.st.index, name))
609 void emit_term(cql_transform_t ct,
611 const char *term, int length,
612 void (*pr)(const char *buf, void *client_data),
616 const char *ns = cn->u.st.index_uri;
617 int process_term = !has_modifier(cn, "regexp");
620 assert(cn->which == CQL_NODE_ST);
622 if (process_term && length > 0)
624 if (length > 1 && term[0] == '^' && term[length-1] == '^')
626 cql_pr_attr(ct, "position", "firstAndLast", 0,
627 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
631 else if (term[0] == '^')
633 cql_pr_attr(ct, "position", "first", 0,
634 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
638 else if (term[length-1] == '^')
640 cql_pr_attr(ct, "position", "last", 0,
641 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
646 cql_pr_attr(ct, "position", "any", 0,
647 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
651 if (process_term && length > 0)
653 const char *first_wc = wcchar(1, term, length);
654 const char *second_wc = first_wc ?
655 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
657 /* Check for well-known globbing patterns that represent
658 * simple truncation attributes as expected by, for example,
659 * Bath-compliant server. If we find such a pattern but
660 * there's no mapping for it, that's fine: we just use a
661 * general pattern-matching attribute.
663 if (first_wc == term && second_wc == term + length-1
664 && *first_wc == '*' && *second_wc == '*'
665 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
670 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
671 && cql_pr_attr(ct, "truncation", "left", 0,
677 else if (first_wc == term + length-1 && second_wc == 0
679 && cql_pr_attr(ct, "truncation", "right", 0,
686 /* We have one or more wildcard characters, but not in a
687 * way that can be dealt with using only the standard
688 * left-, right- and both-truncation attributes. We need
689 * to translate the pattern into a Z39.58-type pattern,
690 * which has been supported in BIB-1 since 1996. If
691 * there's no configuration element for "truncation.z3958"
692 * we indicate this as error 28 "Masking character not
696 cql_pr_attr(ct, "truncation", "z3958", 0,
697 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
698 z3958_mem = (char *) xmalloc(length+1);
699 for (i = 0; i < length; i++)
701 if (i > 0 && term[i-1] == '\\')
702 z3958_mem[i] = term[i];
703 else if (term[i] == '*')
705 else if (term[i] == '?')
708 z3958_mem[i] = term[i];
710 z3958_mem[length] = '\0';
714 /* No masking characters. Use "truncation.none" if given. */
715 cql_pr_attr(ct, "truncation", "none", 0,
720 cql_pr_attr_uri(ct, "index", ns,
721 cn->u.st.index, "serverChoice",
722 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
724 if (cn->u.st.modifiers)
726 struct cql_node *mod = cn->u.st.modifiers;
727 for (; mod; mod = mod->u.st.modifiers)
729 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
730 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
734 (*pr)("\"", client_data);
735 for (i = 0; i<length; i++)
737 /* pr(int) each character */
738 /* we do not need to deal with \-sequences because the
739 CQL and PQF terms have same \-format, bug #1988 */
744 (*pr)(buf, client_data);
746 (*pr)("\" ", client_data);
750 void emit_terms(cql_transform_t ct,
752 void (*pr)(const char *buf, void *client_data),
756 struct cql_node *ne = cn->u.st.extra_terms;
759 (*pr)("@", client_data);
760 (*pr)(op, client_data);
761 (*pr)(" ", client_data);
763 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
765 for (; ne; ne = ne->u.st.extra_terms)
767 if (ne->u.st.extra_terms)
769 (*pr)("@", client_data);
770 (*pr)(op, client_data);
771 (*pr)(" ", client_data);
773 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
778 void emit_wordlist(cql_transform_t ct,
780 void (*pr)(const char *buf, void *client_data),
784 const char *cp0 = cn->u.st.term;
786 const char *last_term = 0;
792 cp1 = strchr(cp0, ' ');
795 (*pr)("@", client_data);
796 (*pr)(op, client_data);
797 (*pr)(" ", client_data);
798 emit_term(ct, cn, last_term, last_length, pr, client_data);
802 last_length = cp1 - cp0;
804 last_length = strlen(cp0);
808 emit_term(ct, cn, last_term, last_length, pr, client_data);
811 void cql_transform_r(cql_transform_t ct,
813 void (*pr)(const char *buf, void *client_data),
817 struct cql_node *mods;
824 ns = cn->u.st.index_uri;
827 if (!strcmp(ns, cql_uri())
828 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
830 (*pr)("@set \"", client_data);
831 (*pr)(cn->u.st.term, client_data);
832 (*pr)("\" ", client_data);
840 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
844 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
845 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
846 YAZ_SRW_UNSUPP_RELATION);
847 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
848 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
849 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
850 emit_wordlist(ct, cn, pr, client_data, "and");
851 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
852 emit_wordlist(ct, cn, pr, client_data, "or");
854 emit_terms(ct, cn, pr, client_data, "and");
857 (*pr)("@", client_data);
858 (*pr)(cn->u.boolean.value, client_data);
859 (*pr)(" ", client_data);
860 mods = cn->u.boolean.modifiers;
861 if (!strcmp(cn->u.boolean.value, "prox"))
863 if (!cql_pr_prox(ct, mods, pr, client_data))
868 /* Boolean modifiers other than on proximity not supported */
869 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
870 ct->addinfo = xstrdup(mods->u.st.index);
874 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
875 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
879 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
884 int cql_transform(cql_transform_t ct, struct cql_node *cn,
885 void (*pr)(const char *buf, void *client_data),
888 struct cql_prop_entry *e;
889 NMEM nmem = nmem_create();
895 for (e = ct->entry; e ; e = e->next)
897 if (!cql_strncmp(e->pattern, "set.", 4))
898 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
899 else if (!cql_strcmp(e->pattern, "set"))
900 cql_apply_prefix(nmem, cn, 0, e->value);
902 cql_transform_r(ct, cn, pr, client_data);
908 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
910 return cql_transform(ct, cn, cql_fputs, f);
913 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
915 struct cql_buf_write_info info;
921 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
923 /* Attempt to write past end of buffer. For some reason, this
924 SRW diagnostic is deprecated, but it's so perfect for our
925 purposes that it would be stupid not to use it. */
927 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
928 sprintf(numbuf, "%ld", (long) info.max);
929 ct->addinfo = xstrdup(numbuf);
933 info.buf[info.off] = '\0';
937 int cql_transform_error(cql_transform_t ct, const char **addinfo)
939 *addinfo = ct->addinfo;
943 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
946 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
953 * indent-tabs-mode: nil
955 * vim: shiftwidth=4 tabstop=8 expandtab