1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
71 WRBUF w = wrbuf_alloc();
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
163 wrbuf_puts(w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
204 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
208 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
209 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
210 r = cql_transform_parse_tok_line(ct, pattern, tp);
211 yaz_tok_parse_destroy(tp);
215 cql_transform_t cql_transform_open_FILE(FILE *f)
217 cql_transform_t ct = cql_transform_create();
220 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
222 while (fgets(line, sizeof(line)-1, f))
224 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
226 t = yaz_tok_move(tp);
227 if (t == YAZ_TOK_STRING)
229 char * pattern = xstrdup(yaz_tok_parse_string(tp));
230 t = yaz_tok_move(tp);
233 yaz_tok_parse_destroy(tp);
234 cql_transform_close(ct);
237 if (cql_transform_parse_tok_line(ct, pattern, tp))
239 yaz_tok_parse_destroy(tp);
240 cql_transform_close(ct);
245 else if (t != YAZ_TOK_EOF)
247 yaz_tok_parse_destroy(tp);
248 cql_transform_close(ct);
251 yaz_tok_parse_destroy(tp);
256 void cql_transform_close(cql_transform_t ct)
258 struct cql_prop_entry *pe;
264 struct cql_prop_entry *pe_next = pe->next;
271 yaz_tok_cfg_destroy(ct->tok_cfg);
272 nmem_destroy(ct->nmem);
276 cql_transform_t cql_transform_open_fname(const char *fname)
279 FILE *f = fopen(fname, "r");
282 ct = cql_transform_open_FILE(f);
288 struct Z_AttributeElement {
289 Z_AttributeSetId *attributeSet; /* OPT */
294 Z_ComplexAttribute *complex;
295 #define Z_AttributeValue_numeric 1
296 #define Z_AttributeValue_complex 2
301 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
303 ODR odr_a = odr_createmem(ODR_ENCODE);
304 ODR odr_b = odr_createmem(ODR_ENCODE);
309 z_AttributeElement(odr_a, &a, 0, 0);
310 z_AttributeElement(odr_b, &b, 0, 0);
312 buf_a = odr_getbuf(odr_a, &len_a, 0);
313 buf_b = odr_getbuf(odr_b, &len_b, 0);
315 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
322 const char *cql_lookup_reverse(cql_transform_t ct,
323 const char *category,
324 Z_AttributeList *attributes)
326 struct cql_prop_entry *e;
327 size_t clen = strlen(category);
328 for (e = ct->entry; e; e = e->next)
330 if (!strncmp(e->pattern, category, clen))
332 /* category matches.. See if attributes in pattern value
333 are all listed in actual attributes */
335 for (i = 0; i < e->attr_list.num_attributes; i++)
337 /* entry attribute */
338 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
340 for (j = 0; j < attributes->num_attributes; j++)
342 /* actual attribute */
343 Z_AttributeElement *a_ae = attributes->attributes[j];
344 int r = compare_attr(e_ae, a_ae);
348 if (j == attributes->num_attributes)
349 break; /* i was not found at all.. try next pattern */
352 if (i == e->attr_list.num_attributes)
353 return e->pattern + clen;
359 static const char *cql_lookup_property(cql_transform_t ct,
360 const char *pat1, const char *pat2,
364 struct cql_prop_entry *e;
366 if (pat1 && pat2 && pat3)
367 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
368 else if (pat1 && pat2)
369 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
370 else if (pat1 && pat3)
371 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
373 sprintf(pattern, "%.39s", pat1);
377 for (e = ct->entry; e; e = e->next)
379 if (!cql_strcmp(e->pattern, pattern))
385 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
386 const char *uri, const char *val, const char *default_val,
387 void (*pr)(const char *buf, void *client_data),
392 const char *eval = val ? val : default_val;
393 const char *prefix = 0;
397 struct cql_prop_entry *e;
399 for (e = ct->entry; e; e = e->next)
400 if (!memcmp(e->pattern, "set.", 4) && e->value &&
401 !strcmp(e->value, uri))
403 prefix = e->pattern+4;
406 /* must have a prefix now - if not it's an error */
412 res = cql_lookup_property(ct, category, prefix, eval);
413 /* we have some aliases for some relations unfortunately.. */
414 if (!res && !prefix && !strcmp(category, "relation"))
416 if (!strcmp(val, "=="))
417 res = cql_lookup_property(ct, category, prefix, "exact");
418 if (!strcmp(val, "="))
419 res = cql_lookup_property(ct, category, prefix, "eq");
420 if (!strcmp(val, "<="))
421 res = cql_lookup_property(ct, category, prefix, "le");
422 if (!strcmp(val, ">="))
423 res = cql_lookup_property(ct, category, prefix, "ge");
426 res = cql_lookup_property(ct, category, prefix, "*");
432 const char *cp0 = res, *cp1;
433 while ((cp1 = strchr(cp0, '=')))
436 while (*cp1 && *cp1 != ' ')
438 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
440 memcpy(buf, cp0, cp1 - cp0);
442 (*pr)("@attr ", client_data);
444 for (i = 0; buf[i]; i++)
447 (*pr)(eval, client_data);
453 (*pr)(tmp, client_data);
456 (*pr)(" ", client_data);
464 if (errcode && !ct->error)
468 ct->addinfo = xstrdup(val);
475 int cql_pr_attr(cql_transform_t ct, const char *category,
476 const char *val, const char *default_val,
477 void (*pr)(const char *buf, void *client_data),
481 return cql_pr_attr_uri(ct, category, 0 /* uri */,
482 val, default_val, pr, client_data, errcode);
486 static void cql_pr_int(int val,
487 void (*pr)(const char *buf, void *client_data),
490 char buf[21]; /* enough characters to 2^64 */
491 sprintf(buf, "%d", val);
492 (*pr)(buf, client_data);
493 (*pr)(" ", client_data);
497 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
498 void (*pr)(const char *buf, void *client_data),
504 int proxrel = 2; /* less than or equal */
505 int unit = 2; /* word */
509 const char *name = mods->u.st.index;
510 const char *term = mods->u.st.term;
511 const char *relation = mods->u.st.relation;
513 if (!strcmp(name, "distance")) {
514 distance = strtol(term, (char**) 0, 0);
515 if (!strcmp(relation, "="))
517 else if (!strcmp(relation, ">"))
519 else if (!strcmp(relation, "<"))
521 else if (!strcmp(relation, ">="))
523 else if (!strcmp(relation, "<="))
525 else if (!strcmp(relation, "<>"))
529 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
530 ct->addinfo = xstrdup(relation);
534 else if (!strcmp(name, "ordered"))
536 else if (!strcmp(name, "unordered"))
538 else if (!strcmp(name, "unit"))
540 if (!strcmp(term, "word"))
542 else if (!strcmp(term, "sentence"))
544 else if (!strcmp(term, "paragraph"))
546 else if (!strcmp(term, "element"))
550 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
551 ct->addinfo = xstrdup(term);
557 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
558 ct->addinfo = xstrdup(name);
561 mods = mods->u.st.modifiers;
565 distance = (unit == 2) ? 1 : 0;
567 cql_pr_int(exclusion, pr, client_data);
568 cql_pr_int(distance, pr, client_data);
569 cql_pr_int(ordered, pr, client_data);
570 cql_pr_int(proxrel, pr, client_data);
571 (*pr)("k ", client_data);
572 cql_pr_int(unit, pr, client_data);
577 /* ### checks for CQL relation-name rather than Type-1 attribute */
578 static int has_modifier(struct cql_node *cn, const char *name) {
579 struct cql_node *mod;
580 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
581 if (!strcmp(mod->u.st.index, name))
588 static void emit_term(cql_transform_t ct,
590 const char *term, int length,
591 void (*pr)(const char *buf, void *client_data),
595 const char *ns = cn->u.st.index_uri;
597 int process_term = 1;
599 if (has_modifier(cn, "regexp"))
601 else if (has_modifier(cn, "unmasked"))
603 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
606 cql_pr_attr(ct, "truncation", "cql", 0,
607 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
609 assert(cn->which == CQL_NODE_ST);
612 { /* convert term via truncation.things */
615 for (i = 0; i < length; i++)
617 if (term[i] == '\\' && i < length - 1)
626 else if (i == length - 1)
632 else if (i == length - 1)
645 cql_pr_attr(ct, "position", "firstAndLast", 0,
646 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
650 else if (anchor == 1)
652 cql_pr_attr(ct, "position", "first", 0,
653 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
657 else if (anchor == 2)
659 cql_pr_attr(ct, "position", "last", 0,
660 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
665 cql_pr_attr(ct, "position", "any", 0,
666 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
670 if (trunc == 3 && cql_pr_attr(ct, "truncation",
671 "both", 0, pr, client_data, 0))
676 else if (trunc == 1 && cql_pr_attr(ct, "truncation",
677 "left", 0, pr, client_data, 0))
682 else if (trunc == 2 && cql_pr_attr(ct, "truncation", "right", 0,
690 cql_pr_attr(ct, "truncation", "none", 0,
694 cql_pr_attr(ct, "truncation", "z3958", 0,
695 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
698 cql_pr_attr_uri(ct, "index", ns,
699 cn->u.st.index, "serverChoice",
700 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
702 if (cn->u.st.modifiers)
704 struct cql_node *mod = cn->u.st.modifiers;
705 for (; mod; mod = mod->u.st.modifiers)
707 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
708 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
711 (*pr)("\"", client_data);
713 for (i = 0; i < length; i++)
715 char x[2]; /* temp buffer */
716 if (term[i] == '\\' && i < length - 1)
719 if (strchr("\"\\", term[i]))
720 pr("\\", client_data);
721 if (z3958_mode && strchr("#?", term[i]))
722 pr("\\\\", client_data); /* double \\ to survive PQF parse */
727 else if (z3958_mode && term[i] == '*')
729 pr("?", client_data);
730 if (i < length - 1 && yaz_isdigit(term[i+1]))
731 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
733 else if (z3958_mode && term[i] == '?')
735 pr("#", client_data);
740 pr("\\", client_data);
741 if (z3958_mode && strchr("#?", term[i]))
742 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
750 for (i = 0; i < length; i++)
758 (*pr)("\" ", client_data);
761 static void emit_terms(cql_transform_t ct,
763 void (*pr)(const char *buf, void *client_data),
767 struct cql_node *ne = cn->u.st.extra_terms;
770 (*pr)("@", client_data);
771 (*pr)(op, client_data);
772 (*pr)(" ", client_data);
774 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
776 for (; ne; ne = ne->u.st.extra_terms)
778 if (ne->u.st.extra_terms)
780 (*pr)("@", client_data);
781 (*pr)(op, client_data);
782 (*pr)(" ", client_data);
784 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
789 static void emit_wordlist(cql_transform_t ct,
791 void (*pr)(const char *buf, void *client_data),
795 const char *cp0 = cn->u.st.term;
797 const char *last_term = 0;
803 cp1 = strchr(cp0, ' ');
806 (*pr)("@", client_data);
807 (*pr)(op, client_data);
808 (*pr)(" ", client_data);
809 emit_term(ct, cn, last_term, last_length, pr, client_data);
813 last_length = cp1 - cp0;
815 last_length = strlen(cp0);
819 emit_term(ct, cn, last_term, last_length, pr, client_data);
822 void cql_transform_r(cql_transform_t ct,
824 void (*pr)(const char *buf, void *client_data),
828 struct cql_node *mods;
835 ns = cn->u.st.index_uri;
838 if (!strcmp(ns, cql_uri())
839 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
841 (*pr)("@set \"", client_data);
842 (*pr)(cn->u.st.term, client_data);
843 (*pr)("\" ", client_data);
851 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
855 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
856 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
857 YAZ_SRW_UNSUPP_RELATION);
858 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
859 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
860 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
861 emit_wordlist(ct, cn, pr, client_data, "and");
862 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
863 emit_wordlist(ct, cn, pr, client_data, "or");
865 emit_terms(ct, cn, pr, client_data, "and");
868 (*pr)("@", client_data);
869 (*pr)(cn->u.boolean.value, client_data);
870 (*pr)(" ", client_data);
871 mods = cn->u.boolean.modifiers;
872 if (!strcmp(cn->u.boolean.value, "prox"))
874 if (!cql_pr_prox(ct, mods, pr, client_data))
879 /* Boolean modifiers other than on proximity not supported */
880 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
881 ct->addinfo = xstrdup(mods->u.st.index);
885 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
886 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
889 cql_transform_r(ct, cn->u.sort.search, pr, client_data);
892 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
897 int cql_transform(cql_transform_t ct, struct cql_node *cn,
898 void (*pr)(const char *buf, void *client_data),
901 struct cql_prop_entry *e;
902 NMEM nmem = nmem_create();
908 for (e = ct->entry; e ; e = e->next)
910 if (!cql_strncmp(e->pattern, "set.", 4))
911 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
912 else if (!cql_strcmp(e->pattern, "set"))
913 cql_apply_prefix(nmem, cn, 0, e->value);
915 cql_transform_r(ct, cn, pr, client_data);
921 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
923 return cql_transform(ct, cn, cql_fputs, f);
926 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
929 struct cql_buf_write_info info;
935 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
937 /* Attempt to write past end of buffer. For some reason, this
938 SRW diagnostic is deprecated, but it's so perfect for our
939 purposes that it would be stupid not to use it. */
941 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
942 sprintf(numbuf, "%ld", (long) info.max);
943 ct->addinfo = xstrdup(numbuf);
947 info.buf[info.off] = '\0';
951 int cql_transform_error(cql_transform_t ct, const char **addinfo)
953 *addinfo = ct->addinfo;
957 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
960 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
967 * c-file-style: "Stroustrup"
968 * indent-tabs-mode: nil
970 * vim: shiftwidth=4 tabstop=8 expandtab