1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/xmalloc.h>
26 #include <yaz/diagsrw.h>
28 struct cql_prop_entry {
31 struct cql_prop_entry *next;
34 struct cql_transform_t_ {
35 struct cql_prop_entry *entry;
40 cql_transform_t cql_transform_open_FILE(FILE *f)
43 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
44 struct cql_prop_entry **pp = &ct->entry;
48 while (fgets(line, sizeof(line)-1, f))
50 const char *cp_value_start;
51 const char *cp_value_end;
52 const char *cp_pattern_start;
53 const char *cp_pattern_end;
54 const char *cp = line;
56 while (*cp && strchr(" \t", *cp))
58 cp_pattern_start = cp;
60 while (*cp && !strchr(" \t\r\n=#", *cp))
63 if (cp == cp_pattern_start)
65 while (*cp && strchr(" \t", *cp))
70 cql_transform_close(ct);
74 while (*cp && strchr(" \t\r\n", *cp))
77 cp_value_end = strchr(cp, '#');
79 cp_value_end = strlen(line) + line;
81 if (cp_value_end != cp_value_start &&
82 strchr(" \t\r\n", cp_value_end[-1]))
84 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
85 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
86 memcpy((*pp)->pattern, cp_pattern_start,
87 cp_pattern_end-cp_pattern_start);
88 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
90 (*pp)->value = (char *) xmalloc(cp_value_end-cp_value_start + 1);
91 if (cp_value_start != cp_value_end)
92 memcpy((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
93 (*pp)->value[cp_value_end - cp_value_start] = '\0';
100 void cql_transform_close(cql_transform_t ct)
102 struct cql_prop_entry *pe;
108 struct cql_prop_entry *pe_next = pe->next;
118 cql_transform_t cql_transform_open_fname(const char *fname)
121 FILE *f = fopen(fname, "r");
124 ct = cql_transform_open_FILE(f);
129 static const char *cql_lookup_property(cql_transform_t ct,
130 const char *pat1, const char *pat2,
134 struct cql_prop_entry *e;
136 if (pat1 && pat2 && pat3)
137 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
138 else if (pat1 && pat2)
139 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
140 else if (pat1 && pat3)
141 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
143 sprintf(pattern, "%.39s", pat1);
147 for (e = ct->entry; e; e = e->next)
149 if (!cql_strcmp(e->pattern, pattern))
155 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
156 const char *uri, const char *val, const char *default_val,
157 void (*pr)(const char *buf, void *client_data),
162 const char *eval = val ? val : default_val;
163 const char *prefix = 0;
167 struct cql_prop_entry *e;
169 for (e = ct->entry; e; e = e->next)
170 if (!memcmp(e->pattern, "set.", 4) && e->value &&
171 !strcmp(e->value, uri))
173 prefix = e->pattern+4;
176 /* must have a prefix now - if not it's an error */
182 res = cql_lookup_property(ct, category, prefix, eval);
183 /* we have some aliases for some relations unfortunately.. */
184 if (!res && !prefix && !strcmp(category, "relation"))
186 if (!strcmp(val, "=="))
187 res = cql_lookup_property(ct, category, prefix, "exact");
188 if (!strcmp(val, "="))
189 res = cql_lookup_property(ct, category, prefix, "eq");
190 if (!strcmp(val, "<="))
191 res = cql_lookup_property(ct, category, prefix, "le");
192 if (!strcmp(val, ">="))
193 res = cql_lookup_property(ct, category, prefix, "ge");
196 res = cql_lookup_property(ct, category, prefix, "*");
202 const char *cp0 = res, *cp1;
203 while ((cp1 = strchr(cp0, '=')))
206 while (*cp1 && *cp1 != ' ')
208 if (cp1 - cp0 >= sizeof(buf))
210 memcpy(buf, cp0, cp1 - cp0);
212 (*pr)("@attr ", client_data);
214 for (i = 0; buf[i]; i++)
217 (*pr)(eval, client_data);
223 (*pr)(tmp, client_data);
226 (*pr)(" ", client_data);
234 if (errcode && !ct->error)
238 ct->addinfo = xstrdup(val);
245 int cql_pr_attr(cql_transform_t ct, const char *category,
246 const char *val, const char *default_val,
247 void (*pr)(const char *buf, void *client_data),
251 return cql_pr_attr_uri(ct, category, 0 /* uri */,
252 val, default_val, pr, client_data, errcode);
256 static void cql_pr_int(int val,
257 void (*pr)(const char *buf, void *client_data),
260 char buf[21]; /* enough characters to 2^64 */
261 sprintf(buf, "%d", val);
262 (*pr)(buf, client_data);
263 (*pr)(" ", client_data);
267 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
268 void (*pr)(const char *buf, void *client_data),
272 int distance; /* to be filled in later depending on unit */
273 int distance_defined = 0;
275 int proxrel = 2; /* less than or equal */
276 int unit = 2; /* word */
280 const char *name = mods->u.st.index;
281 const char *term = mods->u.st.term;
282 const char *relation = mods->u.st.relation;
284 if (!strcmp(name, "distance")) {
285 distance = strtol(term, (char**) 0, 0);
286 distance_defined = 1;
287 if (!strcmp(relation, "="))
289 else if (!strcmp(relation, ">"))
291 else if (!strcmp(relation, "<"))
293 else if (!strcmp(relation, ">="))
295 else if (!strcmp(relation, "<="))
297 else if (!strcmp(relation, "<>"))
301 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
302 ct->addinfo = xstrdup(relation);
306 else if (!strcmp(name, "ordered"))
308 else if (!strcmp(name, "unordered"))
310 else if (!strcmp(name, "unit"))
312 if (!strcmp(term, "word"))
314 else if (!strcmp(term, "sentence"))
316 else if (!strcmp(term, "paragraph"))
318 else if (!strcmp(term, "element"))
322 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
323 ct->addinfo = xstrdup(term);
329 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
330 ct->addinfo = xstrdup(name);
333 mods = mods->u.st.modifiers;
336 if (!distance_defined)
337 distance = (unit == 2) ? 1 : 0;
339 cql_pr_int(exclusion, pr, client_data);
340 cql_pr_int(distance, pr, client_data);
341 cql_pr_int(ordered, pr, client_data);
342 cql_pr_int(proxrel, pr, client_data);
343 (*pr)("k ", client_data);
344 cql_pr_int(unit, pr, client_data);
349 /* Returns location of first wildcard character in the `length'
350 * characters starting at `term', or a null pointer of there are
351 * none -- like memchr().
353 static const char *wcchar(int start, const char *term, int length)
357 if (start || term[-1] != '\\')
358 if (strchr("*?", *term))
368 /* ### checks for CQL relation-name rather than Type-1 attribute */
369 static int has_modifier(struct cql_node *cn, const char *name) {
370 struct cql_node *mod;
371 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
372 if (!strcmp(mod->u.st.index, name))
380 void emit_term(cql_transform_t ct,
382 const char *term, int length,
383 void (*pr)(const char *buf, void *client_data),
387 const char *ns = cn->u.st.index_uri;
388 int process_term = !has_modifier(cn, "regexp");
391 assert(cn->which == CQL_NODE_ST);
393 if (process_term && length > 0)
395 if (length > 1 && term[0] == '^' && term[length-1] == '^')
397 cql_pr_attr(ct, "position", "firstAndLast", 0,
398 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
402 else if (term[0] == '^')
404 cql_pr_attr(ct, "position", "first", 0,
405 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
409 else if (term[length-1] == '^')
411 cql_pr_attr(ct, "position", "last", 0,
412 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
417 cql_pr_attr(ct, "position", "any", 0,
418 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
422 if (process_term && length > 0)
424 const char *first_wc = wcchar(1, term, length);
425 const char *second_wc = first_wc ?
426 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
428 /* Check for well-known globbing patterns that represent
429 * simple truncation attributes as expected by, for example,
430 * Bath-compliant server. If we find such a pattern but
431 * there's no mapping for it, that's fine: we just use a
432 * general pattern-matching attribute.
434 if (first_wc == term && second_wc == term + length-1
435 && *first_wc == '*' && *second_wc == '*'
436 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
441 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
442 && cql_pr_attr(ct, "truncation", "left", 0,
448 else if (first_wc == term + length-1 && second_wc == 0
450 && cql_pr_attr(ct, "truncation", "right", 0,
457 /* We have one or more wildcard characters, but not in a
458 * way that can be dealt with using only the standard
459 * left-, right- and both-truncation attributes. We need
460 * to translate the pattern into a Z39.58-type pattern,
461 * which has been supported in BIB-1 since 1996. If
462 * there's no configuration element for "truncation.z3958"
463 * we indicate this as error 28 "Masking character not
467 cql_pr_attr(ct, "truncation", "z3958", 0,
468 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
469 z3958_mem = (char *) xmalloc(length+1);
470 for (i = 0; i < length; i++)
472 if (i > 0 && term[i-1] == '\\')
473 z3958_mem[i] = term[i];
474 else if (term[i] == '*')
476 else if (term[i] == '?')
479 z3958_mem[i] = term[i];
481 z3958_mem[length] = '\0';
485 /* No masking characters. Use "truncation.none" if given. */
486 cql_pr_attr(ct, "truncation", "none", 0,
491 cql_pr_attr_uri(ct, "index", ns,
492 cn->u.st.index, "serverChoice",
493 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
495 if (cn->u.st.modifiers)
497 struct cql_node *mod = cn->u.st.modifiers;
498 for (; mod; mod = mod->u.st.modifiers)
500 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
501 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
505 (*pr)("\"", client_data);
506 for (i = 0; i<length; i++)
508 /* pr(int) each character */
509 /* we do not need to deal with \-sequences because the
510 CQL and PQF terms have same \-format, bug #1988 */
515 (*pr)(buf, client_data);
517 (*pr)("\" ", client_data);
521 void emit_terms(cql_transform_t ct,
523 void (*pr)(const char *buf, void *client_data),
527 struct cql_node *ne = cn->u.st.extra_terms;
530 (*pr)("@", client_data);
531 (*pr)(op, client_data);
532 (*pr)(" ", client_data);
534 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
536 for (; ne; ne = ne->u.st.extra_terms)
538 if (ne->u.st.extra_terms)
540 (*pr)("@", client_data);
541 (*pr)(op, client_data);
542 (*pr)(" ", client_data);
544 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
549 void emit_wordlist(cql_transform_t ct,
551 void (*pr)(const char *buf, void *client_data),
555 const char *cp0 = cn->u.st.term;
557 const char *last_term = 0;
563 cp1 = strchr(cp0, ' ');
566 (*pr)("@", client_data);
567 (*pr)(op, client_data);
568 (*pr)(" ", client_data);
569 emit_term(ct, cn, last_term, last_length, pr, client_data);
573 last_length = cp1 - cp0;
575 last_length = strlen(cp0);
579 emit_term(ct, cn, last_term, last_length, pr, client_data);
582 void cql_transform_r(cql_transform_t ct,
584 void (*pr)(const char *buf, void *client_data),
588 struct cql_node *mods;
595 ns = cn->u.st.index_uri;
598 if (!strcmp(ns, cql_uri())
599 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
601 (*pr)("@set \"", client_data);
602 (*pr)(cn->u.st.term, client_data);
603 (*pr)("\" ", client_data);
611 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
615 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
616 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
617 YAZ_SRW_UNSUPP_RELATION);
618 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
619 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
620 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
621 emit_wordlist(ct, cn, pr, client_data, "and");
622 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
623 emit_wordlist(ct, cn, pr, client_data, "or");
625 emit_terms(ct, cn, pr, client_data, "and");
628 (*pr)("@", client_data);
629 (*pr)(cn->u.boolean.value, client_data);
630 (*pr)(" ", client_data);
631 mods = cn->u.boolean.modifiers;
632 if (!strcmp(cn->u.boolean.value, "prox"))
634 if (!cql_pr_prox(ct, mods, pr, client_data))
639 /* Boolean modifiers other than on proximity not supported */
640 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
641 ct->addinfo = xstrdup(mods->u.st.index);
645 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
646 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
650 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
655 int cql_transform(cql_transform_t ct, struct cql_node *cn,
656 void (*pr)(const char *buf, void *client_data),
659 struct cql_prop_entry *e;
660 NMEM nmem = nmem_create();
666 for (e = ct->entry; e ; e = e->next)
668 if (!cql_strncmp(e->pattern, "set.", 4))
669 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
670 else if (!cql_strcmp(e->pattern, "set"))
671 cql_apply_prefix(nmem, cn, 0, e->value);
673 cql_transform_r(ct, cn, pr, client_data);
679 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
681 return cql_transform(ct, cn, cql_fputs, f);
684 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
686 struct cql_buf_write_info info;
692 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
694 /* Attempt to write past end of buffer. For some reason, this
695 SRW diagnostic is deprecated, but it's so perfect for our
696 purposes that it would be stupid not to use it. */
698 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
699 sprintf(numbuf, "%ld", (long) info.max);
700 ct->addinfo = xstrdup(numbuf);
704 info.buf[info.off] = '\0';
708 int cql_transform_error(cql_transform_t ct, const char **addinfo)
710 *addinfo = ct->addinfo;
716 * indent-tabs-mode: nil
718 * vim: shiftwidth=4 tabstop=8 expandtab