1 /* $Id: cqltransform.c,v 1.29 2007-10-31 21:58:07 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
189 res = cql_lookup_property(ct, category, prefix, "*");
195 const char *cp0 = res, *cp1;
196 while ((cp1 = strchr(cp0, '=')))
199 while (*cp1 && *cp1 != ' ')
201 if (cp1 - cp0 >= sizeof(buf))
203 memcpy (buf, cp0, cp1 - cp0);
205 (*pr)("@attr ", client_data);
207 for (i = 0; buf[i]; i++)
210 (*pr)(eval, client_data);
216 (*pr)(tmp, client_data);
219 (*pr)(" ", client_data);
227 if (errcode && !ct->error)
231 ct->addinfo = xstrdup(val);
238 int cql_pr_attr(cql_transform_t ct, const char *category,
239 const char *val, const char *default_val,
240 void (*pr)(const char *buf, void *client_data),
244 return cql_pr_attr_uri(ct, category, 0 /* uri */,
245 val, default_val, pr, client_data, errcode);
249 static void cql_pr_int (int val,
250 void (*pr)(const char *buf, void *client_data),
253 char buf[21]; /* enough characters to 2^64 */
254 sprintf(buf, "%d", val);
255 (*pr)(buf, client_data);
256 (*pr)(" ", client_data);
260 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
261 void (*pr)(const char *buf, void *client_data),
265 int distance; /* to be filled in later depending on unit */
266 int distance_defined = 0;
268 int proxrel = 2; /* less than or equal */
269 int unit = 2; /* word */
272 char *name = mods->u.st.index;
273 char *term = mods->u.st.term;
274 char *relation = mods->u.st.relation;
276 if (!strcmp(name, "distance")) {
277 distance = strtol(term, (char**) 0, 0);
278 distance_defined = 1;
279 if (!strcmp(relation, "=")) {
281 } else if (!strcmp(relation, ">")) {
283 } else if (!strcmp(relation, "<")) {
285 } else if (!strcmp(relation, ">=")) {
287 } else if (!strcmp(relation, "<=")) {
289 } else if (!strcmp(relation, "<>")) {
292 ct->error = 40; /* Unsupported proximity relation */
293 ct->addinfo = xstrdup(relation);
296 } else if (!strcmp(name, "ordered")) {
298 } else if (!strcmp(name, "unordered")) {
300 } else if (!strcmp(name, "unit")) {
301 if (!strcmp(term, "word")) {
303 } else if (!strcmp(term, "sentence")) {
305 } else if (!strcmp(term, "paragraph")) {
307 } else if (!strcmp(term, "element")) {
310 ct->error = 42; /* Unsupported proximity unit */
311 ct->addinfo = xstrdup(term);
315 ct->error = 46; /* Unsupported boolean modifier */
316 ct->addinfo = xstrdup(name);
320 mods = mods->u.st.modifiers;
323 if (!distance_defined)
324 distance = (unit == 2) ? 1 : 0;
326 cql_pr_int(exclusion, pr, client_data);
327 cql_pr_int(distance, pr, client_data);
328 cql_pr_int(ordered, pr, client_data);
329 cql_pr_int(proxrel, pr, client_data);
330 (*pr)("k ", client_data);
331 cql_pr_int(unit, pr, client_data);
336 /* Returns location of first wildcard character in the `length'
337 * characters starting at `term', or a null pointer of there are
338 * none -- like memchr().
340 static const char *wcchar(int start, const char *term, int length)
344 if (start || term[-1] != '\\')
345 if (strchr("*?", *term))
355 /* ### checks for CQL relation-name rather than Type-1 attribute */
356 static int has_modifier(struct cql_node *cn, const char *name) {
357 struct cql_node *mod;
358 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
359 if (!strcmp(mod->u.st.index, name))
367 void emit_term(cql_transform_t ct,
369 const char *term, int length,
370 void (*pr)(const char *buf, void *client_data),
374 const char *ns = cn->u.st.index_uri;
375 int process_term = !has_modifier(cn, "regexp");
378 assert(cn->which == CQL_NODE_ST);
380 if (process_term && length > 0)
382 if (length > 1 && term[0] == '^' && term[length-1] == '^')
384 cql_pr_attr(ct, "position", "firstAndLast", 0,
385 pr, client_data, 32);
389 else if (term[0] == '^')
391 cql_pr_attr(ct, "position", "first", 0,
392 pr, client_data, 32);
396 else if (term[length-1] == '^')
398 cql_pr_attr(ct, "position", "last", 0,
399 pr, client_data, 32);
404 cql_pr_attr(ct, "position", "any", 0,
405 pr, client_data, 32);
409 if (process_term && length > 0)
411 const char *first_wc = wcchar(1, term, length);
412 const char *second_wc = first_wc ?
413 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
415 /* Check for well-known globbing patterns that represent
416 * simple truncation attributes as expected by, for example,
417 * Bath-compliant server. If we find such a pattern but
418 * there's no mapping for it, that's fine: we just use a
419 * general pattern-matching attribute.
421 if (first_wc == term && second_wc == term + length-1
422 && *first_wc == '*' && *second_wc == '*'
423 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
428 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
429 && cql_pr_attr(ct, "truncation", "left", 0,
435 else if (first_wc == term + length-1 && second_wc == 0
437 && cql_pr_attr(ct, "truncation", "right", 0,
444 /* We have one or more wildcard characters, but not in a
445 * way that can be dealt with using only the standard
446 * left-, right- and both-truncation attributes. We need
447 * to translate the pattern into a Z39.58-type pattern,
448 * which has been supported in BIB-1 since 1996. If
449 * there's no configuration element for "truncation.z3958"
450 * we indicate this as error 28 "Masking character not
454 cql_pr_attr(ct, "truncation", "z3958", 0,
455 pr, client_data, 28);
456 z3958_mem = (char *) xmalloc(length+1);
457 for (i = 0; i < length; i++)
459 if (i > 0 && term[i-1] == '\\')
460 z3958_mem[i] = term[i];
461 else if (term[i] == '*')
463 else if (term[i] == '?')
466 z3958_mem[i] = term[i];
468 z3958_mem[length] = '\0';
472 /* No masking characters. Use "truncation.none" if given. */
473 cql_pr_attr(ct, "truncation", "none", 0,
478 cql_pr_attr_uri(ct, "index", ns,
479 cn->u.st.index, "serverChoice",
480 pr, client_data, 16);
482 if (cn->u.st.modifiers)
484 struct cql_node *mod = cn->u.st.modifiers;
485 for (; mod; mod = mod->u.st.modifiers)
487 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
488 pr, client_data, 20);
492 (*pr)("\"", client_data);
493 for (i = 0; i<length; i++)
495 /* pr(int) each character */
501 /* do we have to escape this char? */
509 (*pr)(cp, client_data);
511 (*pr)("\" ", client_data);
515 void emit_wordlist(cql_transform_t ct,
517 void (*pr)(const char *buf, void *client_data),
521 const char *cp0 = cn->u.st.term;
523 const char *last_term = 0;
529 cp1 = strchr(cp0, ' ');
532 (*pr)("@", client_data);
533 (*pr)(op, client_data);
534 (*pr)(" ", client_data);
535 emit_term(ct, cn, last_term, last_length, pr, client_data);
539 last_length = cp1 - cp0;
541 last_length = strlen(cp0);
545 emit_term(ct, cn, last_term, last_length, pr, client_data);
548 void cql_transform_r(cql_transform_t ct,
550 void (*pr)(const char *buf, void *client_data),
554 struct cql_node *mods;
561 ns = cn->u.st.index_uri;
564 if (!strcmp(ns, cql_uri())
565 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
567 (*pr)("@set \"", client_data);
568 (*pr)(cn->u.st.term, client_data);
569 (*pr)("\" ", client_data);
581 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
582 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
583 cql_pr_attr(ct, "relation", "eq", "scr",
584 pr, client_data, 19);
585 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
586 cql_pr_attr(ct, "relation", "le", "scr",
587 pr, client_data, 19);
588 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
589 cql_pr_attr(ct, "relation", "ge", "scr",
590 pr, client_data, 19);
592 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
593 pr, client_data, 19);
594 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
595 pr, client_data, 24);
596 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
598 emit_wordlist(ct, cn, pr, client_data, "and");
600 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
602 emit_wordlist(ct, cn, pr, client_data, "or");
606 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
611 (*pr)("@", client_data);
612 (*pr)(cn->u.boolean.value, client_data);
613 (*pr)(" ", client_data);
614 mods = cn->u.boolean.modifiers;
615 if (!strcmp(cn->u.boolean.value, "prox")) {
616 if (!cql_pr_prox(ct, mods, pr, client_data))
619 /* Boolean modifiers other than on proximity not supported */
620 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
621 ct->addinfo = xstrdup(mods->u.st.index);
625 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
626 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
630 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
635 int cql_transform(cql_transform_t ct,
637 void (*pr)(const char *buf, void *client_data),
640 struct cql_prop_entry *e;
641 NMEM nmem = nmem_create();
648 for (e = ct->entry; e ; e = e->next)
650 if (!cql_strncmp(e->pattern, "set.", 4))
651 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
652 else if (!cql_strcmp(e->pattern, "set"))
653 cql_apply_prefix(nmem, cn, 0, e->value);
655 cql_transform_r (ct, cn, pr, client_data);
661 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
663 return cql_transform(ct, cn, cql_fputs, f);
666 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
669 struct cql_buf_write_info info;
675 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
677 /* Attempt to write past end of buffer. For some reason, this
678 SRW diagnostic is deprecated, but it's so perfect for our
679 purposes that it would be stupid not to use it. */
681 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
682 sprintf(numbuf, "%ld", (long) info.max);
683 ct->addinfo = xstrdup(numbuf);
687 info.buf[info.off] = '\0';
691 int cql_transform_error(cql_transform_t ct, const char **addinfo)
693 *addinfo = ct->addinfo;
699 * indent-tabs-mode: nil
701 * vim: shiftwidth=4 tabstop=8 expandtab