2 package org.z3950.zing.cql;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.util.Properties;
7 import java.io.InputStream;
8 import java.io.FileInputStream;
9 import java.io.InputStreamReader;
10 import java.util.ArrayList;
11 import java.util.HashSet;
12 import java.util.List;
17 * Compiles CQL strings into parse trees of CQLNode subtypes.
19 * @see <A href="http://zing.z3950.org/cql/index.html"
20 * >http://zing.z3950.org/cql/index.html</A>
22 public class CQLParser {
23 private CQLTokenizer lexer;
24 private final int compat; // When false, implement CQL 1.2
25 private final Set<String> customRelations = new HashSet<String>();
27 public static final int V1POINT1 = 12368;
28 public static final int V1POINT2 = 12369;
29 public static final int V1POINT1SORT = 12370;
30 public final boolean allowKeywordTerms;
32 static private boolean DEBUG = false;
33 static private boolean LEXDEBUG = false;
36 * The new parser implements a dialect of CQL specified by the
37 * <tt>compat</tt> argument:
39 * <li>V1POINT1 - CQL version 1.1
41 * <li>V1POINT2 - CQL version 1.2
43 * <li>V1POINT1SORT - CQL version 1.1 but including
44 * <tt>sortby</tt> as specified for CQL 1.2.
48 public CQLParser(int compat) {
50 this.allowKeywordTerms = true;
54 * Official CQL grammar allows registered keywords like 'and/or/not/sortby/prox'
55 * to be used unquoted in terms. This constructor allows to create an instance
56 * of a parser that prohibits this behavior while sacrificing compatibility.
57 * @param compat CQL version compatibility
58 * @param allowKeywordTerms when false registered keywords are disallowed in unquoted terms
60 public CQLParser(int compat, boolean allowKeywordTerms) {
62 this.allowKeywordTerms = allowKeywordTerms;
66 * The new parser implements CQL 1.2
69 this.compat = V1POINT2;
70 this.allowKeywordTerms = true;
73 private static void debug(String str) {
75 System.err.println("PARSEDEBUG: " + str);
79 * Registers custom relation in this parser. Note that when a custom relation
80 * is registered the parser is no longer strictly compliant with the chosen spec.
82 * @return true if custom relation has not been registered already
84 public boolean registerCustomRelation(String relation) {
85 return customRelations.add(relation);
89 * Unregisters previously registered custom relation in this instance of the parser.
91 * @return true is relation has been previously registered
93 public boolean unregisterCustomRelation(String relation) {
94 return customRelations.remove(relation);
98 * Compiles a CQL query.
100 * The resulting parse tree may be further processed by hand (see
101 * the individual node-types' documentation for details on the
102 * data structure) or, more often, simply rendered out in the
103 * desired form using one of the back-ends. <TT>toCQL()</TT>
104 * returns a decompiled CQL query equivalent to the one that was
105 * compiled in the first place; <TT>toXCQL()</TT> returns an
106 * XML snippet representing the query; and <TT>toPQF()</TT>
107 * returns the query rendered in Index Data's Prefix Query
110 * @param cql The query
111 * @return A CQLNode object which is the root of a parse
112 * tree representing the query. */
113 public CQLNode parse(String cql)
114 throws CQLParseException, IOException {
115 lexer = new CQLLexer(cql, LEXDEBUG);
118 debug("about to parseQuery()");
119 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
120 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
121 if (lexer.what() != CQLTokenizer.TT_EOF)
122 throw new CQLParseException("junk after end: " + lexer.render(),
128 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
129 throws CQLParseException, IOException {
130 debug("top-level prefix mapping");
132 if (lexer.what() == '>') {
133 return parsePrefix(index, relation, true);
136 CQLNode node = parseQuery(index, relation);
137 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
138 lexer.what() == CQLTokenizer.TT_SORTBY) {
142 CQLSortNode sortnode = new CQLSortNode(node);
143 while (lexer.what() != CQLTokenizer.TT_EOF) {
144 String sortindex = matchSymbol("sort index");
145 ModifierSet ms = gatherModifiers(sortindex);
146 sortnode.addSortIndex(ms);
149 if (sortnode.keys.size() == 0) {
150 throw new CQLParseException("no sort keys", lexer.pos());
159 private CQLNode parseQuery(String index, CQLRelation relation)
160 throws CQLParseException, IOException {
161 debug("in parseQuery()");
163 CQLNode term = parseTerm(index, relation);
164 while (lexer.what() != CQLTokenizer.TT_EOF &&
165 lexer.what() != ')' &&
166 lexer.what() != CQLTokenizer.TT_SORTBY) {
167 if (lexer.what() == CQLTokenizer.TT_AND ||
168 lexer.what() == CQLTokenizer.TT_OR ||
169 lexer.what() == CQLTokenizer.TT_NOT ||
170 lexer.what() == CQLTokenizer.TT_PROX) {
171 int type = lexer.what();
172 String val = lexer.value();
174 ModifierSet ms = gatherModifiers(val);
175 CQLNode term2 = parseTerm(index, relation);
176 term = ((type == CQLTokenizer.TT_AND) ? new CQLAndNode(term, term2, ms) :
177 (type == CQLTokenizer.TT_OR) ? new CQLOrNode (term, term2, ms) :
178 (type == CQLTokenizer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
179 new CQLProxNode(term, term2, ms));
181 throw new CQLParseException("expected boolean, got " +
182 lexer.render(), lexer.pos());
186 debug("no more ops");
190 private ModifierSet gatherModifiers(String base)
191 throws CQLParseException, IOException {
192 debug("in gatherModifiers()");
194 ModifierSet ms = new ModifierSet(base);
195 while (lexer.what() == '/') {
197 if (lexer.what() != CQLTokenizer.TT_WORD)
198 throw new CQLParseException("expected modifier, "
199 + "got " + lexer.render(),
201 String type = lexer.value().toLowerCase();
203 if (!isSymbolicRelation()) {
204 // It's a simple modifier consisting of type only
205 ms.addModifier(type);
207 // It's a complex modifier of the form type=value
208 String comparision = lexer.render(lexer.what(), false);
210 String value = matchSymbol("modifier value");
211 ms.addModifier(type, comparision, value);
218 private CQLNode parseTerm(String index, CQLRelation relation)
219 throws CQLParseException, IOException {
220 debug("in parseTerm()");
225 if (lexer.what() == '(') {
226 debug("parenthesised term");
228 CQLNode expr = parseQuery(index, relation);
231 } else if (lexer.what() == '>') {
232 return parsePrefix(index, relation, false);
235 debug("non-parenthesised term");
236 first = matchSymbol("index or term");
237 all = new StringBuilder(first);
238 //match relation only on second postion
239 while (isWordOrString() && (all.length() > first.length() || !isRelation())) {
240 all.append(" ").append(lexer.value());
245 break; //we're done if no relation
247 //we have relation, but it only makes sense if preceded by a single term
248 if (all.length() > first.length()) {
249 throw new CQLParseException("unexpected relation '"+lexer.value()+"'"
253 String relstr = (lexer.what() == CQLTokenizer.TT_WORD ?
254 lexer.value() : lexer.render(lexer.what(), false));
255 relation = new CQLRelation(relstr);
257 ModifierSet ms = gatherModifiers(relstr);
259 debug("index='" + index + ", " +
260 "relation='" + relation.toCQL() + "'");
262 CQLTermNode node = new CQLTermNode(index, relation, all.toString());
263 debug("made term node " + node.toCQL());
267 private CQLNode parsePrefix(String index, CQLRelation relation,
269 throws CQLParseException, IOException {
270 debug("prefix mapping");
274 String identifier = matchSymbol("prefix-name");
275 if (lexer.what() == '=') {
278 identifier = matchSymbol("prefix-identifer");
280 CQLNode node = topLevel ?
281 parseTopLevelPrefixes(index, relation) :
282 parseQuery(index, relation);
284 return new CQLPrefixNode(name, identifier, node);
287 private boolean isWordOrString() {
288 return CQLTokenizer.TT_WORD == lexer.what()
289 || CQLTokenizer.TT_STRING == lexer.what();
292 private boolean isRelation() {
293 debug("isRelation: checking what()=" + lexer.what() +
294 " (" + lexer.render() + ")");
295 if (lexer.what() == CQLTokenizer.TT_WORD &&
296 (lexer.value().indexOf('.') >= 0 ||
297 lexer.value().equals("any") ||
298 lexer.value().equals("all") ||
299 lexer.value().equals("within") ||
300 lexer.value().equals("encloses") ||
301 (lexer.value().equals("exact") && compat != V1POINT2) ||
302 (lexer.value().equals("scr") && compat != V1POINT2) ||
303 (lexer.value().equals("adj") && compat == V1POINT2) ||
304 customRelations.contains(lexer.value())))
307 return isSymbolicRelation();
310 private boolean isSymbolicRelation() {
311 debug("isSymbolicRelation: checking what()=" + lexer.what() +
312 " (" + lexer.render() + ")");
313 return (lexer.what() == '<' ||
314 lexer.what() == '>' ||
315 lexer.what() == '=' ||
316 lexer.what() == CQLTokenizer.TT_LE ||
317 lexer.what() == CQLTokenizer.TT_GE ||
318 lexer.what() == CQLTokenizer.TT_NE ||
319 lexer.what() == CQLTokenizer.TT_EQEQ);
322 private void match(int token)
323 throws CQLParseException, IOException {
324 debug("in match(" + lexer.render(token, true) + ")");
325 if (lexer.what() != token)
326 throw new CQLParseException("expected " +
327 lexer.render(token, true) +
328 ", " + "got " + lexer.render(),
331 debug("match() got token=" + lexer.what() + ", value()='" + lexer.value() + "'");
334 private String matchSymbol(String expected)
335 throws CQLParseException, IOException {
337 debug("in matchSymbol()");
338 if (lexer.what() == CQLTokenizer.TT_WORD ||
339 lexer.what() == CQLTokenizer.TT_STRING ||
340 // The following is a complete list of keywords. Because
341 // they're listed here, they can be used unquoted as
342 // indexes, terms, prefix names and prefix identifiers.
343 (allowKeywordTerms &&
344 lexer.what() == CQLTokenizer.TT_AND ||
345 lexer.what() == CQLTokenizer.TT_OR ||
346 lexer.what() == CQLTokenizer.TT_NOT ||
347 lexer.what() == CQLTokenizer.TT_PROX ||
348 lexer.what() == CQLTokenizer.TT_SORTBY)) {
349 String symbol = lexer.value();
354 throw new CQLParseException("expected " + expected + ", " +
355 "got " + lexer.render(), lexer.pos());
360 * Simple test-harness for the CQLParser class.
362 * Reads a CQL query either from its command-line argument, if
363 * there is one, or standard input otherwise. So these two
364 * invocations are equivalent:
366 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
367 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
369 * The test-harness parses the supplied query and renders is as
370 * XCQL, so that both of the invocations above produce the
375 * <value>and</value>
379 * <value>or</value>
381 * <searchClause>
382 * <index>au</index>
384 * <value>=</value>
386 * <term>Kerninghan</term>
387 * </searchClause>
388 * <searchClause>
389 * <index>au</index>
391 * <value>=</value>
393 * <term>Ritchie</term>
394 * </searchClause>
396 * <searchClause>
397 * <index>ti</index>
399 * <value>=</value>
401 * <term>Unix</term>
402 * </searchClause>
407 * CQL version 1.1 (default version 1.2)
409 * Debug mode: extra output written to stderr.
411 * Causes the output to be written in CQL rather than XCQL - that
412 * is, a query equivalent to that which was input, is output. In
413 * effect, the test harness acts as a query canonicaliser.
415 * The input query, either as XCQL [default] or CQL [if the
416 * <TT>-c</TT> option is supplied].
418 public static void main (String[] args) {
419 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
422 List<String> argv = new ArrayList<String>();
423 for (int i = 0; i < args.length; i++) {
427 int compat = V1POINT2;
428 if (argv.size() > 0 && argv.get(0).equals("-1")) {
433 if (argv.size() > 0 && argv.get(0).equals("-d")) {
438 if (argv.size() > 0 && argv.get(0).equals("-c")) {
441 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
444 pfile = (String) argv.get(0);
448 if (argv.size() > 1) {
449 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
450 "[-p <pqf-properties> [<CQL-query>]");
451 System.err.println("If unspecified, query is read from stdin");
456 if (argv.size() == 1) {
457 cql = (String) argv.get(0);
459 BufferedReader buff = new BufferedReader(new InputStreamReader(System.in));
461 // read a single line of input
462 cql = buff.readLine();
464 System.err.println("Can't read query from stdin");
468 } catch (IOException ex) {
469 System.err.println("Can't read query: " + ex.getMessage());
475 CQLParser parser = new CQLParser(compat);
478 root = parser.parse(cql);
479 } catch (CQLParseException ex) {
480 System.err.println("Syntax error: " + ex.getMessage());
481 StringBuilder space = new StringBuilder(cql.length());
482 System.out.println(cql);
483 for (int i=0; i<ex.getPosition(); i++) space.append(" ");
485 System.err.println(space.toString());
488 } catch (IOException ex) {
489 System.err.println("Can't compile query: " + ex.getMessage());
496 System.out.println(root.toCQL());
497 } else if (mode == 'p') {
499 InputStream f = new FileInputStream(pfile);
500 Properties config = new Properties();
503 System.out.println(root.toPQF(config));
504 } catch (IOException ex) {
505 System.err.println("Can't load PQF properties:" +
510 System.out.print(root.toXCQL());
512 } catch (UnknownIndexException ex) {
513 System.err.println("Unknown index: " + ex.getMessage());
515 } catch (UnknownRelationException ex) {
516 System.err.println("Unknown relation: " + ex.getMessage());
518 } catch (UnknownRelationModifierException ex) {
519 System.err.println("Unknown relation modifier: " +
522 } catch (UnknownPositionException ex) {
523 System.err.println("Unknown position: " + ex.getMessage());
525 } catch (PQFTranslationException ex) {
526 System.err.println("Cannot translate to PQF: " + ex.getMessage());