2 package org.z3950.zing.cql;
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.Properties;
6 import java.io.InputStream;
7 import java.io.FileInputStream;
8 import java.io.FileNotFoundException;
9 import java.io.InputStreamReader;
10 import java.io.Reader;
11 import java.io.StringReader;
12 import java.util.ArrayList;
13 import java.util.HashSet;
14 import java.util.List;
19 * Compiles CQL strings into parse trees of CQLNode subtypes.
21 * @see <A href="http://zing.z3950.org/cql/index.html"
22 * >http://zing.z3950.org/cql/index.html</A>
24 public class CQLParser {
25 private CQLTokenizer lexer;
26 private final int compat; // When false, implement CQL 1.2
27 private final Set<String> customRelations = new HashSet<String>();
29 public static final int V1POINT1 = 12368;
30 public static final int V1POINT2 = 12369;
31 public static final int V1POINT1SORT = 12370;
32 public final boolean allowKeywordTerms;
34 static private boolean DEBUG = false;
35 static private boolean LEXDEBUG = false;
38 * The new parser implements a dialect of CQL specified by the
39 * <tt>compat</tt> argument:
41 * <li>V1POINT1 - CQL version 1.1
43 * <li>V1POINT2 - CQL version 1.2
45 * <li>V1POINT1SORT - CQL version 1.1 but including
46 * <tt>sortby</tt> as specified for CQL 1.2.
50 public CQLParser(int compat) {
52 this.allowKeywordTerms = true;
56 * Official CQL grammar allows registered keywords like 'and/or/not/sortby/prox'
57 * to be used unquoted in terms. This constructor allows to create an instance
58 * of a parser that prohibits this behavior while sacrificing compatibility.
59 * @param compat CQL version compatibility
60 * @param allowKeywordTerms when false registered keywords are disallowed in unquoted terms
62 public CQLParser(int compat, boolean allowKeywordTerms) {
64 this.allowKeywordTerms = allowKeywordTerms;
68 * The new parser implements CQL 1.2
71 this.compat = V1POINT2;
72 this.allowKeywordTerms = true;
75 private static void debug(String str) {
77 System.err.println("PARSEDEBUG: " + str);
81 * Registers custom relation in this parser. Note that when a custom relation
82 * is registered the parser is no longer strictly compliant with the chosen spec.
84 * @return true if custom relation has not been registered already
86 public boolean registerCustomRelation(String relation) {
87 return customRelations.add(relation);
91 * Unregisters previously registered custom relation in this instance of the parser.
93 * @return true is relation has been previously registered
95 public boolean unregisterCustomRelation(String relation) {
96 return customRelations.remove(relation);
100 * Compiles a CQL query.
102 * The resulting parse tree may be further processed by hand (see
103 * the individual node-types' documentation for details on the
104 * data structure) or, more often, simply rendered out in the
105 * desired form using one of the back-ends. <TT>toCQL()</TT>
106 * returns a decompiled CQL query equivalent to the one that was
107 * compiled in the first place; <TT>toXCQL()</TT> returns an
108 * XML snippet representing the query; and <TT>toPQF()</TT>
109 * returns the query rendered in Index Data's Prefix Query
112 * @param cql The query
113 * @return A CQLNode object which is the root of a parse
114 * tree representing the query. */
115 public CQLNode parse(String cql)
116 throws CQLParseException, IOException {
117 lexer = new CQLLexer(cql, LEXDEBUG);
120 debug("about to parseQuery()");
121 CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
122 new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
123 if (lexer.what() != CQLTokenizer.TT_EOF)
124 throw new CQLParseException("junk after end: " + lexer.render(),
130 private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation)
131 throws CQLParseException, IOException {
132 debug("top-level prefix mapping");
134 if (lexer.what() == '>') {
135 return parsePrefix(index, relation, true);
138 CQLNode node = parseQuery(index, relation);
139 if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
140 lexer.what() == CQLTokenizer.TT_SORTBY) {
144 CQLSortNode sortnode = new CQLSortNode(node);
145 while (lexer.what() != CQLTokenizer.TT_EOF) {
146 String sortindex = matchSymbol("sort index");
147 ModifierSet ms = gatherModifiers(sortindex);
148 sortnode.addSortIndex(ms);
151 if (sortnode.keys.size() == 0) {
152 throw new CQLParseException("no sort keys", lexer.pos());
161 private CQLNode parseQuery(String index, CQLRelation relation)
162 throws CQLParseException, IOException {
163 debug("in parseQuery()");
165 CQLNode term = parseTerm(index, relation);
166 while (lexer.what() != CQLTokenizer.TT_EOF &&
167 lexer.what() != ')' &&
168 lexer.what() != CQLTokenizer.TT_SORTBY) {
169 if (lexer.what() == CQLTokenizer.TT_AND ||
170 lexer.what() == CQLTokenizer.TT_OR ||
171 lexer.what() == CQLTokenizer.TT_NOT ||
172 lexer.what() == CQLTokenizer.TT_PROX) {
173 int type = lexer.what();
174 String val = lexer.value();
176 ModifierSet ms = gatherModifiers(val);
177 CQLNode term2 = parseTerm(index, relation);
178 term = ((type == CQLTokenizer.TT_AND) ? new CQLAndNode(term, term2, ms) :
179 (type == CQLTokenizer.TT_OR) ? new CQLOrNode (term, term2, ms) :
180 (type == CQLTokenizer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
181 new CQLProxNode(term, term2, ms));
183 throw new CQLParseException("expected boolean, got " +
184 lexer.render(), lexer.pos());
188 debug("no more ops");
192 private ModifierSet gatherModifiers(String base)
193 throws CQLParseException, IOException {
194 debug("in gatherModifiers()");
196 ModifierSet ms = new ModifierSet(base);
197 while (lexer.what() == '/') {
199 if (lexer.what() != CQLTokenizer.TT_WORD)
200 throw new CQLParseException("expected modifier, "
201 + "got " + lexer.render(),
203 String type = lexer.value().toLowerCase();
205 if (!isSymbolicRelation()) {
206 // It's a simple modifier consisting of type only
207 ms.addModifier(type);
209 // It's a complex modifier of the form type=value
210 String comparision = lexer.render(lexer.what(), false);
212 String value = matchSymbol("modifier value");
213 ms.addModifier(type, comparision, value);
220 private CQLNode parseTerm(String index, CQLRelation relation)
221 throws CQLParseException, IOException {
222 debug("in parseTerm()");
226 if (lexer.what() == '(') {
227 debug("parenthesised term");
229 CQLNode expr = parseQuery(index, relation);
232 } else if (lexer.what() == '>') {
233 return parsePrefix(index, relation, false);
236 debug("non-parenthesised term");
237 word = matchSymbol("index or term");
238 while (lexer.what() == CQLTokenizer.TT_WORD && !isRelation()) {
239 word = word + " " + lexer.value();
240 match(CQLTokenizer.TT_WORD);
247 String relstr = (lexer.what() == CQLTokenizer.TT_WORD ?
248 lexer.value() : lexer.render(lexer.what(), false));
249 relation = new CQLRelation(relstr);
251 ModifierSet ms = gatherModifiers(relstr);
253 debug("index='" + index + ", " +
254 "relation='" + relation.toCQL() + "'");
257 CQLTermNode node = new CQLTermNode(index, relation, word);
258 debug("made term node " + node.toCQL());
262 private CQLNode parsePrefix(String index, CQLRelation relation,
264 throws CQLParseException, IOException {
265 debug("prefix mapping");
269 String identifier = matchSymbol("prefix-name");
270 if (lexer.what() == '=') {
273 identifier = matchSymbol("prefix-identifer");
275 CQLNode node = topLevel ?
276 parseTopLevelPrefixes(index, relation) :
277 parseQuery(index, relation);
279 return new CQLPrefixNode(name, identifier, node);
282 private boolean isRelation() {
283 debug("isRelation: checking what()=" + lexer.what() +
284 " (" + lexer.render() + ")");
285 if (lexer.what() == CQLTokenizer.TT_WORD &&
286 (lexer.value().indexOf('.') >= 0 ||
287 lexer.value().equals("any") ||
288 lexer.value().equals("all") ||
289 lexer.value().equals("within") ||
290 lexer.value().equals("encloses") ||
291 (lexer.value().equals("exact") && compat != V1POINT2) ||
292 (lexer.value().equals("scr") && compat != V1POINT2) ||
293 (lexer.value().equals("adj") && compat == V1POINT2) ||
294 customRelations.contains(lexer.value())))
297 return isSymbolicRelation();
300 private boolean isSymbolicRelation() {
301 debug("isSymbolicRelation: checking what()=" + lexer.what() +
302 " (" + lexer.render() + ")");
303 return (lexer.what() == '<' ||
304 lexer.what() == '>' ||
305 lexer.what() == '=' ||
306 lexer.what() == CQLTokenizer.TT_LE ||
307 lexer.what() == CQLTokenizer.TT_GE ||
308 lexer.what() == CQLTokenizer.TT_NE ||
309 lexer.what() == CQLTokenizer.TT_EQEQ);
312 private void match(int token)
313 throws CQLParseException, IOException {
314 debug("in match(" + lexer.render(token, true) + ")");
315 if (lexer.what() != token)
316 throw new CQLParseException("expected " +
317 lexer.render(token, true) +
318 ", " + "got " + lexer.render(),
321 debug("match() got token=" + lexer.what() + ", value()='" + lexer.value() + "'");
324 private String matchSymbol(String expected)
325 throws CQLParseException, IOException {
327 debug("in matchSymbol()");
328 if (lexer.what() == CQLTokenizer.TT_WORD ||
329 lexer.what() == '"' ||
330 // The following is a complete list of keywords. Because
331 // they're listed here, they can be used unquoted as
332 // indexes, terms, prefix names and prefix identifiers.
333 // ### Instead, we should ask the lexer whether what we
334 // have is a keyword, and let the knowledge reside there.
335 (allowKeywordTerms &&
336 lexer.what() == CQLTokenizer.TT_AND ||
337 lexer.what() == CQLTokenizer.TT_OR ||
338 lexer.what() == CQLTokenizer.TT_NOT ||
339 lexer.what() == CQLTokenizer.TT_PROX ||
340 lexer.what() == CQLTokenizer.TT_SORTBY)) {
341 String symbol = lexer.value();
346 throw new CQLParseException("expected " + expected + ", " +
347 "got " + lexer.render(), lexer.pos());
352 * Simple test-harness for the CQLParser class.
354 * Reads a CQL query either from its command-line argument, if
355 * there is one, or standard input otherwise. So these two
356 * invocations are equivalent:
358 * CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
359 * echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
361 * The test-harness parses the supplied query and renders is as
362 * XCQL, so that both of the invocations above produce the
367 * <value>and</value>
371 * <value>or</value>
373 * <searchClause>
374 * <index>au</index>
376 * <value>=</value>
378 * <term>Kerninghan</term>
379 * </searchClause>
380 * <searchClause>
381 * <index>au</index>
383 * <value>=</value>
385 * <term>Ritchie</term>
386 * </searchClause>
388 * <searchClause>
389 * <index>ti</index>
391 * <value>=</value>
393 * <term>Unix</term>
394 * </searchClause>
399 * CQL version 1.1 (default version 1.2)
401 * Debug mode: extra output written to stderr.
403 * Causes the output to be written in CQL rather than XCQL - that
404 * is, a query equivalent to that which was input, is output. In
405 * effect, the test harness acts as a query canonicaliser.
407 * The input query, either as XCQL [default] or CQL [if the
408 * <TT>-c</TT> option is supplied].
410 public static void main (String[] args) {
411 char mode = 'x'; // x=XCQL, c=CQL, p=PQF
414 List<String> argv = new ArrayList<String>();
415 for (int i = 0; i < args.length; i++) {
419 int compat = V1POINT2;
420 if (argv.size() > 0 && argv.get(0).equals("-1")) {
425 if (argv.size() > 0 && argv.get(0).equals("-d")) {
430 if (argv.size() > 0 && argv.get(0).equals("-c")) {
433 } else if (argv.size() > 1 && argv.get(0).equals("-p")) {
436 pfile = (String) argv.get(0);
440 if (argv.size() > 1) {
441 System.err.println("Usage: CQLParser [-1] [-d] [-c] " +
442 "[-p <pqf-properties> [<CQL-query>]");
443 System.err.println("If unspecified, query is read from stdin");
448 if (argv.size() == 1) {
449 cql = (String) argv.get(0);
451 BufferedReader buff = new BufferedReader(new InputStreamReader(System.in));
453 // read a single line of input
454 cql = buff.readLine();
456 System.err.println("Can't read query from stdin");
460 } catch (IOException ex) {
461 System.err.println("Can't read query: " + ex.getMessage());
467 CQLParser parser = new CQLParser(compat);
470 root = parser.parse(cql);
471 } catch (CQLParseException ex) {
472 System.err.println("Syntax error: " + ex.getMessage());
475 } catch (IOException ex) {
476 System.err.println("Can't compile query: " + ex.getMessage());
483 System.out.println(root.toCQL());
484 } else if (mode == 'p') {
486 InputStream f = new FileInputStream(pfile);
487 Properties config = new Properties();
490 System.out.println(root.toPQF(config));
491 } catch (IOException ex) {
492 System.err.println("Can't load PQF properties:" +
497 System.out.print(root.toXCQL());
499 } catch (UnknownIndexException ex) {
500 System.err.println("Unknown index: " + ex.getMessage());
502 } catch (UnknownRelationException ex) {
503 System.err.println("Unknown relation: " + ex.getMessage());
505 } catch (UnknownRelationModifierException ex) {
506 System.err.println("Unknown relation modifier: " +
509 } catch (UnknownPositionException ex) {
510 System.err.println("Unknown position: " + ex.getMessage());
512 } catch (PQFTranslationException ex) {
513 System.err.println("Cannot translate to PQF: " + ex.getMessage());