1 // $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $
3 package org.z3950.zing.cql;
4 import java.util.Properties;
5 import java.io.InputStream;
6 import java.io.IOException;
7 import java.io.StringReader;
8 import java.io.StreamTokenizer;
12 * Compiles a CQL string into a parse tree ...
15 * @version $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $
16 * @see <A href="http://zing.z3950.org/cql/index.html"
17 * >http://zing.z3950.org/cql/index.html</A>
19 public class CQLParser {
20 private CQLLexer lexer;
21 static private boolean PARSEDEBUG = false;
22 static private boolean LEXDEBUG = true;
24 private class CQLParseException extends Exception {
25 CQLParseException(String s) { super(s); }
28 static void debug(String str) {
30 System.err.println("PARSEDEBUG: " + str);
33 public CQLNode parse(String cql)
34 throws CQLParseException, IOException {
35 lexer = new CQLLexer(cql, LEXDEBUG);
38 debug("about to parse_query()");
39 CQLNode root = parse_query("srw.serverChoice", "=");
40 if (lexer.ttype != lexer.TT_EOF)
41 throw new CQLParseException("junk after end: " + lexer.render());
46 private CQLNode parse_query(String qualifier, String relation)
47 throws CQLParseException, IOException {
48 debug("in parse_query()");
50 CQLNode term = parse_term(qualifier, relation);
51 while (lexer.ttype == lexer.TT_WORD) {
52 String op = lexer.sval.toLowerCase();
53 debug("checking op '" + op + "'");
54 if (lexer.sval.equals("and")) {
56 CQLNode term2 = parse_term(qualifier, relation);
57 term = new CQLAndNode(term, term2);
58 } else if (lexer.sval.equals("or")) {
60 CQLNode term2 = parse_term(qualifier, relation);
61 term = new CQLOrNode(term, term2);
62 } else if (lexer.sval.equals("not")) {
64 CQLNode term2 = parse_term(qualifier, relation);
65 term = new CQLNotNode(term, term2);
66 } else if (lexer.sval.equals("prox")) {
69 throw new CQLParseException("unrecognised boolean: '" +
78 private CQLNode parse_term(String qualifier, String relation)
79 throws CQLParseException, IOException {
80 debug("in parse_term()");
84 if (lexer.ttype == '(') {
85 debug("parenthesised term");
87 CQLNode expr = parse_query(qualifier, relation);
90 } else if (lexer.ttype != lexer.TT_WORD && lexer.ttype != '"') {
91 throw new CQLParseException("expected qualifier or term, " +
92 "got " + lexer.render());
95 debug("non-parenthesised term");
102 relation = lexer.render(false);
103 debug("got relation '" + relation + "'");
105 debug("qualifier='" + qualifier + ", relation='" + relation + "'");
108 CQLTermNode node = new CQLTermNode(qualifier, relation, word);
109 debug("made term node " + node);
113 boolean isRelation() {
114 // ### Handle any, all and exact
115 return (lexer.ttype == '<' ||
116 lexer.ttype == '>' ||
117 lexer.ttype == '=' ||
118 lexer.ttype == lexer.TT_LE ||
119 lexer.ttype == lexer.TT_GE ||
120 lexer.ttype == lexer.TT_NE);
123 private void match(int token)
124 throws CQLParseException, IOException {
125 debug("in match(" + lexer.render(token, null, true) + ")");
126 if (lexer.ttype != token)
127 throw new CQLParseException("expected " +
128 lexer.render(token, null, true) +
129 ", " + "got " + lexer.render());
136 // e.g. echo '(au=Kerninghan or au=Ritchie) and ti=Unix' |
137 // java org.z3950.zing.cql.CQLParser
140 // <boolean>and</boolean>
142 // <boolean>or</boolean>
145 // <relation>=<relation>
146 // <term>Kerninghan<term>
150 // <relation>=<relation>
151 // <term>Ritchie<term>
156 // <relation>=<relation>
161 public static void main (String[] args) {
162 if (args.length != 0) {
163 System.err.println("Usage: " + args[0]);
167 byte[] bytes = new byte[10000];
169 // Read in the whole of standard input in one go
170 int nbytes = System.in.read(bytes);
171 } catch (java.io.IOException ex) {
172 System.err.println("Can't read query: " + ex.getMessage());
175 String cql = new String(bytes);
176 CQLParser parser = new CQLParser();
179 root = parser.parse(cql);
180 debug("root='" + root + "'");
181 System.out.println(root.toXCQL(0));
182 } catch (CQLParseException ex) {
183 System.err.println("Syntax error: " + ex.getMessage());
185 } catch (java.io.IOException ex) {
186 System.err.println("Can't compile query: " + ex.getMessage());
193 // This is a trivial subclass for java.io.StreamTokenizer which knows
194 // about the multi-character tokens "<=", ">=" and "<>", and included
195 // a render() method. Used only by CQLParser.
197 class CQLLexer extends StreamTokenizer {
198 private static boolean LEXDEBUG;
199 static int TT_LE = 1000; // The token "<="
200 static int TT_GE = 1001; // The token ">="
201 static int TT_NE = 1002; // The token "<>"
203 static void debug(String str) {
205 System.err.println("LEXDEBUG: " + str);
208 CQLLexer(String cql, boolean lexdebug) {
209 super(new StringReader(cql));
210 this.ordinaryChar('=');
211 this.ordinaryChar('<');
212 this.ordinaryChar('>');
213 this.ordinaryChar('/');
214 this.ordinaryChar('(');
215 this.ordinaryChar(')');
216 this.wordChars('\'', '\''); // prevent this from introducing strings
217 this.LEXDEBUG = lexdebug;
220 public int nextToken() throws java.io.IOException {
221 int token = super.nextToken();
224 debug("token starts with '<' ...");
225 int t2 = super.nextToken();
227 debug("token continues with '=' - it's '<='");
228 this.ttype = token = TT_LE;
229 } else if (t2 == '>') {
230 debug("token continues with '>' - it's '<>'");
231 this.ttype = token = TT_NE;
233 debug("next token is " + token + " (pushed back)");
236 } else if (token == '>') {
237 debug("token starts with '>' ...");
238 int t2 = super.nextToken();
240 debug("token continues with '=' - it's '>='");
241 this.ttype = token = TT_GE;
243 debug("next token is " + token + " (pushed back)");
248 debug("token=" + token + ", " +
249 "nval=" + this.nval + ", " + "sval=" + this.sval);
255 return this.render(this.ttype, null, true);
258 String render(boolean quoteChars) {
259 return this.render(this.ttype, null, quoteChars);
262 String render(int token, String str, boolean quoteChars) {
265 if (token == this.TT_EOF) {
267 } else if (token == this.TT_EOL) {
269 } else if (token == this.TT_NUMBER) {
270 return "number: " + this.nval;
271 } else if (token == this.TT_WORD) {
272 return "word: " + this.sval;
273 } else if (token == '"') {
274 return "string: \"" + this.sval + "\"";
275 } else if (token == TT_LE) {
277 } else if (token == TT_GE) {
279 } else if (token == TT_NE) {
283 String res = String.valueOf((char) token);
284 if (quoteChars) res = "'" + res + "'";