+++ /dev/null
-$Id: Grammar,v 1.1 2002-10-30 09:19:26 mike Exp $
-
-This is the CQL grammar, more or less as on the official Maintenance
-Agency page (http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html) but
-with a few tweaks described in my message of Tue, 29 Oct 2002 14:11:48
-which I hope will be integrated into the official grammar.
-
---
-
-cql-query ::= cql-query boolean search-clause
- | search-clause
-boolean ::= "and" | "or" | "not" | prox
-search-clause ::= "(" cql-query ")"
- | [ qualifier relation ] term
-
-relation ::= base-relation { "/" relation-modifier }
-base-relation ::= numeric-relation | "exact" | "all" | "any"
-relation-modifier ::= "relevant" | "fuzzy" | "stem"
-numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "="
-
-prox ::= "prox" [ "/" prox-parameters ]
-prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering
- | [ numeric-relation ] "/" [ distance ] "/" unit
- | [ numeric-relation ] "/" distance
- | numeric-relation
-unit ::= "word" | "sentence" | "paragraph" | "element"
-ordering ::= "ordered" | "unordered"
-distance ::= non-negative-integer
-
-qualifier ::= [ qualifier-prefix "." ] qualifier-name
-qualifier-prefix ::= identifier
-qualifier-name ::= identifier
-identifer ::= string
-term ::= string | ""string""
-string ::= a character string
-$Id: README,v 1.6 2002-10-30 11:13:18 mike Exp $
+$Id: README,v 1.7 2002-10-31 22:22:01 mike Exp $
cql-java -- a free CQL compiler for Java
DESCRIPTION
-----------
-Se the automatically generated class documentation in the "doc"
-subdirectory. (### It's not there yet, of course)
+See the automatically generated class documentation in the "doc"
+subdirectory. (It's not all there yet, but it's coming.)
AUTHOR
TO DO
-----
-* Add proximity support to parser
+* Add proximity support to parser -- just the back-ends left to do.
-* Some niceties for the CQL-decompiling back-end:
- * Don't emit redundant parentheses.
- * Don't put spaces around relations that don't need them.
+* Relation modifiers could be limited to known modifiers only.
-* Write PQN-generating back-end (will need to be driven from a
+* Fix CQLParser and CQLLexer shell-script front-ends to elegantly
+ handle their classes' test harnesses' ability to read the query from
+ the command-line arguments, if any, falling back to stdin if there
+ are none.
+
+* Add CQLGenerate shell-script. Allow CQLGenerate test-harness to
+ take some arguments on command-line as well as or instead of a
+ file.
+
+* Trivial CQLCanonicalise application, which renders out its source
+ tree in a canonical form, enabling queries to be diffed for
+ semantically significant differences only. Tests can be run by
+ generating random trees, canonicalising them, then canonicalising
+ them _again_ and checking that the before-and-after results are the
+ same.
+
+* Some niceties for the cql-decompiling back-end:
+ * don't emit redundant parentheses.
+ * don't put spaces around relations that don't need them.
+
+* Write pqn-generating back-end (will need to be driven from a
configuation file specifying how to represent the qualifiers,
- relations, relation modifiers and wildcard characters as Z39.50
+ relations, relation modifiers and wildcard characters as z39.50
attributes.)
* Consider the utility of yet another back-end that translates a
- CQLNode tree into a Type-1 query tree using the JZKit data
+ cqlnode tree into a type-1 query tree using the jzkit data
structures. That would be nice so that CQL could become a JZKit
query-type, but you could achieve the same effect by generating PQN,
and running that through JZKit's existing PQN-to-Type-1 compiler.
* Write generic test suite.
-* Fix CQLParser test harness to read query from command-line
- arguments, if any, falling back to stdin if there are none.
-
--- /dev/null
+#!/bin/sh
+
+# $Id: CQLLexer,v 1.1 2002-10-31 22:22:01 mike Exp $
+# Trivial script to invoke the CQLLexer test-harness
+
+cat | java org.z3950.zing.cql.CQLLexer ${@+"$@"}
--- /dev/null
+#!/bin/sh
+
+# $Id: CQLParser,v 1.1 2002-10-31 22:22:01 mike Exp $
+# Trivial script to invoke the CQLParser test-harness
+
+cat | java org.z3950.zing.cql.CQLParser ${@+"$@"}
--- /dev/null
+$Id: Grammar,v 1.1 2002-10-31 22:22:01 mike Exp $
+
+This is the CQL grammar, more or less as on the official Maintenance
+Agency page (http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html) but
+with a few tweaks described in my message of Tue, 29 Oct 2002 14:11:48
+which I hope will be integrated into the official grammar.
+
+--
+
+cql-query ::= cql-query boolean search-clause
+ | search-clause
+boolean ::= "and" | "or" | "not" | prox
+search-clause ::= "(" cql-query ")"
+ | [ qualifier relation ] term
+
+relation ::= base-relation { "/" relation-modifier }
+base-relation ::= numeric-relation | "exact" | "all" | "any"
+relation-modifier ::= "relevant" | "fuzzy" | "stem"
+numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "="
+
+prox ::= "prox" [ "/" prox-parameters ]
+prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering
+ | [ numeric-relation ] "/" [ distance ] "/" unit
+ | [ numeric-relation ] "/" distance
+ | numeric-relation
+unit ::= "word" | "sentence" | "paragraph" | "element"
+ordering ::= "ordered" | "unordered"
+distance ::= non-negative-integer
+
+qualifier ::= [ qualifier-prefix "." ] qualifier-name
+qualifier-prefix ::= identifier
+qualifier-name ::= identifier
+identifer ::= string
+term ::= string | ""string""
+string ::= a character string
--- /dev/null
+From mike@seatbooker.net Tue Oct 29 15:12:09 2002
+Envelope-to: mike@miketaylor.org.uk
+Date: Tue, 29 Oct 2002 14:11:48 GMT
+From: Mike Taylor <mike@seatbooker.net>
+To: ZNG@loc.gov
+Cc: mike@miketaylor.org.uk
+Subject: Again: Grammar Tweaks
+
+Dear Everyone,
+
+I sent this message last Friday, and didn't get a delivery failure
+message or anything similar; but there has been absolutely zero
+response on-list, which makes me wonder whether it mysteriously didn't
+get through.
+
+... or surely it didn't get caught by people's "this message is too
+complicated to pay attention to" filters? :-~
+
+ _/|_ _______________________________________________________________
+/o ) \/ Mike Taylor <mike@miketaylor.org.uk> www.miketaylor.org.uk
+)_v__/\ "Conclusion: is left to the reader (see Table 2).
+ Acknowledgements: I wrote this paper for money" --
+ A. A. Chastel, _A critical analysis of the explanation of
+ red-shifts by a new field_, A&A 53, 67 (1976)
+
+
+------------------------------- cut here -------------------------------
+Well, it looks like the CQL grammar has settled down more or less to
+everyone's satisfaction. So it must be time to throw it all up the
+air again! :-)
+
+No, I'm joking -- mostly. I'd like to point one actual mistake (I
+think), suggest one substantive change, and request a few cosmetic
+changes.
+
+For anyone who's not got it to hand, the URL for the grammar is
+http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html
+
+1. I think it's a mistake that the grammar says:
+ prox-qualifiers ::= "/" [ unit ] "/" [ relation ] "/" [ distance ] "/" ordering
+ (and the similar productions that follow) because that allows
+ prox/word/exact/3 <--- "exact" is meaningless here
+ and -- even worse --
+ prox/word/=/stem <--- a relation-modifier!
+ (This is not only silly, but ambiguous too)
+
+ So I think all the occurrences of "relation" in the productions
+ for prox need to be changed to "order-or-equal-relation".
+
+2. The only thing that I'm suggesting we actually _change_ is the
+ order of the proximity parameters. Quick! Close your eyes and
+ tell me the correct order of relation, ordering, distance and
+ unit? See -- you can't do it: no-one can :-)
+
+ So, based somewhat on Adam's rather more difficult suggestion of
+ a couple of days ago, I propose that we change the order to:
+ relation/distance/unit/ordering
+ Rationale: you can read it out loud. If you want to find two
+ clauses with the conditions "*more* than *5* *sentences* apart",
+ you would write ``foo prox/>/5/sentence bar''.
+
+3. Cosmetic changes.
+
+ 3a. The "/" at the beginning of each of the prox-qualfiers
+ productions can be moved up into the definition of prox, like
+ this:
+ prox::= "prox" [ "/" prox-qualifiers ]
+ which yields a slightly simpler, neater (but equivalent)
+ grammar.
+
+ 3b. The things that the grammar called "index-name", we have been
+ calling "qualifiers" (and talking about the "qualifier-sets"
+ that contain them.) I think that's a much nicer name than
+ "index-name", in part because it doesn't carry such a loading
+ of implementation detail. Also, remember that we way we've
+ designed things, a qualifier will typically implemented by
+ multiple indexes (a word index and a string index) so I don't
+ want to give misleading impressions.
+
+ 3b1. :-)
+ That would mean that, in the name of simplicity, we'd
+ need to rename "prox-qualifiers" to something like
+ "prox-modifiers" or "prox-parameters" (which is what
+ we've actually been calling them, 4WIW) and rename
+ "qualifier" to something more suggestive such as
+ "relation-modifier" (which, again, is what we've been
+ using in prose.)
+
+ 3c. (Nearly done, honest.) I think that
+ "order-or-equal-relation" is a horrible name and would much
+ prefer to call it something like "numeric-relation", which
+ better explains its role in, for example, proximity
+ parameters.
+
+So, putting it all together, here's how I think the grammar should
+look:
+
+------------------------------- cut here -------------------------------
+cql-query ::= cql-query boolean search-clause
+ | search-clause
+boolean ::= "and" | "or" | "not" | prox
+search-clause ::= "(" cql-query ")"
+ | [ qualifier relation ] term
+
+relation ::= base-relation { "/" relation-modifier }
+base-relation ::= numeric-relation | "exact" | "all" | "any"
+relation-modifier ::= "relevant" | "fuzzy" | "stem"
+numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "="
+
+prox ::= "prox" [ "/" prox-parameters ]
+prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering
+ | [ numeric-relation ] "/" [ distance ] "/" unit
+ | [ numeric-relation ] "/" distance
+ | numeric-relation
+unit ::= "word" | "sentence" | "paragraph" | "element"
+ordering ::= "ordered" | "unordered"
+distance ::= non-negative-integer
+
+qualifier ::= [ qualifier-prefix "." ] qualifier-name
+qualifier-prefix ::= identifier
+qualifier-name ::= identifier
+identifer ::= string
+term ::= string | ""string""
+string ::= a character string
+------------------------------- cut here -------------------------------
+
+Hope this helps, and that it's none of it's controversial. I guess it
+ought not to be, except maybe the change in the order of proximity
+parameters.
+
+ _/|_ _______________________________________________________________
+/o ) \/ Mike Taylor <mike@miketaylor.org.uk> www.miketaylor.org.uk
+)_v__/\ The IBM 360 had no stack, and that was stupid, short-sighted
+ design. The Cray 2 has no stack either, but that's elegant
+ minimalism.
+
+
+
-// $Id: CQLAndNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLAndNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents an AND node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLAndNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLAndNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
*/
public class CQLAndNode extends CQLBooleanNode {
public CQLAndNode(CQLNode left, CQLNode right) {
-// $Id: CQLBooleanNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLBooleanNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents a boolean node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLBooleanNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLBooleanNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $
*/
public abstract class CQLBooleanNode extends CQLNode {
protected CQLNode left;
String toXCQL(int level) {
return (indent(level) + "<triple>\n" +
- indent(level+1) + "<boolean>" + op() + "</boolean>\n" +
+ booleanXQL(level+1) +
left.toXCQL(level+1) +
right.toXCQL(level+1) +
indent(level) + "</triple>\n");
}
+ String booleanXQL(int level) {
+ return(indent(level) + "<boolean>\n" +
+ indent(level+1) + "<value>" + op() + "</value>\n" +
+ indent(level) + "</boolean>\n");
+ }
+
String toCQL() {
// ### We don't always need parens around the operands
return "(" + left.toCQL() + ") " + op() + " (" + right.toCQL() + ")";
-// $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLLexer.java,v 1.2 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
import java.io.StreamTokenizer;
import java.io.StringReader;
+import java.util.Hashtable;
-// This is a trivial subclass for java.io.StreamTokenizer which knows
-// about the multi-character tokens "<=", ">=" and "<>", and includes
-// a render() method. Used only by CQLParser.
+// This is a semi-trivial subclass for java.io.StreamTokenizer that:
+// * Has a halfDecentPushBack() method that actually works
+// * Includes a render() method
+// * Knows about the multi-character tokens "<=", ">=" and "<>"
+// * Recognises a set of keywords as tokens in their own right
+// * Includes some primitive debugging-output facilities
+// It's used only by CQLParser.
//
class CQLLexer extends StreamTokenizer {
- private static boolean DEBUG;
- static int TT_LE = 1000; // The "<=" relation
- static int TT_GE = 1001; // The ">=" relation
- static int TT_NE = 1002; // The "<>" relation
- static int TT_AND = 1003; // The "and" boolean
- static int TT_OR = 1004; // The "or" boolean
- static int TT_NOT = 1005; // The "not" boolean
- static int TT_PROX = 1006; // The "prox" boolean
- static int TT_ANY = 1007; // The "any" relation
- static int TT_ALL = 1008; // The "all" relation
- static int TT_EXACT = 1009; // The "exact" relation
+ // New publicly visible token-types
+ static int TT_LE = 1000; // The "<=" relation
+ static int TT_GE = 1001; // The ">=" relation
+ static int TT_NE = 1002; // The "<>" relation
+ static int TT_AND = 1003; // The "and" boolean
+ static int TT_OR = 1004; // The "or" boolean
+ static int TT_NOT = 1005; // The "not" boolean
+ static int TT_PROX = 1006; // The "prox" boolean
+ static int TT_ANY = 1007; // The "any" relation
+ static int TT_ALL = 1008; // The "all" relation
+ static int TT_EXACT = 1009; // The "exact" relation
+ static int TT_pWORD = 1010; // The "word" proximity unit
+ static int TT_SENTENCE = 1011; // The "sentence" proximity unit
+ static int TT_PARAGRAPH = 1012; // The "paragraph" proximity unit
+ static int TT_ELEMENT = 1013; // The "element" proximity unit
+ static int TT_ORDERED = 1014; // The "ordered" proximity ordering
+ static int TT_UNORDERED = 1015; // The "unordered" proximity ordering
+
+ // Support for keywords. It would be nice to compile this linear
+ // list into a Hashtable, but it's hard to store ints as hash
+ // values, and next to impossible to use them as hash keys. So
+ // we'll just scan the (very short) list every time we need to do
+ // a lookup.
+ private class Keyword {
+ int token;
+ String keyword;
+ Keyword(int token, String keyword) {
+ this.token = token;
+ this.keyword = keyword;
+ }
+ }
+ // This should logically be static, but Java won't allow it :-P
+ private Keyword[] keywords = {
+ new Keyword(TT_AND, "and"),
+ new Keyword(TT_OR, "or"),
+ new Keyword(TT_NOT, "not"),
+ new Keyword(TT_PROX, "prox"),
+ new Keyword(TT_ANY, "any"),
+ new Keyword(TT_ALL, "all"),
+ new Keyword(TT_EXACT, "exact"),
+ new Keyword(TT_pWORD, "word"),
+ new Keyword(TT_SENTENCE, "sentence"),
+ new Keyword(TT_PARAGRAPH, "paragraph"),
+ new Keyword(TT_ELEMENT, "element"),
+ new Keyword(TT_ORDERED, "ordered"),
+ new Keyword(TT_UNORDERED, "unordered"),
+ };
// For halfDecentPushBack() and the code at the top of nextToken()
private static int TT_UNDEFINED = -1000;
- int saved_ttype = TT_UNDEFINED;
- double saved_nval;
- String saved_sval;
+ private int saved_ttype = TT_UNDEFINED;
+ private double saved_nval;
+ private String saved_sval;
+
+ // Controls debugging output
+ private static boolean DEBUG;
CQLLexer(String cql, boolean lexdebug) {
super(new StringReader(cql));
ordinaryChar('(');
ordinaryChar(')');
wordChars('\'', '\''); // prevent this from introducing strings
+ parseNumbers();
DEBUG = lexdebug;
}
//
public int underlyingNextToken() throws java.io.IOException {
super.nextToken();
- if (ttype == TT_WORD) {
- if (sval.equalsIgnoreCase("and")) {
- ttype = TT_AND;
- } else if (sval.equalsIgnoreCase("or")) {
- ttype = TT_OR;
- } else if (sval.equalsIgnoreCase("not")) {
- ttype = TT_NOT;
- } else if (sval.equalsIgnoreCase("prox")) {
- ttype = TT_PROX;
- } else if (sval.equalsIgnoreCase("any")) {
- ttype = TT_ANY;
- } else if (sval.equalsIgnoreCase("all")) {
- ttype = TT_ALL;
- } else if (sval.equalsIgnoreCase("exact")) {
- ttype = TT_EXACT;
- }
- }
+ if (ttype == TT_WORD)
+ for (int i = 0; i < keywords.length; i++)
+ if (sval.equalsIgnoreCase(keywords[i].keyword))
+ ttype = keywords[i].token;
+
return ttype;
}
if (token == TT_EOF) {
return "EOF";
} else if (token == TT_NUMBER) {
- return "number: " + nval;
+ return new Integer((int) nval).toString();
} else if (token == TT_WORD) {
return "word: " + sval;
} else if (token == '"') {
return ">=";
} else if (token == TT_NE) {
return "<>";
- } else if (token == TT_AND) {
- return "and";
- } else if (token == TT_OR) {
- return "or";
- } else if (token == TT_NOT) {
- return "not";
- } else if (token == TT_PROX) {
- return "prox";
- } else if (token == TT_ANY) {
- return "any";
- } else if (token == TT_ALL) {
- return "all";
- } else if (token == TT_EXACT) {
- return "exact";
}
+ // Check whether its associated with one of the keywords
+ for (int i = 0; i < keywords.length; i++)
+ if (token == keywords[i].token)
+ return keywords[i].keyword;
+
+ // Otherwise it must be a single character, such as '(' or '/'.
String res = String.valueOf((char) token);
if (quoteChars) res = "'" + res + "'";
return res;
}
public static void main(String[] args) throws Exception {
- CQLLexer lexer = new CQLLexer(args[0], true);
- int token;
+ if (args.length > 1) {
+ System.err.println("Usage: CQLLexer [<CQL-query>]");
+ System.err.println("If unspecified, query is read from stdin");
+ System.exit(1);
+ }
+ String cql;
+ if (args.length == 1) {
+ cql = args[0];
+ } else {
+ byte[] bytes = new byte[10000];
+ try {
+ // Read in the whole of standard input in one go
+ int nbytes = System.in.read(bytes);
+ } catch (java.io.IOException ex) {
+ System.err.println("Can't read query: " + ex.getMessage());
+ System.exit(2);
+ }
+ cql = new String(bytes);
+ }
+
+ CQLLexer lexer = new CQLLexer(cql, true);
+ int token;
while ((token = lexer.nextToken()) != TT_EOF) {
// Nothing to do: debug() statements render tokens for us
}
-// $Id: CQLNode.java,v 1.8 2002-10-30 11:13:18 mike Exp $
+// $Id: CQLNode.java,v 1.9 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents a node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLNode.java,v 1.8 2002-10-30 11:13:18 mike Exp $
+ * @version $Id: CQLNode.java,v 1.9 2002-10-31 22:22:01 mike Exp $
*/
public abstract class CQLNode {
abstract String toXCQL(int level);
abstract String toCQL();
- protected String indent(int level) {
- String x = "";
- while (level-- > 0) {
- x += " ";
- }
- return x;
- }
-
- // XML Quote --
- // s/&/&/g;
- // s/</</g;
- // s/>/>/g;
- // This is hideously inefficient, but I just don't see a better
- // way using the standard JAVA library.
- //
- protected String xq(String str) {
- str = replace(str, "&", "&");
- str = replace(str, "<", "<");
- str = replace(str, ">", ">");
- return str;
- }
-
- // I can't _believe_ I have to write this by hand in 2002 ...
- protected static String replace(String str, String from, String to) {
- StringBuffer sb = new StringBuffer();
- int ix; // index of next `from'
- int offset = 0; // index of previous `from' + length(from)
-
- while ((ix = str.indexOf(from, offset)) != -1) {
- sb.append(str.substring(offset, ix));
- sb.append(to);
- offset = ix + from.length();
- }
-
- // End of string: append last bit and we're done
- sb.append(str.substring(offset));
- return sb.toString();
- }
+ // Utility-function abbreviations for the use of subclasses
+ protected static String indent(int level) { return Utils.indent(level); }
+ protected static String xq(String str) { return Utils.xq(str); }
// Test harness
public static void main (String[] args) {
-// $Id: CQLNotNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLNotNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents a NOT node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLNotNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLNotNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
*/
public class CQLNotNode extends CQLBooleanNode {
public CQLNotNode(CQLNode left, CQLNode right) {
-// $Id: CQLOrNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLOrNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents an OR node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLOrNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLOrNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $
*/
public class CQLOrNode extends CQLBooleanNode {
public CQLOrNode(CQLNode left, CQLNode right) {
-// $Id: CQLParser.java,v 1.10 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
import java.io.IOException;
/**
* Compiles a CQL string into a parse tree.
- * ###
+ * ##
*
- * @version $Id: CQLParser.java,v 1.10 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $
* @see <A href="http://zing.z3950.org/cql/index.html"
* >http://zing.z3950.org/cql/index.html</A>
*/
CQLNode term2 = parse_term(qualifier, relation);
term = new CQLNotNode(term, term2);
} else if (lexer.ttype == lexer.TT_PROX) {
- // ### Handle "prox"
+ match(lexer.TT_PROX);
+ CQLProxNode proxnode = new CQLProxNode(term);
+ gatherProxParameters(proxnode);
+ CQLNode term2 = parse_term(qualifier, relation);
+ proxnode.addSecondSubterm(term2);
+ term = (CQLNode) proxnode;
} else {
throw new CQLParseException("expected boolean, got " +
lexer.render());
return node;
}
+ private void gatherProxParameters(CQLProxNode node)
+ throws CQLParseException, IOException {
+ for (int i = 0; i < 4; i++) {
+ if (lexer.ttype != '/')
+ return; // end of proximity parameters
+
+ match('/');
+ if (lexer.ttype != '/') {
+ // not an omitted default
+ switch (i) {
+ // Assumes order is: relation/distance/unit/ordering
+ case 0: gatherProxRelation(node); break;
+ case 1: gatherProxDistance(node); break;
+ case 2: gatherProxUnit(node); break;
+ case 3: gatherProxOrdering(node); break;
+ }
+ }
+ }
+ }
+
+ private void gatherProxRelation(CQLProxNode node)
+ throws CQLParseException, IOException {
+ if (!isProxRelation())
+ throw new CQLParseException("expected proximity relation, got " +
+ lexer.render());
+ node.addModifier("relation", lexer.render(lexer.ttype, false));
+ match(lexer.ttype);
+ debug("gPR matched " + lexer.render(lexer.ttype, false));
+ }
+
+ private void gatherProxDistance(CQLProxNode node)
+ throws CQLParseException, IOException {
+ if (lexer.ttype != lexer.TT_NUMBER)
+ throw new CQLParseException("expected proximity distance, got " +
+ lexer.render());
+ node.addModifier("distance", lexer.render(lexer.ttype, false));
+ match(lexer.ttype);
+ debug("gPD matched " + lexer.render(lexer.ttype, false));
+ }
+
+ private void gatherProxUnit(CQLProxNode node)
+ throws CQLParseException, IOException {
+ if (lexer.ttype != lexer.TT_pWORD &&
+ lexer.ttype != lexer.TT_SENTENCE &&
+ lexer.ttype != lexer.TT_PARAGRAPH &&
+ lexer.ttype != lexer.TT_ELEMENT)
+ throw new CQLParseException("expected proximity unit, got " +
+ lexer.render());
+ node.addModifier("unit", lexer.render());
+ match(lexer.ttype);
+ }
+
+ private void gatherProxOrdering(CQLProxNode node)
+ throws CQLParseException, IOException {
+ if (lexer.ttype != lexer.TT_ORDERED &&
+ lexer.ttype != lexer.TT_UNORDERED)
+ throw new CQLParseException("expected proximity ordering, got " +
+ lexer.render());
+ node.addModifier("ordering", lexer.render());
+ match(lexer.ttype);
+ }
+
boolean isBaseRelation() {
debug("isBaseRelation: checking ttype=" + lexer.ttype +
" (" + lexer.render() + ")");
+ return (isProxRelation() ||
+ lexer.ttype == lexer.TT_ANY ||
+ lexer.ttype == lexer.TT_ALL ||
+ lexer.ttype == lexer.TT_EXACT);
+ }
+
+ boolean isProxRelation() {
+ debug("isProxRelation: checking ttype=" + lexer.ttype +
+ " (" + lexer.render() + ")");
return (lexer.ttype == '<' ||
lexer.ttype == '>' ||
lexer.ttype == '=' ||
lexer.ttype == lexer.TT_LE ||
lexer.ttype == lexer.TT_GE ||
- lexer.ttype == lexer.TT_NE ||
- lexer.ttype == lexer.TT_ANY ||
- lexer.ttype == lexer.TT_ALL ||
- lexer.ttype == lexer.TT_EXACT);
+ lexer.ttype == lexer.TT_NE);
}
private void match(int token)
// </triple>
//
public static void main (String[] args) {
- if (args.length != 0) {
- System.err.println("Usage: " + args[0]);
+ if (args.length > 1) {
+ System.err.println("Usage: CQLParser [<CQL-query>]");
+ System.err.println("If unspecified, query is read from stdin");
System.exit(1);
}
- byte[] bytes = new byte[10000];
- try {
- // Read in the whole of standard input in one go
- int nbytes = System.in.read(bytes);
- } catch (java.io.IOException ex) {
- System.err.println("Can't read query: " + ex.getMessage());
- System.exit(2);
+ String cql;
+ if (args.length == 1) {
+ cql = args[0];
+ } else {
+ byte[] bytes = new byte[10000];
+ try {
+ // Read in the whole of standard input in one go
+ int nbytes = System.in.read(bytes);
+ } catch (java.io.IOException ex) {
+ System.err.println("Can't read query: " + ex.getMessage());
+ System.exit(2);
+ }
+ cql = new String(bytes);
}
- String cql = new String(bytes);
+
CQLParser parser = new CQLParser();
CQLNode root;
try {
root = parser.parse(cql);
debug("root='" + root + "'");
- System.out.println(root.toXCQL(0));
+ System.out.println(root.toCQL());
} catch (CQLParseException ex) {
System.err.println("Syntax error: " + ex.getMessage());
System.exit(3);
--- /dev/null
+// $Id: CQLProxNode.java,v 1.1 2002-10-31 22:22:01 mike Exp $
+
+package org.z3950.zing.cql;
+
+
+/**
+ * Represents a proximity node in a CQL parse-tree.
+ * ##
+ *
+ * @version $Id: CQLProxNode.java,v 1.1 2002-10-31 22:22:01 mike Exp $
+ */
+public class CQLProxNode extends CQLBooleanNode {
+ ModifierSet ms;
+
+ public CQLProxNode(CQLNode left) {
+ ms = new ModifierSet("prox");
+ this.left = left;
+ // this.right left unresolved for now ...
+ }
+
+ // ... delayed "second half" of the constructor
+ public void addSecondSubterm(CQLNode right) {
+ this.right = right;
+ }
+
+ String op() {
+ return ms.toCQL();
+ }
+
+ public void addModifier(String type, String value) {
+ ms.addModifier(type, value);
+ }
+
+ String booleanXQL(int level) {
+ return ms.toXCQL(level, "boolean");
+ }
+}
-// $Id: CQLRelation.java,v 1.1 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLRelation.java,v 1.2 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
import java.util.Vector;
/**
* Represents a relation between a CQL qualifier and term.
- * ###
+ * ##
*
- * @version $Id: CQLRelation.java,v 1.1 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLRelation.java,v 1.2 2002-10-31 22:22:01 mike Exp $
*/
public class CQLRelation extends CQLNode {
- String base;
- Vector modifiers;
+ ModifierSet ms;
public CQLRelation(String base) {
- this.base = base;
- modifiers = new Vector();
+ ms = new ModifierSet(base);
}
public void addModifier(String modifier) {
- modifiers.add(modifier);
- }
-
- public String[] getModifiers() {
- int n = modifiers.size();
- String[] res = new String[n];
- for (int i = 0; i < n; i++) {
- res[i] = (String) modifiers.get(i);
- }
-
- return res;
+ ms.addModifier(null, modifier);
}
public String toXCQL(int level) {
- StringBuffer buf = new StringBuffer();
- buf.append (indent(level) + "<relation>\n" +
- indent(level+1) + "<value>" + xq(base) + "</value>\n");
- String[] mods = getModifiers();
- if (mods.length > 0) {
- buf.append(indent(level+1) + "<modifiers>\n");
- for (int i = 0; i < mods.length; i++)
- buf.append(indent(level+2)).
- append("<modifier><value>"). append(mods[i]).
- append("</value></modifier>\n");
- buf.append(indent(level+1) + "</modifiers>\n");
- }
- buf.append(indent(level) + "</relation>\n");
- return buf.toString();
+ return ms.toXCQL(level, "relation");
}
public String toCQL() {
- StringBuffer buf = new StringBuffer(base);
- String[] mods = getModifiers();
- for (int i = 0; i < mods.length; i++) {
- buf.append("/").append(mods[i]);
- }
-
- return buf.toString();
- }
-
- public static void main(String[] args) {
- if (args.length < 1) {
- System.err.println("Usage: CQLRelation <base> <modifier>...");
- System.exit(1);
- }
-
- CQLRelation res = new CQLRelation(args[0]);
- for (int i = 1; i < args.length; i++) {
- res.addModifier(args[i]);
- }
-
- System.out.println(res.toCQL());
+ return ms.toCQL();
}
}
-// $Id: CQLTermNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLTermNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $
package org.z3950.zing.cql;
/**
* Represents a terminal node in a CQL parse-tree.
- * ###
+ * ##
*
- * @version $Id: CQLTermNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $
+ * @version $Id: CQLTermNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $
*/
public class CQLTermNode extends CQLNode {
private String qualifier;
String toCQL() {
String quotedQualifier = maybeQuote(qualifier);
String quotedTerm = maybeQuote(term);
+ String res = quotedTerm;
- // ### We don't always need spaces around `relation'.
- return quotedQualifier + " " + relation.toCQL() + " " + quotedTerm;
+ if (!qualifier.equalsIgnoreCase("srw.serverChoice")) {
+ // ### We don't always need spaces around `relation'.
+ res = quotedQualifier + " " + relation.toCQL() + " " + quotedTerm;
+ }
+
+ return res;
}
static String maybeQuote(String str) {
str.indexOf('/') != -1 ||
str.indexOf('(') != -1 ||
str.indexOf(')') != -1) {
- str = '"' + replace(str, "\"", "\\\"") + '"';
+ str = '"' + Utils.replaceString(str, "\"", "\\\"") + '"';
}
return str;
-# $Id: Makefile,v 1.2 2002-10-30 09:19:26 mike Exp $
+# $Id: Makefile,v 1.3 2002-10-31 22:22:01 mike Exp $
-all: CQLNode.class CQLTermNode.class CQLBooleanNode.class \
+all: Utils.class \
+ CQLNode.class CQLTermNode.class CQLBooleanNode.class \
CQLAndNode.class CQLOrNode.class CQLNotNode.class \
- CQLRelation.class \
+ CQLRelation.class CQLProxNode.class ModifierSet.class \
CQLParser.class CQLLexer.class CQLParseException.class \
CQLGenerator.class ParameterMissingException.class
--- /dev/null
+// $Id: ModifierSet.java,v 1.1 2002-10-31 22:22:01 mike Exp $
+
+package org.z3950.zing.cql;
+import java.util.Vector;
+import java.lang.StringBuffer;
+
+/**
+ * Represents a base String and a set of modifier Strings.
+ * <P>
+ * This class is used as a workhorse delegate by both CQLRelation and
+ * CQLProxNode - two functionally very separate classes that happen to
+ * require the same data structures and functionality.
+ *
+ * @version $Id: ModifierSet.java,v 1.1 2002-10-31 22:22:01 mike Exp $
+ */
+public class ModifierSet {
+ String base;
+ Vector modifiers;
+
+ public ModifierSet(String base) {
+ this.base = base;
+ modifiers = new Vector();
+ }
+
+ public void addModifier(String type, String value) {
+ Vector modifier = new Vector();
+ modifier.add(type);
+ modifier.add(value);
+ modifiers.add(modifier);
+ }
+
+ public Vector[] getModifiers() {
+ int n = modifiers.size();
+ Vector[] res = new Vector[n];
+ for (int i = 0; i < n; i++) {
+ res[i] = (Vector) modifiers.get(i);
+ }
+
+ return res;
+ }
+
+ public String toXCQL(int level, String topLevelElement) {
+ StringBuffer buf = new StringBuffer();
+ buf.append (Utils.indent(level) + "<" + topLevelElement + ">\n" +
+ Utils.indent(level+1) + "<value>" + Utils.xq(base) +
+ "</value>\n");
+ Vector[] mods = getModifiers();
+ if (mods.length > 0) {
+ buf.append(Utils.indent(level+1) + "<modifiers>\n");
+ for (int i = 0; i < mods.length; i++) {
+ Vector modifier = mods[i];
+ buf.append(Utils.indent(level+2)).
+ append("<modifier>\n");
+ if (modifier.get(0) != null)
+ buf.append(Utils.indent(level+3)).
+ append("<type>").
+ append(Utils.xq((String) modifier.get(0))).
+ append("</type>\n");
+ buf.append(Utils.indent(level+3));
+ buf.append("<value>").
+ append(Utils.xq((String) modifier.get(1))).
+ append("</value>\n");
+ buf.append(Utils.indent(level+2)).
+ append("</modifier>\n");
+ }
+ buf.append(Utils.indent(level+1) + "</modifiers>\n");
+ }
+ buf.append(Utils.indent(level) + "</" + topLevelElement + ">\n");
+ return buf.toString();
+ }
+
+ public String toCQL() {
+ StringBuffer buf = new StringBuffer(base);
+ Vector[] mods = getModifiers();
+ for (int i = 0; i < mods.length; i++) {
+ buf.append("/").append(mods[i].get(1));
+ }
+
+ return buf.toString();
+ }
+
+ public static void main(String[] args) {
+ if (args.length < 1) {
+ System.err.println("Usage: ModifierSet <base> [<type> <name>]...");
+ System.exit(1);
+ }
+
+ ModifierSet res = new ModifierSet(args[0]);
+ for (int i = 1; i < args.length; i += 2) {
+ res.addModifier(args[i], args[i+1]);
+ }
+
+ System.out.println(res.toCQL());
+ }
+}