-$Id: README,v 1.3 2002-10-25 07:38:16 mike Exp $
+$Id: README,v 1.4 2002-10-29 10:15:58 mike Exp $
cql-java -- a free CQL compiler for Java
Adam Dickmeiss's CQL compiler, written in C.
Rob Sanderson's CQL compiler, written in Python.
All the other free CQL compilers everyone's going to write.
+
+
+TO DO
+-----
+
+### Finish the parser:
+### * multi-character relations DONE but ### single "<" fails!
+### * word relations
+### * relation modifiers
+### * proximity,
+
+### Finish the CXQL-rendering back end (mostly a matter of quoting
+ characters to be emitted as part of an XML document).
+ DONE
+
+### Finish CQL-decompiling back end (mostly a matter of quoting)
+
+### Write PQN-generating back end (will need to be driven from a
+ configuation file specifying how to represent the qualifiers,
+ relations, relation modifiers and wildcard characters as Z39.50
+ attributes.)
+
+### Write stochastic query generator, driven off MA grammar.
+
+### Write "javadoc" comments.
+
-// $Id: CQLNode.java,v 1.5 2002-10-27 00:46:25 mike Exp $
+// $Id: CQLNode.java,v 1.6 2002-10-29 10:15:58 mike Exp $
package org.z3950.zing.cql;
* Represents a node in a CQL parse-tree ...
* ###
*
- * @version $Id: CQLNode.java,v 1.5 2002-10-27 00:46:25 mike Exp $
+ * @version $Id: CQLNode.java,v 1.6 2002-10-29 10:15:58 mike Exp $
*/
public abstract class CQLNode {
abstract String toXCQL(int level);
return x;
}
+ // XML Quote --
+ // s/&/&/g;
+ // s/</</g;
+ // s/>/>/g;
+ // This is hideously inefficient, but I just don't see a better
+ // way using the standard JAVA library.
+ //
protected String xq(String str) {
- // XML Quote
- // ### s/&/&/g;
- // s/</</g;
- // s/>/>/g;
+ str = replace(str, "&", "&");
+ str = replace(str, "<", "<");
+ str = replace(str, ">", ">");
return str;
}
+ String replace(String str, String from, String to) {
+ StringBuffer sb = new StringBuffer();
+ int ix; // index of next `from'
+ int offset = 0; // index of previous `from' + length(from)
+
+ while ((ix = str.indexOf(from, offset)) != -1) {
+ sb.append(str.substring(offset, ix));
+ sb.append(to);
+ offset = ix + from.length();
+ }
+
+ // End of string: append last bit and we're done
+ sb.append(str.substring(offset));
+ return sb.toString();
+ }
+
// Test harness
public static void main (String[] args) {
CQLNode n1 = new CQLTermNode("dc.author", "=", "kernighan");
-// $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $
+// $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $
package org.z3950.zing.cql;
import java.util.Properties;
* Compiles a CQL string into a parse tree ...
* ###
*
- * @version $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $
+ * @version $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $
* @see <A href="http://zing.z3950.org/cql/index.html"
* >http://zing.z3950.org/cql/index.html</A>
*/
public class CQLParser {
private CQLLexer lexer;
static private boolean PARSEDEBUG = false;
- static private boolean LEXDEBUG = false;
+ static private boolean LEXDEBUG = true;
private class CQLParseException extends Exception {
CQLParseException(String s) { super(s); }
qualifier = word;
relation = lexer.render(false);
+ debug("got relation '" + relation + "'");
match(lexer.ttype);
debug("qualifier='" + qualifier + ", relation='" + relation + "'");
}
}
boolean isRelation() {
- // ### Also need to handle <=, >=, <>
+ // ### Handle any, all and exact
return (lexer.ttype == '<' ||
lexer.ttype == '>' ||
- lexer.ttype == '=');
+ lexer.ttype == '=' ||
+ lexer.ttype == lexer.TT_LE ||
+ lexer.ttype == lexer.TT_GE ||
+ lexer.ttype == lexer.TT_NE);
}
private void match(int token)
// a render() method. Used only by CQLParser.
//
class CQLLexer extends StreamTokenizer {
- private static boolean lexdebug;
+ private static boolean LEXDEBUG;
+ static int TT_LE = 1000; // The token "<="
+ static int TT_GE = 1001; // The token ">="
+ static int TT_NE = 1002; // The token "<>"
+
+ static void debug(String str) {
+ if (LEXDEBUG)
+ System.err.println("LEXDEBUG: " + str);
+ }
CQLLexer(String cql, boolean lexdebug) {
super(new StringReader(cql));
this.ordinaryChar('(');
this.ordinaryChar(')');
this.wordChars('\'', '\''); // prevent this from introducing strings
- this.lexdebug = lexdebug;
+ this.LEXDEBUG = lexdebug;
}
public int nextToken() throws java.io.IOException {
int token = super.nextToken();
- if (lexdebug)
- System.out.println("LEXDEBUG: " +
- "token=" + token + ", " +
- "nval=" + this.nval + ", " +
- "sval=" + this.sval);
+
+ if (token == '<') {
+ debug("token starts with '<' ...");
+ int t2 = super.nextToken();
+ if (t2 == '=') {
+ debug("token continues with '=' - it's '<='");
+ this.ttype = token = TT_LE;
+ } else if (t2 == '>') {
+ debug("token continues with '>' - it's '<>'");
+ this.ttype = token = TT_NE;
+ } else {
+ debug("next token is " + token + " (pushed back)");
+ //this.pushBack();
+ }
+ } else if (token == '>') {
+ debug("token starts with '>' ...");
+ int t2 = super.nextToken();
+ if (t2 == '=') {
+ debug("token continues with '=' - it's '>='");
+ this.ttype = token = TT_GE;
+ } else {
+ debug("next token is " + token + " (pushed back)");
+ //this.pushBack();
+ }
+ }
+
+ debug("token=" + token + ", " +
+ "nval=" + this.nval + ", " + "sval=" + this.sval);
return token;
}
} else if (token == this.TT_NUMBER) {
return "number: " + this.nval;
} else if (token == this.TT_WORD) {
- return "word: \"" + this.sval + "\"";
+ return "word: " + this.sval;
} else if (token == '"') {
return "string: \"" + this.sval + "\"";
+ } else if (token == TT_LE) {
+ return "<=";
+ } else if (token == TT_GE) {
+ return ">=";
+ } else if (token == TT_NE) {
+ return "<>";
}
String res = String.valueOf((char) token);