From ac8fb2ba7ecad42eb920c65b7056e3a3e677275c Mon Sep 17 00:00:00 2001 From: mike Date: Tue, 29 Oct 2002 10:15:58 +0000 Subject: [PATCH] Handle relations, improve XML rendering (wow, was that hard. The Java class library can be really sucky!) --- README | 28 +++++++++++++- src/org/z3950/zing/cql/CQLNode.java | 34 ++++++++++++++--- src/org/z3950/zing/cql/CQLParser.java | 67 ++++++++++++++++++++++++++------- 3 files changed, 109 insertions(+), 20 deletions(-) diff --git a/README b/README index 6d123c7..ad98b9c 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -$Id: README,v 1.3 2002-10-25 07:38:16 mike Exp $ +$Id: README,v 1.4 2002-10-29 10:15:58 mike Exp $ cql-java -- a free CQL compiler for Java @@ -76,3 +76,29 @@ SEE ALSO Adam Dickmeiss's CQL compiler, written in C. Rob Sanderson's CQL compiler, written in Python. All the other free CQL compilers everyone's going to write. + + +TO DO +----- + +### Finish the parser: +### * multi-character relations DONE but ### single "<" fails! +### * word relations +### * relation modifiers +### * proximity, + +### Finish the CXQL-rendering back end (mostly a matter of quoting + characters to be emitted as part of an XML document). + DONE + +### Finish CQL-decompiling back end (mostly a matter of quoting) + +### Write PQN-generating back end (will need to be driven from a + configuation file specifying how to represent the qualifiers, + relations, relation modifiers and wildcard characters as Z39.50 + attributes.) + +### Write stochastic query generator, driven off MA grammar. + +### Write "javadoc" comments. + diff --git a/src/org/z3950/zing/cql/CQLNode.java b/src/org/z3950/zing/cql/CQLNode.java index baa6cad..3d16a37 100644 --- a/src/org/z3950/zing/cql/CQLNode.java +++ b/src/org/z3950/zing/cql/CQLNode.java @@ -1,4 +1,4 @@ -// $Id: CQLNode.java,v 1.5 2002-10-27 00:46:25 mike Exp $ +// $Id: CQLNode.java,v 1.6 2002-10-29 10:15:58 mike Exp $ package org.z3950.zing.cql; @@ -7,7 +7,7 @@ package org.z3950.zing.cql; * Represents a node in a CQL parse-tree ... * ### * - * @version $Id: CQLNode.java,v 1.5 2002-10-27 00:46:25 mike Exp $ + * @version $Id: CQLNode.java,v 1.6 2002-10-29 10:15:58 mike Exp $ */ public abstract class CQLNode { abstract String toXCQL(int level); @@ -21,14 +21,36 @@ public abstract class CQLNode { return x; } + // XML Quote -- + // s/&/&/g; + // s//>/g; + // This is hideously inefficient, but I just don't see a better + // way using the standard JAVA library. + // protected String xq(String str) { - // XML Quote - // ### s/&/&/g; - // s//>/g; + str = replace(str, "&", "&"); + str = replace(str, "<", "<"); + str = replace(str, ">", ">"); return str; } + String replace(String str, String from, String to) { + StringBuffer sb = new StringBuffer(); + int ix; // index of next `from' + int offset = 0; // index of previous `from' + length(from) + + while ((ix = str.indexOf(from, offset)) != -1) { + sb.append(str.substring(offset, ix)); + sb.append(to); + offset = ix + from.length(); + } + + // End of string: append last bit and we're done + sb.append(str.substring(offset)); + return sb.toString(); + } + // Test harness public static void main (String[] args) { CQLNode n1 = new CQLTermNode("dc.author", "=", "kernighan"); diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java index 16d9bd6..8c5d871 100644 --- a/src/org/z3950/zing/cql/CQLParser.java +++ b/src/org/z3950/zing/cql/CQLParser.java @@ -1,4 +1,4 @@ -// $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $ +// $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $ package org.z3950.zing.cql; import java.util.Properties; @@ -12,14 +12,14 @@ import java.io.StreamTokenizer; * Compiles a CQL string into a parse tree ... * ### * - * @version $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $ + * @version $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ public class CQLParser { private CQLLexer lexer; static private boolean PARSEDEBUG = false; - static private boolean LEXDEBUG = false; + static private boolean LEXDEBUG = true; private class CQLParseException extends Exception { CQLParseException(String s) { super(s); } @@ -100,6 +100,7 @@ public class CQLParser { qualifier = word; relation = lexer.render(false); + debug("got relation '" + relation + "'"); match(lexer.ttype); debug("qualifier='" + qualifier + ", relation='" + relation + "'"); } @@ -110,10 +111,13 @@ public class CQLParser { } boolean isRelation() { - // ### Also need to handle <=, >=, <> + // ### Handle any, all and exact return (lexer.ttype == '<' || lexer.ttype == '>' || - lexer.ttype == '='); + lexer.ttype == '=' || + lexer.ttype == lexer.TT_LE || + lexer.ttype == lexer.TT_GE || + lexer.ttype == lexer.TT_NE); } private void match(int token) @@ -191,7 +195,15 @@ public class CQLParser { // a render() method. Used only by CQLParser. // class CQLLexer extends StreamTokenizer { - private static boolean lexdebug; + private static boolean LEXDEBUG; + static int TT_LE = 1000; // The token "<=" + static int TT_GE = 1001; // The token ">=" + static int TT_NE = 1002; // The token "<>" + + static void debug(String str) { + if (LEXDEBUG) + System.err.println("LEXDEBUG: " + str); + } CQLLexer(String cql, boolean lexdebug) { super(new StringReader(cql)); @@ -202,16 +214,39 @@ class CQLLexer extends StreamTokenizer { this.ordinaryChar('('); this.ordinaryChar(')'); this.wordChars('\'', '\''); // prevent this from introducing strings - this.lexdebug = lexdebug; + this.LEXDEBUG = lexdebug; } public int nextToken() throws java.io.IOException { int token = super.nextToken(); - if (lexdebug) - System.out.println("LEXDEBUG: " + - "token=" + token + ", " + - "nval=" + this.nval + ", " + - "sval=" + this.sval); + + if (token == '<') { + debug("token starts with '<' ..."); + int t2 = super.nextToken(); + if (t2 == '=') { + debug("token continues with '=' - it's '<='"); + this.ttype = token = TT_LE; + } else if (t2 == '>') { + debug("token continues with '>' - it's '<>'"); + this.ttype = token = TT_NE; + } else { + debug("next token is " + token + " (pushed back)"); + //this.pushBack(); + } + } else if (token == '>') { + debug("token starts with '>' ..."); + int t2 = super.nextToken(); + if (t2 == '=') { + debug("token continues with '=' - it's '>='"); + this.ttype = token = TT_GE; + } else { + debug("next token is " + token + " (pushed back)"); + //this.pushBack(); + } + } + + debug("token=" + token + ", " + + "nval=" + this.nval + ", " + "sval=" + this.sval); return token; } @@ -234,9 +269,15 @@ class CQLLexer extends StreamTokenizer { } else if (token == this.TT_NUMBER) { return "number: " + this.nval; } else if (token == this.TT_WORD) { - return "word: \"" + this.sval + "\""; + return "word: " + this.sval; } else if (token == '"') { return "string: \"" + this.sval + "\""; + } else if (token == TT_LE) { + return "<="; + } else if (token == TT_GE) { + return ">="; + } else if (token == TT_NE) { + return "<>"; } String res = String.valueOf((char) token); -- 1.7.10.4