From: Marc Cromme Date: Fri, 20 Jan 2006 22:38:12 +0000 (+0000) Subject: added regex core functionality for filter_query_rewrite X-Git-Tag: YP2.0.0.3~16 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=743341841a21a695d08c99b27a30329119030db5;p=metaproxy-moved-to-github.git added regex core functionality for filter_query_rewrite added check in configure.ac for libboost_regex still missing configuration of filter_query_rewrite, a list of regular expressions, and in fact packing the altered query back in the GDU, so only half-ways --- diff --git a/configure.ac b/configure.ac index d151883..83c30ec 100644 --- a/configure.ac +++ b/configure.ac @@ -49,6 +49,11 @@ AC_CHECK_LIB(boost_program_options, main, [],[ AC_MSG_ERROR([Install libboost-program-options-dev (or similar)]) ]) +AC_CHECK_LIB(boost_regex, main, [],[ + AC_MSG_NOTICE([boost regex library not found.]) + AC_MSG_ERROR([Install libboost-regex-dev (or similar)]) +]) + AC_CHECK_HEADER(boost/test/auto_unit_test.hpp,,[ AC_MSG_NOTICE([boost test unit header not found.]) AC_MSG_ERROR([Install libboost-test-dev (or similar)]) @@ -120,6 +125,7 @@ fi AC_CHECK_HEADERS(sys/select.h sys/socket.h iostream list vector stdexcept) AC_CHECK_HEADERS(boost/thread/mutex.hpp \ boost/date_time/posix_time/posix_time.hpp \ + boost/regex.hpp \ boost/test/auto_unit_test.hpp) # Checks for typedefs, structures, and compiler characteristics. diff --git a/src/filter_query_rewrite.cpp b/src/filter_query_rewrite.cpp index 8ae8400..cffa7e3 100644 --- a/src/filter_query_rewrite.cpp +++ b/src/filter_query_rewrite.cpp @@ -1,4 +1,4 @@ -/* $Id: filter_query_rewrite.cpp,v 1.1 2006-01-19 12:18:09 marc Exp $ +/* $Id: filter_query_rewrite.cpp,v 1.2 2006-01-20 22:38:12 marc Exp $ Copyright (c) 2005, Index Data. %LICENSE% @@ -10,6 +10,7 @@ #include "package.hpp" //#include +#include #include "util.hpp" #include "filter_query_rewrite.hpp" @@ -21,15 +22,17 @@ namespace yf = yp2::filter; namespace yp2 { namespace filter { class QueryRewrite::Rep { - friend class QueryRewrite; - int dummy; + //friend class QueryRewrite; + public: + void process(yp2::Package &package) const; + private: + void rewriteRegex(Z_Query *query) const; }; } } yf::QueryRewrite::QueryRewrite() : m_p(new Rep) { - m_p->dummy = 1; } yf::QueryRewrite::~QueryRewrite() @@ -38,7 +41,11 @@ yf::QueryRewrite::~QueryRewrite() void yf::QueryRewrite::process(yp2::Package &package) const { + m_p->process(package); +} +void yf::QueryRewrite::Rep::process(yp2::Package &package) const +{ if (package.session().is_closed()) { std::cout << "Got Close.\n"; @@ -57,8 +64,17 @@ void yf::QueryRewrite::process(yp2::Package &package) const Z_APDU_searchRequest) { std::cout << "Got Z3950 Search PDU\n"; - //Z_SearchRequest *req = gdu->u.z3950->u.searchRequest; - //package.request() = gdu; + Z_SearchRequest *req = gdu->u.z3950->u.searchRequest; + + // applying regex query rewriting + rewriteRegex(req->query); + + // fold new query structure into gdu package .. + // yp2::util::pqf(odr, gdu->u.z3950, query_out); + // question: which odr structure to use in this call ?? + // memory alignment has to be correct, this is a little tricky ... + // I'd rather like to alter the gdu and pack it back using: + package.request() = gdu; } else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == Z_APDU_scanRequest) @@ -70,6 +86,65 @@ void yf::QueryRewrite::process(yp2::Package &package) const package.move(); } + +void yf::QueryRewrite::Rep::rewriteRegex(Z_Query *query) const +{ + std::string query_in = yp2::util::zQueryToString(query); + std::cout << "QUERY IN '" << query_in << "'\n"; + + std::string query_out; + + boost::regex rgx; + try{ + // make regular expression replacement here + std::string expression("@attr 1=4"); + std::string format("@attr 1=4 @attr 4=3"); + //std::string expression("the"); + //std::string format("else"); + //std::string expression("(<)|(>)|\\r"); + //std::string format("(?1<)(?2>)"); + + std::cout << "EXPRESSION '" << expression << "'\n"; + std::cout << "FORMAT '" << format << "'\n"; + + rgx.assign(expression.c_str()); + + bool match(false); + bool search(false); + + // other flags + // see http://www.boost.org/libs/regex/doc/match_flag_type.html + //boost::match_flag_type flags = boost::match_default; + // boost::format_default + // boost::format_perl + // boost::format_literal + // boost::format_all + // boost::format_no_copy + // boost::format_first_only + + boost::match_flag_type flags + = boost::match_default | boost::format_all; + + match = regex_match(query_in, rgx, flags); + search = regex_search(query_in, rgx, flags); + query_out = boost::regex_replace(query_in, rgx, format, flags); + std::cout << "MATCH '" << match << "'\n"; + std::cout << "SEARCH '" << search << "'\n"; + std::cout << "QUERY OUT '" << query_out << "'\n"; + + } + catch(boost::regex_error &e) + { + std::cout << "REGEX Error code=" << e.code() + << " position=" << e.position() << "\n"; + } + + //std::cout << "QUERY OUT '" << query_out << "'\n"; + // still need to fold this new rpn query string into Z_Query structure... +} + + + static yp2::filter::Base* filter_creator() { return new yp2::filter::QueryRewrite; diff --git a/src/test_filter_query_rewrite.cpp b/src/test_filter_query_rewrite.cpp index 8208890..0fe19a2 100644 --- a/src/test_filter_query_rewrite.cpp +++ b/src/test_filter_query_rewrite.cpp @@ -1,4 +1,4 @@ -/* $Id: test_filter_query_rewrite.cpp,v 1.1 2006-01-19 12:18:09 marc Exp $ +/* $Id: test_filter_query_rewrite.cpp,v 1.2 2006-01-20 22:38:12 marc Exp $ Copyright (c) 2005, Index Data. %LICENSE% @@ -30,8 +30,8 @@ public: Z_GDU *gdu = package.request().get(); - if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_initRequest) + if (gdu && gdu->which == Z_GDU_Z3950 + && gdu->u.z3950->which == Z_APDU_initRequest) { std::cout << "Got Z3950 Init PDU\n"; //Z_InitRequest *req = gdu->u.z3950->u.initRequest; @@ -58,7 +58,7 @@ public: void check_query_rewrite_init(yp2::RouterChain &router) { - std::cout << "QUERY REWRITE INIT\n"; + //std::cout << "QUERY REWRITE INIT\n"; // Create package with Z39.50 init request in it yp2::Package pack; @@ -78,19 +78,20 @@ void check_query_rewrite_init(yp2::RouterChain &router) Z_GDU *z_gdu = gdu->get(); - std::cout << "Z_GDU " << z_gdu << "\n"; + //std::cout << "Z_GDU " << z_gdu << "\n"; BOOST_CHECK(z_gdu); if (z_gdu) { BOOST_CHECK_EQUAL(z_gdu->which, Z_GDU_Z3950); BOOST_CHECK_EQUAL(z_gdu->u.z3950->which, Z_APDU_initRequest); } } + void check_query_rewrite_search(yp2::RouterChain &router, std::string query_in, - std::string query_out) + std::string query_expect) { - std::cout << "QUERY REWRITE SEARCH " - << query_in << " " << query_out << "\n"; + //std::cout << "QUERY REWRITE SEARCH " + // << query_in << " " << query_expect << "\n"; // Create package with Z39.50 search request in it yp2::Package pack; @@ -101,18 +102,17 @@ void check_query_rewrite_search(yp2::RouterChain &router, // create package PQF query here yp2::util::pqf(odr, apdu, query_in); - - //apdu->u.searchRequest->num_databaseNames = 1; - //apdu->u.searchRequest->databaseNames = (char**) - //odr_malloc(odr, sizeof(char *)); - //apdu->u.searchRequest->databaseNames[0] = odr_strdup(odr, "Default"); - + // create package PDF database info (needed!) + apdu->u.searchRequest->num_databaseNames = 1; + apdu->u.searchRequest->databaseNames + = (char**)odr_malloc(odr, sizeof(char *)); + apdu->u.searchRequest->databaseNames[0] = odr_strdup(odr, "Default"); + // Done creating request package pack.request() = apdu; - // Done creating query. // Put it in router - //pack.router(router).move(); + pack.router(router).move(); // Inspect bounced back request //yazpp_1::GDU *gdu = &pack.response(); @@ -121,12 +121,24 @@ void check_query_rewrite_search(yp2::RouterChain &router, Z_GDU *z_gdu = gdu->get(); //std::cout << "Z_GDU " << z_gdu << "\n"; - //BOOST_CHECK(z_gdu); + BOOST_CHECK(z_gdu); if (z_gdu) { BOOST_CHECK_EQUAL(z_gdu->which, Z_GDU_Z3950); BOOST_CHECK_EQUAL(z_gdu->u.z3950->which, Z_APDU_searchRequest); - // take query out of package again - BOOST_CHECK_EQUAL(query_in, query_out); + + // take query out of package again and check rewrite + //std::string query_changed = yp2::util::apduToPqf(apdu); + std::string query_changed + = yp2::util::zQueryToString(z_gdu->u.z3950->u.searchRequest->query); + + //BOOST_CHECK_EQUAL(query_in, query_expect); + + //std::cout << "'" << query_expect << "'\n"; + //std::cout << "'" << query_changed << "'\n"; + + // need ugly whitespace here .. + //BOOST_CHECK_EQUAL(query_expect, std::string(" ") + query_changed); + BOOST_CHECK_EQUAL(query_expect, query_changed); } } @@ -155,7 +167,9 @@ BOOST_AUTO_UNIT_TEST( test_filter_query_rewrite2 ) //router.append(f_bounce_z3950); check_query_rewrite_init(router); - check_query_rewrite_search(router, "@attr 1=4 the", "@attr 1=4 the"); + check_query_rewrite_search(router, + "@attrset Bib-1 @attr 1=4 the", + "@attrset Bib-1 @attr 1=4 the"); } catch ( ... ) { diff --git a/src/util.cpp b/src/util.cpp index c8cf573..4570c31 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -1,4 +1,4 @@ -/* $Id: util.cpp,v 1.12 2006-01-19 21:43:51 adam Exp $ +/* $Id: util.cpp,v 1.13 2006-01-20 22:38:12 marc Exp $ Copyright (c) 2005, Index Data. %LICENSE% @@ -9,8 +9,11 @@ #include #include #include +#include // for yaz_query_to_wrbuf() #include "util.hpp" +//#include + void yp2::util::piggyback(int smallSetUpperBound, int largeSetLowerBound, int mediumSetPresentNumber, @@ -38,6 +41,7 @@ void yp2::util::piggyback(int smallSetUpperBound, } } + bool yp2::util::pqf(ODR odr, Z_APDU *apdu, const std::string &q) { YAZ_PQF_Parser pqf_parser = yaz_pqf_create(); @@ -56,6 +60,49 @@ bool yp2::util::pqf(ODR odr, Z_APDU *apdu, const std::string &q) { return true; } + +std::string yp2::util::zQueryToString(Z_Query *query) +{ + std::string query_str = ""; + + if (query && query->which == Z_Query_type_1){ + Z_RPNQuery *rpn = query->u.type_1; + + if (rpn){ + + // allocate wrbuf (strings in YAZ!) + WRBUF w = wrbuf_alloc(); + + // put query in w + yaz_rpnquery_to_wrbuf(w, rpn); + + // from w to std::string + query_str = std::string(wrbuf_buf(w), wrbuf_len(w)); + + // destroy wrbuf + wrbuf_free(w, 1); + } + } + +#if 0 + if (query && query->which == Z_Query_type_1){ + + // allocate wrbuf (strings in YAZ!) + WRBUF w = wrbuf_alloc(); + + // put query in w + yaz_query_to_wrbuf(w, query); + + // from w to std::string + query_str = std::string(wrbuf_buf(w), wrbuf_len(w)); + + // destroy wrbuf + wrbuf_free(w, 1); + } +#endif + return query_str; +} + void yp2::util::get_default_diag(Z_DefaultDiagFormat *r, int &error_code, std::string &addinfo) { diff --git a/src/util.hpp b/src/util.hpp index f610ce5..45add31 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -1,4 +1,4 @@ -/* $Id: util.hpp,v 1.10 2006-01-19 21:43:51 adam Exp $ +/* $Id: util.hpp,v 1.11 2006-01-20 22:38:12 marc Exp $ Copyright (c) 2005, Index Data. %LICENSE% @@ -18,21 +18,30 @@ namespace yp2 { namespace util { bool pqf(ODR odr, Z_APDU *apdu, const std::string &q); + + std::string zQueryToString(Z_Query *query); + Z_ReferenceId **get_referenceId(Z_APDU *apdu); + Z_APDU *create_APDU(ODR odr, int type, Z_APDU *in_apdu); + bool set_databases_from_zurl(ODR odr, std::string zurl, int *db_num, char ***db_strings); + void split_zurl(std::string zurl, std::string &host, std::list &db); int get_vhost_otherinfo(Z_OtherInformation **otherInformation, bool remove_flag, std::list &vhosts); + void set_vhost_otherinfo(Z_OtherInformation **otherInformation, ODR odr, const std::list &vhosts); + void get_init_diagnostics(Z_InitResponse *res, int &error_code, std::string &addinfo); + void get_default_diag(Z_DefaultDiagFormat *r, int &error_code, std::string &addinfo);