From e5329397729e8959a821c6956f75012372fc0a77 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 30 Jun 2011 19:34:43 +0200 Subject: [PATCH] URL recipe in place --- etc/config-zoom.xml | 1 + include/metaproxy/xmlutil.hpp | 2 +- src/Makefile.am | 9 ++-- src/filter_zoom.cpp | 27 +++++++++- src/test_xmlutil.cpp | 116 +++++++++++++++++++++++++++++++++++++++++ src/url_recipe.cpp | 76 +++++++++++++++++++++------ src/xmlutil.cpp | 3 +- xml/schema/filter_zoom.rnc | 1 + xml/schema/filter_zoom.rng | 5 ++ xml/schema/filter_zoom.xsd | 2 + 10 files changed, 218 insertions(+), 24 deletions(-) create mode 100644 src/test_xmlutil.cpp diff --git a/etc/config-zoom.xml b/etc/config-zoom.xml index 6985f85..20246c1 100644 --- a/etc/config-zoom.xml +++ b/etc/config-zoom.xml @@ -22,6 +22,7 @@ 1=4 s=pw t=l,r usmarc tmarc.xsl + http://sever.com?title=${md-title[\s+/+/g]} localhost:9999/db01 diff --git a/include/metaproxy/xmlutil.hpp b/include/metaproxy/xmlutil.hpp index ff265ef..ce17190 100644 --- a/include/metaproxy/xmlutil.hpp +++ b/include/metaproxy/xmlutil.hpp @@ -51,7 +51,7 @@ namespace metaproxy_1 { void check_empty(const xmlNode *node); - void url_recipe_handle(xmlDoc *doc, std::string recipe); + std::string url_recipe_handle(xmlDoc *doc, std::string recipe); } class XMLError : public std::runtime_error { public: diff --git a/src/Makefile.am b/src/Makefile.am index 903c637..7972352 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -58,11 +58,11 @@ libmetaproxy_la_SOURCES = \ xmlutil.cpp -libmetaproxy_la_LIBADD = $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) +libmetaproxy_la_LIBADD = $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) $(BOOST_REGEX_LIB) # Rules for lib -LDADD = libmetaproxy.la $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) +LDADD = libmetaproxy.la $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) $(BOOST_REGEX_LIB) bin_PROGRAMS = metaproxy noinst_PROGRAMS = ex_filter_frontend_net ex_router_flexml tstdl @@ -101,7 +101,8 @@ check_PROGRAMS = \ test_filter_sru_to_z3950 \ test_filter_virt_db \ test_ses_map \ - test_router_flexml + test_router_flexml \ + test_xmlutil TESTS=$(check_PROGRAMS) @@ -127,6 +128,7 @@ test_filter_sru_to_z3950_SOURCES = test_filter_sru_to_z3950.cpp test_filter_virt_db_SOURCES = test_filter_virt_db.cpp test_ses_map_SOURCES = test_ses_map.cpp test_router_flexml_SOURCES = test_router_flexml.cpp +test_xmlutil_SOURCES = test_xmlutil.cpp TESTLDADD = $(LDADD) $(BOOST_TEST_LIB) @@ -152,6 +154,7 @@ test_filter_virt_db_LDADD = $(TESTLDADD) test_router_flexml_LDADD = $(TESTLDADD) test_ses_map_LDADD = $(TESTLDADD) test_thread_pool_observer_LDADD = $(TESTLDADD) +test_xmlutil_LDADD = $(TESTLDADD) # doxygen target dox: diff --git a/src/filter_zoom.cpp b/src/filter_zoom.cpp index 39b5624..eb9f64b 100644 --- a/src/filter_zoom.cpp +++ b/src/filter_zoom.cpp @@ -855,6 +855,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, } int rec_len; + xmlChar *xmlrec_buf = 0; const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str, &rec_len); if (rec_buf && b->xsp && enable_pz2_transform) @@ -866,15 +867,35 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0); if (rec_res) - xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len, + xsltSaveResultToString(&xmlrec_buf, &rec_len, rec_res, b->xsp); + rec_buf = (const char *) xmlrec_buf; + xmlFreeDoc(rec_doc); + xmlFreeDoc(rec_res); } } if (rec_buf) { xmlDoc *doc = xmlParseMemory(rec_buf, rec_len); - mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe); + std::string res = + mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe); + if (res.length()) + { + xmlNode *ptr = xmlDocGetRootElement(doc); + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + xmlNode *c = + xmlNewChild(ptr, 0, BAD_CAST "generated-url", 0); + xmlNode * t = xmlNewText(BAD_CAST res.c_str()); + xmlAddChild(c, t); + + if (xmlrec_buf) + xmlFree(xmlrec_buf); + + xmlDocDumpMemory(doc, &xmlrec_buf, &rec_len); + rec_buf = (const char *) xmlrec_buf; + } xmlFreeDoc(doc); } if (rec_buf) @@ -892,6 +913,8 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, rec_type_str); } + if (xmlrec_buf) + xmlFree(xmlrec_buf); } else { diff --git a/src/test_xmlutil.cpp b/src/test_xmlutil.cpp new file mode 100644 index 0000000..13065a1 --- /dev/null +++ b/src/test_xmlutil.cpp @@ -0,0 +1,116 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2011 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "config.hpp" +#include +#include + +#include + +#define BOOST_AUTO_TEST_MAIN +#define BOOST_TEST_DYN_LINK +#include + +#include +#include +#include + +using namespace boost::unit_test; +namespace mp = metaproxy_1; +namespace mp_xml = metaproxy_1::xml; + +BOOST_AUTO_TEST_CASE( url_recipe ) +{ + try + { + const char *xml_text = + "\n" + "\n" + " 11224466 \n" + "\n" + " 11224466 \n" + "Jack Collins\n" + "\n" + "\n" + "\n" + "How to program a computer\n" + "Penguin\n" + "no\n" + "\n"; + xmlDoc *doc = xmlParseMemory(xml_text, strlen(xml_text)); + BOOST_CHECK(doc); + if (doc) + { + std::string res; + + res = mp_xml::url_recipe_handle(doc, "abc"); + BOOST_CHECK(!res.compare("abc")); + + res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no/yes]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("yes")); + + res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("")); + + res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no/]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("")); + + res = mp_xml::url_recipe_handle(doc, "${has-fulltext[n/]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("o")); + + res = mp_xml::url_recipe_handle(doc, "${has-fulltext}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("no")); + + res = mp_xml::url_recipe_handle( + doc, "http://sever.com?title=${md-title[\\s+/+/g]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare("http://sever.com?title=How+to+program+a+computer")); + + res = mp_xml::url_recipe_handle(doc, "${md-id[2/1]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare(" 11124466 ")); + + res = mp_xml::url_recipe_handle(doc, "${md-id[2/1/g]}"); + std::cout << "res=" << res << std::endl; + BOOST_CHECK(!res.compare(" 11114466 ")); + + + xmlFreeDoc(doc); + } + } + catch ( ... ) { + BOOST_CHECK (false); + } +} + + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/url_recipe.cpp b/src/url_recipe.cpp index bba6a41..8076928 100644 --- a/src/url_recipe.cpp +++ b/src/url_recipe.cpp @@ -16,77 +16,119 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.hpp" + +#include #include #include -namespace mp = metaproxy_1; -// Doxygen doesn't like mp::xml, so we use this instead namespace mp_xml = metaproxy_1::xml; -void mp_xml::url_recipe_handle(xmlDoc *doc, std::string recipe) +std::string mp_xml::url_recipe_handle(xmlDoc *doc, std::string recipe) { - if (recipe.length() == 0) - return; std::string result; + if (recipe.length() == 0) + return result; + + const xmlNode *ptr1 = xmlDocGetRootElement(doc); + while (ptr1 && ptr1->type != XML_ELEMENT_NODE) + ptr1 = ptr1->next; + if (ptr1) + ptr1 = ptr1->children; - size_t p0 = 0, p1 = 0; + size_t p0 = 0; for (;;) { - p1 = recipe.find_first_of("${", p0); + size_t p1 = recipe.find_first_of("${", p0); if (p1 == std::string::npos) { result += recipe.substr(p0); break; } result += recipe.substr(p0, p1 - p0); + p0 = p1+2; int step = 0; // 0=variable, 1=pattern, 2=replacement, 3=mode std::string variable; std::string pattern; std::string replacement; std::string mode; - p0 = p1+2; + int c_prev = 0; while (p0 < recipe.length() && step < 5) { char c = recipe[p0]; - if (c == '}') + int c_check = c; + if (c_prev == '\\') + c_check = 0; + + if (c_check == '}') step = 5; else if (step == 0) { - if (c == '[') + if (c_check == '[') step = 1; else variable += c; } + else if (c_check == ']') + step = 4; else if (step == 1) { - if (c == '/') + if (c_check == '/') step = 2; else pattern += c; } else if (step == 2) { - if (c == '/') + if (c_check == '/') step = 3; else replacement += c; } else if (step == 3) { - if (c == ']') - step = 4; - else - mode += c; + mode += c; } + c_prev = c; p0++; } if (variable.length()) { - ; + std::string text; + size_t offset = 0; + size_t md_pos = variable.find_first_of("md-"); + if (md_pos == 0) + offset = 3; + const xmlNode *ptr = ptr1; + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE + && !strcmp((const char *) ptr->name, "metadata")) + { + const _xmlAttr *attr = ptr->properties; + for (; attr; attr = attr->next) + if (!strcmp((const char *) attr->name, "type") + && attr->children + && !strcmp((const char *) attr->children->content, + variable.c_str() + offset)) + { + text = mp_xml::get_text(ptr); + break; + } + } + boost::regex::flag_type b_mode = boost::regex::perl; + if (mode.find_first_of('i') != std::string::npos) + b_mode |= boost::regex::icase; + boost::regex e(pattern, b_mode); + + boost::match_flag_type match_mode = boost::format_first_only; + if (mode.find_first_of('g') != std::string::npos) + match_mode = boost::format_all; + result += regex_replace(text, e, replacement, match_mode); } } + return result; } diff --git a/src/xmlutil.cpp b/src/xmlutil.cpp index 288096f..ef86b2d 100644 --- a/src/xmlutil.cpp +++ b/src/xmlutil.cpp @@ -16,11 +16,12 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.hpp" + #include #include - namespace mp = metaproxy_1; // Doxygen doesn't like mp::xml, so we use this instead namespace mp_xml = metaproxy_1::xml; diff --git a/xml/schema/filter_zoom.rnc b/xml/schema/filter_zoom.rnc index 62ee291..75494bb 100644 --- a/xml/schema/filter_zoom.rnc +++ b/xml/schema/filter_zoom.rnc @@ -28,6 +28,7 @@ filter_zoom = element mp:requestSyntax { xsd:string }?, element mp:sru { xsd:string }?, element mp:transform { xsd:string }?, + element mp:urlRecipe { xsd:string }?, element mp:zurl { xsd:string }, element mp:cfAuth { xsd:string }?, element mp:cfProxy { xsd:string }?, diff --git a/xml/schema/filter_zoom.rng b/xml/schema/filter_zoom.rng index 85fd5a8..16f04d1 100644 --- a/xml/schema/filter_zoom.rng +++ b/xml/schema/filter_zoom.rng @@ -112,6 +112,11 @@ + + + + + diff --git a/xml/schema/filter_zoom.xsd b/xml/schema/filter_zoom.xsd index fb7fbbd..fe9c5a6 100644 --- a/xml/schema/filter_zoom.xsd +++ b/xml/schema/filter_zoom.xsd @@ -46,6 +46,7 @@ + @@ -68,6 +69,7 @@ + -- 1.7.10.4