From: Adam Dickmeiss Date: Thu, 27 Jun 2013 12:23:02 +0000 (+0200) Subject: Event.text for whitespace in tags X-Git-Tag: v1.3.59~36 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=897639233e3a6232d039666ba38b393bf7ac0ef0;p=metaproxy-moved-to-github.git Event.text for whitespace in tags --- diff --git a/src/html_parser.cpp b/src/html_parser.cpp index 47b2e14..1436553 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -42,6 +42,9 @@ namespace metaproxy_1 { int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp); + int skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len); Rep(); ~Rep(); int m_verbose; @@ -94,8 +97,9 @@ static int skipName(const char *cp) return i; } -static int skipAttribute(const char *cp, int *attr_len, - const char **value, int *val_len) +int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len) { int i = skipName(cp); *attr_len = i; @@ -143,7 +147,7 @@ int mp::HTMLParser::Rep::tagAttrs(HTMLParserEvent &event, int attr_len; const char *value; int val_len; - int nor = skipAttribute(cp+i, &attr_len, &value, &val_len); + int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len); i += nor; if (nor) { @@ -209,10 +213,13 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, { int i = 0; int close_it = 0; - while (cp[i] && cp[i] != '>') + for (; cp[i] && cp[i] != '/' && cp[i] != '>'; i++) + ; + if (i > 0) + event.text(cp, i); + if (cp[i] == '/') { - if (cp[i] == '/') - close_it = 1; + close_it = 1; i++; } if (cp[i] == '>') diff --git a/src/test_html_parser.cpp b/src/test_html_parser.cpp index 5659152..cdb266c 100644 --- a/src/test_html_parser.cpp +++ b/src/test_html_parser.cpp @@ -67,18 +67,17 @@ public: } }; - BOOST_AUTO_TEST_CASE( test_html_parser_1 ) { try { mp::HTMLParser hp; - const char* html = + const char* html = "some text" "
"; - const char* expected = + const char* expected = "some text" - "
"; + "
"; MyEvent e; hp.set_verbose(1); hp.parse(e, html);