From: Adam Dickmeiss Date: Mon, 23 Nov 2015 09:51:29 +0000 (+0100) Subject: Fix documentation of of chr's equivalent directive ZEB-672 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=0e8272b3d5a18a0695f8c7a58617caa0ad938059;p=idzebra-moved-to-github.git Fix documentation of of chr's equivalent directive ZEB-672 And add a few tests for the behavior. --- diff --git a/doc/field-structure.xml b/doc/field-structure.xml index 1b24aae..a19838e 100644 --- a/doc/field-structure.xml +++ b/doc/field-structure.xml @@ -436,21 +436,22 @@ equivalent value-set - This directive introduces equivalence classes of characters - and/or strings for sorting purposes only. It resembles the map - directive, but does not affect search and retrieval indexing, - but only sorting order under present requests. + This directive introduces equivalence classes of strings for + searching purposes only. It's a one-to-many + conversion that takes place only during search before the map + directive kicks in. - For example, scan.chr contains the following - equivalent sorting instructions, which can be uncommented: + For example given: + + a search for the äsel will be be match any of + æsel, äsel and + aesel. + diff --git a/test/api/Makefile.am b/test/api/Makefile.am index 9fc3ec5..0d5aad4 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -18,7 +18,7 @@ EXTRA_DIST=zebra.cfg test_trunc.cfg test_private_attset.cfg \ test_icu_indexing.cfg test_icu_indexing.idx \ test_sort1.cfg test_sort1.idx test_sort1.chr sort1.abs \ test_sort2.cfg test_sort2.idx test_sort2.chr sort2.abs \ - test_safari.cfg test_sort3.cfg + test_safari.cfg test_sort3.cfg string.chr noinst_LIBRARIES = libtestlib.a diff --git a/test/api/string.chr b/test/api/string.chr new file mode 100644 index 0000000..637ff98 --- /dev/null +++ b/test/api/string.chr @@ -0,0 +1,41 @@ +# Generic character map. +# + +# Define the basic value-set. *Beware* of changing this without re-indexing +# your databases. + +lowercase {0-9}{a-y}üzæäøöå +uppercase {0-9}{A-Y}ÜZÆÄØÖÅ + +# Breaking characters + +space {\001-\040}!"#$%&'\()*+,-./:;<=>?@\[\\]^_`\{|}~ + +# Characters to be considered equivalent for searching purposes. + +# equivalent æä(ae) +# equivalent øö(oe) +equivalent å(aa) +# equivalent uü + +# Supplemental mappings + +#map (ä) ä +#map (æ) æ +#map (ø) ø +#map (å) å +#map (ö) ö +#map (Ä) Ä +#map (&Aelig;) Æ +#map (Ø) Ø +#map (Å) Å +#map (Ö) Ö + +#map éÉ e +#map á a +#map ó o +#map í i + +#map (Aa) (AA) + +#map (aa) a diff --git a/test/api/test_search.c b/test/api/test_search.c index a560e15..17d540e 100644 --- a/test/api/test_search.c +++ b/test/api/test_search.c @@ -76,6 +76,12 @@ const char *myrec[] = { "" "" , + "\n" + "" + "\xe5" " og vandl" "\xf8" "b" + "" + "" + , 0} ; static void tst(int argc, char **argv) @@ -144,7 +150,7 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 titl", 3)); YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 titlx", 3)); YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 titlxx", 0)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 x", 2)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 x", 3)); YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 le", 0)); YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 5=103 .*le", 3)); @@ -219,18 +225,18 @@ static void tst(int argc, char **argv) /* string relations, >= */ - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 x", 2)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 tu", 2)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 title", 3)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 x", 3)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 tu", 3)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=4 title", 4)); /* string relations, > */ - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 x", 0)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 tu", 2)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 2)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 x", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 tu", 3)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 3)); /* always-matches relation */ - YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 6)); - YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 5)); + YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 7)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 6)); YAZ_CHECK(tl_query_x(zh, "@attr 1=1 @attr 2=103 {x my}", 0, 114)); /* and searches */ @@ -434,6 +440,14 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=30 @attr 4=5 @attr 2=5 {2107-09-19 00:00:00}", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 {\xe5" " og vandl" "\xf8" "b}", 1)); + + YAZ_CHECK(tl_query(zh, "@attr 1=4 {\xc5" " OG VANDL" "\xd8" "B}", 1)); + + /* try equivalent directive */ + YAZ_CHECK(tl_query(zh, "@attr 1=4 {aa" " OG VANDL" "\xd8" "B}", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 {AA" " OG VANDL" "\xd8" "B}", 0)); + YAZ_CHECK(tl_close_down(zh, zs)); }