* Copyright (C) 1995-2005, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: tsticonv.c,v 1.10 2005-06-25 15:46:07 adam Exp $
+ * $Id: tsticonv.c,v 1.17 2006-04-19 23:15:40 adam Exp $
*/
#if HAVE_CONFIG_H
#include <ctype.h>
#include <yaz/yaz-util.h>
+#include <yaz/test.h>
static int compare_buffers(char *msg, int no,
- int expect_len, const unsigned char *expect_buf,
- int got_len, const unsigned char *got_buf)
+ int expect_len, const char *expect_buf,
+ int got_len, const char *got_buf)
{
- int i;
if (expect_len == got_len
&& !memcmp(expect_buf, got_buf, expect_len))
return 1;
- printf("tsticonv test=%s i=%d failed\n", msg, no);
- printf("off got exp\n");
- for (i = 0; i<got_len || i<expect_len; i++)
+
+ if (0) /* use 1 see how the buffers differ (for debug purposes) */
{
- char got_char[10];
- char expect_char[10];
-
- if (i < got_len)
- sprintf(got_char, "%02X", got_buf[i]);
- else
- sprintf(got_char, "? ");
-
- if (i < expect_len)
- sprintf(expect_char, "%02X", expect_buf[i]);
- else
- sprintf(expect_char, "? ");
-
- printf("%02d %s %s %c\n",
- i, got_char, expect_char, got_buf[i] == expect_buf[i] ?
- ' ' : '*');
-
+ int i;
+ printf("tsticonv test=%s i=%d failed\n", msg, no);
+ printf("off got exp\n");
+ for (i = 0; i<got_len || i<expect_len; i++)
+ {
+ char got_char[10];
+ char expect_char[10];
+
+ if (i < got_len)
+ sprintf(got_char, "%02X", got_buf[i]);
+ else
+ sprintf(got_char, "? ");
+
+ if (i < expect_len)
+ sprintf(expect_char, "%02X", expect_buf[i]);
+ else
+ sprintf(expect_char, "? ");
+
+ printf("%02d %s %s %c\n",
+ i, got_char, expect_char, got_buf[i] == expect_buf[i] ?
+ ' ' : '*');
+
+ }
}
- exit(1);
+ return 0;
}
/* some test strings in ISO-8859-1 format */
{
int i;
yaz_iconv_t cd;
+ int ret;
cd = yaz_iconv_open("ISO-8859-1", "MARC8");
+ YAZ_CHECK(cd);
if (!cd)
- {
- printf("tsticonv 10 yaz_iconv_open failed\n");
- exit(10);
- }
+ return;
for (i = 0; iso_8859_1_a[i]; i++)
{
size_t r;
size_t outbytesleft = sizeof(outbuf0);
r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ YAZ_CHECK(r != (size_t)(-1));
if (r == (size_t) (-1))
- {
- int e = yaz_iconv_error(cd);
+ break;
- printf ("tsticonv 11 i=%d e=%d\n", i, e);
- exit(11);
- }
- compare_buffers("tsticonv 11", i,
- strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
- outbuf - outbuf0, outbuf0);
+ ret = compare_buffers("tsticonv 11", i,
+ strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
+ outbuf - outbuf0, outbuf0);
+ YAZ_CHECK(ret);
}
yaz_iconv_close(cd);
}
static void tst_marc8_to_ucs4b()
{
static struct {
- const unsigned char *marc8_b;
+ const char *marc8_b;
int len;
- const unsigned char *ucs4_b;
+ const char *ucs4_b;
} ar[] = {
{
"\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o",
"\xe5\xe8\x41",
12, "\x00\x00\x00\x41" "\x00\x00\x03\x04" "\x00\x00\x03\x08"
},
+ { /* bug #416 */
+ "\xEB\x74\xEC\x73",
+ 12, "\x00\x00\x00\x74" "\x00\x00\x03\x61" "\x00\x00\x00\x73"
+ },
+ { /* bug #416 */
+ "\xFA\x74\xFB\x73",
+ 12, "\x00\x00\x00\x74" "\x00\x00\x03\x60" "\x00\x00\x00\x73"
+ },
{
0, 0, 0
}
};
int i;
+ int ret;
yaz_iconv_t cd;
cd = yaz_iconv_open("UCS4", "MARC8");
+ YAZ_CHECK(cd);
if (!cd)
- {
- printf ("tsticonv 20 yaz_iconv_open failed\n");
- exit(20);
- }
+ return;
for (i = 0; ar[i].len; i++)
{
size_t r;
if (r == (size_t) (-1))
{
int e = yaz_iconv_error(cd);
+ YAZ_CHECK(e == YAZ_ICONV_E2BIG);
if (e != YAZ_ICONV_E2BIG)
- {
- printf ("tsticonv 21 i=%d e=%d\n", i, e);
- exit(21);
- }
+ return;
}
else
break;
}
- compare_buffers("tsticonv 22", i,
- expect_len, ar[i].ucs4_b,
- outbuf - outbuf0, outbuf0);
+ ret = compare_buffers("tsticonv 22", i,
+ expect_len, ar[i].ucs4_b,
+ outbuf - outbuf0, outbuf0);
+ YAZ_CHECK(ret);
}
yaz_iconv_close(cd);
}
};
int i;
+ int ret;
yaz_iconv_t cd;
cd = yaz_iconv_open("UTF8", "UCS4");
+ YAZ_CHECK(cd);
if (!cd)
- {
- printf ("tsticonv 30 yaz_iconv_open failed\n");
- exit(30);
- }
+ return;
for (i = 0; ucs4_c[i]; i++)
{
size_t r;
size_t outbytesleft = sizeof(outbuf0);
r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ YAZ_CHECK(r != (size_t) (-1));
if (r == (size_t) (-1))
- {
- int e = yaz_iconv_error(cd);
-
- printf ("tsticonv 31 i=%d e=%d\n", i, e);
- exit(31);
- }
- compare_buffers("tsticonv 32", i,
- strlen(utf8_c[i]), utf8_c[i],
- outbuf - outbuf0, outbuf0);
+ return;
+ ret = compare_buffers("tsticonv 32", i,
+ strlen(utf8_c[i]), utf8_c[i],
+ outbuf - outbuf0, outbuf0);
+ YAZ_CHECK(ret);
}
yaz_iconv_close(cd);
}
static void dconvert(int mandatory, const char *tmpcode)
{
int i;
+ int ret;
yaz_iconv_t cd;
for (i = 0; iso_8859_1_a[i]; i++)
{
size_t outbytesleft = sizeof(outbuf0);
cd = yaz_iconv_open(tmpcode, "ISO-8859-1");
+ YAZ_CHECK(cd || !mandatory);
if (!cd)
- {
- if (!mandatory)
- return;
- printf ("tsticonv code=%s i=%d 1\n", tmpcode, i);
- exit(1);
- }
+ return;
r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
- if (r == (size_t)(-1))
- {
- int e = yaz_iconv_error(cd);
-
- printf ("tsticonv code=%s i=%d 2 e=%d\n", tmpcode, i, e);
- exit(2);
- }
+ YAZ_CHECK(r != (size_t) (-1));
yaz_iconv_close(cd);
+ if (r == (size_t) (-1))
+ return;
cd = yaz_iconv_open("ISO-8859-1", tmpcode);
+ YAZ_CHECK(cd || !mandatory);
if (!cd)
- {
- if (!mandatory)
- return;
- printf ("tsticonv code=%s i=%d 3\n", tmpcode, i);
- exit(3);
- }
+ return;
inbuf = outbuf0;
inbytesleft = sizeof(outbuf0) - outbytesleft;
outbuf = outbuf1;
outbytesleft = sizeof(outbuf1);
r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
- if (r == (size_t)(-1)) {
- int e = yaz_iconv_error(cd);
-
- printf ("tsticonv code=%s i=%d 4 e=%d\n", tmpcode, i, e);
- exit(4);
+ YAZ_CHECK(r != (size_t) (-1));
+ if (r != (size_t)(-1))
+ {
+ ret = compare_buffers("dconvert", i,
+ strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
+ sizeof(outbuf1) - outbytesleft, outbuf1);
+ YAZ_CHECK(ret);
}
- compare_buffers("dconvert", i,
- strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
- sizeof(outbuf1) - outbytesleft, outbuf1);
yaz_iconv_close(cd);
}
}
+
+int utf8_check(unsigned c)
+{
+ if (sizeof(c) >= 4)
+ {
+ size_t r;
+ char src[4];
+ char dst[4];
+ char utf8buf[6];
+ char *inbuf = src;
+ size_t inbytesleft = 4;
+ char *outbuf = utf8buf;
+ size_t outbytesleft = sizeof(utf8buf);
+ int i;
+ yaz_iconv_t cd = yaz_iconv_open("UTF-8", "UCS4LE");
+ if (!cd)
+ return 0;
+ for (i = 0; i<4; i++)
+ src[i] = c >> (i*8);
+
+ r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ yaz_iconv_close(cd);
+
+ if (r == (size_t)(-1))
+ return 0;
+
+ cd = yaz_iconv_open("UCS4LE", "UTF-8");
+ if (!cd)
+ return 0;
+ inbytesleft = sizeof(utf8buf) - outbytesleft;
+ inbuf = utf8buf;
+
+ outbuf = dst;
+ outbytesleft = 4;
+
+ r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ if (r == (size_t)(-1))
+ return 0;
+
+ yaz_iconv_close(cd);
+
+ if (memcmp(src, dst, 4))
+ return 0;
+ }
+ return 1;
+}
+static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf)
+{
+ int ret = 0;
+ WRBUF b = wrbuf_alloc();
+ char outbuf[12];
+ size_t inbytesleft = strlen(buf);
+ const char *inp = buf;
+ while (inbytesleft)
+ {
+ size_t outbytesleft = sizeof(outbuf);
+ char *outp = outbuf;
+ size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft,
+ &outp, &outbytesleft);
+ if (r == (size_t) (-1))
+ {
+ int e = yaz_iconv_error(cd);
+ if (e != YAZ_ICONV_E2BIG)
+ break;
+ }
+ wrbuf_write(b, outbuf, outp - outbuf);
+ }
+ if (wrbuf_len(b) == strlen(cmpbuf)
+ && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b)))
+ ret = 1;
+ else
+ yaz_log(YLOG_LOG, "GOT (%.*s)", wrbuf_len(b), wrbuf_buf(b));
+ wrbuf_free(b, 1);
+ return ret;
+}
+
+static void tst_conversion_marc8_to_latin1()
+{
+ yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours de math",
+ "Cours de math"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâe",
+ "Cours de mathé"));
+ YAZ_CHECK(tst_convert(cd, "12345678âe",
+ "12345678é"));
+ YAZ_CHECK(tst_convert(cd, "123456789âe",
+ "123456789é"));
+ YAZ_CHECK(tst_convert(cd, "1234567890âe",
+ "1234567890é"));
+ YAZ_CHECK(tst_convert(cd, "12345678901âe",
+ "12345678901é"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâem",
+ "Cours de mathém"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâematiques",
+ "Cours de mathématiques"));
+
+ yaz_iconv_close(cd);
+}
+
+static void tst_conversion_utf8_to_marc8()
+{
+ yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+ /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+ YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+ /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+ /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+ /** UPPERCASE SCANDINAVIAN O */
+ YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2"));
+
+ /** ARING */
+ YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A"));
+
+ /** A MACRON + UMLAUT, DIAERESIS */
+ YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88",
+ "\xE5\xE8\x41"));
+
+ /* Ligature spanning two characters */
+ YAZ_CHECK(tst_convert(cd,
+ "\x74" "\xCD\xA1" "\x73", /* UTF-8 */
+ "\xEB\x74\xEC\x73")); /* MARC-8 */
+
+ /* Double title spanning two characters */
+ YAZ_CHECK(tst_convert(cd,
+ "\x74" "\xCD\xA0" "\x73", /* UTF-8 */
+ "\xFA\x74\xFB\x73")); /* MARC-8 */
+
+ /** Ideographic question mark (Unicode FF1F) */
+ YAZ_CHECK(tst_convert(cd,
+ "\xEF\xBC\x9F" "o", /* UTF-8 */
+ "\033(1" "\x21\x2B\x3B" "\033(B" "o" ));
+
+ yaz_iconv_close(cd);
+}
+
+
+static void tst_conversion_latin1_to_marc8()
+{
+ yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+ /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+ YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+ /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+ /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+ /** UPPERCASE SCANDINAVIAN O */
+ YAZ_CHECK(tst_convert(cd, "SØ", "S\xa2"));
+
+ yaz_iconv_close(cd);
+}
+
int main (int argc, char **argv)
{
+ YAZ_CHECK_INIT(argc, argv);
+
+ tst_conversion_marc8_to_latin1();
+
+ tst_conversion_utf8_to_marc8();
+
+ tst_conversion_latin1_to_marc8();
+
+ YAZ_CHECK(utf8_check(3));
+ YAZ_CHECK(utf8_check(127));
+ YAZ_CHECK(utf8_check(128));
+ YAZ_CHECK(utf8_check(255));
+ YAZ_CHECK(utf8_check(256));
+ YAZ_CHECK(utf8_check(900));
+ YAZ_CHECK(utf8_check(1000));
+ YAZ_CHECK(utf8_check(10000));
+ YAZ_CHECK(utf8_check(100000));
+ YAZ_CHECK(utf8_check(1000000));
+ YAZ_CHECK(utf8_check(10000000));
+ YAZ_CHECK(utf8_check(100000000));
+
dconvert(1, "UTF-8");
dconvert(1, "ISO-8859-1");
dconvert(1, "UCS4");
tst_marc8_to_iso_8859_1();
tst_marc8_to_ucs4b();
tst_ucs4b_to_utf8();
- exit(0);
+
+ YAZ_CHECK_TERM;
}
/*
* Local variables:
* End:
* vim: shiftwidth=4 tabstop=8 expandtab
*/
-