From e462d48003aafc70536f9982b44edef5d32a5f6a Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 7 Nov 2013 16:02:44 +0100 Subject: [PATCH] New encoding: danmarc YAZ-692 --- src/Makefile.am | 2 +- src/iconv-p.h | 2 ++ src/iconv_encode_danmarc.c | 86 ++++++++++++++++++++++++++++++++++++++++++++ src/siconv.c | 2 ++ test/test_iconv.c | 18 ++++++++++ win/makefile | 1 + 6 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 src/iconv_encode_danmarc.c diff --git a/src/Makefile.am b/src/Makefile.am index 65bf742..08389c8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -104,7 +104,7 @@ libyaz_la_SOURCES=base64.c version.c options.c log.c \ xmlquery.c xmlerror.c http.c \ mime.c mime.h oid_util.c tokenizer.c \ record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \ - copy_types.c match_glob.c poll.c daemon.c \ + copy_types.c match_glob.c poll.c daemon.c iconv_encode_danmarc.c \ iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \ iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c \ json.c xml_include.c file_glob.c dirent.c mutex-p.h mutex.c condvar.c \ diff --git a/src/iconv-p.h b/src/iconv-p.h index a236403..df62e30 100644 --- a/src/iconv-p.h +++ b/src/iconv-p.h @@ -63,6 +63,8 @@ yaz_iconv_encoder_t yaz_iso_5428_encoder(const char *name, yaz_iconv_encoder_t e); yaz_iconv_encoder_t yaz_advancegreek_encoder(const char *name, yaz_iconv_encoder_t e); +yaz_iconv_encoder_t yaz_danmarc_encoder(const char *name, + yaz_iconv_encoder_t e); yaz_iconv_encoder_t yaz_wchar_encoder(const char *name, yaz_iconv_encoder_t e); typedef unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft, diff --git a/src/iconv_encode_danmarc.c b/src/iconv_encode_danmarc.c new file mode 100644 index 0000000..1afcf7c --- /dev/null +++ b/src/iconv_encode_danmarc.c @@ -0,0 +1,86 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2013 Index Data + * See the file LICENSE for details. + */ +/** + * \file + * \brief Danmarc2 character set encoding + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include +#include "iconv-p.h" + +static size_t write_danmarc(yaz_iconv_t cd, yaz_iconv_encoder_t en, + unsigned long x, + char **outbuf, size_t *outbytesleft) +{ + unsigned char *outp = (unsigned char *) *outbuf; + + if (x == '@') + { + if (*outbytesleft < 2) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + return (size_t)(-1); + } + *outp++ = x; + (*outbytesleft)--; + *outp++ = x; + (*outbytesleft)--; + } + else if (x <= 255) + { /* latin-1 range */ + if (*outbytesleft < 1) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + return (size_t)(-1); + } + *outp++ = x; + (*outbytesleft)--; + } + else + { /* full unicode, emit @XXXX */ + if (*outbytesleft < 6) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + return (size_t)(-1); + } + sprintf(*outbuf, "@%04lX", x); + outp += 5; + (*outbytesleft) -= 5; + } + *outbuf = (char *) outp; + return 0; +} + +yaz_iconv_encoder_t yaz_danmarc_encoder(const char *tocode, + yaz_iconv_encoder_t e) + +{ + if (!yaz_matchstr(tocode, "danmarc")) + { + e->write_handle = write_danmarc; + return e; + } + return 0; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/siconv.c b/src/siconv.c index be6eae5..f1430fe 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -64,6 +64,8 @@ static int prepare_encoders(yaz_iconv_t cd, const char *tocode) return 1; if (yaz_wchar_encoder(tocode, &cd->encoder)) return 1; + if (yaz_danmarc_encoder(tocode, &cd->encoder)) + return 1; return 0; } diff --git a/test/test_iconv.c b/test/test_iconv.c index 4651203..e341972 100644 --- a/test/test_iconv.c +++ b/test/test_iconv.c @@ -701,6 +701,23 @@ static void tst_danmarc_to_latin1(void) yaz_iconv_close(cd); } +static void tst_utf8_to_danmarc(void) +{ + yaz_iconv_t cd = yaz_iconv_open("danmarc", "utf-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "ax", "ax")); + YAZ_CHECK(tst_convert(cd, "@", "@@")); + YAZ_CHECK(tst_convert(cd, "a\xc3\xa5" "b", "a\xe5" "b")); /* aring */ + YAZ_CHECK(tst_convert(cd, "a\xce\xbb" "b", "a@03BBb")); /* lambda */ + + yaz_iconv_close(cd); +} + + int main (int argc, char **argv) { @@ -722,6 +739,7 @@ int main (int argc, char **argv) tst_utf8_to_marc8("marc8lossless"); tst_danmarc_to_latin1(); + tst_utf8_to_danmarc(); tst_latin1_to_marc8(); diff --git a/win/makefile b/win/makefile index 3b9cf54..74039cc 100644 --- a/win/makefile +++ b/win/makefile @@ -535,6 +535,7 @@ MISC_OBJS= \ $(OBJDIR)\match_glob.obj \ $(OBJDIR)\poll.obj \ $(OBJDIR)\daemon.obj \ + $(OBJDIR)\iconv_encode_danmarc.obj \ $(OBJDIR)\iconv_encode_iso_8859_1.obj \ $(OBJDIR)\iconv_encode_marc8.obj \ $(OBJDIR)\iconv_decode_marc8.obj \ -- 1.7.10.4