2 * Copyright (c) 1997-2004, Index Data
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.3 2004-03-15 21:39:06 adam Exp $
8 /* mini iconv and wrapper for system iconv library (if present) */
25 #include <yaz/yaz-util.h>
27 unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
30 unsigned long yaz_marc8_cjk_conv (unsigned char *inp, size_t inbytesleft,
33 struct yaz_iconv_struct {
36 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
37 size_t inbytesleft, size_t *no_read);
38 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
39 size_t inbytesleft, size_t *no_read);
40 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
41 char **outbuf, size_t *outbytesleft);
48 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
49 size_t inbytesleft, size_t *no_read)
51 unsigned long x = inp[0];
56 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
57 size_t inbytesleft, size_t *no_read)
66 cd->my_errno = YAZ_ICONV_EINVAL;
69 if (inp[1] != 0xbb || inp[2] != 0xbf)
71 cd->my_errno = YAZ_ICONV_EILSEQ;
78 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
79 size_t inbytesleft, size_t *no_read)
88 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
91 cd->my_errno = YAZ_ICONV_EILSEQ;
93 else if (inp[0] <= 0xdf && inbytesleft >= 2)
95 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
101 cd->my_errno = YAZ_ICONV_EILSEQ;
104 else if (inp[0] <= 0xef && inbytesleft >= 3)
106 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
113 cd->my_errno = YAZ_ICONV_EILSEQ;
116 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
118 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
119 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
125 cd->my_errno = YAZ_ICONV_EILSEQ;
128 else if (inp[0] <= 0xfb && inbytesleft >= 5)
130 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
131 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
138 cd->my_errno = YAZ_ICONV_EILSEQ;
141 else if (inp[0] <= 0xfd && inbytesleft >= 6)
143 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
144 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
145 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
151 cd->my_errno = YAZ_ICONV_EILSEQ;
157 cd->my_errno = YAZ_ICONV_EINVAL;
162 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
163 size_t inbytesleft, size_t *no_read)
169 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
174 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
180 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
181 size_t inbytesleft, size_t *no_read)
187 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
192 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
199 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
200 size_t inbytesleft, size_t *no_read)
204 if (inbytesleft < sizeof(wchar_t))
206 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
212 memcpy (&wch, inp, sizeof(wch));
214 *no_read = sizeof(wch);
220 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
221 size_t inbytesleft, size_t *no_read)
224 while(inbytesleft >= 1 && inp[0] == 27)
226 size_t inbytesleft0 = inbytesleft;
229 if (inbytesleft <= 1)
232 cd->my_errno = YAZ_ICONV_EINVAL;
235 if (*inp == '(' || *inp == ',') /* GO, one bytes */
240 else if (*inp == '$') /* G0, multi byte */
250 if (inbytesleft <= 0)
253 cd->my_errno = YAZ_ICONV_EINVAL;
258 if (inbytesleft <= 1)
261 cd->my_errno = YAZ_ICONV_EINVAL;
267 cd->marc8_esc_mode = *inp++;
269 (*no_read) += inbytesleft0 - inbytesleft;
271 if (inbytesleft <= 0)
276 size_t no_read_sub = 0;
278 switch(cd->marc8_esc_mode)
282 x = yaz_marc8_conv(inp, inbytesleft, &no_read_sub);
283 *no_read += no_read_sub;
286 x = yaz_marc8_cjk_conv(inp, inbytesleft, &no_read_sub);
287 *no_read += no_read_sub;
291 cd->my_errno = YAZ_ICONV_EILSEQ;
297 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
298 char **outbuf, size_t *outbytesleft)
300 unsigned char *outp = (unsigned char *) *outbuf;
301 if (x <= 0x7f && *outbytesleft >= 1)
303 *outp++ = (unsigned char) x;
306 else if (x <= 0x7ff && *outbytesleft >= 2)
308 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
309 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
310 (*outbytesleft) -= 2;
312 else if (x <= 0xffff && *outbytesleft >= 3)
314 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
315 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
316 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
317 (*outbytesleft) -= 3;
319 else if (x <= 0x1fffff && *outbytesleft >= 4)
321 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
322 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
323 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
324 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
325 (*outbytesleft) -= 4;
327 else if (x <= 0x3ffffff && *outbytesleft >= 5)
329 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
330 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
331 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
332 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
333 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
334 (*outbytesleft) -= 5;
336 else if (*outbytesleft >= 6)
338 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
339 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
340 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
341 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
342 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
343 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
344 (*outbytesleft) -= 6;
348 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
351 *outbuf = (char *) outp;
355 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
356 char **outbuf, size_t *outbytesleft)
358 unsigned char *outp = (unsigned char *) *outbuf;
359 if (x > 255 || x < 1)
361 cd->my_errno = YAZ_ICONV_EILSEQ;
364 else if (*outbytesleft >= 1)
366 *outp++ = (unsigned char) x;
371 cd->my_errno = YAZ_ICONV_E2BIG;
374 *outbuf = (char *) outp;
379 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
380 char **outbuf, size_t *outbytesleft)
382 unsigned char *outp = (unsigned char *) *outbuf;
383 if (*outbytesleft >= 4)
385 *outp++ = (unsigned char) (x>>24);
386 *outp++ = (unsigned char) (x>>16);
387 *outp++ = (unsigned char) (x>>8);
388 *outp++ = (unsigned char) x;
389 (*outbytesleft) -= 4;
393 cd->my_errno = YAZ_ICONV_E2BIG;
396 *outbuf = (char *) outp;
400 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
401 char **outbuf, size_t *outbytesleft)
403 unsigned char *outp = (unsigned char *) *outbuf;
404 if (*outbytesleft >= 4)
406 *outp++ = (unsigned char) x;
407 *outp++ = (unsigned char) (x>>8);
408 *outp++ = (unsigned char) (x>>16);
409 *outp++ = (unsigned char) (x>>24);
410 (*outbytesleft) -= 4;
414 cd->my_errno = YAZ_ICONV_E2BIG;
417 *outbuf = (char *) outp;
422 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
423 char **outbuf, size_t *outbytesleft)
425 unsigned char *outp = (unsigned char *) *outbuf;
427 if (*outbytesleft >= sizeof(wchar_t))
430 memcpy(outp, &wch, sizeof(wch));
432 (*outbytesleft) -= sizeof(wch);
436 cd->my_errno = YAZ_ICONV_E2BIG;
439 *outbuf = (char *) outp;
444 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
446 return cd->read_handle && cd->write_handle;
449 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
451 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
453 cd->write_handle = 0;
456 cd->my_errno = YAZ_ICONV_UNKNOWN;
457 cd->marc8_esc_mode = 'B';
459 /* a useful hack: if fromcode has leading @,
460 the library not use YAZ's own conversions .. */
461 if (fromcode[0] == '@')
465 if (!yaz_matchstr(fromcode, "UTF8"))
467 cd->read_handle = yaz_read_UTF8;
468 cd->init_handle = yaz_init_UTF8;
470 else if (!yaz_matchstr(fromcode, "ISO88591"))
471 cd->read_handle = yaz_read_ISO8859_1;
472 else if (!yaz_matchstr(fromcode, "UCS4"))
473 cd->read_handle = yaz_read_UCS4;
474 else if (!yaz_matchstr(fromcode, "UCS4LE"))
475 cd->read_handle = yaz_read_UCS4LE;
476 else if (!yaz_matchstr(fromcode, "MARC8"))
477 cd->read_handle = yaz_read_marc8;
479 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
480 cd->read_handle = yaz_read_wchar_t;
483 if (!yaz_matchstr(tocode, "UTF8"))
484 cd->write_handle = yaz_write_UTF8;
485 else if (!yaz_matchstr(tocode, "ISO88591"))
486 cd->write_handle = yaz_write_ISO8859_1;
487 else if (!yaz_matchstr (tocode, "UCS4"))
488 cd->write_handle = yaz_write_UCS4;
489 else if (!yaz_matchstr(tocode, "UCS4LE"))
490 cd->write_handle = yaz_write_UCS4LE;
492 else if (!yaz_matchstr(tocode, "WCHAR_T"))
493 cd->write_handle = yaz_write_wchar_t;
498 if (!cd->read_handle || !cd->write_handle)
500 cd->iconv_cd = iconv_open (tocode, fromcode);
501 if (cd->iconv_cd == (iconv_t) (-1))
508 if (!cd->read_handle || !cd->write_handle)
518 size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
519 char **outbuf, size_t *outbytesleft)
527 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
528 if (r == (size_t)(-1))
533 cd->my_errno = YAZ_ICONV_E2BIG;
536 cd->my_errno = YAZ_ICONV_EINVAL;
539 cd->my_errno = YAZ_ICONV_EILSEQ;
542 cd->my_errno = YAZ_ICONV_UNKNOWN;
548 if (inbuf == 0 || *inbuf == 0)
551 cd->my_errno = YAZ_ICONV_UNKNOWN;
561 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
562 *inbytesleft, &no_read);
565 if (cd->my_errno == YAZ_ICONV_EINVAL)
570 *inbytesleft -= no_read;
580 if (*inbytesleft == 0)
586 x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
595 r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
599 *inbytesleft -= no_read;
605 int yaz_iconv_error (yaz_iconv_t cd)
610 int yaz_iconv_close (yaz_iconv_t cd)
614 iconv_close (cd->iconv_cd);