1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2009 Index Data
3 * See the file LICENSE for details.
7 * \brief ISO 5426 decoding
10 * http://www.loc.gov/marc/specifications/specchariso8.html
12 * ISO 5426 reference (in German)
13 * Zeichenkonkordanz MAB2-Zeichensatz - ISO/IEC 10646 / Unicode
14 * http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf
26 #include <yaz/xmalloc.h>
35 unsigned long comb_x[8];
36 size_t comb_no_read[8];
39 yaz_conv_func_t yaz_iso5426_42_conv;
40 yaz_conv_func_t yaz_iso5426_45_conv;
41 yaz_conv_func_t yaz_iso5426_67_conv;
42 yaz_conv_func_t yaz_iso5426_62_conv;
43 yaz_conv_func_t yaz_iso5426_70_conv;
44 yaz_conv_func_t yaz_iso5426_32_conv;
45 yaz_conv_func_t yaz_iso5426_4E_conv;
46 yaz_conv_func_t yaz_iso5426_51_conv;
47 yaz_conv_func_t yaz_iso5426_33_conv;
48 yaz_conv_func_t yaz_iso5426_34_conv;
49 yaz_conv_func_t yaz_iso5426_53_conv;
50 yaz_conv_func_t yaz_iso5426_31_conv;
53 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
54 struct decoder_data *data,
56 size_t inbytesleft, size_t *no_read,
59 static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
61 size_t inbytesleft, size_t *no_read)
63 struct decoder_data *data = (struct decoder_data *) d->data;
65 if (data->comb_offset < data->comb_size)
67 *no_read = data->comb_no_read[data->comb_offset];
68 x = data->comb_x[data->comb_offset];
70 /* special case for double-diacritic combining characters,
71 INVERTED BREVE and DOUBLE TILDE.
72 We'll increment the no_read counter by 1, since we want to skip over
73 the processing of the closing ligature character
75 /* this code is no longer necessary.. our handlers code in
76 yaz_iso5426_?_conv (generated by charconv.tcl) now returns
77 0 and no_read=1 when a sequence does not match the input.
78 The SECOND HALFs in codetables.xml produces a non-existant
79 entry in the conversion trie.. Hence when met, the input byte is
80 skipped as it should (in yaz_iconv)
83 if (x == 0x0361 || x == 0x0360)
90 data->comb_offset = 0;
91 for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
95 if (inbytesleft == 0 && data->comb_size)
97 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
102 x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
105 data->comb_x[data->comb_size] = x;
106 data->comb_no_read[data->comb_size] = *no_read;
108 inbytesleft = inbytesleft - *no_read;
115 static unsigned long read_iso5426s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
117 size_t inbytesleft, size_t *no_read)
119 struct decoder_data *data = (struct decoder_data *) d->data;
120 unsigned long x = read_iso5426(cd, d, inp, inbytesleft, no_read);
121 if (x && data->comb_size == 1)
123 if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
125 *no_read += data->comb_no_read[0];
133 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
134 struct decoder_data *data,
136 size_t inbytesleft, size_t *no_read,
140 while (inbytesleft > 0 && *inp == 27)
142 int *modep = &data->g0_mode;
143 size_t inbytesleft0 = inbytesleft;
147 if (inbytesleft == 0)
149 if (*inp == '$') /* set with multiple bytes */
154 if (inbytesleft == 0)
156 if (*inp == '(' || *inp == ',') /* G0 */
161 else if (*inp == ')' || *inp == '-') /* G1 */
165 modep = &data->g1_mode;
167 if (inbytesleft == 0)
169 if (*inp == '!') /* ANSEL is a special case */
174 if (inbytesleft == 0)
176 *modep = *inp++; /* Final character */
179 (*no_read) += inbytesleft0 - inbytesleft;
181 if (inbytesleft == 0)
183 else if (*inp == ' ')
191 size_t no_read_sub = 0;
192 int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
197 case 'B': /* Basic ASCII */
198 case 's': /* ASCII */
199 x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
201 case 'E': /* ANSEL */
202 x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
206 case 'g': /* Greek */
207 x = yaz_iso5426_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
209 case 'b': /* Subscripts */
210 x = yaz_iso5426_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
212 case 'p': /* Superscripts */
213 x = yaz_iso5426_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
215 case '2': /* Basic Hebrew */
216 x = yaz_iso5426_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
218 case 'N': /* Basic Cyrillic */
219 x = yaz_iso5426_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
221 case 'Q': /* Extended Cyrillic */
222 x = yaz_iso5426_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
224 case '3': /* Basic Arabic */
225 x = yaz_iso5426_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
227 case '4': /* Extended Arabic */
228 x = yaz_iso5426_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
230 case 'S': /* Greek */
231 x = yaz_iso5426_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
233 case '1': /* Chinese, Japanese, Korean (EACC) */
234 x = yaz_iso5426_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
239 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
242 *no_read += no_read_sub;
247 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
252 static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
254 size_t inbytesleft, size_t *no_read)
256 struct decoder_data *data = (struct decoder_data *) d->data;
259 data->comb_offset = data->comb_size = 0;
263 void destroy_iso5426(yaz_iconv_decoder_t d)
265 struct decoder_data *data = (struct decoder_data *) d->data;
269 yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode,
270 yaz_iconv_decoder_t d)
272 if (!yaz_matchstr(fromcode, "ISO5426"))
273 d->read_handle = read_iso5426;
277 struct decoder_data *data = (struct decoder_data *)
278 xmalloc(sizeof(*data));
280 d->init_handle = init_iso5426;
281 d->destroy_handle = destroy_iso5426;
290 * c-file-style: "Stroustrup"
291 * indent-tabs-mode: nil
293 * vim: shiftwidth=4 tabstop=8 expandtab