1 // $Id: UnicodeToIso6937.java,v 1.3 2008/10/17 06:47:06 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.converter.impl;
\r
23 import org.marc4j.converter.CharConverter;
\r
27 * A utility to convert UCS/Unicode data to ISO 6937.
\r
30 * @author Bas Peters
\r
31 * @author Yves Pratter
\r
32 * @version $Revision: 1.3 $
\r
34 public class UnicodeToIso6937 extends CharConverter {
\r
38 * Converts UCS/Unicode data to ISO 6937.
\r
42 * A question mark (0x3F) is returned if there is no match.
\r
45 * @param data - the UCS/Unicode data in an array of char
\r
46 * @return {@link String}- the ISO 6937 data
\r
48 public String convert(char data[]) {
\r
49 StringBuffer sb = new StringBuffer();
\r
50 for (int i = 0; i < data.length; i++) {
\r
57 sb.append((char) d);
\r
59 sb.append((char) (d / 256));
\r
60 sb.append((char) (d % 256));
\r
64 return sb.toString();
\r
67 private int convert(int i) {
\r
70 return 0xA0; // 10/00 NO-BREAK SPACE
\r
72 return 0xA1; // 10/01 INVERTED EXCLAMATION MARK
\r
74 return 0xA2; // 10/02 CENT SIGN
\r
76 return 0xA3; // 10/03 POUND SIGN
\r
78 return 0xA8; // 10/08 CURRENCY SIGN
\r
80 return 0xA5; // 10/05 YEN SIGN
\r
82 return 0xD7; // 13/07 BROKEN BAR
\r
84 return 0xA7; // 10/07 SECTION SIGN
\r
86 return 0xC820; // DIAERESIS
\r
88 return 0xD3; // 13/03 COPYRIGHT SIGN
\r
90 return 0xE3; // 14/03 FEMININE ORDINAL INDICATOR
\r
92 return 0xAB; // 10/11 LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
\r
94 return 0xD6; // 13/06 NOT SIGN
\r
96 return 0xFF; // 15/15 SOFT HYPHEN
\r
98 return 0xD4; // 13/04 TRADE MARK SIGN
\r
100 return 0xB0; // 11/00 DEGREE SIGN
\r
102 return 0xB1; // 11/01 PLUS-MINUS SIGN
\r
104 return 0xB2; // 11/02 SUPERSCRIPT TWO
\r
106 return 0xB3; // 11/03 SUPERSCRIPT THREE
\r
108 return 0xC220; // ACUTE ACCENT
\r
110 return 0xB5; // 11/05 MICRO SIGN
\r
112 return 0xB6; // 11/06 PILCROW SIGN
\r
114 return 0xB7; // 11/07 MIDDLE DOT
\r
116 return 0xCB20; // CEDILLA
\r
118 return 0xD1; // 13/01 SUPERSCRIPT ONE
\r
120 return 0xEB; // 14/11 MASCULINE ORDINAL INDICATOR
\r
122 return 0xBB; // 11/11 RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
\r
124 return 0xBC; // 11/12 VULGAR FRACTION ONE QUARTER
\r
126 return 0xBD; // 11/13 VULGAR FRACTION ONE HALF
\r
128 return 0xBE; // 11/14 VULGAR FRACTION THREE QUARTERS
\r
130 return 0xBF; // 11/15 INVERTED QUESTION MARK
\r
132 return 0xC141; // LATIN CAPITAL LETTER A WITH GRAVE
\r
134 return 0xC241; // LATIN CAPITAL LETTER A WITH ACUTE
\r
136 return 0xC341; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
\r
138 return 0xC441; // LATIN CAPITAL LETTER A WITH TILDE
\r
140 return 0xC841; // LATIN CAPITAL LETTER A WITH DIAERESIS
\r
142 return 0xCA41; // LATIN CAPITAL LETTER A WITH RING ABOVE
\r
144 return 0xE1; // 14/01 LATIN CAPITAL LETTER AE
\r
146 return 0xCB43; // LATIN CAPITAL LETTER C WITH CEDILLA
\r
148 return 0xC145; // LATIN CAPITAL LETTER E WITH GRAVE
\r
150 return 0xC245; // LATIN CAPITAL LETTER E WITH ACUTE
\r
152 return 0xC345; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
\r
154 return 0xC845; // LATIN CAPITAL LETTER E WITH DIAERESIS
\r
156 return 0xC149; // LATIN CAPITAL LETTER I WITH GRAVE
\r
158 return 0xC249; // LATIN CAPITAL LETTER I WITH ACUTE
\r
160 return 0xC349; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
\r
162 return 0xC849; // LATIN CAPITAL LETTER I WITH DIAERESIS
\r
164 return 0xC44E; // LATIN CAPITAL LETTER N WITH TILDE
\r
166 return 0xC14F; // LATIN CAPITAL LETTER O WITH GRAVE
\r
168 return 0xC24F; // LATIN CAPITAL LETTER O WITH ACUTE
\r
170 return 0xC34F; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
\r
172 return 0xC44F; // LATIN CAPITAL LETTER O WITH TILDE
\r
174 return 0xC84F; // LATIN CAPITAL LETTER O WITH DIAERESIS
\r
176 return 0xB4; // 11/04 MULTIPLICATION SIGN
\r
178 return 0xE9; // 14/09 LATIN CAPITAL LETTER O WITH STROKE
\r
180 return 0xC155; // LATIN CAPITAL LETTER U WITH GRAVE
\r
182 return 0xC255; // LATIN CAPITAL LETTER U WITH ACUTE
\r
184 return 0xC355; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
\r
186 return 0xC855; // LATIN CAPITAL LETTER U WITH DIAERESIS
\r
188 return 0xC259; // LATIN CAPITAL LETTER Y WITH ACUTE
\r
190 return 0xEC; // 14/12 LATIN CAPITAL LETTER THORN
\r
192 return 0xFB; // 15/11 LATIN SMALL LETTER SHARP S
\r
194 return 0xC161; // LATIN SMALL LETTER A WITH GRAVE
\r
196 return 0xC261; // LATIN SMALL LETTER A WITH ACUTE
\r
198 return 0xC361; // LATIN SMALL LETTER A WITH CIRCUMFLEX
\r
200 return 0xC461; // LATIN SMALL LETTER A WITH TILDE
\r
202 return 0xC861; // LATIN SMALL LETTER A WITH DIAERESIS
\r
204 return 0xCA61; // LATIN SMALL LETTER A WITH RING ABOVE
\r
206 return 0xF1; // 15/01 LATIN SMALL LETTER AE
\r
208 return 0xCB63; // LATIN SMALL LETTER C WITH CEDILLA
\r
210 return 0xC165; // LATIN SMALL LETTER E WITH GRAVE
\r
212 return 0xC265; // LATIN SMALL LETTER E WITH ACUTE
\r
214 return 0xC365; // LATIN SMALL LETTER E WITH CIRCUMFLEX
\r
216 return 0xC865; // LATIN SMALL LETTER E WITH DIAERESIS
\r
218 return 0xC169; // LATIN SMALL LETTER I WITH GRAVE
\r
220 return 0xC269; // LATIN SMALL LETTER I WITH ACUTE
\r
222 return 0xC369; // LATIN SMALL LETTER I WITH CIRCUMFLEX
\r
224 return 0xC869; // LATIN SMALL LETTER I WITH DIAERESIS
\r
226 return 0xF3; // 15/03 LATIN SMALL LETTER ETH
\r
228 return 0xC46E; // LATIN SMALL LETTER N WITH TILDE
\r
230 return 0xC16F; // LATIN SMALL LETTER O WITH GRAVE
\r
232 return 0xC26F; // LATIN SMALL LETTER O WITH ACUTE
\r
234 return 0xC36F; // LATIN SMALL LETTER O WITH CIRCUMFLEX
\r
236 return 0xC46F; // LATIN SMALL LETTER O WITH TILDE
\r
238 return 0xC86F; // LATIN SMALL LETTER O WITH DIAERESIS
\r
240 return 0xB8; // 11/08 DIVISION SIGN
\r
242 return 0xF9; // 15/09 LATIN SMALL LETTER O WITH STROKE
\r
244 return 0xC175; // LATIN SMALL LETTER U WITH GRAVE
\r
246 return 0xC275; // LATIN SMALL LETTER U WITH ACUTE
\r
248 return 0xC375; // LATIN SMALL LETTER U WITH CIRCUMFLEX
\r
250 return 0xC875; // LATIN SMALL LETTER U WITH DIAERESIS
\r
252 return 0xC279; // LATIN SMALL LETTER Y WITH ACUTE
\r
254 return 0xFC; // 15/12 LATIN SMALL LETTER THORN
\r
256 return 0xC879; // LATIN SMALL LETTER Y WITH DIAERESIS
\r
258 return 0xC541; // LATIN CAPITAL LETTER A WITH MACRON
\r
260 return 0xC561; // LATIN SMALL LETTER A WITH MACRON
\r
262 return 0xC641; // LATIN CAPITAL LETTER A WITH BREVE
\r
264 return 0xC661; // LATIN SMALL LETTER A WITH BREVE
\r
266 return 0xCE41; // LATIN CAPITAL LETTER A WITH OGONEK
\r
268 return 0xCE61; // LATIN SMALL LETTER A WITH OGONEK
\r
270 return 0xC243; // LATIN CAPITAL LETTER C WITH ACUTE
\r
272 return 0xC263; // LATIN SMALL LETTER C WITH ACUTE
\r
274 return 0xC343; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
\r
276 return 0xC363; // LATIN SMALL LETTER C WITH CIRCUMFLEX
\r
278 return 0xC743; // LATIN CAPITAL LETTER C WITH DOT ABOVE
\r
280 return 0xC763; // LATIN SMALL LETTER C WITH DOT ABOVE
\r
282 return 0xCF43; // LATIN CAPITAL LETTER C WITH CARON
\r
284 return 0xCF63; // LATIN SMALL LETTER C WITH CARON
\r
286 return 0xCF44; // LATIN CAPITAL LETTER D WITH CARON
\r
288 return 0xCF64; // LATIN SMALL LETTER D WITH CARON
\r
290 return 0xE2; // 14/02 LATIN CAPITAL LETTER D WITH STROKE
\r
292 return 0xF2; // 15/02 LATIN SMALL LETTER D WITH STROKE
\r
294 return 0xC545; // LATIN CAPITAL LETTER E WITH MACRON
\r
296 return 0xC565; // LATIN SMALL LETTER E WITH MACRON
\r
298 return 0xC745; // LATIN CAPITAL LETTER E WITH DOT ABOVE
\r
300 return 0xC765; // LATIN SMALL LETTER E WITH DOT ABOVE
\r
302 return 0xCE45; // LATIN CAPITAL LETTER E WITH OGONEK
\r
304 return 0xCE65; // LATIN SMALL LETTER E WITH OGONEK
\r
306 return 0xCF45; // LATIN CAPITAL LETTER E WITH CARON
\r
308 return 0xCF65; // LATIN SMALL LETTER E WITH CARON
\r
310 return 0xC347; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
\r
312 return 0xC367; // LATIN SMALL LETTER G WITH CIRCUMFLEX
\r
314 return 0xC647; // LATIN CAPITAL LETTER G WITH BREVE
\r
316 return 0xC667; // LATIN SMALL LETTER G WITH BREVE
\r
318 return 0xC747; // LATIN CAPITAL LETTER G WITH DOT ABOVE
\r
320 return 0xC767; // LATIN SMALL LETTER G WITH DOT ABOVE
\r
322 return 0xCB47; // LATIN CAPITAL LETTER G WITH CEDILLA
\r
323 // case 0x0123: return 0xCB67; // small g with cedilla
\r
325 return 0xC348; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
\r
327 return 0xC368; // LATIN SMALL LETTER H WITH CIRCUMFLEX
\r
329 return 0xE4; // 14/04 LATIN CAPITAL LETTER H WITH STROKE
\r
331 return 0xF4; // 15/04 LATIN SMALL LETTER H WITH STROKE
\r
333 return 0xC449; // LATIN CAPITAL LETTER I WITH TILDE
\r
335 return 0xC469; // LATIN SMALL LETTER I WITH TILDE
\r
337 return 0xC549; // LATIN CAPITAL LETTER I WITH MACRON
\r
339 return 0xC569; // LATIN SMALL LETTER I WITH MACRON
\r
341 return 0xCE49; // LATIN CAPITAL LETTER I WITH OGONEK
\r
343 return 0xCE69; // LATIN SMALL LETTER I WITH OGONEK
\r
345 return 0xC749; // LATIN CAPITAL LETTER I WITH DOT ABOVE
\r
347 return 0xF5; // 15/05 LATIN SMALL LETTER DOTLESS I
\r
349 return 0xE6; // 14/06 LATIN CAPITAL LIGATURE IJ
\r
351 return 0xF6; // 15/06 LATIN SMALL LIGATURE IJ
\r
353 return 0xC34A; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
\r
355 return 0xC36A; // LATIN SMALL LETTER J WITH CIRCUMFLEX
\r
357 return 0xCB4B; // LATIN CAPITAL LETTER K WITH CEDILLA
\r
359 return 0xCB6B; // LATIN SMALL LETTER K WITH CEDILLA
\r
361 return 0xF0; // 15/00 LATIN SMALL LETTER KRA
\r
363 return 0xC24C; // LATIN CAPITAL LETTER L WITH ACUTE
\r
365 return 0xC26C; // LATIN SMALL LETTER L WITH ACUTE
\r
367 return 0xCB4C; // LATIN CAPITAL LETTER L WITH CEDILLA
\r
369 return 0xCB6C; // LATIN SMALL LETTER L WITH CEDILLA
\r
371 return 0xCF4C; // LATIN CAPITAL LETTER L WITH CARON
\r
373 return 0xCF6C; // LATIN SMALL LETTER L WITH CARON
\r
375 return 0xE7; // 14/07 LATIN CAPITAL LETTER L WITH MIDDLE DOT
\r
377 return 0xF7; // 15/07 LATIN SMALL LETTER L WITH MIDDLE DOT
\r
379 return 0xE8; // 14/08 LATIN CAPITAL LETTER L WITH STROKE
\r
381 return 0xF8; // 15/08 LATIN SMALL LETTER L WITH STROKE
\r
383 return 0xC24E; // LATIN CAPITAL LETTER N WITH ACUTE
\r
385 return 0xC26E; // LATIN SMALL LETTER N WITH ACUTE
\r
387 return 0xCB4E; // LATIN CAPITAL LETTER N WITH CEDILLA
\r
389 return 0xCB6E; // LATIN SMALL LETTER N WITH CEDILLA
\r
391 return 0xCF4E; // LATIN CAPITAL LETTER N WITH CARON
\r
393 return 0xCF6E; // LATIN SMALL LETTER N WITH CARON
\r
395 return 0xEF; // 14/15 LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
\r
397 return 0xEE; // 14/14 LATIN CAPITAL LETTER ENG
\r
399 return 0xFE; // 15/14 LATIN SMALL LETTER ENG
\r
401 return 0xC54F; // LATIN CAPITAL LETTER O WITH MACRON
\r
403 return 0xC56F; // LATIN SMALL LETTER O WITH MACRON
\r
405 return 0xCD4F; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
\r
407 return 0xCD6F; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
\r
409 return 0xEA; // 14/10 LATIN CAPITAL LIGATURE OE
\r
411 return 0xFA; // 15/10 LATIN SMALL LIGATURE OE
\r
413 return 0xC252; // LATIN CAPITAL LETTER R WITH ACUTE
\r
415 return 0xC272; // LATIN SMALL LETTER R WITH ACUTE
\r
417 return 0xCB52; // LATIN CAPITAL LETTER R WITH CEDILLA
\r
419 return 0xCB72; // LATIN SMALL LETTER R WITH CEDILLA
\r
421 return 0xCF52; // LATIN CAPITAL LETTER R WITH CARON
\r
423 return 0xCF72; // LATIN SMALL LETTER R WITH CARON
\r
425 return 0xC253; // LATIN CAPITAL LETTER S WITH ACUTE
\r
427 return 0xC273; // LATIN SMALL LETTER S WITH ACUTE
\r
429 return 0xC353; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
\r
431 return 0xC373; // LATIN SMALL LETTER S WITH CIRCUMFLEX
\r
433 return 0xCB53; // LATIN CAPITAL LETTER S WITH CEDILLA
\r
435 return 0xCB73; // LATIN SMALL LETTER S WITH CEDILLA
\r
437 return 0xCF53; // LATIN CAPITAL LETTER S WITH CARON
\r
439 return 0xCF73; // LATIN SMALL LETTER S WITH CARON
\r
441 return 0xCB54; // LATIN CAPITAL LETTER T WITH CEDILLA
\r
443 return 0xCB74; // LATIN SMALL LETTER T WITH CEDILLA
\r
445 return 0xCF54; // LATIN CAPITAL LETTER T WITH CARON
\r
447 return 0xCF74; // LATIN SMALL LETTER T WITH CARON
\r
449 return 0xED; // 14/13 LATIN CAPITAL LETTER T WITH STROKE
\r
451 return 0xFD; // 15/13 LATIN SMALL LETTER T WITH STROKE
\r
453 return 0xC455; // LATIN CAPITAL LETTER U WITH TILDE
\r
455 return 0xC475; // LATIN SMALL LETTER U WITH TILDE
\r
457 return 0xC555; // LATIN CAPITAL LETTER U WITH MACRON
\r
459 return 0xC575; // LATIN SMALL LETTER U WITH MACRON
\r
461 return 0xC655; // LATIN CAPITAL LETTER U WITH BREVE
\r
463 return 0xC675; // LATIN SMALL LETTER U WITH BREVE
\r
465 return 0xCAAD; // LATIN CAPITAL LETTER U WITH RING ABOVE
\r
467 return 0xCA75; // LATIN SMALL LETTER U WITH RING ABOVE
\r
469 return 0xCD55; // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
\r
471 return 0xCD75; // LATIN SMALL LETTER U WITH DOUBLE ACUTE
\r
473 return 0xCE55; // LATIN CAPITAL LETTER U WITH OGONEK
\r
475 return 0xCE75; // LATIN SMALL LETTER U WITH OGONEK
\r
477 return 0xC357; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
\r
479 return 0xC377; // LATIN SMALL LETTER W WITH CIRCUMFLEX
\r
481 return 0xC359; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
\r
483 return 0xC379; // LATIN SMALL LETTER Y WITH CIRCUMFLEX
\r
485 return 0xC859; // LATIN CAPITAL LETTER Y WITH DIAERESIS
\r
487 return 0xC25A; // LATIN CAPITAL LETTER Z WITH ACUTE
\r
489 return 0xC27A; // LATIN SMALL LETTER Z WITH ACUTE
\r
491 return 0xC75A; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
\r
493 return 0xC77A; // LATIN SMALL LETTER Z WITH DOT ABOVE
\r
495 return 0xCF5A; // LATIN CAPITAL LETTER Z WITH CARON
\r
497 return 0xCF7A; // LATIN SMALL LETTER Z WITH CARON
\r
499 return 0xC267; // LATIN SMALL LETTER G WITH CEDILLA(4)
\r
501 return 0xCF20; // CARON
\r
503 return 0xC620; // BREVE
\r
505 return 0xCA20; // RING ABOVE
\r
507 return 0xCE20; // ogonek
\r
509 return 0xD0; // 13/00 HORIZONTAL BAR
\r
511 return 0xA9; // 10/09 LEFT SINGLE QUOTATION MARK
\r
513 return 0xB9; // 11/09 RIGHT SINGLE QUOTATION MARK
\r
515 return 0xAA; // 10/10 LEFT DOUBLE QUOTATION MARK
\r
517 return 0xBA; // 11/10 RIGHT DOUBLE QUOTATION MARK
\r
519 return 0xD2; // 13/02 REGISTERED SIGN
\r
521 return 0xE0; // 14/00 OHM SIGN
\r
523 return 0xDC; // 13/12 VULGAR FRACTION ONE EIGHTH
\r
525 return 0xDF; // 13/15 VULGAR FRACTION SEVEN EIGHTHS
\r
527 return 0xAC; // 10/12 LEFTWARDS ARROW
\r
529 return 0xAD; // 10/13 UPWARDS ARROW
\r
531 return 0xAE; // 10/14 RIGHTWARDS ARROW
\r
533 return 0xAF; // 10/15 DOWNWARDS ARROW
\r
535 return 0xD5; // 13/05 EIGHTH NOTE
\r
538 return 0x3F; // if no match, return question mark
\r