1 // $Id: UnicodeToAnsel.java,v 1.4 2008/10/17 06:47:06 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.converter.impl;
\r
23 import java.io.InputStream;
\r
24 import java.util.Hashtable;
\r
26 import org.marc4j.converter.CharConverter;
\r
30 * A utility to convert UCS/Unicode data to MARC-8.
\r
33 * The MARC-8 to Unicode mapping used is the version with the March 2005
\r
37 * @author Bas Peters
\r
38 * @author Corey Keith
\r
39 * @version $Revision: 1.4 $
\r
41 public class UnicodeToAnsel extends CharConverter {
\r
42 protected ReverseCodeTable rct;
\r
44 static final char ESC = 0x1b;
\r
46 static final char G0 = 0x28;
\r
48 static final char G0multibyte = 0x24;
\r
50 static final char G1 = 0x29;
\r
52 static final int ASCII = 0x42;
\r
55 * Creates a new instance and loads the MARC4J supplied Ansel/Unicode
\r
56 * conversion tables based on the official LC tables.
\r
58 public UnicodeToAnsel() {
\r
59 this(UnicodeToAnsel.class
\r
60 .getResourceAsStream("resources/codetables.xml"));
\r
64 * Constructs an instance with the specified pathname.
\r
66 * Use this constructor to create an instance with a customized code table
\r
67 * mapping. The mapping file should follow the structure of LC's XML MARC-8
\r
68 * to Unicode mapping (see:
\r
69 * http://www.loc.gov/marc/specifications/codetables.xml).
\r
72 public UnicodeToAnsel(String pathname) {
\r
73 rct = new ReverseCodeTable(pathname);
\r
77 * Constructs an instance with the specified input stream.
\r
79 * Use this constructor to create an instance with a customized code table
\r
80 * mapping. The mapping file should follow the structure of LC's XML MARC-8
\r
81 * to Unicode mapping (see:
\r
82 * http://www.loc.gov/marc/specifications/codetables.xml).
\r
85 public UnicodeToAnsel(InputStream in) {
\r
86 rct = new ReverseCodeTable(in);
\r
90 * Converts UCS/Unicode data to MARC-8.
\r
93 * A question mark (0x3F) is returned if there is no match.
\r
96 * @param data - the UCS/Unicode data in an array of char
\r
97 * @return String - the MARC-8 data
\r
99 public String convert(char data[]) {
\r
100 StringBuffer sb = new StringBuffer();
\r
101 CodeTableTracker ctt = new CodeTableTracker();
\r
103 boolean technique1 = false;
\r
105 for (int i = 0; i < data.length; i++) {
\r
106 Character c = new Character(data[i]);
\r
108 StringBuffer marc = new StringBuffer();
\r
109 Hashtable h = rct.codeTableHash(c);
\r
111 if (h.keySet().contains(ctt.getPrevious(CodeTableTracker.G0))) {
\r
112 ctt.makePreviousCurrent();
\r
113 marc.append((char[]) h
\r
114 .get(ctt.getPrevious(CodeTableTracker.G0)));
\r
115 } else if (h.keySet()
\r
116 .contains(ctt.getPrevious(CodeTableTracker.G1))) {
\r
117 ctt.makePreviousCurrent();
\r
118 marc.append((char[]) h
\r
119 .get(ctt.getPrevious(CodeTableTracker.G1)));
\r
121 table = (Integer) h.keySet().iterator().next();
\r
122 char[] marc8 = (char[]) h.get(table);
\r
124 if (marc8.length == 3) {
\r
126 marc.append(G0multibyte);
\r
127 ctt.setPrevious(CodeTableTracker.G0, table);
\r
128 } else if (marc8[0] < 0x80) {
\r
130 if ((table.intValue() == 0x62)
\r
131 || (table.intValue() == 0x70)) {
\r
136 ctt.setPrevious(CodeTableTracker.G0, table);
\r
140 ctt.setPrevious(CodeTableTracker.G1, table);
\r
142 marc.append((char) table.intValue());
\r
143 marc.append(marc8);
\r
146 if (rct.isCombining(c))
\r
147 sb.insert(sb.length() - 1, marc);
\r
152 if (ctt.getPrevious(CodeTableTracker.G0).intValue() != ASCII) {
\r
155 sb.append((char) ASCII);
\r
158 return sb.toString();
\r