1 //$Id: XmlMarcDriver.java,v 1.4 2006/12/28 08:09:10 bpeters Exp $
\r
3 * Copyright (C) 2004 Bas Peters
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.util;
\r
23 import java.io.File;
\r
24 import java.io.FileInputStream;
\r
25 import java.io.FileNotFoundException;
\r
26 import java.io.FileOutputStream;
\r
27 import java.io.InputStream;
\r
28 import java.io.OutputStream;
\r
30 import javax.xml.transform.Source;
\r
31 import javax.xml.transform.stream.StreamSource;
\r
33 import org.marc4j.Constants;
\r
34 import org.marc4j.MarcStreamWriter;
\r
35 import org.marc4j.MarcXmlReader;
\r
36 import org.marc4j.converter.CharConverter;
\r
37 import org.marc4j.converter.impl.UnicodeToAnsel;
\r
38 import org.marc4j.converter.impl.UnicodeToIso5426;
\r
39 import org.marc4j.converter.impl.UnicodeToIso6937;
\r
40 import org.marc4j.marc.Record;
\r
43 * Provides a driver to convert MARCXML records to MARC format.
\r
46 * The following example reads input.xml and writes output to the console:
\r
50 * java org.marc4j.util.XmlMarcDriver input.xml
\r
54 * The following example reads input.xml, converts UTF-8 and writes output in
\r
55 * MARC-8 to output.mrc:
\r
59 * java org.marc4j.util.XmlMarcDriver -convert MARC8 -out output.mrc input.xml
\r
63 * It is possible to pre-process the input file using an XSLT stylesheet. The
\r
64 * transformation should produce valid MARCXML. The following example transforms
\r
65 * a MODS file to MARCXML and outputs MARC records.
\r
69 * java org.marc4j.util.XmlMarcDriver -convert MARC8 -out output.mrc -xsl http://www.loc.gov/standards/marcxml/xslt/MODS2MARC21slim.xsl modsfile.xml
\r
73 * For usage, run from the command-line with the following command:
\r
77 * java org.marc4j.util.XmlMarcDriver -usage
\r
81 * Check the home page for <a href="http://www.loc.gov/standards/marcxml/">
\r
82 * MARCXML </a> for more information about the MARCXML format.
\r
85 * @author Bas Peters
\r
86 * @version $Revision: 1.4 $
\r
89 public class XmlMarcDriver {
\r
92 * Provides a static entry point.
\r
98 * <li>-xsl <stylesheet URL> - pre-process using XSLT-stylesheet</li>
\r
99 * <li>-out <output file> - write to output file</li>
\r
100 * <li>-convert <encoding> - convert UTF-8 to <encoding>
\r
101 * (Supported encodings: MARC8, ISO5426, ISO6937)</li>
\r
102 * <li>-encoding <encoding> - Output using specified Java character
\r
104 * <li>-usage - show usage</li>
\r
105 * <li><input file> - input file with MARCXML records or a
\r
106 * transformation source
\r
109 public static void main(String args[]) {
\r
110 long start = System.currentTimeMillis();
\r
112 String input = null;
\r
113 String output = null;
\r
114 String stylesheet = null;
\r
115 String convert = null;
\r
116 String encoding = null;
\r
118 for (int i = 0; i < args.length; i++) {
\r
119 if (args[i].equals("-xsl")) {
\r
120 if (i == args.length - 1) {
\r
123 stylesheet = args[++i].trim();
\r
124 } else if (args[i].equals("-out")) {
\r
125 if (i == args.length - 1) {
\r
128 output = args[++i].trim();
\r
129 } else if (args[i].equals("-convert")) {
\r
130 if (i == args.length - 1) {
\r
133 convert = args[++i].trim();
\r
134 } else if (args[i].equals("-encoding")) {
\r
135 if (i == args.length - 1) {
\r
138 encoding = args[++i].trim();
\r
139 } else if (args[i].equals("-usage")) {
\r
141 } else if (args[i].equals("-help")) {
\r
144 input = args[i].trim();
\r
146 // Must be last arg
\r
147 if (i != args.length - 1) {
\r
152 if (input == null) {
\r
156 InputStream in = null;
\r
158 in = new FileInputStream(new File(input));
\r
159 } catch (FileNotFoundException e) {
\r
160 e.printStackTrace();
\r
162 MarcXmlReader reader = null;
\r
163 if (stylesheet == null)
\r
164 reader = new MarcXmlReader(in);
\r
166 Source source = new StreamSource(stylesheet);
\r
167 reader = new MarcXmlReader(in, source);
\r
170 OutputStream out = null;
\r
171 if (output != null)
\r
173 out = new FileOutputStream(output);
\r
174 } catch (FileNotFoundException e) {
\r
175 e.printStackTrace();
\r
180 MarcStreamWriter writer = null;
\r
181 if (encoding != null)
\r
182 writer = new MarcStreamWriter(out, encoding);
\r
184 writer = new MarcStreamWriter(out);
\r
186 if (convert != null) {
\r
187 CharConverter charconv = null;
\r
188 if (Constants.MARC_8_ENCODING.equals(convert))
\r
189 charconv = new UnicodeToAnsel();
\r
190 else if (Constants.ISO5426_ENCODING.equals(convert))
\r
191 charconv = new UnicodeToIso5426();
\r
192 else if (Constants.ISO6937_ENCODING.equals(convert))
\r
193 charconv = new UnicodeToIso6937();
\r
195 System.err.println("Unknown character set");
\r
198 writer.setConverter(charconv);
\r
201 while (reader.hasNext()) {
\r
202 Record record = reader.next();
\r
203 if (Constants.MARC_8_ENCODING.equals(convert))
\r
204 record.getLeader().setCharCodingScheme(' ');
\r
205 writer.write(record);
\r
209 System.err.println("Total time: "
\r
210 + (System.currentTimeMillis() - start) + " miliseconds");
\r
213 private static void usage() {
\r
214 System.err.println("MARC4J, Copyright (C) 2002-2006 Bas Peters");
\r
216 .println("Usage: org.marc4j.util.XmlMarcDriver [-options] <file.mrc>");
\r
218 .println(" -convert <encoding> = Converts UTF-8 to <encoding>");
\r
220 .println(" Valid encodings are: MARC8, ISO5426, ISO6937");
\r
222 .println(" -encoding <encoding> = Output using specified Java character encoding");
\r
224 .println(" -xsl <file> = Pre-process MARCXML using XSLT stylesheet <file>");
\r
225 System.err.println(" -out <file> = Output using <file>");
\r
226 System.err.println(" -usage or -help = this message");
\r
228 .println("The program outputs MARC records in ISO 2709 format");
\r
230 .println("See http://marc4j.tigris.org for more information.");
\r