1 /* $Id: xmlread.c,v 1.3 2002-08-28 12:47:10 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
37 #include <yaz/xmalloc.h>
39 #include <yaz/data1.h>
43 #define XML_CHUNK 1024
46 data1_node *d1_stack[256];
53 static void cb_start (void *user, const char *el, const char **attr)
55 struct user_info *ui = (struct user_info*) user;
57 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
58 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
59 ui->d1_stack[ui->level-1]);
61 yaz_log (ui->loglevel, "cb_start %s", el);
64 static void cb_end (void *user, const char *el)
66 struct user_info *ui = (struct user_info*) user;
69 yaz_log (ui->loglevel, "cb_end %s", el);
72 static void cb_chardata (void *user, const char *s, int len)
74 struct user_info *ui = (struct user_info*) user;
76 yaz_log (ui->loglevel, "cb_chardata %.*s", len, s);
78 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
79 ui->d1_stack[ui->level -1]);
82 static void cb_decl (void *user, const char *version, const char*encoding,
85 struct user_info *ui = (struct user_info*) user;
86 const char *attr_list[7];
88 attr_list[0] = "version";
89 attr_list[1] = version;
91 attr_list[2] = "encoding";
92 attr_list[3] = "UTF-8"; /* encoding */
94 attr_list[4] = "standalone";
95 attr_list[5] = standalone ? "yes" : "no";
99 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
100 ui->d1_stack[ui->level-1]);
101 yaz_log (ui->loglevel, "decl version=%s encoding=%s",
102 version ? version : "null",
103 encoding ? encoding : "null");
106 static void cb_processing (void *user, const char *target,
109 struct user_info *ui = (struct user_info*) user;
111 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
112 ui->d1_stack[ui->level-1]);
113 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
115 yaz_log (ui->loglevel, "decl processing target=%s data=%s",
116 target ? target : "null",
117 data ? data : "null");
122 static void cb_comment (void *user, const char *data)
124 struct user_info *ui = (struct user_info*) user;
125 yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null");
126 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
129 static void cb_doctype_start (void *userData, const char *doctypeName,
130 const char *sysid, const char *pubid,
131 int has_internal_subset)
133 struct user_info *ui = (struct user_info*) userData;
134 yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s",
135 doctypeName, sysid, pubid);
138 static void cb_doctype_end (void *userData)
140 struct user_info *ui = (struct user_info*) userData;
141 yaz_log (ui->loglevel, "doctype end");
145 static void cb_entity_decl (void *userData, const char *entityName,
146 int is_parameter_entity,
147 const char *value, int value_length,
148 const char *base, const char *systemId,
149 const char *publicId, const char *notationName)
151 struct user_info *ui = (struct user_info*) userData;
152 yaz_log (ui->loglevel,
153 "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s"
154 " publicId=%s notationName=%s",
155 entityName, is_parameter_entity, value_length, value,
156 base, systemId, publicId, notationName);
160 static int cb_external_entity (XML_Parser pparser,
163 const char *systemId,
164 const char *publicId)
166 struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
171 yaz_log (ui->loglevel,
172 "external entity context=%s base=%s systemid=%s publicid=%s",
173 context, base, systemId, publicId);
177 if (!(inf = fopen (systemId, "rb")))
179 yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId);
183 parser = XML_ExternalEntityParserCreate (pparser, "", 0);
187 void *buf = XML_GetBuffer (parser, XML_CHUNK);
190 yaz_log (LOG_WARN, "XML_GetBuffer fail");
193 r = fread (buf, 1, XML_CHUNK, inf);
198 yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId);
203 if (!XML_ParseBuffer (parser, r, done))
205 yaz_log (LOG_WARN, "XML_ParseBuffer failed %s",
206 XML_ErrorString(XML_GetErrorCode(parser)));
210 XML_ParserFree (parser);
216 static int cb_encoding_convert (void *data, const char *s)
218 iconv_t t = (iconv_t) data;
221 char outbuf_[2], *outbuf = outbuf_;
223 char *inbuf = (char *) s;
226 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
227 if (ret == (size_t) (-1) && errno != E2BIG)
229 iconv (t, 0, 0, 0, 0);
234 memcpy (&code, outbuf_, sizeof(short));
238 static void cb_encoding_release (void *data)
240 iconv_t t = (iconv_t) data;
244 static int cb_encoding_handler (void *userData, const char *name,
249 struct user_info *ui = (struct user_info*) userData;
251 iconv_t t = iconv_open ("UNICODE", name);
252 if (t == (iconv_t) (-1))
255 info->data = 0; /* signal that multibyte is not in use */
256 yaz_log (ui->loglevel, "Encoding handler of %s", name);
257 for (i = 0; i<256; i++)
262 char *inbuf = inbuf_;
263 char *outbuf = outbuf_;
268 iconv (t, 0, 0, 0, 0); /* reset iconv */
270 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
271 if (ret == (size_t) (-1))
275 yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i);
276 info->map[i] = -1; /* invalid sequence */
279 { /* multi byte input */
296 assert (i >= 0 && i<255);
299 for (k = 0; k<len; k++)
301 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
303 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
304 if (ret == (size_t) (-1))
306 if (errno == EILSEQ || errno == E2BIG)
312 else if (errno == EINVAL)
318 else if (outleft == 0)
321 info->data = t; /* signal that multibyte is in use */
329 if (info->map[i] < -1)
330 yaz_log (ui->loglevel, "Encoding %d: multibyte input %d",
333 yaz_log (ui->loglevel, "Encoding %d: multibyte input failed",
338 info->map[i] = -1; /* no room for output */
339 yaz_log (LOG_WARN, "Encoding %d: no room for output",
343 else if (outleft == 0)
346 memcpy (&code, outbuf_, sizeof(short));
351 { /* should never happen */
353 yaz_log (LOG_DEBUG, "Encoding %d: bad state", i);
357 { /* at least one multi byte */
358 info->convert = cb_encoding_convert;
359 info->release = cb_encoding_release;
363 /* no multi byte - we no longer need iconv handler */
376 data1_node *zebra_read_xml (data1_handle dh,
377 int (*rf)(void *, char *, size_t), void *fh,
381 struct user_info uinfo;
384 uinfo.loglevel = LOG_LOG;
388 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
389 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
391 parser = XML_ParserCreate (0 /* encoding */);
393 XML_SetElementHandler (parser, cb_start, cb_end);
394 XML_SetCharacterDataHandler (parser, cb_chardata);
395 XML_SetXmlDeclHandler (parser, cb_decl);
396 XML_SetProcessingInstructionHandler (parser, cb_processing);
397 XML_SetUserData (parser, &uinfo);
398 XML_SetCommentHandler (parser, cb_comment);
399 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
400 XML_SetEntityDeclHandler (parser, cb_entity_decl);
401 XML_SetExternalEntityRefHandler (parser, cb_external_entity);
403 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo);
408 void *buf = XML_GetBuffer (parser, XML_CHUNK);
412 yaz_log (LOG_WARN, "XML_GetBuffer fail");
415 r = (*rf)(fh, buf, XML_CHUNK);
419 yaz_log (LOG_WARN, "XML read fail");
424 if (!XML_ParseBuffer (parser, r, done))
426 yaz_log (LOG_WARN, "XML_ParseBuffer (1) failed %s",
427 XML_ErrorString(XML_GetErrorCode(parser)));
430 XML_ParserFree (parser);
431 if (!uinfo.d1_stack[1] || !done)
433 return uinfo.d1_stack[0];
440 static void *grs_init_xml(void)
442 struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p));
446 static data1_node *grs_read_xml (struct grs_read_info *p)
448 return zebra_read_xml (p->dh, p->readf, p->fh, p->mem);
451 static void grs_destroy_xml(void *clientData)
453 struct sgml_getc_info *p = (struct sgml_getc_info *) clientData;
458 static struct recTypeGrs xml_type = {
465 RecTypeGrs recTypeGrs_xml = &xml_type;