* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*
- * $Id: yaz-iconv.h,v 1.9 2006-04-19 23:15:39 adam Exp $
+ * $Id: yaz-iconv.h,v 1.10 2006-08-04 14:35:39 adam Exp $
*/
/**
* \file yaz-iconv.h
YAZ_EXPORT int yaz_strcmp_del(const char *a, const char *b, const char *b_del);
+YAZ_EXPORT unsigned long yaz_read_UTF8_char(unsigned char *inp,
+ size_t inbytesleft,
+ size_t *no_read,
+ int *error);
+
YAZ_END_CDECL
#endif
/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfaxml.c,v 1.9 2006-07-14 13:06:38 heikki Exp $
+ * $Id: nfaxml.c,v 1.10 2006-08-04 14:35:40 adam Exp $
*/
/**
#include <yaz/nmem.h>
#include <yaz/yconfig.h>
#include <yaz/nfa.h>
+#include <yaz/yaz-iconv.h>
#include <yaz/nfaxml.h>
#include <yaz/libxml2_error.h>
{
int bufidx=0;
xmlChar *content = xmlNodeGetContent(node);
- xmlChar *cp=content;
- int conlen=strlen((char *)content);
- int len;
- int res;
- while (*cp && (bufidx<maxlen) ) {
- len=conlen;
- res=xmlGetUTF8Char(cp,&len);
- if (res==-1) {
+ xmlChar *cp = content;
+ size_t conlen = strlen((char *)content);
+ while (*cp && bufidx<maxlen )
+ {
+ int error;
+ size_t no_read;
+ int res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
+ if (res == 0) {
/* should be caught earlier */
yaz_log(YLOG_FATAL,"Illegal utf-8 sequence "
"%d bytes into '%s' in %s, rule %d ",
xmlFree(content);
return -1;
}
- buf[bufidx++]=res;
- cp +=len;
- conlen -=len;
+ buf[bufidx++] = res;
+ cp += no_read;
+ conlen -= no_read;
}
buf[bufidx]=0;
xmlFree(content);
{
xmlChar *content = xmlNodeGetContent(node);
xmlChar *cp=content;
- int conlen=strlen((char *)content);
- int len;
- int res;
- len=conlen;
- res=xmlGetUTF8Char(cp,&len);
- if ( res != -1 ) {
+ size_t conlen = strlen((char *)content);
+ size_t no_read;
+ int error;
+ int res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
+ if ( res != 0 ) {
*range_start=res;
- cp +=len;
- conlen -=len;
- len=conlen;
- res=xmlGetUTF8Char(cp,&len);
+ cp += no_read;
+ conlen -= no_read;
+ res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
if (res != '-' )
- res = -1;
+ res = 0;
}
- if ( res != -1 ) {
- cp +=len;
- conlen -=len;
- len=conlen;
- res=xmlGetUTF8Char(cp,&len);
+ if ( res != 0 ) {
+ cp += no_read;
+ conlen -= no_read;
+ res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
}
- if ( res != -1 ) {
- *range_end=res;
+ if ( res != 0) {
+ *range_end = res;
}
xmlFree(content);
- if (res==-1) {
+ if (res == 0) {
yaz_log(YLOG_FATAL,"Illegal range. '%s'. Must be like 'a-z' "
"'in %s, rule %d ",
content, filename, rulenumber);
* Copyright (C) 1995-2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: siconv.c,v 1.23 2006-05-09 21:37:02 adam Exp $
+ * $Id: siconv.c,v 1.24 2006-08-04 14:35:40 adam Exp $
*/
/**
* \file siconv.c
return 0;
}
-static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+unsigned long yaz_read_UTF8_char(unsigned char *inp,
+ size_t inbytesleft, size_t *no_read,
+ int *error)
{
unsigned long x = 0;
else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xdf && inbytesleft >= 2)
{
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
}
else if (inp[0] <= 0xef && inbytesleft >= 3)
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
}
else if (inp[0] <= 0xf7 && inbytesleft >= 4)
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
}
else if (inp[0] <= 0xfb && inbytesleft >= 5)
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
}
else if (inp[0] <= 0xfd && inbytesleft >= 6)
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
+ *error = YAZ_ICONV_EILSEQ;
}
}
else
{
*no_read = 0;
- cd->my_errno = YAZ_ICONV_EINVAL;
+ *error = YAZ_ICONV_EINVAL;
}
return x;
}
+static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
+{
+ return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
+}
+
static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
size_t inbytesleft, size_t *no_read)
{