From b41a475db81d89458d4267b27e98f302c4519316 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 17 May 2011 12:25:28 +0200 Subject: [PATCH] Add yaz_utf8_check A function that checks whether a string appears to be UTF-8 encoded. --- include/yaz/yaz-iconv.h | 7 +++++++ src/utf8.c | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/yaz/yaz-iconv.h b/include/yaz/yaz-iconv.h index a1a2dc4..96f326a 100644 --- a/include/yaz/yaz-iconv.h +++ b/include/yaz/yaz-iconv.h @@ -91,6 +91,13 @@ YAZ_EXPORT size_t yaz_write_UTF8_char(unsigned long x, #define yaz_tolower(x) ((x) + ('a' - 'A')) #define yaz_islower(x) ((x) >= 'a' && (x) <= 'z') +/** \brief check whether string apppers to be UTF-8 encoded + \param cstr string to check + \retval 1 OK (appears to be UTF-8) + \retval 0 definitely not UTF-8 +*/ +YAZ_EXPORT int yaz_utf8_check(const char *cstr); + YAZ_END_CDECL #endif diff --git a/src/utf8.c b/src/utf8.c index 8911683..7e294d4 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -239,7 +239,25 @@ yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode, } return 0; } - + +int yaz_utf8_check(const char *str) +{ + /* cast OK: yaz_read_UTF8_char is read-only */ + unsigned char *inp = (unsigned char *) str; + size_t inbytesleft = strlen(str); + + while (inbytesleft) + { + int error = 0; + size_t no_read; + yaz_read_UTF8_char(inp, inbytesleft, &no_read, &error); + if (error) + return 0; + inp += no_read; + inbytesleft -= no_read; + } + return 1; +} /* * Local variables: -- 1.7.10.4