* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* $Id: tokenizer.h,v 1.1 2007-04-26 21:45:16 adam Exp $ */
+/* $Id: tokenizer.h,v 1.2 2007-04-27 10:09:44 adam Exp $ */
/** \file tokenizer.h
\brief Header with public definitions about YAZ' tokenizer
YAZ_BEGIN_CDECL
-#define YAZ_TOKENIZER_EOF 0
-#define YAZ_TOKENIZER_ERROR (-1)
-#define YAZ_TOKENIZER_STRING (-2)
-#define YAZ_TOKENIZER_QSTRING (-3)
+#define YAZ_TOK_EOF 0
+#define YAZ_TOK_ERROR (-1)
+#define YAZ_TOK_STRING (-2)
+#define YAZ_TOK_QSTRING (-3)
-typedef struct yaz_tokenizer *yaz_tokenizer_t;
+typedef struct yaz_tok_cfg *yaz_tok_cfg_t;
+typedef struct yaz_tok_parse *yaz_tok_parse_t;
+
+typedef int (*yaz_tok_get_byte_t)(void **vp);
+
+YAZ_EXPORT
+yaz_tok_cfg_t yaz_tok_cfg_create(void);
YAZ_EXPORT
-yaz_tokenizer_t yaz_tokenizer_create(void);
+void yaz_tok_cfg_destroy(yaz_tok_cfg_t t);
YAZ_EXPORT
-void yaz_tokenizer_destroy(yaz_tokenizer_t t);
+void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple);
YAZ_EXPORT
-void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf);
+yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf);
YAZ_EXPORT
-int yaz_tokenizer_move(yaz_tokenizer_t t);
+yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t, yaz_tok_get_byte_t h,
+ void *vp);
YAZ_EXPORT
-const char *yaz_tokenizer_string(yaz_tokenizer_t t);
+void yaz_tok_parse_destroy(yaz_tok_parse_t tp);
YAZ_EXPORT
-void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple);
+int yaz_tok_move(yaz_tok_parse_t tp);
+
+YAZ_EXPORT
+const char *yaz_tok_parse_string(yaz_tok_parse_t tp);
YAZ_END_CDECL
#endif
-/* CQL_H_INCLUDED */
+
/*
* Local variables:
* c-basic-offset: 4
/* CCL qualifiers
* Europagate, 1995
*
- * $Id: cclqfile.c,v 1.9 2007-04-26 21:45:17 adam Exp $
+ * $Id: cclqfile.c,v 1.10 2007-04-27 10:09:45 adam Exp $
*
* Old Europagate Log:
*
int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name,
const char **addinfo)
{
- yaz_tokenizer_t yt = yaz_tokenizer_create();
+ yaz_tok_cfg_t yt = yaz_tok_cfg_create();
int type_ar[MAX_QUAL];
int value_ar[MAX_QUAL];
int pair_no = 0;
char *type_str = 0;
int t;
+ yaz_tok_parse_t tp;
- yaz_tokenizer_single_tokens(yt, ",=");
- yaz_tokenizer_read_buf(yt, cp);
+ yaz_tok_cfg_single_tokens(yt, ",=");
+
+ tp = yaz_tok_parse_buf(yt, cp);
+
+ yaz_tok_cfg_destroy(yt);
*addinfo = 0;
- t = yaz_tokenizer_move(yt);
- while (t == YAZ_TOKENIZER_STRING)
+ t = yaz_tok_move(tp);
+ while (t == YAZ_TOK_STRING)
{
/* we don't know what lead is yet */
- char *lead_str = xstrdup(yaz_tokenizer_string(yt));
+ char *lead_str = xstrdup(yaz_tok_parse_string(tp));
const char *value_str = 0;
int type = 0, value = 0; /* indicates attribute value UNSET */
- t = yaz_tokenizer_move(yt);
+ t = yaz_tok_move(tp);
if (t == ',')
{
/* full attribute spec: set, type = value */
/* lead is attribute set */
attsets[pair_no] = lead_str;
- t = yaz_tokenizer_move(yt);
- if (t != YAZ_TOKENIZER_STRING)
+ t = yaz_tok_move(tp);
+ if (t != YAZ_TOK_STRING)
{
*addinfo = "token expected";
goto out;
}
xfree(type_str);
- type_str = xstrdup(yaz_tokenizer_string(yt));
- if (yaz_tokenizer_move(yt) != '=')
+ type_str = xstrdup(yaz_tok_parse_string(tp));
+ if (yaz_tok_move(tp) != '=')
{
*addinfo = "= expected";
goto out;
/* lead is first of a list of qualifier aliaeses */
/* qualifier alias: q1 q2 ... */
xfree(lead_str);
- yaz_tokenizer_destroy(yt);
+ yaz_tok_parse_destroy(tp);
ccl_qual_add_combi (bibset, qual_name, cp);
return 0;
}
while (1) /* comma separated attribute value list */
{
- t = yaz_tokenizer_move(yt);
+ t = yaz_tok_move(tp);
/* must have a value now */
- if (t != YAZ_TOKENIZER_STRING)
+ if (t != YAZ_TOK_STRING)
{
*addinfo = "value token expected";
goto out;
}
- value_str = yaz_tokenizer_string(yt);
+ value_str = yaz_tok_parse_string(tp);
if (sscanf(type_str, "%d", &type) == 1)
;
*addinfo = "too many attribute values";
goto out;
}
- t = yaz_tokenizer_move(yt);
+ t = yaz_tok_move(tp);
if (t != ',')
break;
attsets[pair_no] = attsets[pair_no-1];
xfree(type_str);
type_str = 0;
- yaz_tokenizer_destroy(yt);
+ yaz_tok_parse_destroy(tp);
if (*addinfo)
{
* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: tokenizer.c,v 1.1 2007-04-26 21:45:17 adam Exp $
+ * $Id: tokenizer.c,v 1.2 2007-04-27 10:09:45 adam Exp $
*/
/**
#include <yaz/wrbuf.h>
#include <yaz/tokenizer.h>
-struct yaz_tokenizer {
- int (*get_byte_func)(const void **vp);
- const void *get_byte_data;
-
+struct yaz_tok_parse {
int unget_byte;
+ WRBUF wr_string;
+ int look;
+
+ yaz_tok_cfg_t cfg;
+ yaz_tok_get_byte_t get_byte_func;
+ void *get_byte_data;
+};
+
+struct yaz_tok_cfg {
+ int ref_count;
char *white_space;
char *single_tokens;
char *quote_tokens_begin;
char *quote_tokens_end;
- WRBUF wr_string;
- int look;
};
-void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple)
+void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
{
xfree(t->single_tokens);
t->single_tokens = xstrdup(simple);
}
-yaz_tokenizer_t yaz_tokenizer_create(void)
+yaz_tok_cfg_t yaz_tok_cfg_create(void)
{
- yaz_tokenizer_t t = xmalloc(sizeof(*t));
+ yaz_tok_cfg_t t = xmalloc(sizeof(*t));
t->white_space = xstrdup(" \t\r\n");
t->single_tokens = xstrdup("");
t->quote_tokens_begin = xstrdup("\"");
t->quote_tokens_end = xstrdup("\"");
- t->get_byte_func = 0;
- t->get_byte_data = 0;
- t->wr_string = wrbuf_alloc();
- t->look = YAZ_TOKENIZER_ERROR;
- t->unget_byte = 0;
+ t->ref_count = 1;
return t;
}
-void yaz_tokenizer_destroy(yaz_tokenizer_t t)
+void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
{
- xfree(t->white_space);
- xfree(t->single_tokens);
- xfree(t->quote_tokens_begin);
- xfree(t->quote_tokens_end);
- wrbuf_destroy(t->wr_string);
- xfree(t);
+ t->ref_count--;
+ if (t->ref_count == 0)
+ {
+ xfree(t->white_space);
+ xfree(t->single_tokens);
+ xfree(t->quote_tokens_begin);
+ xfree(t->quote_tokens_end);
+ xfree(t);
+ }
}
-static int read_buf(const void **vp)
+static int read_buf(void **vp)
{
const char *cp = *(const char **) vp;
int ch = *cp;
return ch;
}
-static int get_byte(yaz_tokenizer_t t)
+yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
{
- int ch = t->unget_byte;
- assert(t->get_byte_func);
+ return yaz_tok_parse_create(t, read_buf, (void *) buf);
+}
+
+static int get_byte(yaz_tok_parse_t tp)
+{
+ int ch = tp->unget_byte;
+ assert(tp->get_byte_func);
if (ch)
- t->unget_byte = 0;
+ tp->unget_byte = 0;
else
- ch = t->get_byte_func(&t->get_byte_data);
+ ch = tp->get_byte_func(&tp->get_byte_data);
return ch;
}
-static void unget_byte(yaz_tokenizer_t t, int ch)
+static void unget_byte(yaz_tok_parse_t tp, int ch)
+{
+ tp->unget_byte = ch;
+}
+
+yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t,
+ yaz_tok_get_byte_t h,
+ void *vp)
{
- t->unget_byte = ch;
+ yaz_tok_parse_t tp = xmalloc(sizeof(*tp));
+
+ tp->cfg = t;
+ tp->cfg->ref_count++;
+ tp->get_byte_func = h;
+ tp->get_byte_data = vp;
+
+ tp->look = YAZ_TOK_ERROR;
+ tp->unget_byte = 0;
+
+ tp->wr_string = wrbuf_alloc();
+ return tp;
}
+
-void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf)
+void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
{
- assert(t);
- t->get_byte_func = read_buf;
- t->get_byte_data = buf;
+ yaz_tok_cfg_destroy(tp->cfg);
+ wrbuf_destroy(tp->wr_string);
+ xfree(tp);
}
-int yaz_tokenizer_move(yaz_tokenizer_t t)
+int yaz_tok_move(yaz_tok_parse_t tp)
{
+ yaz_tok_cfg_t t = tp->cfg;
const char *cp;
- int ch = get_byte(t);
+ int ch = get_byte(tp);
/* skip white space */
while (ch && strchr(t->white_space, ch))
- ch = get_byte(t);
+ ch = get_byte(tp);
if (!ch)
{
- ch = YAZ_TOKENIZER_EOF;
+ ch = YAZ_TOK_EOF;
}
else if ((cp = strchr(t->single_tokens, ch)))
ch = *cp; /* single token match */
else if ((cp = strchr(t->quote_tokens_begin, ch)))
{ /* quoted string */
int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
- ch = get_byte(t);
- wrbuf_rewind(t->wr_string);
+ ch = get_byte(tp);
+ wrbuf_rewind(tp->wr_string);
while (ch && ch != end_ch)
- wrbuf_putc(t->wr_string, ch);
+ wrbuf_putc(tp->wr_string, ch);
if (!ch)
- ch = YAZ_TOKENIZER_ERROR;
+ ch = YAZ_TOK_ERROR;
else
- ch = YAZ_TOKENIZER_QSTRING;
+ ch = YAZ_TOK_QSTRING;
}
else
{ /* unquoted string */
- wrbuf_rewind(t->wr_string);
+ wrbuf_rewind(tp->wr_string);
while (ch && !strchr(t->white_space, ch)
&& !strchr(t->single_tokens, ch))
{
- wrbuf_putc(t->wr_string, ch);
- ch = get_byte(t);
+ wrbuf_putc(tp->wr_string, ch);
+ ch = get_byte(tp);
}
- unget_byte(t, ch);
- ch = YAZ_TOKENIZER_STRING;
+ unget_byte(tp, ch);
+ ch = YAZ_TOK_STRING;
}
- t->look = ch;
- yaz_log(YLOG_LOG, "tokenizer returns %d (%s)", ch,
- wrbuf_cstr(t->wr_string));
-
+ tp->look = ch;
return ch;
}
-const char *yaz_tokenizer_string(yaz_tokenizer_t t)
+const char *yaz_tok_parse_string(yaz_tok_parse_t tp)
{
- return wrbuf_cstr(t->wr_string);
+ return wrbuf_cstr(tp->wr_string);
}
/*