2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: tokenizer.c,v 1.1 2007-04-26 21:45:17 adam Exp $
10 * \brief Implements attribute match of CCL RPN nodes
18 #include <yaz/wrbuf.h>
19 #include <yaz/tokenizer.h>
21 struct yaz_tokenizer {
22 int (*get_byte_func)(const void **vp);
23 const void *get_byte_data;
28 char *quote_tokens_begin;
29 char *quote_tokens_end;
34 void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple)
36 xfree(t->single_tokens);
37 t->single_tokens = xstrdup(simple);
40 yaz_tokenizer_t yaz_tokenizer_create(void)
42 yaz_tokenizer_t t = xmalloc(sizeof(*t));
43 t->white_space = xstrdup(" \t\r\n");
44 t->single_tokens = xstrdup("");
45 t->quote_tokens_begin = xstrdup("\"");
46 t->quote_tokens_end = xstrdup("\"");
49 t->wr_string = wrbuf_alloc();
50 t->look = YAZ_TOKENIZER_ERROR;
55 void yaz_tokenizer_destroy(yaz_tokenizer_t t)
57 xfree(t->white_space);
58 xfree(t->single_tokens);
59 xfree(t->quote_tokens_begin);
60 xfree(t->quote_tokens_end);
61 wrbuf_destroy(t->wr_string);
65 static int read_buf(const void **vp)
67 const char *cp = *(const char **) vp;
72 *(const char **)vp = cp;
77 static int get_byte(yaz_tokenizer_t t)
79 int ch = t->unget_byte;
80 assert(t->get_byte_func);
84 ch = t->get_byte_func(&t->get_byte_data);
88 static void unget_byte(yaz_tokenizer_t t, int ch)
93 void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf)
96 t->get_byte_func = read_buf;
97 t->get_byte_data = buf;
100 int yaz_tokenizer_move(yaz_tokenizer_t t)
103 int ch = get_byte(t);
105 /* skip white space */
106 while (ch && strchr(t->white_space, ch))
110 ch = YAZ_TOKENIZER_EOF;
112 else if ((cp = strchr(t->single_tokens, ch)))
113 ch = *cp; /* single token match */
114 else if ((cp = strchr(t->quote_tokens_begin, ch)))
115 { /* quoted string */
116 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
118 wrbuf_rewind(t->wr_string);
119 while (ch && ch != end_ch)
120 wrbuf_putc(t->wr_string, ch);
122 ch = YAZ_TOKENIZER_ERROR;
124 ch = YAZ_TOKENIZER_QSTRING;
127 { /* unquoted string */
128 wrbuf_rewind(t->wr_string);
129 while (ch && !strchr(t->white_space, ch)
130 && !strchr(t->single_tokens, ch))
132 wrbuf_putc(t->wr_string, ch);
136 ch = YAZ_TOKENIZER_STRING;
139 yaz_log(YLOG_LOG, "tokenizer returns %d (%s)", ch,
140 wrbuf_cstr(t->wr_string));
145 const char *yaz_tokenizer_string(yaz_tokenizer_t t)
147 return wrbuf_cstr(t->wr_string);
153 * indent-tabs-mode: nil
155 * vim: shiftwidth=4 tabstop=8 expandtab