2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.1 1994-10-03 17:23:04 adam
8 * First version of dictionary lookup with regular expressions and errors.
20 typedef unsigned MatchWord;
24 int n; /* no of MatchWord needed */
25 int range; /* max no. of errors */
26 MatchWord *Sc; /* Mask Sc */
31 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
33 int off = state & (WORD_BITS-1);
34 int wno = state / WORD_BITS;
36 m[mc->n * ch + wno] |= 1<<off;
39 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
42 int off = state & (WORD_BITS-1);
43 int wno = state / WORD_BITS;
45 m[mc->n * ch + wno] &= ~(1<<off);
48 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
51 int off = state & (WORD_BITS-1);
52 int wno = state / WORD_BITS;
54 return m[mc->n * ch + wno] & (1<<off);
57 static MatchContext *mk_MatchContext (DFA_states *dfas, int range)
59 MatchContext *mc = xmalloc (sizeof(*mc));
62 mc->n = (dfas->no+WORD_BITS) / WORD_BITS;
64 mc->Sc = xcalloc (sizeof(*mc->Sc) * 256 * mc->n, 1);
66 for (i=0; i<dfas->no; i++)
69 DFA_state *state = dfas->sortarray[i];
71 for (j=0; j<state->tran_no; j++)
74 int ch0 = state->trans[j].ch[0];
75 int ch1 = state->trans[j].ch[1];
76 assert (ch0 >= 0 && ch1 >= 0);
78 for (ch = ch0; ch <= ch1; ch++)
79 set_bit (mc, mc->Sc, ch, i);
86 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
87 DFA_states *dfas, int ch)
90 MatchWord *Rsrc_p = Rsrc, mask;
93 for (j = 0; j<mc->n; j++)
97 for (j = 1; j<mc->n; j++)
103 for (j = 0; j<WORD_BITS/4; j++)
109 DFA_state *state = dfas->sortarray[s];
110 int i = state->tran_no;
112 if (ch >= state->trans[i].ch[0] &&
113 ch <= state->trans[i].ch[1])
114 set_bit (mc, Rdst, 0, state->trans[i].to);
118 DFA_state *state = dfas->sortarray[s+1];
119 int i = state->tran_no;
121 if (ch >= state->trans[i].ch[0] &&
122 ch <= state->trans[i].ch[1])
123 set_bit (mc, Rdst, 0, state->trans[i].to);
127 DFA_state *state = dfas->sortarray[s+2];
128 int i = state->tran_no;
130 if (ch >= state->trans[i].ch[0] &&
131 ch <= state->trans[i].ch[1])
132 set_bit (mc, Rdst, 0, state->trans[i].to);
136 DFA_state *state = dfas->sortarray[s+3];
137 int i = state->tran_no;
139 if (ch >= state->trans[i].ch[0] &&
140 ch <= state->trans[i].ch[1])
141 set_bit (mc, Rdst, 0, state->trans[i].to);
152 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
156 MatchWord *Rsrc_p = Rsrc, mask;
157 for (j = 0; j<mc->n; j++)
162 for (j = 0; j<WORD_BITS/4; j++)
168 DFA_state *state = dfas->sortarray[s];
169 int i = state->tran_no;
171 set_bit (mc, Rdst, 0, state->trans[i].to);
175 DFA_state *state = dfas->sortarray[s+1];
176 int i = state->tran_no;
178 set_bit (mc, Rdst, 0, state->trans[i].to);
182 DFA_state *state = dfas->sortarray[s+2];
183 int i = state->tran_no;
185 set_bit (mc, Rdst, 0, state->trans[i].to);
189 DFA_state *state = dfas->sortarray[s+3];
190 int i = state->tran_no;
192 set_bit (mc, Rdst, 0, state->trans[i].to);
203 static void or (MatchContext *mc, MatchWord *Rdst,
204 MatchWord *Rsrc1, MatchWord *Rsrc2)
207 for (i = 0; i<mc->n; i++)
208 Rdst[i] = Rsrc1[i] | Rsrc2[i];
211 static int move (MatchContext *mc, MatchWord *Rj1, MatchWord *Rj,
212 Dict_char ch, DFA_states *dfas, MatchWord *Rtmp)
215 MatchWord *Rj_a = Rtmp;
216 MatchWord *Rj_b = Rtmp + mc->n;
217 MatchWord *Rj_c = Rtmp + 2*mc->n;
219 mask_shift (mc, Rj1, Rj, dfas, ch);
220 for (d = 1; d <= mc->range; d++)
222 mask_shift (mc, Rj_b, Rj+d*mc->n, dfas, ch); /* 1 */
224 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
226 shift (mc, Rj_c, Rj_a, dfas);
228 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
230 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
237 static int dict_grep (Dict dict, Dict_ptr ptr, MatchContext *mc,
238 MatchWord *Rj, int pos,
239 int (*userfunc)(Dict_char *name, char *info),
240 Dict_char *prefix, DFA_states *dfas)
247 dict_bf_readp (dict->dbf, ptr, &p);
249 hi = DICT_nodir(p)-1;
250 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
255 /* string (Dict_char *) DICT_EOS terminated */
256 /* unsigned char length of information */
257 /* char * information */
260 info = (char*)p + indxp[-lo];
265 MatchWord *Rj0 = Rj + j *(1+mc->range)*mc->n;
266 MatchWord *Rj1 = Rj + (j+1)*(1+mc->range)*mc->n;
267 MatchWord *Rj_tmp = Rj + (j+2)*(1+mc->range)*mc->n;
269 memcpy (&ch, info+j*sizeof(Dict_char), sizeof(Dict_char));
274 (*userfunc)(prefix, info+(j+1)*sizeof(Dict_char));
279 move (mc, Rj1, Rj0, ch, dfas, Rj_tmp);
280 for (d = mc->n; --d >= 0; )
281 if (Rj1[mc->range*mc->n + d])
285 for (s = 0; s<dfas->no; s++)
287 if (dfas->sortarray[s]->rule_no)
288 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
291 for (d = 0; d <= mc->range; d++)
292 reset_bit (mc, Rj1+d*mc->n, 0, dfas->no);
297 MatchWord *Rj1 = Rj+ (1+mc->range)*mc->n;
298 MatchWord *Rj_tmp = Rj+2*(1+mc->range)*mc->n;
302 /* Dict_ptr subptr */
303 /* Dict_char sub char */
304 /* unsigned char length of information */
305 /* char * information */
306 info = (char*)p - indxp[-lo];
307 memcpy (&ch, info+sizeof(Dict_ptr), sizeof(Dict_char));
310 move (mc, Rj1, Rj, ch, dfas, Rj_tmp);
311 for (d = mc->n; --d >= 0; )
312 if (Rj1[mc->range*mc->n + d])
317 if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
320 for (s = 0; s<dfas->no; s++)
321 if (dfas->sortarray[s]->rule_no)
322 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
324 prefix[pos+1] = DICT_EOS;
325 (*userfunc)(prefix, info+sizeof(Dict_ptr)+
330 memcpy (&subptr, info, sizeof(Dict_ptr));
333 dict_grep (dict, subptr, mc, Rj1, pos+1,
334 userfunc, prefix, dfas);
335 dict_bf_readp (dict->dbf, ptr, &p);
336 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
345 int dict_lookup_grep (Dict dict, Dict_char *pattern, int range,
346 int (*userfunc)(Dict_char *name, char *info))
349 Dict_char prefix[2048];
350 char *this_pattern = pattern;
353 DFA *dfa = init_dfa();
356 i = parse_dfa (dfa, &this_pattern, dfa_thompson_chars);
357 if (i || *this_pattern)
362 dfa->root = dfa->top;
363 dfas = mk_dfas (dfa, 200);
366 mc = mk_MatchContext (dfas, range);
368 Rj = xcalloc ((1000+dict_strlen(pattern)+range+2)*(range+1)*mc->n,
371 set_bit (mc, Rj, 0, 0);
372 for (d = 1; d<=mc->range; d++)
375 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
376 for (s = 0; s<dfas->no; s++)
378 if (get_bit (mc, Rj, d-1, s))
380 DFA_state *state = dfas->sortarray[s];
381 int i = state->tran_no;
383 set_bit (mc, Rj, d, state->trans[i].to);
387 i = dict_grep (dict, 1, mc, Rj, 0, userfunc, prefix, dfas);