2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.7 1994-09-12 08:06:42 adam
8 * Futher development of insert.c
10 * Revision 1.6 1994/09/06 13:05:15 adam
11 * Further development of insertion. Some special cases are
12 * not properly handled yet! assert(0) are put here. The
13 * binary search in each page definitely reduce usr CPU.
15 * Revision 1.5 1994/09/01 17:49:39 adam
16 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
18 * Revision 1.4 1994/09/01 17:44:09 adam
19 * depend include change.
21 * Revision 1.3 1994/08/18 12:40:56 adam
22 * Some development of dictionary. Not finished at all!
24 * Revision 1.2 1994/08/17 13:32:19 adam
25 * Use cache in dict - not in bfile.
27 * Revision 1.1 1994/08/16 16:26:48 adam
41 static int dict_ins (Dict dict, const Dict_char *str,
42 Dict_ptr back_ptr, int userlen, void *userinfo);
45 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
48 Dict_ptr ptr = dict->head.free_list;
49 if (dict->head.free_list == dict->head.last)
51 dict->head.free_list++;
52 dict->head.last = dict->head.free_list;
53 dict_bf_newp (dict->dbf, ptr, &p);
57 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
58 dict->head.free_list = DICT_nextptr(p);
59 if (dict->head.free_list == 0)
60 dict->head.free_list = dict->head.last;
64 DICT_backptr(p) = back_ptr;
67 DICT_size(p) = DICT_infoffset;
73 static int split_page (Dict dict, Dict_ptr ptr, void *p)
79 short *indxp, *best_indxp;
80 Dict_char best_char = 0;
81 Dict_char prev_char = 0;
82 int best_no = -1, no_current = 1;
84 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
85 for (i = DICT_nodir (p); --i >= 0; --indxp)
87 if (*indxp > 0) /* tail string here! */
91 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
93 { /* first entry met */
94 best_char = prev_char = dc;
97 else if (prev_char == dc)
98 { /* same char prefix. update */
99 if (++no_current > best_no)
100 { /* best entry so far */
101 best_no = no_current;
107 { /* new char prefix. restore */
113 if (best_no < 0) /* we didn't find any tail string entry at all! */
116 subptr = new_page (dict, ptr, &subp);
117 /* scan entries to see if there is a string with */
118 /* length 1. info_here indicates if such entry exist */
120 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
127 info = (char*) p + *indxp; /* entry start */
128 assert (*info == best_char);
129 slen = dict_strlen(info);
135 info_here = info+(slen+1)*sizeof(Dict_char);
138 /* calculate the amount of bytes needed for this entry when */
139 /* transformed to a sub entry */
140 need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
145 /* now loop on all entries with string length > 1 i.e. all */
146 /* those entries which contribute to a sub page */
148 for (i=0; i<best_no; i++, indxp++)
155 info = (char*) p + *indxp; /* entry start */
156 assert (*info == best_char);
157 slen = dict_strlen(info);
161 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
163 if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
164 best_indxp = indxp; /* space for entry */
165 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
166 dict_bf_readp (dict->dbf, ptr, &p);
170 { /* there was a hole big enough for a sub entry */
171 char *info = (char*) p + *best_indxp;
174 *--indxp = - *best_indxp;
176 DICT_nodir (p) -= (best_no-1);
177 indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
178 while (indxp != indxp1)
181 *indxp = indxp[1-best_no];
183 memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
184 info += sizeof(Dict_ptr);
185 memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
186 info += sizeof(Dict_char);
188 memcpy (info, info_here, *info_here+1); /* with information */
190 *info = 0; /* without info */
194 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
195 for (i = DICT_nodir (p); --i >= 0; --indxp)
197 if (*indxp > 0) /* tail string here! */
201 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
202 assert (dc != best_char);
203 assert (dc >= prev_char);
209 memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr),
211 assert (dc > prev_char);
214 assert (best_indxp == NULL);
225 short *indxp1, *indxp2;
228 DICT_nodir(p) -= best_no;
230 indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
234 indxp2[0] = indxp2[-best_no];
235 } while (indxp2 != indxp1);
240 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out)
242 char *np = xmalloc (dict->head.page_size);
244 short *indxp1, *indxp2;
247 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
248 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
249 info2 = (char*) np + DICT_infoffset;
250 for (i = DICT_nodir (p); --i >= 0; --indxp1)
252 if (*indxp1 > 0) /* tail string here! */
254 /* string (Dict_char *) DICT_EOS terminated */
255 /* unsigned char length of information */
256 /* char * information */
258 info1 = (char*) p + *indxp1;
259 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
261 *--indxp2 = info2 - np;
262 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
263 memcpy (info2, info1, slen);
269 /* Dict_ptr subptr */
270 /* Dict_char sub char */
271 /* unsigned char length of information */
272 /* char * information */
274 assert (*indxp1 < 0);
275 *--indxp2 = -(info2 - np);
276 info1 = (char*) p - *indxp1;
277 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
278 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
279 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
282 memcpy (info2, info1, slen);
286 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
287 DICT_PAGESIZE-DICT_infoffset);
288 DICT_size(p) = info2 - np;
296 /* return 0 if new */
297 /* return 1 if before but change of info */
298 /* return 2 if same as before */
300 static int dict_ins (Dict dict, const Dict_char *str,
301 Dict_ptr back_ptr, int userlen, void *userinfo)
303 int hi, lo, mid, i, slen, cmp = 1;
304 Dict_ptr ptr = back_ptr;
310 ptr = new_page (dict, back_ptr, &p);
312 dict_bf_readp (dict->dbf, ptr, &p);
318 hi = DICT_nodir(p)-1;
319 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
325 /* string (Dict_char *) DICT_EOS terminated */
326 /* unsigned char length of information */
327 /* char * information */
328 info = (char*)p + indxp[-mid];
329 cmp = dict_strcmp((Dict_char*) info, str);
332 info += (dict_strlen(info)+1)*sizeof(Dict_char);
333 /* consider change of userinfo length... */
334 if (*info == userlen)
336 if (memcmp (info+1, userinfo, userlen))
338 dict_bf_touch (dict->dbf, ptr);
339 memcpy (info+1, userinfo, userlen);
344 else if (*info > userlen)
348 dict_bf_touch (dict->dbf, ptr);
349 memcpy (info+1, userinfo, userlen);
360 /* Dict_ptr subptr */
361 /* Dict_char sub char */
362 /* unsigned char length of information */
363 /* char * information */
364 info = (char*)p - indxp[-mid];
365 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
369 memcpy (&subptr, info, sizeof(Dict_ptr));
370 if (*++str == DICT_EOS)
374 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
377 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
380 dict_bf_touch (dict->dbf, ptr);
381 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
387 else if (xlen > userlen)
390 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
391 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
393 dict_bf_touch (dict->dbf, ptr);
396 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
398 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
400 if (DICT_type(p) == 1)
402 clean_page (dict, ptr, p, NULL);
403 dict_bf_touch (dict->dbf, ptr);
404 return dict_ins (dict, str-1, ptr,
407 if (split_page (dict, ptr, p))
409 log (LOG_FATAL, "Unable to split page %d\n", ptr);
412 return dict_ins (dict, str-1, ptr, userlen, userinfo);
416 info = (char*)p + DICT_size(p);
417 memcpy (info, &subptr, sizeof(subptr));
418 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
419 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
420 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
422 indxp[-mid] = -DICT_size(p);
423 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
426 dict_bf_touch (dict->dbf, ptr);
436 subptr = new_page (dict, ptr, NULL);
437 memcpy (info, &subptr, sizeof(subptr));
438 dict_bf_touch (dict->dbf, ptr);
440 return dict_ins (dict, str, subptr, userlen, userinfo);
450 if (lo>hi && cmp < 0)
452 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
453 if (DICT_size(p)+slen+userlen >=
454 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
456 if (DICT_type(p) == 1)
458 clean_page (dict, ptr, p, NULL);
459 dict_bf_touch (dict->dbf, ptr);
460 return dict_ins (dict, str, ptr, userlen, userinfo);
466 if (split_page (dict, ptr, p))
468 log (LOG_FATAL, "Unable to split page %d\n", ptr);
471 if (DICT_size(p)+slen+userlen <
472 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
475 clean_page (dict, ptr, p, NULL);
476 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
477 (1+DICT_nodir(p))*sizeof(short));
478 return dict_ins (dict, str, ptr, userlen, userinfo);
484 indxp1 = (short*)((char*) p + DICT_PAGESIZE
485 - DICT_nodir(p)*sizeof(short));
486 for (; indxp1 != indxp; indxp1++)
487 indxp1[0] = indxp1[1];
489 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
490 for (i = DICT_nodir (p); --i >= 0; --indxp1)
494 info = (char*)p - *indxp1;
495 assert (info[sizeof(Dict_ptr)] > ' ');
502 info = (char*)p + DICT_size(p);
503 memcpy (info, str, slen);
506 memcpy (info, userinfo, userlen);
509 *indxp = DICT_size(p);
510 DICT_size(p) = info- (char*) p;
511 dict_bf_touch (dict->dbf, ptr);
517 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
519 assert (dict->head.last > 0);
520 if (dict->head.last == 1)
521 return dict_ins (dict, str, 0, userlen, userinfo);
523 return dict_ins (dict, str, 1, userlen, userinfo);