* Sebastian Hammer, Adam Dickmeiss
*
* $Log: dicttest.c,v $
- * Revision 1.2 1994-08-18 12:40:54 adam
+ * Revision 1.3 1994-09-01 17:44:06 adam
+ * depend include change.
+ * CVS ----------------------------------------------------------------------
+ *
+ * Revision 1.2 1994/08/18 12:40:54 adam
* Some development of dictionary. Not finished at all!
*
* Revision 1.1 1994/08/16 16:26:47 adam
{
const char *name = NULL;
const char *inputfile = NULL;
+ const char *base = NULL;
int rw = 0;
+ int infosize = 2;
int cache = 10;
int ret;
char *arg;
prog = argv[0];
- log_init (LOG_DEFAULT_LEVEL, prog, NULL);
if (argc < 2)
{
fprintf (stderr, "usage:\n"
- " %s [-v n] [-i f] [-w] [-c n] file\n", prog);
+ " %s [-s n] [-v n] [-i f] [-w] [-c n] base file\n",
+ prog);
exit (1);
}
- while ((ret = options ("v:i:wc:", argv, argc, &arg)) != -2)
+ while ((ret = options ("s:v:i:wc:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
- if (name)
+ if (!base)
+ base = arg;
+ else if (!name)
+ name = arg;
+ else
{
log (LOG_FATAL, "too many files specified\n");
exit (1);
}
- name = arg;
}
else if (ret == 'c')
{
inputfile = arg;
rw = 1;
}
+ else if (ret == 's')
+ {
+ infosize = atoi(arg);
+ }
else if (ret == 'v')
{
log_init (atoi(arg), prog, NULL);
exit (1);
}
}
- if (!name)
+ if (!base || !name)
+ {
+ log (LOG_FATAL, "no base and/or dictionary specified");
+ exit (1);
+ }
+ common_resource = res_open (base);
+ if (!common_resource)
{
- log (LOG_FATAL, "no dictionary file given");
+ log (LOG_FATAL, "cannot open resource `%s'", base);
exit (1);
}
dict = dict_open (name, cache, rw);
if (!dict)
{
- log (LOG_FATAL, "dict_open fail");
+ log (LOG_FATAL, "dict_open fail of `%s'", name);
exit (1);
}
if (inputfile)
FILE *ipf;
char ipf_buf[256];
char word[256];
- int i, line = 1;
+ int line = 1;
+ char infobytes[120];
+ memset (infobytes, 0, 120);
if (!(ipf = fopen(inputfile, "r")))
{
while (fgets (ipf_buf, 255, ipf))
{
- for (i=0; i<255; i++)
- if (ipf_buf[i] > ' ')
- word[i] = ipf_buf[i];
- else
- break;
- word[i] = 0;
- if (i)
- dict_insert (dict, word, &line);
+ if (sscanf (ipf_buf, "%s", word) == 1)
+ {
+ sprintf (infobytes, "%d", line);
+ dict_insert (dict, word, infosize, infobytes);
+ }
++line;
}
fclose (ipf);
}
dict_close (dict);
+ res_close (common_resource);
return 0;
}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: insert.c,v $
- * Revision 1.3 1994-08-18 12:40:56 adam
+ * Revision 1.4 1994-09-01 17:44:09 adam
+ * depend include change.
+ * CVS ----------------------------------------------------------------------
+ *
+ * Revision 1.3 1994/08/18 12:40:56 adam
* Some development of dictionary. Not finished at all!
*
* Revision 1.2 1994/08/17 13:32:19 adam
*
*/
-#include <stdlib.h>
#include <string.h>
+#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <dict.h>
+static int dict_ins (Dict dict, const Dict_char *str,
+ Dict_ptr back_ptr, int userlen, void *userinfo);
+
+
static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
{
void *p;
dict->head.free_list = dict->head.last;
}
assert (p);
- DICT_type(p) = 1;
+ DICT_type(p) = 0;
DICT_backptr(p) = back_ptr;
DICT_nextptr(p) = 0;
DICT_nodir(p) = 0;
return ptr;
}
-static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr,
- void *p, void *userinfo)
+static int split_page (Dict dict, Dict_ptr ptr, void *p)
+{
+ void *subp;
+ char *info_here;
+ Dict_ptr subptr;
+ int i, need;
+ short *indxp, *best_indxp;
+ Dict_char best_char;
+ Dict_char prev_char;
+ int best_no = -1, no_current;
+
+ indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ for (i = DICT_nodir (p); --i >= 0; --indxp)
+ {
+ if (*indxp > 0) /* tail string here! */
+ {
+ Dict_char dc;
+
+ memcpy (&dc, (char*) p + *indxp, sizeof(dc));
+ if (best_no < 0)
+ { /* first entry met */
+ best_char = prev_char = dc;
+ no_current = best_no = 1;
+ }
+ else if (prev_char == dc)
+ { /* same char prefix. update */
+ if (++no_current > best_no)
+ { /* best entry so far */
+ best_no = no_current;
+ best_char = dc;
+ best_indxp = indxp;
+ }
+ }
+ else
+ { /* new char prefix. restore */
+ prev_char = dc;
+ no_current = 1;
+ }
+ }
+ }
+ if (best_no < 0) /* we didn't find any tail string entry at all! */
+ return -1;
+
+ subptr = new_page (dict, ptr, &subp);
+ /* scan entries to see if there is a string with */
+ /* length 1. info_here indicates if such entry exist */
+ info_here = NULL;
+ for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
+ {
+ char *info;
+ int slen;
+
+ assert (*indxp > 0);
+
+ info = (char*) p + *indxp; /* entry start */
+ slen = dict_strlen(info);
+
+ assert (slen > 0);
+ if (slen == 1)
+ {
+ assert (!info_here);
+ info_here = info+(slen+1)*sizeof(Dict_char);
+ }
+ }
+ /* calculate the amount of bytes needed for this entry when */
+ /* transformed to a sub entry */
+ need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
+ if (info_here)
+ need += *info_here;
+
+ indxp = best_indxp;
+ /* now loop on all entries with string length > 1 i.e. all */
+ /* those entries which contribute to a sub page */
+ best_indxp = NULL;
+ for (i=0; i<best_no; i++, indxp++)
+ {
+ char *info, *info1;
+ int slen;
+
+ assert (*indxp > 0);
+
+ info = (char*) p + *indxp; /* entry start */
+ slen = dict_strlen(info);
+
+ if (slen > 1)
+ {
+ info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
+
+ if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
+ best_indxp = indxp; /* space for entry */
+ dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
+ }
+ }
+ if (best_indxp)
+ { /* there was a hole big enough for a sub entry */
+ char *info = (char*) p + *best_indxp;
+ short *indxp1;
+
+ *--indxp = - *best_indxp;
+ DICT_type(p) = 1;
+ DICT_nodir (p) -= (best_no-1);
+ indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
+ while (indxp != indxp1)
+ {
+ --indxp;
+ *indxp = indxp[1-best_no];
+ }
+ memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
+ info += sizeof(Dict_ptr);
+ memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
+ info += sizeof(Dict_char);
+ if (info_here)
+ memcpy (info, info_here, *info_here+1); /* with information */
+ else
+ *info = 0; /* without info */
+ }
+ else
+ {
+ short *indxp1, *indxp2;
+ assert (0);
+ DICT_type(p) = 1;
+ DICT_nodir(p) -= best_no;
+ indxp2 = indxp;
+ indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
+ do
+ {
+ --indxp2;
+ indxp2[0] = indxp2[-best_no];
+ } while (indxp2 != indxp1);
+ }
+ return 0;
+}
+
+static void clean_page (Dict dict, void *p)
+{
+ char *np = xmalloc (dict->head.page_size);
+ int i, slen;
+ short *indxp1, *indxp2;
+ char *info1, *info2;
+
+ indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
+ info2 = (char*) np + DICT_infoffset;
+ for (i = DICT_nodir (p); --i >= 0; --indxp1)
+ {
+ if (*indxp1 > 0) /* tail string here! */
+ {
+ /* string (Dict_char *) DICT_EOS terminated */
+ /* unsigned char length of information */
+ /* char * information */
+
+ *--indxp2 = info2 - np;
+ info1 = (char*) p + *indxp1;
+ slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
+ memcpy (info2, info1, slen);
+ info2 += slen;
+ info1 += slen;
+ slen = *info1+1;
+ memcpy (info2, info1, slen);
+ info2 += slen;
+ info1 += slen;
+ }
+ else
+ {
+ /* Dict_ptr subptr */
+ /* Dict_char sub char */
+ /* unsigned char length of information */
+ /* char * information */
+
+ *--indxp2 = -(info2 - np);
+ info1 = (char*) p - *indxp1;
+ memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
+ info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
+ info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
+ slen = *info1+1;
+ memcpy (info2, info1, slen);
+ info2 += slen;
+ info1 += slen;
+ }
+ }
+ memcpy ((char*) p + DICT_infoffset, (char*) np + DICT_infoffset,
+ DICT_PAGESIZE-DICT_infoffset);
+ DICT_size(p) = info2 - np;
+ DICT_type(p) = 0;
+ xfree (np);
+}
+
+static int dict_ins (Dict dict, const Dict_char *str,
+ Dict_ptr back_ptr, int userlen, void *userinfo)
{
- int i;
- Dict_ptr ptr = back_ptr, subptr;
- short *indxp, *indxp1;
- short newsize;
+ int i, slen, cmp = 1;
+ Dict_ptr ptr = back_ptr;
+ short *indxp;
+ char *info;
+ void *p;
if (ptr == 0)
ptr = new_page (dict, back_ptr, &p);
+ else
+ dict_bf_readp (dict->dbf, ptr, &p);
+
assert (p);
assert (ptr);
- indxp = (short*) ((char*) p+DICT_PAGESIZE);
- for (i = DICT_nodir (p); --i >= 0; )
+ indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ for (i = DICT_nodir (p); --i >= 0; --indxp)
{
- char *info;
- int cmp;
- if (*--indxp > 0) /* tail string here! */
+ if (*indxp > 0) /* tail string here! */
{
- info = p + *indxp;
- cmp = dict_strcmp ((Dict_char*)
- (info+sizeof(Dict_info)+sizeof(Dict_ptr)),
- str);
+ info = (char*) p + *indxp;
+ /* string (Dict_char *) DICT_EOS terminated */
+ /* unsigned char length of information */
+ /* char * information */
+ cmp = dict_strcmp ((Dict_char*) info, str);
if (!cmp)
{
- if (memcmp (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo)))
+ info += (dict_strlen(info)+1)*sizeof(Dict_char);
+ /* consider change of userinfo length... */
+ if (*info == userlen)
+ {
+ if (memcmp (info+1, userinfo, userlen))
+ {
+ dict_bf_touch (dict->dbf, ptr);
+ memcpy (info+1, userinfo, userlen);
+ }
+ }
+ else if (*info > userlen)
{
- memcpy (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo));
+ DICT_type(p) = 1;
+ *info = userlen;
dict_bf_touch (dict->dbf, ptr);
+ memcpy (info+1, userinfo, userlen);
+ }
+ else
+ {
+ DICT_type(p) = 1;
+ break;
}
return 0;
}
- else if(cmp < 0)
+ else if(cmp > 0)
break;
}
else /* tail of string in sub page */
{
+ Dict_char dc;
assert (*indxp < 0);
- info = p - *indxp;
- cmp = memcmp (info+sizeof(Dict_info)+sizeof(Dict_ptr), str,
- sizeof(Dict_char));
+ info = (char*) p - *indxp;
+ /* Dict_ptr subptr */
+ /* Dict_char sub char */
+ /* unsigned char length of information */
+ /* char * information */
+ memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
+ cmp = dc- *str;
if (!cmp)
{
Dict_ptr subptr;
void *pp;
- memcpy (&subptr, info, sizeof(subptr));
- if (subptr == 0)
+ if (*++str == DICT_EOS)
+ { /* missing: consider change of userinfo length ... */
+ if (memcmp (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
+ userinfo, userlen))
+ {
+ memcpy (dict+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
+ userinfo, userlen);
+ dict_bf_touch (dict->dbf, ptr);
+ }
+ return 0;
+ }
+ else
{
- subptr = new_page (dict, ptr, &pp);
- memcpy (info, &subptr, sizeof(subptr));
- dict_bf_touch (dict->dbf, ptr);
+ memcpy (&subptr, info, sizeof(subptr));
+ if (subptr == 0)
+ {
+ subptr = new_page (dict, ptr, &pp);
+ memcpy (info, &subptr, sizeof(subptr));
+ dict_bf_touch (dict->dbf, ptr);
+ }
+ return dict_ins (dict, str, ptr, userlen, userinfo);
}
- return dict_ins (dict, str+1, ptr, pp, userinfo);
}
- else if(cmp < 0)
+ else if(cmp > 0)
break;
}
}
- newsize = DICT_size(p);
- subptr = 0;
- memcpy (p+newsize, &subptr, sizeof(subptr));
- memcpy (p+newsize + sizeof(Dict_ptr), userinfo, sizeof(Dict_info));
- memcpy (p+newsize + sizeof(Dict_ptr)+sizeof(Dict_info), str,
- dict_strlen (str)+1);
- (DICT_nodir(p))++;
- indxp1 = (short*)((char*) p + DICT_PAGESIZE - DICT_nodir(p)*sizeof(short));
- for (; indxp1 != indxp; indxp1++)
- indxp1[0] = indxp1[1];
- *indxp = -newsize;
-
- DICT_size(p) = newsize + sizeof(Dict_info)+sizeof(Dict_ptr)
- +dict_strlen (str)+1;
+ slen = (dict_strlen(str)+1)*sizeof(Dict_char);
+ if (DICT_size(p)+slen+userlen >=
+ DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
+ {
+ if (DICT_type(p) == 1)
+ {
+ clean_page (dict, p);
+ dict_ins (dict, str, ptr, userlen, userinfo);
+ return 0;
+ }
+ i = 0;
+ do
+ {
+ if (i > 0)
+ assert (0);
+ if (split_page (dict, ptr, p))
+ {
+ log (LOG_FATAL, "Unable to split page %d\n", ptr);
+ abort ();
+ }
+ if (DICT_size(p)+slen+userlen <
+ DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
+ break;
+ i++;
+ clean_page (dict, p);
+ } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
+ (1+DICT_nodir(p))*sizeof(short));
+ dict_ins (dict, str, ptr, userlen, userinfo);
+ return 0;
+ }
+ if (cmp)
+ {
+ short *indxp1;
+ (DICT_nodir(p))++;
+ indxp1 = (short*)((char*) p + DICT_PAGESIZE
+ - DICT_nodir(p)*sizeof(short));
+ for (; indxp1 != indxp; indxp1++)
+ indxp1[0] = indxp1[1];
+ }
+ info = (char*)p + DICT_size(p);
+ memcpy (info, str, slen);
+ info += slen;
+ *info++ = userlen;
+ memcpy (info, userinfo, userlen);
+ info += userlen;
+
+ *indxp = DICT_size(p);
+#if 0
+ printf ("indxp[%d]\n", (char*) indxp - (char*) p);
+#endif
+
+ DICT_size(p) = info- (char*) p;
+ dict_bf_touch (dict->dbf, ptr);
return 0;
}
-int dict_insert (Dict dict, const Dict_char *str, void *userinfo)
+int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
{
- void *p;
+ assert (dict->head.last > 0);
if (dict->head.last == 1)
- dict_ins (dict, str, 0, NULL, userinfo);
+ dict_ins (dict, str, 0, userlen, userinfo);
else
- {
- dict_bf_readp (dict->dbf, 1, &p);
- dict_ins (dict, str, 1, p, userinfo);
- }
+ dict_ins (dict, str, 1, userlen, userinfo);
return 0;
}