# Copyright (C) 1994, Index Data I/S
# All rights reserved.
# Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.6 1994-09-12 08:06:41 adam Exp $
+# $Id: Makefile,v 1.7 1994-09-16 15:39:10 adam Exp $
SHELL=/bin/sh
INCLUDE=-I../include
-TPROG=dicttest
+TPROG1=dicttest
+TPROG2=dictext
CFLAGS=-O -Wall -g -pedantic
DEFS=$(INCLUDE)
LIB=../lib/dict.a
all: $(LIB)
-$(TPROG): $(TPROG).o $(LIB)
- $(CC) $(CFLAGS) -o $(TPROG) $(TPROG).o $(LIB) ../lib/bfile.a ../lib/util.a
+$(TPROG1): $(TPROG1).o $(LIB)
+ $(CC) $(CFLAGS) -o $(TPROG1) $(TPROG1).o $(LIB) ../lib/bfile.a ../lib/util.a
+
+$(TPROG2): $(TPROG2).o $(LIB)
+ $(CC) $(CFLAGS) -o $(TPROG2) $(TPROG2).o ../lib/util.a
+
$(LIB): $(PO)
rm -f $(LIB)
$(CC) -c $(DEFS) $(CFLAGS) $<
clean:
- rm -f *.[oa] $(TPROG) core mon.out gmon.out errlist
+ rm -f *.[oa] $(TPROG1) $(TPROG2) core mon.out gmon.out errlist
dep depend:
$(CPP) $(INCLUDE) -M *.c >.depend
--- /dev/null
+/*
+ * Copyright (C) 1994, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: dictext.c,v $
+ * Revision 1.1 1994-09-16 15:39:11 adam
+ * Initial code of lookup - not tested yet.
+ *
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include <util.h>
+
+char *prog;
+
+int main (int argc, char **argv)
+{
+ char *arg;
+ int ret;
+ char *inputfile = NULL;
+ FILE *ipf = stdin;
+ char ipf_buf[1024];
+
+ prog = *argv;
+ while ((ret = options ("vh", argv, argc, &arg)) != -2)
+ {
+ if (ret == 0)
+ {
+ if (!inputfile)
+ inputfile = arg;
+ else
+ {
+ log (LOG_FATAL, "too many files specified\n");
+ exit (1);
+ }
+ }
+ else if (ret == 'v')
+ {
+ log_init (atoi(arg), prog, NULL);
+ }
+ else if (ret == 'h')
+ {
+ fprintf (stderr, "usage:\n"
+ " %s [-h] [-v n] [file]\n", prog);
+ exit (1);
+ }
+ else
+ {
+ log (LOG_FATAL, "unknown option");
+ exit (1);
+ }
+ }
+ if (inputfile)
+ {
+ ipf = fopen (inputfile, "r");
+ if (!ipf)
+ {
+ log (LOG_FATAL|LOG_ERRNO, "cannot open '%s'", inputfile);
+ exit (1);
+ }
+ }
+ while (fgets (ipf_buf, 1023, ipf))
+ {
+ char *ipf_ptr = ipf_buf;
+ for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
+ {
+ if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
+ {
+ int i = 1;
+ while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
+ ipf_ptr[i] == '_'))
+ i++;
+ if (ipf_ptr[i])
+ ipf_ptr[i++] = '\0';
+ printf ("%s\n", ipf_ptr);
+ ipf_ptr += (i-1);
+ }
+ }
+ }
+ return 0;
+}
+
+
+
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: dicttest.c,v $
- * Revision 1.5 1994-09-06 13:05:14 adam
+ * Revision 1.6 1994-09-16 15:39:12 adam
+ * Initial code of lookup - not tested yet.
+ *
+ * Revision 1.5 1994/09/06 13:05:14 adam
* Further development of insertion. Some special cases are
* not properly handled yet! assert(0) are put here. The
* binary search in each page definitely reduce usr CPU.
int ret;
int no_of_insertions = 0;
int no_of_new = 0, no_of_same = 0, no_of_change = 0;
+ int unique = 0;
char *arg;
prog = argv[0];
if (argc < 2)
{
fprintf (stderr, "usage:\n"
- " %s [-s n] [-v n] [-i f] [-w] [-c n] base file\n",
+ " %s [-u] [-s n] [-v n] [-i f] [-w] [-c n] base file\n",
prog);
exit (1);
}
- while ((ret = options ("s:v:i:wc:", argv, argc, &arg)) != -2)
+ while ((ret = options ("us:v:i:wc:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
exit (1);
}
}
+ else if (ret == 'u')
+ {
+ unique = 1;
+ }
else if (ret == 'c')
{
cache = atoi(arg);
if (inputfile)
{
FILE *ipf;
- char ipf_buf[256];
+ char ipf_buf[1024];
int line = 1;
char infobytes[120];
memset (infobytes, 0, 120);
exit (1);
}
- while (fgets (ipf_buf, 255, ipf))
+ while (fgets (ipf_buf, 1023, ipf))
{
char *ipf_ptr = ipf_buf;
sprintf (infobytes, "%d", line);
i++;
if (ipf_ptr[i])
ipf_ptr[i++] = '\0';
-#if 1
switch(dict_insert (dict, ipf_ptr, infosize, infobytes))
{
case 0:
break;
case 1:
no_of_change++;
+ if (unique)
+ log (LOG_LOG, "%s change\n", ipf_ptr);
break;
case 2:
+ if (unique)
+ log (LOG_LOG, "%s duplicate\n", ipf_ptr);
no_of_same++;
break;
}
-#else
- printf ("%s\n", ipf_ptr);
-#endif
++no_of_insertions;
ipf_ptr += (i-1);
}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: insert.c,v $
- * Revision 1.8 1994-09-16 12:35:01 adam
+ * Revision 1.9 1994-09-16 15:39:13 adam
+ * Initial code of lookup - not tested yet.
+ *
+ * Revision 1.8 1994/09/16 12:35:01 adam
* New version of split_page which use clean_page for splitting.
*
* Revision 1.7 1994/09/12 08:06:42 adam
void *subp;
char *info_here;
Dict_ptr subptr;
- int i, need;
- short *indxp, *best_indxp;
+ int i;
+ short *indxp, *best_indxp = NULL;
Dict_char best_char = 0;
Dict_char prev_char = 0;
int best_no = -1, no_current = 1;
+ /* determine splitting char... */
indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
for (i = DICT_nodir (p); --i >= 0; --indxp)
{
info_here = NULL;
for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
{
- char *info;
+ char *info, *info1;
int slen;
assert (*indxp > 0);
assert (!info_here);
info_here = info+(slen+1)*sizeof(Dict_char);
}
- }
- /* calculate the amount of bytes needed for this entry when */
- /* transformed to a sub entry */
- need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
- if (info_here)
- need += *info_here;
-
- indxp = best_indxp;
- /* now loop on all entries with string length > 1 i.e. all */
- /* those entries which contribute to a sub page */
- best_indxp = NULL;
- for (i=0; i<best_no; i++, indxp++)
- {
- char *info, *info1;
- int slen;
-
- assert (*indxp > 0);
-
- info = (char*) p + *indxp; /* entry start */
- assert (*info == best_char);
- slen = dict_strlen(info);
-
- if (slen > 1)
+ else
{
info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
-
- if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
- best_indxp = indxp; /* space for entry */
dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
dict_bf_readp (dict->dbf, ptr, &p);
}
}
-#if 1
/* now clean the page ... */
clean_page (dict, ptr, p, &best_char, subptr, info_here);
return 0;
-#endif
- if (best_indxp)
- { /* there was a hole big enough for a sub entry */
- char *info = (char*) p + *best_indxp;
- short *indxp1;
-
- *--indxp = - *best_indxp;
- DICT_type(p) = 1;
- DICT_nodir (p) -= (best_no-1);
- indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
- while (indxp != indxp1)
- {
- --indxp;
- *indxp = indxp[1-best_no];
- }
- memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
- info += sizeof(Dict_ptr);
- memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
- info += sizeof(Dict_char);
- if (info_here)
- memcpy (info, info_here, *info_here+1); /* with information */
- else
- *info = 0; /* without info */
-#if CHECK
- best_indxp = NULL;
- prev_char = 0;
- indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
- for (i = DICT_nodir (p); --i >= 0; --indxp)
- {
- if (*indxp > 0) /* tail string here! */
- {
- Dict_char dc;
-
- memcpy (&dc, (char*) p + *indxp, sizeof(dc));
- assert (dc != best_char);
- assert (dc >= prev_char);
- prev_char = dc;
- }
- else
- {
- Dict_char dc;
- memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr),
- sizeof(dc));
- assert (dc > prev_char);
- if (dc == best_char)
- {
- assert (best_indxp == NULL);
- best_indxp = indxp;
- }
- prev_char = dc;
- }
- }
- assert (best_indxp);
-#endif
- }
- else
- {
- short *indxp1, *indxp2;
- assert (0);
- DICT_type(p) = 1;
- DICT_nodir(p) -= best_no;
- indxp2 = indxp;
- indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
- do
- {
- --indxp2;
- indxp2[0] = indxp2[-best_no];
- } while (indxp2 != indxp1);
- }
- return 0;
}
static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
DICT_type(p) = 0;
DICT_nodir(p) = no;
xfree (np);
+ dict_bf_touch (dict->dbf, ptr);
}
static int dict_ins (Dict dict, const Dict_char *str,
Dict_ptr back_ptr, int userlen, void *userinfo)
{
- int hi, lo, mid, i, slen, cmp = 1;
+ int hi, lo, mid, slen, cmp = 1;
Dict_ptr ptr = back_ptr;
short *indxp;
char *info;
if (DICT_type(p) == 1)
{
clean_page (dict, ptr, p, NULL, 0, NULL);
- dict_bf_touch (dict->dbf, ptr);
return dict_ins (dict, str-1, ptr,
userlen, userinfo);
}
if (DICT_size(p)+slen+userlen >=
DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
{
- if (DICT_type(p) == 1)
- {
- clean_page (dict, ptr, p, NULL, 0, NULL);
- dict_bf_touch (dict->dbf, ptr);
- return dict_ins (dict, str, ptr, userlen, userinfo);
- }
- i = 0;
- do
- {
- assert (i <= 1);
- if (split_page (dict, ptr, p))
- {
- log (LOG_FATAL, "Unable to split page %d\n", ptr);
- abort ();
- }
- if (DICT_size(p)+slen+userlen <
- DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
- break;
- i++;
- clean_page (dict, ptr, p, NULL, 0, NULL);
- } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
- (1+DICT_nodir(p))*sizeof(short));
+ split_page (dict, ptr, p);
return dict_ins (dict, str, ptr, userlen, userinfo);
}
if (cmp)
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: lookup.c,v $
- * Revision 1.1 1994-08-16 16:26:48 adam
+ * Revision 1.2 1994-09-16 15:39:14 adam
+ * Initial code of lookup - not tested yet.
+ *
+ * Revision 1.1 1994/08/16 16:26:48 adam
* Added dict.
*
*/
#include <dict.h>
-int dict_lookup (Dict dict, Dict_char *p)
+static char *dict_look (Dict dict, Dict_char *str)
+{
+ Dict_ptr ptr = 1;
+ int mid, lo, hi;
+ int cmp;
+ void *p;
+ short *indxp;
+ char *info;
+
+ dict_bf_readp (dict->dbf, ptr, &p);
+ mid = lo = 0;
+ hi = DICT_nodir(p)-1;
+ indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ while (lo <= hi)
+ {
+ mid = (lo+hi)/2;
+ if (indxp[-mid] > 0)
+ {
+ /* string (Dict_char *) DICT_EOS terminated */
+ /* unsigned char length of information */
+ /* char * information */
+ info = (char*)p + indxp[-mid];
+ cmp = dict_strcmp((Dict_char*) info, str);
+ if (!cmp)
+ return info+(dict_strlen (info)+1)*sizeof(Dict_char);
+ }
+ else
+ {
+ Dict_char dc;
+ Dict_ptr subptr;
+
+ /* Dict_ptr subptr */
+ /* Dict_char sub char */
+ /* unsigned char length of information */
+ /* char * information */
+ info = (char*)p - indxp[-mid];
+ memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
+ cmp = dc- *str;
+ if (!cmp)
+ {
+ memcpy (&subptr, info, sizeof(Dict_ptr));
+ if (*++str == DICT_EOS)
+ return info+sizeof(Dict_ptr)+sizeof(Dict_char);
+ else
+ {
+ if (subptr == 0)
+ return NULL;
+ ptr = subptr;
+ dict_bf_readp (dict->dbf, ptr, &p);
+ mid = lo = 0;
+ hi = DICT_nodir(p)-1;
+ indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ continue;
+ }
+ }
+ }
+ if (cmp < 0)
+ lo = mid+1;
+ else
+ hi = mid-1;
+ }
+ return NULL;
+}
+
+char *dict_lookup (Dict dict, Dict_char *p)
{
- return 0;
+ if (dict->head.last == 1)
+ return NULL;
+ return dict_look (dict, p);
}
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: dict.h,v $
- * Revision 1.5 1994-09-06 13:05:29 adam
+ * Revision 1.6 1994-09-16 15:39:21 adam
+ * Initial code of lookup - not tested yet.
+ *
+ * Revision 1.5 1994/09/06 13:05:29 adam
* Further development of insertion. Some special cases are
* not properly handled yet! assert(0) are put here. The
* binary search in each page definitely reduce usr CPU.
Dict dict_open (const char *name, int cache, int rw);
int dict_close (Dict dict);
int dict_insert (Dict dict, const Dict_char *p, int userlen, void *userinfo);
-int dict_lookup (Dict dict, Dict_char *p);
+char *dict_lookup (Dict dict, Dict_char *p);
int dict_strcmp (const Dict_char *s1, const Dict_char *s2);
int dict_strlen (const Dict_char *s);