* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zrpn.c,v $
- * Revision 1.57 1996-11-11 13:38:02 adam
+ * Revision 1.61 1997-02-10 10:21:14 adam
+ * Bug fix: in search terms character (^) wasn't observed.
+ *
+ * Revision 1.60 1997/01/31 11:10:34 adam
+ * Bug fix: Leading and trailing white space weren't removed in scan tokens.
+ *
+ * Revision 1.59 1997/01/17 11:31:46 adam
+ * Bug fix: complete phrase search didn't work.
+ *
+ * Revision 1.58 1996/12/23 15:30:45 adam
+ * Work on truncation.
+ * Bug fix: result sets weren't deleted after server shut down.
+ *
+ * Revision 1.57 1996/11/11 13:38:02 adam
* Added proximity support in search.
*
* Revision 1.56 1996/11/08 11:10:32 adam
return *s0;
}
-static int term_100 (char **src, char *dst)
+static int term_100 (char **src, char *dst, int space_split)
{
char *s0, *s1, **map;
int i = 0;
{
s1 = s0;
map = map_chrs_input (&s0, strlen(s0));
- if (**map == *CHR_SPACE)
+ if (space_split && **map == *CHR_SPACE)
break;
while (s1 < s0)
{
return i;
}
-static int term_101 (char **src, char *dst)
+static int term_101 (char **src, char *dst, int space_split)
{
char *s0, *s1, **map;
int i = 0;
{
s1 = s0;
map = map_chrs_input (&s0, strlen(s0));
- if (**map == *CHR_SPACE)
+ if (space_split && **map == *CHR_SPACE)
break;
while (s1 < s0)
{
}
-static int term_103 (char **src, char *dst, int *errors)
+static int term_103 (char **src, char *dst, int *errors, int space_split)
{
int i = 0;
char *s0, *s1, **map;
- if (!term_pre (src, "\\()[].*+?|", "("))
+ if (!term_pre (src, "^\\()[].*+?|", "("))
return 0;
s0 = *src;
if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
}
while (*s0)
{
- if (strchr ("\\()[].*+?|-", *s0))
+ if (strchr ("^\\()[].*+?|-", *s0))
dst[i++] = *s0++;
else
{
return i;
}
-static int term_102 (char **src, char *dst)
+static int term_102 (char **src, char *dst, int space_split)
{
- return term_103 (src, dst, NULL);
+ return term_103 (src, dst, NULL, space_split);
}
/* gen_regular_rel - generate regular expression from relation
switch (relation_value)
{
case 1:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value <= 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value-1, 1);
break;
case 2:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value, 1);
break;
case 4:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value, 0);
break;
case 5:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
char **term_sub, int regType,
oid_value attributeSet, struct grep_info *grep_info,
- int num_bases, char **basenames)
+ int num_bases, char **basenames, int space_split)
{
char term_dict[2*IT_MAX_WORD+2];
int j, r, base_no;
attr_init (&use, zapt, 1);
use_value = attr_find (&use, &curAttributeSet);
- logf (LOG_DEBUG, "use value %d", use_value);
+ logf (LOG_DEBUG, "field_term, use value %d", use_value);
attr_init (&truncation, zapt, 5);
truncation_value = attr_find (&truncation, NULL);
logf (LOG_DEBUG, "truncation value %d", truncation_value);
{
case -1: /* not specified */
case 100: /* do not truncate */
- term_dict[j++] = '(';
- if (!term_100 (&termp, term_dict + j))
+ term_dict[j++] = '(';
+ if (!term_100 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
break;
case 1: /* right truncation */
term_dict[j++] = '(';
- if (!term_100 (&termp, term_dict + j))
+ if (!term_100 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ".*)");
dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
return -1;
case 101: /* process # in term */
term_dict[j++] = '(';
- if (!term_101 (&termp, term_dict + j))
+ if (!term_101 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
break;
case 102: /* Regexp-1 */
term_dict[j++] = '(';
- if (!term_102 (&termp, term_dict + j))
+ if (!term_102 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
case 103: /* Regexp-1 */
r = 1;
term_dict[j++] = '(';
- if (!term_103 (&termp, term_dict + j, &r))
+ if (!term_103 (&termp, term_dict + j, &r, space_split))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
const char *cp_end = cp + term->u.general->len;
const char *src;
int i = 0;
- int prev_space = 0;
+ const char *space_map = NULL;
int len;
while ((len = (cp_end - cp)) > 0)
{
map = map_chrs_input (&cp, len);
if (**map == *CHR_SPACE)
- {
- if (prev_space)
- continue;
- prev_space = 1;
- }
+ space_map = *map;
else
- prev_space = 0;
- for (src = *map; *src; src++)
- termz[i++] = *src;
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
+ termz[i++] = *src;
+ }
}
termz[i] = '\0';
}
int r;
parms.key_size = sizeof(struct it_key);
- parms.max_rec = 100;
- parms.cmp = key_compare;
+ parms.max_rec = 1000;
+ parms.cmp = key_compare_it;
parms.is = zi->isam;
parms.isc = zi->isamc;
parms.no_terms = 0;
while (1)
{
r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 1);
if (r <= 0)
break;
#ifdef TERM_COUNT
grep_info.isam_p_buf = NULL;
r = field_term (zi, zapt, &termp, 'p', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 0);
result = rset_trunc (zi, grep_info.isam_p_buf, grep_info.isam_p_indx);
#ifdef TERM_COUNT
xfree(grep_info.term_no);
exclusion, ordered, relation, distance);
while (more1 && more2)
{
- int cmp = key_compare (&buf1, &buf2);
+ int cmp = key_compare_it (&buf1, &buf2);
if (cmp < -1)
more1 = rset_read (rset1, rsfd1, &buf1);
else if (cmp > 1)
int excl = exclusion;
if (!ordered && diff < 0)
diff = -diff;
- logf (LOG_DEBUG, "l = %d r = %d", seqno[i], buf2.seqno);
switch (relation)
{
case 1: /* < */
break;
}
if (excl)
- {
- logf (LOG_DEBUG, " match");
rset_write (result, rsfd_result, &buf2);
- }
}
} while ((more2 = rset_read (rset2, rsfd2, &buf2)) &&
sysno == buf2.sysno);
*more = 0;
break;
}
- cmp = key_compare (buf[i], buf[i-1]);
+ cmp = key_compare_it (buf[i], buf[i-1]);
if (cmp > 1)
{
more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]);
{
grep_info.isam_p_indx = 0;
r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 1);
if (r < 1)
break;
rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf,
if (rset_is_ranked(bool_parms.rset_r))
soft = 1;
bool_parms.key_size = sizeof(struct it_key);
- bool_parms.cmp = key_compare;
+ bool_parms.cmp = key_compare_it;
switch (zop->which)
{
RSFD rfd, wfd;
RSET w;
rset_temp_parms parms;
-
+ int maxResultSetSize = atoi (res_get_def (common_resource,
+ "maxResultSetSize", "400"));
logf (LOG_DEBUG, "count_set_save");
*count = 0;
parms.key_size = sizeof(struct it_key);
rfd = rset_open (*r, RSETF_READ|RSETF_SORT_SYSNO);
while (rset_read (*r, rfd, &key))
{
- logf (LOG_DEBUG, "sysno=%-7d seqno=%d", key.sysno, key.seqno);
if (key.sysno != psysno)
{
- rset_write (w, wfd, &key);
- psysno = key.sysno;
+ if (*count < maxResultSetSize)
+ rset_write (w, wfd, &key);
(*count)++;
+ psysno = key.sysno;
}
kno++;
}
idx = scan_info->after - pos + scan_info->before;
else
idx = - pos - 1;
- logf (LOG_DEBUG, "%-3d %s", idx, name+len_prefix);
scan_info->list[idx].term = odr_malloc (scan_info->odr,
strlen(name + len_prefix)+1);
strcpy (scan_info->list[idx].term, name + len_prefix);
rset_trunc (zi, &scan_info_array[j].list[ptr[j]].isam_p, 1);
bool_parms.key_size = sizeof(struct it_key);
- bool_parms.cmp = key_compare;
+ bool_parms.cmp = key_compare_it;
bool_parms.rset_l = rset;
bool_parms.rset_r = rset2;
&scan_info_array[j].list[before-1-ptr[j]].isam_p, 1);
bool_parms.key_size = sizeof(struct it_key);
- bool_parms.cmp = key_compare;
+ bool_parms.cmp = key_compare_it;
bool_parms.rset_l = rset;
bool_parms.rset_r = rset2;