2 * Copyright (C) 1995-2000, Index Data
6 * Revision 1.29 2000-03-20 19:08:36 adam
7 * Added remote record import using Z39.50 extended services and Segment
10 * Revision 1.28 2000/03/15 15:00:30 adam
11 * First work on threaded version.
13 * Revision 1.27 2000/02/24 12:31:17 adam
14 * Added zebra_string_norm.
16 * Revision 1.26 1999/11/30 13:48:03 adam
17 * Improved installation. Updated for inclusion of YAZ header files.
19 * Revision 1.25 1999/11/04 15:00:45 adam
20 * Implemented delete result set(s).
22 * Revision 1.24 1999/10/14 14:33:50 adam
23 * Added truncation 5=106.
25 * Revision 1.23 1999/09/07 11:36:32 adam
28 * Revision 1.22 1999/08/02 10:13:47 adam
29 * Fixed bug regarding zebra_hits.
31 * Revision 1.21 1999/07/14 10:59:26 adam
32 * Changed functions isc_getmethod, isams_getmethod.
33 * Improved fatal error handling (such as missing EXPLAIN schema).
35 * Revision 1.20 1999/07/06 12:28:04 adam
36 * Updated record index structure. Format includes version ID. Compression
37 * algorithm ID is stored for each record block.
39 * Revision 1.19 1999/05/26 07:49:13 adam
42 * Revision 1.18 1999/05/15 14:36:38 adam
43 * Updated dictionary. Implemented "compression" of dictionary.
45 * Revision 1.17 1999/05/12 13:08:06 adam
46 * First version of ISAMS.
48 * Revision 1.16 1999/02/19 10:38:30 adam
49 * Implemented chdir-setting.
51 * Revision 1.15 1999/02/17 12:18:12 adam
52 * Fixed zebra_close so that a NULL pointer is ignored.
54 * Revision 1.14 1999/02/02 14:51:11 adam
55 * Updated WIN32 code specific sections. Changed header.
57 * Revision 1.13 1998/12/16 12:23:30 adam
58 * Added facility for database name mapping using resource mapdb.
60 * Revision 1.12 1998/11/16 10:18:10 adam
61 * Better error reporting for result sets.
63 * Revision 1.11 1998/10/16 08:14:34 adam
64 * Updated record control system.
66 * Revision 1.10 1998/09/22 10:03:42 adam
67 * Changed result sets to be persistent in the sense that they can
68 * be re-searched if needed.
69 * Fixed memory leak in rsm_or.
71 * Revision 1.9 1998/09/02 13:53:17 adam
72 * Extra parameter decode added to search routines to implement
75 * Revision 1.8 1998/08/24 17:29:23 adam
78 * Revision 1.7 1998/06/24 12:16:13 adam
79 * Support for relations on text operands. Open range support in
80 * DFA module (i.e. [-j], [g-]).
82 * Revision 1.6 1998/06/22 11:36:47 adam
83 * Added authentication check facility to zebra.
85 * Revision 1.5 1998/06/13 00:14:08 adam
88 * Revision 1.4 1998/06/12 12:22:12 adam
91 * Revision 1.3 1998/05/27 16:57:44 adam
92 * Zebra returns surrogate diagnostic for single records when
95 * Revision 1.2 1998/05/20 10:12:19 adam
96 * Implemented automatic EXPLAIN database maintenance.
97 * Modified Zebra to work with ASN.1 compiled version of YAZ.
99 * Revision 1.1 1998/03/05 08:45:13 adam
100 * New result set model and modular ranking system. Moved towards
101 * descent server API. System information stored as "SGML" records.
115 #include <yaz/diagbib1.h>
119 static void zebra_chdir (ZebraService zh)
121 const char *dir = res_get (zh->res, "chdir");
124 logf (LOG_DEBUG, "chdir %s", dir);
132 static int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
133 const char *buf, size_t buf_size,
134 const char *databaseName, int delete_flag,
135 int test_mode, int *sysno,
136 int store_keys, int store_data,
137 const char *match_criteria);
139 static int explain_extract (void *handle, Record rec, data1_node *n);
140 static void extract_index (ZebraHandle zh);
142 static void zebra_register_unlock (ZebraHandle zh);
144 static int zebra_register_lock (ZebraHandle zh)
146 if (!zh->service->active)
154 static void zebra_register_unlock (ZebraHandle zh)
158 ZebraHandle zebra_open (ZebraService zs)
169 zh = (ZebraHandle) xmalloc (sizeof(*zh));
179 zebra_mutex_cond_lock (&zs->session_lock);
181 zh->next = zs->sessions;
184 zebra_mutex_cond_unlock (&zs->session_lock);
188 static int zebra_register_activate (ZebraService zh);
189 static int zebra_register_deactivate (ZebraService zh);
191 ZebraService zebra_start (const char *configName)
193 ZebraService zh = xmalloc (sizeof(*zh));
195 yaz_log (LOG_LOG, "zebra_start %s", configName);
197 zh->configName = xstrdup(configName);
201 zebra_mutex_cond_init (&zh->session_lock);
202 zebra_register_activate (zh);
206 static int zebra_register_activate (ZebraService zh)
210 yaz_log (LOG_LOG, "zebra_register_activate");
211 if (!(zh->res = res_open (zh->configName)))
213 logf (LOG_WARN, "Failed to read resources `%s'", zh->configName);
217 zh->dh = data1_create ();
220 zh->bfs = bfs_create (res_get (zh->res, "register"));
223 data1_destroy(zh->dh);
226 bf_lockDir (zh->bfs, res_get (zh->res, "lockDir"));
227 data1_set_tabpath (zh->dh, res_get(zh->res, "profilePath"));
228 zh->registerState = -1; /* trigger open of registers! */
229 zh->registerChange = 0;
230 zh->recTypes = recTypes_init (zh->dh);
231 recTypes_default_handlers (zh->recTypes);
234 zh->zebra_maps = zebra_maps_open (zh->res);
235 zh->rank_classes = NULL;
246 zebraRankInstall (zh, rank1_class);
248 if (!res_get (zh->res, "passwd"))
249 zh->passwd_db = NULL;
252 zh->passwd_db = passwd_db_open ();
254 logf (LOG_WARN|LOG_ERRNO, "passwd_db_open failed");
256 passwd_db_file (zh->passwd_db, res_get (zh->res, "passwd"));
259 if (!(zh->records = rec_open (zh->bfs, 1, 0)))
261 logf (LOG_WARN, "rec_open");
264 if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 40, 1, 0)))
266 logf (LOG_WARN, "dict_open");
269 if (!(zh->sortIdx = sortIdx_open (zh->bfs, 0)))
271 logf (LOG_WARN, "sortIdx_open");
274 if (res_get_match (zh->res, "isam", "s", ISAM_DEFAULT))
276 struct ISAMS_M_s isams_m;
277 if (!(zh->isams = isams_open (zh->bfs, FNAME_ISAMS, 1,
278 key_isams_m(zh->res, &isams_m))))
280 logf (LOG_WARN, "isams_open");
285 else if (res_get_match (zh->res, "isam", "i", ISAM_DEFAULT))
287 if (!(zh->isam = is_open (zh->bfs, FNAME_ISAM, key_compare, 1,
288 sizeof (struct it_key), zh->res)))
290 logf (LOG_WARN, "is_open");
294 else if (res_get_match (zh->res, "isam", "c", ISAM_DEFAULT))
296 struct ISAMC_M_s isamc_m;
297 if (!(zh->isamc = isc_open (zh->bfs, FNAME_ISAMC,
298 1, key_isamc_m(zh->res, &isamc_m))))
300 logf (LOG_WARN, "isc_open");
304 else if (res_get_match (zh->res, "isam", "d", ISAM_DEFAULT))
306 struct ISAMD_M_s isamd_m;
308 if (!(zh->isamd = isamd_open (zh->bfs, FNAME_ISAMD,
309 1, key_isamd_m(zh->res, &isamd_m))))
311 logf (LOG_WARN, "isamd_open");
316 zh->zei = zebraExplain_open (zh->records, zh->dh,
317 zh->res, 1, 0 /* rGroup */,
321 logf (LOG_WARN, "Cannot obtain EXPLAIN information");
325 yaz_log (LOG_LOG, "zebra_register_activate ok");
329 void zebra_admin_shutdown (ZebraHandle zh)
331 zebraExplain_flush (zh->service->zei, 1, zh);
334 zebra_mutex_cond_lock (&zh->service->session_lock);
335 zh->service->stop_flag = 1;
336 if (!zh->service->sessions)
337 zebra_register_deactivate(zh->service);
338 zebra_mutex_cond_unlock (&zh->service->session_lock);
341 void zebra_admin_start (ZebraHandle zh)
343 ZebraService zs = zh->service;
345 zebra_mutex_cond_lock (&zs->session_lock);
347 zebra_register_activate(zs);
348 zebra_mutex_cond_unlock (&zs->session_lock);
351 static int zebra_register_deactivate (ZebraService zh)
356 yaz_log(LOG_LOG, "zebra_register_deactivate");
360 zebraExplain_close (zh->zei, 1);
361 dict_close (zh->dict);
362 sortIdx_close (zh->sortIdx);
364 isams_close (zh->isams);
369 isc_close (zh->isamc);
371 isamd_close (zh->isamd);
373 rec_close (&zh->records);
375 recTypes_destroy (zh->recTypes);
376 zebra_maps_close (zh->zebra_maps);
377 zebraRankDestroy (zh);
378 bfs_destroy (zh->bfs);
379 data1_destroy (zh->dh);
382 passwd_db_close (zh->passwd_db);
388 void zebra_stop(ZebraService zh)
392 yaz_log (LOG_LOG, "zebra_stop");
394 assert (!zh->sessions);
396 zebra_mutex_cond_destroy (&zh->session_lock);
398 zebra_register_deactivate(zh);
399 xfree (zh->configName);
403 void zebra_close (ZebraHandle zh)
405 ZebraService zs = zh->service;
406 struct zebra_session **sp;
409 resultSetDestroy (zh, -1, 0, 0);
416 zebra_mutex_cond_lock (&zs->session_lock);
428 if (!zs->sessions && zs->stop_flag)
429 zebra_register_deactivate(zs);
430 zebra_mutex_cond_unlock (&zs->session_lock);
434 struct map_baseinfo {
440 char **new_basenames;
444 void map_basenames_func (void *vp, const char *name, const char *value)
446 struct map_baseinfo *p = (struct map_baseinfo *) vp;
448 char fromdb[128], todb[8][128];
451 sscanf (value, "%127s %127s %127s %127s %127s %127s %127s %127s %127s",
452 fromdb, todb[0], todb[1], todb[2], todb[3], todb[4],
453 todb[5], todb[6], todb[7]);
457 for (i = 0; i<p->num_bases; i++)
458 if (p->basenames[i] && !strcmp (p->basenames[i], fromdb))
461 for (i = 0; i < no; i++)
463 if (p->new_num_bases == p->new_num_max)
465 p->new_basenames[(p->new_num_bases)++] =
466 nmem_strdup (p->mem, todb[i]);
472 void map_basenames (ZebraHandle zh, ODR stream,
473 int *num_bases, char ***basenames)
475 struct map_baseinfo info;
476 struct map_baseinfo *p = &info;
480 info.num_bases = *num_bases;
481 info.basenames = *basenames;
482 info.new_num_max = 128;
483 info.new_num_bases = 0;
484 info.new_basenames = (char **)
485 odr_malloc (stream, sizeof(*info.new_basenames) * info.new_num_max);
486 info.mem = stream->mem;
488 res_trav (zh->service->res, "mapdb", &info, map_basenames_func);
490 for (i = 0; i<p->num_bases; i++)
491 if (p->basenames[i] && p->new_num_bases < p->new_num_max)
493 p->new_basenames[(p->new_num_bases)++] =
494 nmem_strdup (p->mem, p->basenames[i]);
496 *num_bases = info.new_num_bases;
497 *basenames = info.new_basenames;
498 for (i = 0; i<*num_bases; i++)
499 logf (LOG_LOG, "base %s", (*basenames)[i]);
502 void zebra_search_rpn (ZebraHandle zh, ODR stream, ODR decode,
503 Z_RPNQuery *query, int num_bases, char **basenames,
507 if (zebra_register_lock (zh))
509 map_basenames (zh, stream, &num_bases, &basenames);
510 resultSetAddRPN (zh, stream, decode, query, num_bases, basenames, setname);
512 zebra_register_unlock (zh);
515 void zebra_records_retrieve (ZebraHandle zh, ODR stream,
516 const char *setname, Z_RecordComposition *comp,
517 oid_value input_format, int num_recs,
518 ZebraRetrievalRecord *recs)
523 if (zebra_register_lock (zh))
526 pos_array = (int *) xmalloc (num_recs * sizeof(*pos_array));
527 for (i = 0; i<num_recs; i++)
528 pos_array[i] = recs[i].position;
529 poset = zebraPosSetCreate (zh, setname, num_recs, pos_array);
532 logf (LOG_DEBUG, "zebraPosSetCreate error");
534 zh->errString = nmem_strdup (stream->mem, setname);
538 for (i = 0; i<num_recs; i++)
544 sprintf (num_str, "%d", pos_array[i]);
546 zh->errString = nmem_strdup (stream->mem, num_str);
552 zebra_record_fetch (zh, poset[i].sysno, poset[i].score,
553 stream, input_format, comp,
554 &recs[i].format, &recs[i].buf,
557 recs[i].errString = NULL;
560 zebraPosSetDestroy (zh, poset, num_recs);
562 zebra_register_unlock (zh);
566 void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
567 oid_value attributeset,
568 int num_bases, char **basenames,
569 int *position, int *num_entries, ZebraScanEntry **entries,
572 if (zebra_register_lock (zh))
578 map_basenames (zh, stream, &num_bases, &basenames);
579 rpn_scan (zh, stream, zapt, attributeset,
580 num_bases, basenames, position,
581 num_entries, entries, is_partial);
582 zebra_register_unlock (zh);
585 void zebra_sort (ZebraHandle zh, ODR stream,
586 int num_input_setnames, const char **input_setnames,
587 const char *output_setname, Z_SortKeySpecList *sort_sequence,
590 if (zebra_register_lock (zh))
592 resultSetSort (zh, stream->mem, num_input_setnames, input_setnames,
593 output_setname, sort_sequence, sort_status);
594 zebra_register_unlock (zh);
597 int zebra_deleleResultSet(ZebraHandle zh, int function,
598 int num_setnames, char **setnames,
602 if (zebra_register_lock (zh))
603 return Z_DeleteStatus_systemProblemAtTarget;
606 case Z_DeleteRequest_list:
607 resultSetDestroy (zh, num_setnames, setnames, statuses);
609 case Z_DeleteRequest_all:
610 resultSetDestroy (zh, -1, 0, statuses);
613 zebra_register_unlock (zh);
614 status = Z_DeleteStatus_success;
615 for (i = 0; i<num_setnames; i++)
616 if (statuses[i] == Z_DeleteStatus_resultSetDidNotExist)
617 status = statuses[i];
621 int zebra_errCode (ZebraHandle zh)
626 const char *zebra_errString (ZebraHandle zh)
628 return diagbib1_str (zh->errCode);
631 char *zebra_errAdd (ZebraHandle zh)
633 return zh->errString;
636 int zebra_hits (ZebraHandle zh)
641 int zebra_auth (ZebraService zh, const char *user, const char *pass)
643 if (!zh->passwd_db || !passwd_db_auth (zh->passwd_db, user, pass))
648 void zebra_admin_import_begin (ZebraHandle zh, const char *database)
650 if (zebra_register_lock (zh))
652 zebra_register_unlock(zh);
655 void zebra_admin_import_segment (ZebraHandle zh, Z_Segment *segment)
657 if (zebra_register_lock (zh))
659 if (segment->num_segmentRecords == 0)
661 zebraExplain_flush (zh->service->zei, 1, zh);
668 for (i = 0; i<segment->num_segmentRecords; i++)
670 Z_NamePlusRecord *npr = segment->segmentRecords[i];
671 printf ("--------------%d--------------------\n", i);
672 if (npr->which == Z_NamePlusRecord_intermediateFragment)
674 Z_FragmentSyntax *fragment = npr->u.intermediateFragment;
675 if (fragment->which == Z_FragmentSyntax_notExternallyTagged)
677 Odr_oct *oct = fragment->u.notExternallyTagged;
678 printf ("%.*s", (oct->len > 100 ? 100 : oct->len) ,
682 extract_rec_in_mem (zh, "grs.sgml",
684 "Default", 0 /* delete_flag */,
689 0 /* match criteria */);
694 zebra_register_unlock(zh);
697 void zebra_admin_create (ZebraHandle zh, const char *database)
699 ZebraService zs = zh->service;
700 if (zebra_register_lock(zh))
705 /* announce database */
706 if (zebraExplain_newDatabase (zs->zei, database, 0 /* explainDatabase */))
709 zh->errString = "Database already exist";
711 zebra_register_unlock(zh);
714 int zebra_string_norm (ZebraHandle zh, unsigned reg_id,
715 const char *input_str, int input_len,
716 char *output_str, int output_len)
719 if (!zh->service->zebra_maps)
721 wrbuf = zebra_replace(zh->service->zebra_maps, reg_id, "",
722 input_str, input_len);
725 if (wrbuf_len(wrbuf) >= output_len)
727 if (wrbuf_len(wrbuf))
728 memcpy (output_str, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
729 output_str[wrbuf_len(wrbuf)] = '\0';
730 return wrbuf_len(wrbuf);
733 static void extract_init (struct recExtractCtrl *p, RecWord *w)
735 w->zebra_maps = p->zebra_maps;
736 w->seqnos = p->seqno;
737 w->attrSet = VAL_BIB1;
743 static void extract_add_index_string (RecWord *p, const char *string,
747 unsigned char attrSet;
748 unsigned short attrUse;
751 int *pseqno = &p->seqnos[p->reg_type];
752 ZebraHandle zh = p->extractCtrl->handle;
753 struct recKeys *keys = &zh->keys;
755 if (keys->buf_used+1024 > keys->buf_max)
759 b = (char *) xmalloc (keys->buf_max += 128000);
760 if (keys->buf_used > 0)
761 memcpy (b, keys->buf, keys->buf_used);
765 dst = keys->buf + keys->buf_used;
767 attrSet = p->attrSet;
768 if (keys->buf_used > 0 && keys->prevAttrSet == attrSet)
771 keys->prevAttrSet = attrSet;
772 attrUse = p->attrUse;
773 if (keys->buf_used > 0 && keys->prevAttrUse == attrUse)
776 keys->prevAttrUse = attrUse;
778 diff = 1 + *pseqno - keys->prevSeqNo;
779 if (diff >= 1 && diff <= 15)
784 keys->prevSeqNo = *pseqno;
790 memcpy (dst, &attrSet, sizeof(attrSet));
791 dst += sizeof(attrSet);
795 memcpy (dst, &attrUse, sizeof(attrUse));
796 dst += sizeof(attrUse);
798 *dst++ = p->reg_type;
799 memcpy (dst, string, length);
805 memcpy (dst, pseqno, sizeof(*pseqno));
806 dst += sizeof(*pseqno);
808 keys->buf_used = dst - keys->buf;
813 static void extract_add_sort_string (RecWord *p, const char *string,
817 ZebraHandle zh = p->extractCtrl->handle;
818 struct sortKey *sortKeys = zh->sortKeys;
820 for (sk = sortKeys; sk; sk = sk->next)
821 if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse)
824 sk = (struct sortKey *) xmalloc (sizeof(*sk));
828 sk->string = (char *) xmalloc (length);
830 memcpy (sk->string, string, length);
832 sk->attrSet = p->attrSet;
833 sk->attrUse = p->attrUse;
836 static void extract_add_string (RecWord *p, const char *string, int length)
839 if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
840 extract_add_sort_string (p, string, length);
842 extract_add_index_string (p, string, length);
845 static void extract_add_incomplete_field (RecWord *p)
847 const char *b = p->string;
848 int remain = p->length;
849 const char **map = 0;
852 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
856 char buf[IT_MAX_WORD+1];
860 while (map && *map && **map == *CHR_SPACE)
862 remain = p->length - (b - p->string);
864 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
871 while (map && *map && **map != *CHR_SPACE)
873 const char *cp = *map;
875 while (i < IT_MAX_WORD && *cp)
877 remain = p->length - (b - p->string);
879 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
885 extract_add_string (p, buf, i);
887 (p->seqnos[p->reg_type])++; /* to separate this from next one */
890 static void extract_add_complete_field (RecWord *p)
892 const char *b = p->string;
893 char buf[IT_MAX_WORD+1];
894 const char **map = 0;
895 int i = 0, remain = p->length;
898 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
900 while (remain > 0 && i < IT_MAX_WORD)
902 while (map && *map && **map == *CHR_SPACE)
904 remain = p->length - (b - p->string);
906 map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
913 if (i && i < IT_MAX_WORD)
914 buf[i++] = *CHR_SPACE;
915 while (map && *map && **map != *CHR_SPACE)
917 const char *cp = *map;
919 if (i >= IT_MAX_WORD)
921 while (i < IT_MAX_WORD && *cp)
923 remain = p->length - (b - p->string);
925 map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
933 extract_add_string (p, buf, i);
936 static void extract_token_add (RecWord *p)
939 if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
940 p->string, p->length)))
942 p->string = wrbuf_buf(wrbuf);
943 p->length = wrbuf_len(wrbuf);
945 if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
946 extract_add_complete_field (p);
948 extract_add_incomplete_field(p);
951 static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
953 ZebraHandle zh = (ZebraHandle) (p->handle);
954 zebraExplain_addSchema (zh->service->zei, oid);
957 static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
958 int cmd, struct sortKey **skp)
960 struct sortKey *sk = *skp;
961 SortIdx sortIdx = zh->service->sortIdx;
963 sortIdx_sysno (sortIdx, sysno);
966 struct sortKey *sk_next = sk->next;
967 sortIdx_type (sortIdx, sk->attrUse);
968 sortIdx_add (sortIdx, sk->string, sk->length);
983 void encode_key_init (struct encode_info *i)
990 char *encode_key_int (int d, char *bp)
999 else if (d <= 4194303)
1001 *bp++ = 128 + (d>>16);
1002 *bp++ = (d>>8) & 255;
1007 *bp++ = 192 + (d>>24);
1008 *bp++ = (d>>16) & 255;
1009 *bp++ = (d>>8) & 255;
1015 void encode_key_write (char *k, struct encode_info *i, FILE *outf)
1020 while ((*bp++ = *k++))
1022 memcpy (&key, k+1, sizeof(struct it_key));
1023 bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
1024 if (i->sysno != key.sysno)
1026 i->sysno = key.sysno;
1029 else if (!i->seqno && !key.seqno && i->cmd == *k)
1031 bp = encode_key_int (key.seqno - i->seqno, bp);
1032 i->seqno = key.seqno;
1034 if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
1036 logf (LOG_FATAL|LOG_ERRNO, "fwrite");
1041 static void extract_flushWriteKeys (ZebraHandle zh)
1044 char out_fname[200];
1046 struct encode_info encode_info;
1047 int ptr_i = zh->ptr_i;
1051 if (!zh->key_buf || ptr_i <= 0)
1054 (zh->key_file_no)++;
1055 logf (LOG_LOG, "sorting section %d", (zh->key_file_no));
1057 qsort (zh->key_buf + zh->ptr_top - ptr_i, ptr_i, sizeof(char*),
1059 extract_get_fname_tmp (zh, out_fname, zh->key_file_no);
1061 if (!(outf = fopen (out_fname, "wb")))
1063 logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
1066 logf (LOG_LOG, "writing section %d", zh->key_file_no);
1067 prevcp = cp = (zh->key_buf)[zh->ptr_top - ptr_i];
1069 encode_key_init (&encode_info);
1070 encode_key_write (cp, &encode_info, outf);
1074 cp = (zh->key_buf)[zh->ptr_top - ptr_i];
1075 if (strcmp (cp, prevcp))
1077 encode_key_init (&encode_info);
1078 encode_key_write (cp, &encode_info, outf);
1082 encode_key_write (cp + strlen(cp), &encode_info, outf);
1085 qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
1086 extract_get_fname_tmp (out_fname, key_file_no);
1088 if (!(outf = fopen (out_fname, "wb")))
1090 logf (LOG_FATAL|LOG_ERRNO, "fopen %s", out_fname);
1093 logf (LOG_LOG, "writing section %d", key_file_no);
1095 prevcp = key_buf[ptr_top-i];
1097 if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
1099 key_y_len = strlen(prevcp)+1;
1101 logf (LOG_LOG, "key_y_len: %2d %02x %02x %s",
1102 key_y_len, prevcp[0], prevcp[1], 2+prevcp);
1104 qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
1105 sizeof(char*), key_y_compare);
1106 cp = key_buf[ptr_top-ptr_i];
1108 encode_key_init (&encode_info);
1109 encode_key_write (cp, &encode_info, outf);
1112 cp = key_buf[ptr_top-ptr_i];
1113 encode_key_write (cp+key_y_len, &encode_info, outf);
1117 prevcp = key_buf[ptr_top-ptr_i];
1122 logf (LOG_FATAL|LOG_ERRNO, "fclose %s", out_fname);
1125 logf (LOG_LOG, "finished section %d", zh->key_file_no);
1127 zh->key_buf_used = 0;
1130 static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
1131 int cmd, struct recKeys *reckeys)
1133 unsigned char attrSet = (unsigned char) -1;
1134 unsigned short attrUse = (unsigned short) -1;
1137 ZebraExplainInfo zei = zh->service->zei;
1141 int mem = 8*1024*1024;
1142 zh->key_buf = (char**) xmalloc (mem);
1143 zh->ptr_top = mem/sizeof(char*);
1145 zh->key_buf_used = 0;
1146 zh->key_file_no = 0;
1148 zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
1149 while (off < reckeys->buf_used)
1151 const char *src = reckeys->buf + off;
1159 memcpy (&attrSet, src, sizeof(attrSet));
1160 src += sizeof(attrSet);
1164 memcpy (&attrUse, src, sizeof(attrUse));
1165 src += sizeof(attrUse);
1167 if (zh->key_buf_used + 1024 > (zh->ptr_top-zh->ptr_i)*sizeof(char*))
1168 extract_flushWriteKeys (zh);
1170 (zh->key_buf)[zh->ptr_top - zh->ptr_i] =
1171 (char*)zh->key_buf + zh->key_buf_used;
1173 ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
1175 ch = zebraExplain_addSU (zei, attrSet, attrUse);
1178 key_SU_code (ch,((char*)zh->key_buf) + zh->key_buf_used);
1181 ((char*)zh->key_buf) [(zh->key_buf_used)++] = *src++;
1183 ((char*)(zh->key_buf))[(zh->key_buf_used)++] = '\0';
1184 ((char*)(zh->key_buf))[(zh->key_buf_used)++] = cmd;
1187 seqno += ((lead>>2) & 15)-1;
1190 memcpy (&seqno, src, sizeof(seqno));
1191 src += sizeof(seqno);
1195 memcpy ((char*)zh->key_buf + zh->key_buf_used, &key, sizeof(key));
1196 (zh->key_buf_used) += sizeof(key);
1197 off = src - reckeys->buf;
1199 assert (off == reckeys->buf_used);
1202 static void extract_index (ZebraHandle zh)
1204 extract_flushWriteKeys (zh);
1205 zebra_index_merge (zh);
1208 static int explain_extract (void *handle, Record rec, data1_node *n)
1210 ZebraHandle zh = (ZebraHandle) handle;
1211 struct recExtractCtrl extractCtrl;
1214 if (zebraExplain_curDatabase (zh->service->zei,
1215 rec->info[recInfo_databaseName]))
1218 if (zebraExplain_newDatabase (zh->service->zei,
1219 rec->info[recInfo_databaseName], 0))
1223 zh->keys.buf_used = 0;
1224 zh->keys.prevAttrUse = -1;
1225 zh->keys.prevAttrSet = -1;
1226 zh->keys.prevSeqNo = 0;
1229 extractCtrl.init = extract_init;
1230 extractCtrl.tokenAdd = extract_token_add;
1231 extractCtrl.schemaAdd = extract_schema_add;
1232 extractCtrl.dh = zh->service->dh;
1233 for (i = 0; i<256; i++)
1234 extractCtrl.seqno[i] = 0;
1235 extractCtrl.zebra_maps = zh->service->zebra_maps;
1236 extractCtrl.flagShowRecords = 0;
1237 extractCtrl.handle = handle;
1239 grs_extract_tree(&extractCtrl, n);
1241 logf (LOG_LOG, "flush explain record, sysno=%d", rec->sysno);
1243 if (rec->size[recInfo_delKeys])
1245 struct recKeys delkeys;
1246 struct sortKey *sortKeys = 0;
1248 delkeys.buf_used = rec->size[recInfo_delKeys];
1249 delkeys.buf = rec->info[recInfo_delKeys];
1250 extract_flushSortKeys (zh, rec->sysno, 0, &sortKeys);
1251 extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys);
1253 extract_flushRecordKeys (zh, rec->sysno, 1, &zh->keys);
1254 extract_flushSortKeys (zh, rec->sysno, 1, &zh->sortKeys);
1256 xfree (rec->info[recInfo_delKeys]);
1257 rec->size[recInfo_delKeys] = zh->keys.buf_used;
1258 rec->info[recInfo_delKeys] = zh->keys.buf;
1259 zh->keys.buf = NULL;
1260 zh->keys.buf_max = 0;
1264 static int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
1265 const char *buf, size_t buf_size,
1266 const char *databaseName, int delete_flag,
1267 int test_mode, int *sysno,
1268 int store_keys, int store_data,
1269 const char *match_criteria)
1271 RecordAttr *recordAttr;
1272 struct recExtractCtrl extractCtrl;
1277 const char *fname = "<no file>";
1279 long recordOffset = 0;
1280 struct zebra_fetch_control fc;
1283 fc.record_int_buf = buf;
1284 fc.record_int_len = buf_size;
1285 fc.record_int_pos = 0;
1287 fc.record_offset = 0;
1289 extractCtrl.offset = 0;
1290 extractCtrl.readf = zebra_record_int_read;
1291 extractCtrl.seekf = zebra_record_int_seek;
1292 extractCtrl.tellf = zebra_record_int_tell;
1293 extractCtrl.endf = zebra_record_int_end;
1294 extractCtrl.fh = &fc;
1296 /* announce database */
1297 if (zebraExplain_curDatabase (zh->service->zei, databaseName))
1299 if (zebraExplain_newDatabase (zh->service->zei, databaseName, 0))
1303 recType_byName (zh->service->recTypes, recordType, subType,
1306 logf (LOG_WARN, "No such record type: %s", recordType);
1310 zh->keys.buf_used = 0;
1311 zh->keys.prevAttrUse = -1;
1312 zh->keys.prevAttrSet = -1;
1313 zh->keys.prevSeqNo = 0;
1317 extractCtrl.subType = subType;
1318 extractCtrl.init = extract_init;
1319 extractCtrl.tokenAdd = extract_token_add;
1320 extractCtrl.schemaAdd = extract_schema_add;
1321 extractCtrl.dh = zh->service->dh;
1322 extractCtrl.handle = zh;
1323 extractCtrl.zebra_maps = zh->service->zebra_maps;
1324 extractCtrl.flagShowRecords = 0;
1325 for (i = 0; i<256; i++)
1327 if (zebra_maps_is_positioned(zh->service->zebra_maps, i))
1328 extractCtrl.seqno[i] = 1;
1330 extractCtrl.seqno[i] = 0;
1333 r = (*recType->extract)(clientData, &extractCtrl);
1335 if (r == RECCTRL_EXTRACT_EOF)
1337 else if (r == RECCTRL_EXTRACT_ERROR)
1339 /* error occured during extraction ... */
1341 yaz_log (LOG_WARN, "extract error");
1343 if (rGroup->flagRw &&
1344 records_processed < rGroup->fileVerboseLimit)
1346 logf (LOG_WARN, "fail %s %s %ld", rGroup->recordType,
1347 fname, (long) recordOffset);
1352 if (zh->keys.buf_used == 0)
1354 /* the extraction process returned no information - the record
1355 is probably empty - unless flagShowRecords is in use */
1358 logf (LOG_WARN, "No keys generated for record");
1359 logf (LOG_WARN, " The file is probably empty");
1362 /* match criteria */
1369 logf (LOG_LOG, "delete %s %s %ld", recordType,
1370 fname, (long) recordOffset);
1371 logf (LOG_WARN, "cannot delete record above (seems new)");
1374 logf (LOG_LOG, "add %s %s %ld", recordType, fname,
1375 (long) recordOffset);
1376 rec = rec_new (zh->service->records);
1378 *sysno = rec->sysno;
1380 recordAttr = rec_init_attr (zh->service->zei, rec);
1385 dict_insert (matchDict, matchStr, sizeof(*sysno), sysno);
1388 extract_flushRecordKeys (zh, *sysno, 1, &zh->keys);
1389 extract_flushSortKeys (zh, *sysno, 1, &zh->sortKeys);
1393 /* record already exists */
1394 struct recKeys delkeys;
1396 rec = rec_get (zh->service->records, *sysno);
1399 recordAttr = rec_init_attr (zh->service->zei, rec);
1401 if (recordAttr->runNumber ==
1402 zebraExplain_runNumberIncrement (zh->service->zei, 0))
1404 logf (LOG_LOG, "skipped %s %s %ld", recordType,
1405 fname, (long) recordOffset);
1409 delkeys.buf_used = rec->size[recInfo_delKeys];
1410 delkeys.buf = rec->info[recInfo_delKeys];
1411 extract_flushSortKeys (zh, *sysno, 0, &zh->sortKeys);
1412 extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
1415 /* record going to be deleted */
1416 if (!delkeys.buf_used)
1418 logf (LOG_LOG, "delete %s %s %ld", recordType,
1419 fname, (long) recordOffset);
1420 logf (LOG_WARN, "cannot delete file above, storeKeys false");
1424 logf (LOG_LOG, "delete %s %s %ld", recordType,
1425 fname, (long) recordOffset);
1428 dict_delete (matchDict, matchStr);
1430 rec_del (zh->service->records, &rec);
1437 /* record going to be updated */
1438 if (!delkeys.buf_used)
1440 logf (LOG_LOG, "update %s %s %ld", recordType,
1441 fname, (long) recordOffset);
1442 logf (LOG_WARN, "cannot update file above, storeKeys false");
1446 logf (LOG_LOG, "update %s %s %ld", recordType,
1447 fname, (long) recordOffset);
1448 extract_flushRecordKeys (zh, *sysno, 1, &zh->keys);
1452 /* update file type */
1453 xfree (rec->info[recInfo_fileType]);
1454 rec->info[recInfo_fileType] =
1455 rec_strdup (recordType, &rec->size[recInfo_fileType]);
1457 /* update filename */
1458 xfree (rec->info[recInfo_filename]);
1459 rec->info[recInfo_filename] =
1460 rec_strdup (fname, &rec->size[recInfo_filename]);
1462 /* update delete keys */
1463 xfree (rec->info[recInfo_delKeys]);
1464 if (zh->keys.buf_used > 0 && store_keys == 1)
1466 rec->size[recInfo_delKeys] = zh->keys.buf_used;
1467 rec->info[recInfo_delKeys] = zh->keys.buf;
1468 zh->keys.buf = NULL;
1469 zh->keys.buf_max = 0;
1473 rec->info[recInfo_delKeys] = NULL;
1474 rec->size[recInfo_delKeys] = 0;
1477 /* save file size of original record */
1478 zebraExplain_recordBytesIncrement (zh->service->zei,
1479 - recordAttr->recordSize);
1481 recordAttr->recordSize = fi->file_moffset - recordOffset;
1482 if (!recordAttr->recordSize)
1483 recordAttr->recordSize = fi->file_max - recordOffset;
1485 recordAttr->recordSize = buf_size;
1487 zebraExplain_recordBytesIncrement (zh->service->zei,
1488 recordAttr->recordSize);
1490 /* set run-number for this record */
1491 recordAttr->runNumber =
1492 zebraExplain_runNumberIncrement (zh->service->zei, 0);
1494 /* update store data */
1495 xfree (rec->info[recInfo_storeData]);
1496 if (store_data == 1)
1498 rec->size[recInfo_storeData] = recordAttr->recordSize;
1499 rec->info[recInfo_storeData] = (char *)
1500 xmalloc (recordAttr->recordSize);
1502 memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
1504 if (lseek (fi->fd, recordOffset, SEEK_SET) < 0)
1506 logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s",
1507 (long) recordOffset, fname);
1510 if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize)
1511 < recordAttr->recordSize)
1513 logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s",
1514 recordAttr->recordSize, fname);
1521 rec->info[recInfo_storeData] = NULL;
1522 rec->size[recInfo_storeData] = 0;
1524 /* update database name */
1525 xfree (rec->info[recInfo_databaseName]);
1526 rec->info[recInfo_databaseName] =
1527 rec_strdup (databaseName, &rec->size[recInfo_databaseName]);
1530 recordAttr->recordOffset = recordOffset;
1532 /* commit this record */
1533 rec_put (zh->service->records, &rec);