1 /* $Id: retrieve.c,v 1.59 2006-11-24 12:21:31 marc Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <yaz/diagbib1.h>
40 #define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
42 static int zebra_create_record_stream(ZebraHandle zh,
44 struct ZebraRecStream *stream){
46 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
48 if ((*rec)->size[recInfo_storeData] > 0)
49 zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50 (*rec)->size[recInfo_storeData]);
56 if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57 strcpy(full_rep, zh->path_reg);
58 strcat(full_rep, "/");
59 strcat(full_rep, (*rec)->info[recInfo_filename]);
62 strcpy(full_rep, (*rec)->info[recInfo_filename]);
64 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65 yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
70 zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
77 static int parse_zebra_elem(const char *elem,
78 const char **index, size_t *index_len,
79 const char **type, size_t *type_len)
90 /* verify that '::' is in the beginning of *elem
91 and something more follows */
93 || !(elem +1) || ':' != *(elem +1)
94 || !(elem +2) || '\0' == *(elem +2))
97 /* pick out info from string after '::' */
99 cp = strchr(elem, ':');
101 if (!cp) /* index, no colon, no type */
104 *index_len = strlen(elem);
106 else if (cp[1] == '\0') /* colon, but no following type */
110 else /* index, colon and type */
113 *index_len = cp - elem;
115 *type_len = strlen(cp+1);
122 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
124 const char *elemsetname,
125 oid_value input_format,
126 oid_value *output_format,
127 char **rec_bufp, int *rec_lenp)
129 const char *retrieval_index;
130 size_t retrieval_index_len;
131 const char *retrieval_type;
132 size_t retrieval_type_len;
134 zebra_rec_keys_t keys;
136 /* set output variables before processing possible error states */
139 /* only accept XML and SUTRS requests */
140 if (input_format != VAL_TEXT_XML
141 && input_format != VAL_SUTRS){
142 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
144 *output_format = VAL_NONE;
145 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
148 if (!parse_zebra_elem(elemsetname,
149 &retrieval_index, &retrieval_index_len,
150 &retrieval_type, &retrieval_type_len))
151 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
153 if (retrieval_type_len != 0 && retrieval_type_len != 1)
155 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
158 if (retrieval_index_len)
160 char retrieval_index_cstr[256];
162 if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
164 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
165 retrieval_index_cstr[retrieval_index_len] = '\0';
167 if (zebraExplain_lookup_attr_str(zh->reg->zei,
168 zinfo_index_category_index,
169 (retrieval_type_len == 0 ? -1 :
171 retrieval_index_cstr) == -1)
172 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
176 keys = zebra_rec_keys_open();
177 zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
178 rec->size[recInfo_delKeys], 0);
180 wrbuf = wrbuf_alloc();
181 if (zebra_rec_keys_rewind(keys)){
184 struct it_key key_in;
186 if (input_format == VAL_TEXT_XML)
188 *output_format = VAL_TEXT_XML;
189 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
190 " sysno=\"" ZINT_FORMAT "\""
191 " set=\"zebra::index%s/\">\n",
194 else if (input_format == VAL_SUTRS)
195 *output_format = VAL_SUTRS;
197 while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){
199 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
202 const char *string_index = 0;
203 size_t string_index_len;
204 char dst_buf[IT_MAX_WORD];
206 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
208 string_index_len = strlen(string_index);
210 /* process only if index is not defined,
211 or if defined and matching */
212 if (retrieval_index == 0
213 || (string_index_len == retrieval_index_len
214 && !memcmp(string_index, retrieval_index,
217 /* process only if type is not defined, or is matching */
218 if (retrieval_type == 0
219 || (retrieval_type_len == 1
220 && retrieval_type[0] == index_type)){
223 zebra_term_untrans(zh, index_type, dst_buf, str);
224 if (strlen(dst_buf)){
226 if (input_format == VAL_TEXT_XML){
227 wrbuf_printf(wrbuf, " <index name=\"%s\"",
230 wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
232 wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">",
233 key_in.mem[key_in.len -1]);
235 wrbuf_xmlputs(wrbuf, dst_buf);
236 wrbuf_printf(wrbuf, "</index>\n");
238 else if (input_format == VAL_SUTRS){
239 wrbuf_printf(wrbuf, "%s ", string_index);
241 wrbuf_printf(wrbuf, "%c", index_type);
243 for (i = 1; i < key_in.len; i++)
244 wrbuf_printf(wrbuf, " " ZINT_FORMAT,
247 /* zebra_term_untrans(zh, index_type, dst_buf, str); */
248 wrbuf_printf(wrbuf, " %s", dst_buf);
250 wrbuf_printf(wrbuf, "\n");
257 if (input_format == VAL_TEXT_XML)
258 wrbuf_printf(wrbuf, "</record>\n");
260 *rec_lenp = wrbuf_len(wrbuf);
261 *rec_bufp = odr_malloc(odr, *rec_lenp);
262 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
263 wrbuf_free(wrbuf, 1);
264 zebra_rec_keys_close(keys);
269 static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
274 wrbuf_printf(wrbuf, " %s=\"", name);
275 wrbuf_xmlputs(wrbuf, value);
276 wrbuf_printf(wrbuf, "\"");
280 static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
283 wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
286 static void retrieve_puts_str(WRBUF wrbuf, const char *name,
290 wrbuf_printf(wrbuf, "%s %s\n", name, value);
293 static void retrieve_puts_int(WRBUF wrbuf, const char *name,
296 wrbuf_printf(wrbuf, "%s %i\n", name, value);
299 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
300 const char *elemsetname,
301 oid_value input_format,
302 oid_value *output_format,
303 char **rec_bufp, int *rec_lenp)
307 /* set output variables before processing possible error states */
312 /* processing zebra::meta::sysno elemset without fetching binary data */
313 if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
316 WRBUF wrbuf = wrbuf_alloc();
317 if (input_format == VAL_SUTRS)
319 wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
320 *output_format = VAL_SUTRS;
322 else if (input_format == VAL_TEXT_XML)
324 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
325 " sysno=\"" ZINT_FORMAT "\""
326 " set=\"zebra::%s\"/>\n",
328 *output_format = VAL_TEXT_XML;
330 *rec_lenp = wrbuf_len(wrbuf);
332 *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
334 ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
335 wrbuf_free(wrbuf, 1);
339 /* fetching binary record up for all other display elementsets */
340 rec = rec_get(zh->reg->records, sysno);
343 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
344 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
347 /* processing special elementsetnames zebra::data */
348 if (elemsetname && 0 == strcmp(elemsetname, "data"))
350 struct ZebraRecStream stream;
351 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
352 zebra_create_record_stream(zh, &rec, &stream);
353 *output_format = input_format;
354 *rec_lenp = recordAttr->recordSize;
355 *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
356 stream.readf(&stream, *rec_bufp, *rec_lenp);
357 stream.destroy(&stream);
362 /* only accept XML and SUTRS requests from now */
363 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
365 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
367 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
371 /* processing special elementsetnames zebra::meta:: */
372 if (elemsetname && 0 == strcmp(elemsetname, "meta"))
375 WRBUF wrbuf = wrbuf_alloc();
376 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
378 if (input_format == VAL_TEXT_XML)
380 *output_format = VAL_TEXT_XML;
382 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
383 " sysno=\"" ZINT_FORMAT "\"", sysno);
384 retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
385 retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
386 retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
388 retrieve_puts_attr_int(wrbuf, "score", score);
391 " rank=\"" ZINT_FORMAT "\""
393 " set=\"zebra::%s\"/>\n",
394 recordAttr->staticrank,
395 recordAttr->recordSize,
398 else if (input_format == VAL_SUTRS)
400 *output_format = VAL_SUTRS;
401 wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
402 retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
403 retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
404 retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
406 retrieve_puts_int(wrbuf, "score", score);
409 "rank " ZINT_FORMAT "\n"
412 recordAttr->staticrank,
413 recordAttr->recordSize,
416 *rec_lenp = wrbuf_len(wrbuf);
418 *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
420 ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
422 wrbuf_free(wrbuf, 1);
427 /* processing special elementsetnames zebra::index:: */
428 if (elemsetname && 0 == strncmp(elemsetname, "index", 5)){
430 int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
432 input_format, output_format,
441 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
445 int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
446 zebra_snippets *hit_snippet, ODR odr,
447 oid_value input_format, Z_RecordComposition *comp,
448 oid_value *output_format,
449 char **rec_bufp, int *rec_lenp, char **basenamep,
453 char *fname, *file_type, *basename;
454 const char *elemsetname;
455 struct ZebraRecStream stream;
456 RecordAttr *recordAttr;
462 elemsetname = yaz_get_esn(comp);
464 /* processing zebra special elementset names of form 'zebra:: */
465 if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
466 return zebra_special_fetch(zh, sysno, score, odr,
468 input_format, output_format,
472 /* processing all other element set names */
473 rec = rec_get(zh->reg->records, sysno);
476 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
478 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
482 recordAttr = rec_init_attr(zh->reg->zei, rec);
484 file_type = rec->info[recInfo_fileType];
485 fname = rec->info[recInfo_filename];
486 basename = rec->info[recInfo_databaseName];
487 *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
488 strcpy (*basenamep, basename);
490 yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
493 zebra_create_record_stream(zh, &rec, &stream);
497 zebra_snippets *snippet;
498 zebra_rec_keys_t reckeys = zebra_rec_keys_open();
500 struct recRetrieveCtrl retrieveCtrl;
502 retrieveCtrl.stream = &stream;
503 retrieveCtrl.fname = fname;
504 retrieveCtrl.localno = sysno;
505 retrieveCtrl.staticrank = recordAttr->staticrank;
506 retrieveCtrl.score = score;
507 retrieveCtrl.recordSize = recordAttr->recordSize;
508 retrieveCtrl.odr = odr;
509 retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
510 retrieveCtrl.comp = comp;
511 retrieveCtrl.encoding = zh->record_encoding;
512 retrieveCtrl.diagnostic = 0;
513 retrieveCtrl.addinfo = 0;
514 retrieveCtrl.dh = zh->reg->dh;
515 retrieveCtrl.res = zh->res;
516 retrieveCtrl.rec_buf = 0;
517 retrieveCtrl.rec_len = -1;
518 retrieveCtrl.hit_snippet = hit_snippet;
519 retrieveCtrl.doc_snippet = zebra_snippets_create();
521 zebra_rec_keys_set_buf(reckeys,
522 rec->info[recInfo_delKeys],
523 rec->size[recInfo_delKeys],
525 zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
526 zebra_rec_keys_close(reckeys);
529 /* for debugging purposes */
530 yaz_log(YLOG_LOG, "DOC SNIPPET:");
531 zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
532 yaz_log(YLOG_LOG, "HIT SNIPPET:");
533 zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
535 snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
536 retrieveCtrl.hit_snippet,
539 /* for debugging purposes */
540 yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
541 zebra_snippets_log(snippet, YLOG_LOG);
544 if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
545 file_type, &clientData)))
547 return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
551 (*rt->retrieve)(clientData, &retrieveCtrl);
552 return_code = retrieveCtrl.diagnostic;
554 *output_format = retrieveCtrl.output_format;
555 *rec_bufp = (char *) retrieveCtrl.rec_buf;
556 *rec_lenp = retrieveCtrl.rec_len;
557 *addinfo = retrieveCtrl.addinfo;
560 zebra_snippets_destroy(snippet);
561 zebra_snippets_destroy(retrieveCtrl.doc_snippet);
564 stream.destroy(&stream);
573 * indent-tabs-mode: nil
575 * vim: shiftwidth=4 tabstop=8 expandtab