1 /* $Id: api.h,v 1.52 2007-08-21 13:27:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 Most functions has return type ZEBRA_RES, where ZEBRA_FAIL indicates
28 failure; ZEBRA_OK indicates success.
35 #include <yaz/proto.h>
36 #include <idzebra/res.h>
37 #include <idzebra/version.h>
38 #include <idzebra/recctrl.h>
49 } ZebraTransactionStatus;
51 /** Retrieval Record Descriptor */
53 int errCode; /* non-zero if error when fetching this */
54 char *errString; /* error string */
55 int position; /* position of record in result set (1,2,..) */
56 char *buf; /* record buffer (void pointer really) */
58 const Odr_oid *format; /* record syntax */
62 } ZebraRetrievalRecord;
64 /** Scan Term Descriptor */
66 zint occurrences; /* scan term occurrences */
67 char *term; /* scan term string */
68 char *display_term; /* display scan term entry */
72 \brief a Zebra Handle - (session)
74 typedef struct zebra_session *ZebraHandle;
77 \brief a Zebra Service handle
79 typedef struct zebra_service *ZebraService;
81 /** \brief Creates a Zebra Service.
82 \param configName name of configuration file
84 This function is a simplified version of zebra_start_res.
87 ZebraService zebra_start(const char *configName
88 ) ZEBRA_GCC_ATTR((warn_unused_result));
90 /** \brief Creates a Zebra service with resources.
91 \param configName name of configuration file
92 \param def_res default resources
93 \param over_res overriding resources
95 This function typically called once in a program. A Zebra Service
96 acts as a factory for Zebra session handles.
99 ZebraService zebra_start_res(const char *configName,
100 Res def_res, Res over_res
101 ) ZEBRA_GCC_ATTR((warn_unused_result));
103 /** \brief stops a Zebra service.
104 \param zs service handle
106 Frees resources used by the service.
109 ZEBRA_RES zebra_stop(ZebraService zs);
111 /** \brief Lists enabled Zebra filters
112 \param zs service handle
113 \param cd callback parameter (opaque)
114 \param cb callback function
117 void zebra_filter_info(ZebraService zs, void *cd,
118 void (*cb)(void *cd, const char *name));
121 /** \brief Creates a Zebra session handle within service.
122 \param zs service handle.
123 \param res resources to be used for the service (NULL for none)
125 There should be one handle for each thread doing something
126 with zebra, be that searching or indexing. In simple apps
127 one handle is sufficient
130 ZebraHandle zebra_open(ZebraService zs, Res res
131 ) ZEBRA_GCC_ATTR((warn_unused_result));
133 /** \brief Destroys Zebra session handle.
134 \param zh zebra session handle.
137 ZEBRA_RES zebra_close(ZebraHandle zh);
139 /** \brief Returns error code for last error
140 \param zh zebra session handle.
143 int zebra_errCode(ZebraHandle zh);
145 /** \brief Returns error string for last error
146 \param zh zebra session handle.
149 const char *zebra_errString(ZebraHandle zh);
151 /** \brief Returns additional info for last error
152 \param zh zebra session handle.
155 char *zebra_errAdd(ZebraHandle zh);
157 /** \brief Returns error code and additional info for last error
158 \param zh zebra session handle.
159 \param code pointer to returned error code
160 \param addinfo pointer to returned additional info
163 void zebra_result(ZebraHandle zh, int *code, char **addinfo);
166 /** \brief Returns character set encoding for session
167 \param zh zebra session handle.
168 \returns encoding name (e.g. "iso-8859-1")
171 const char *zebra_get_encoding(ZebraHandle zh);
173 /** \brief Set limit before Zebra does approx hit count
174 \param zh session handle
175 \param approx_limit the limit
177 Results will be approximiate if hit count is greater than the
178 limit specified. By default there is a high-limit (no limit).
180 ZEBRA_RES zebra_set_approx_limit(ZebraHandle zh, zint approx_limit);
182 /** \brief Search using PQF Query String
183 \param zh session handle
184 \param pqf_query query
185 \param setname name of resultset
186 \param hits of hits is returned
189 ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query,
190 const char *setname, zint *hits);
192 /** \brief Search using RPN Query structure (from ASN.1)
193 \param zh session handle
195 \param query RPN query using YAZ structure
196 \param setname name of resultset
197 \param hits number of hits is returned
198 \param estimated_hit_count whether hit count is an estimate
199 \param partial_resultset whether result is only partially evaluated
202 ZEBRA_RES zebra_search_RPN_x(ZebraHandle zh, ODR o, Z_RPNQuery *query,
203 const char *setname, zint *hits,
204 int *estimated_hit_count,
205 int *partial_resultset);
208 /** \brief Search using RPN Query structure (from ASN.1)
209 \param zh session handle
211 \param query RPN query using YAZ structure
212 \param setname name of resultset
213 \param hits number of hits is returned
216 ZEBRA_RES zebra_search_RPN(ZebraHandle zh, ODR o, Z_RPNQuery *query,
217 const char *setname, zint *hits);
219 /** \brief Retrieve records from result set (after search)
220 \param zh session handle
221 \param stream allocate records returned using this ODR
222 \param setname name of result set to retrieve records from
223 \param comp Z39.50 record composition
224 \param input_format transfer syntax (OID)
225 \param num_recs number of records to retrieve
226 \param recs store records in this structure (size is num_recs)
229 ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
231 Z_RecordComposition *comp,
232 const Odr_oid *input_format,
234 ZebraRetrievalRecord *recs);
235 /** \brief Deletes one or more resultsets
236 \param zh session handle
237 \param function Z_DeleteResultSetRequest_{list,all}
238 \param num_setnames number of result sets
239 \param setnames result set names
240 \param statuses status result
243 int zebra_deleteResultSet(ZebraHandle zh, int function,
244 int num_setnames, char **setnames,
248 /** \brief returns number of term info terms assocaited with result set
249 \param zh session handle
250 \param setname result set name
251 \param num_terms number of terms returned in this integer
253 This function is used in conjunction with zebra_result_set_term_info.
254 If operation was successful, ZEBRA_OK is returned; otherwise
255 ZEBRA_FAIL is returned (typically non-existing setname)
258 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
261 /** \brief returns information about a term assocated with a result set
262 \param zh session handle
263 \param setname result set name
264 \param no the term we want to know about (0=first, 1=second,..)
265 \param count the number of occurrences of this term, aka hits (output)
266 \param approx about hits: 0=exact,1=approx (output)
267 \param termbuf buffer for term string (intput, output)
268 \param termlen size of termbuf (input=max, output=actual length)
269 \param term_ref_id if non-NULL *term_ref_id holds term reference
271 Returns information about one search term associated with result set.
272 Use zebra_result_set_term_no to read total number of terms associated
273 with result set. If this function can not return information,
274 due to no out of range or bad result set name, ZEBRA_FAIL is
276 The passed termbuf must be able to hold at least *termlen characters.
277 Upon completion, *termlen holds actual length of search term.
280 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
281 int no, zint *count, int *approx,
282 char *termbuf, size_t *termlen,
283 const char **term_ref_id);
286 /** \brief performs Scan (Z39.50 style)
287 \param zh session handle
288 \param stream ODR handle for result
289 \param zapt Attribute plus Term (start term)
290 \param attributeset Attributeset for Attribute plus Term
291 \param position input/output position
292 \param num_entries number of terms requested / returned
293 \param entries list of resulting terms (ODR allocated)
294 \param is_partial upon return 1=partial, 0=complete
295 \param setname limit scan by this set (NULL means no limit)
297 YAZ_EXPORT ZEBRA_RES zebra_scan(ZebraHandle zh, ODR stream,
298 Z_AttributesPlusTerm *zapt,
299 const Odr_oid *attributeset,
300 int *position, int *num_entries,
301 ZebraScanEntry **entries,
303 const char *setname);
305 /** \brief performs Scan (taking PQF string)
306 \param zh session handle
307 \param stream ODR handle for result
308 \param query PQF scan query
309 \param position input/output position
310 \param num_entries number of terms requested / returned
311 \param entries list of resulting terms (ODR allocated)
312 \param is_partial upon return 1=partial, 0=complete
313 \param setname limit scan by this set (NULL means no limit)
316 ZEBRA_RES zebra_scan_PQF(ZebraHandle zh, ODR stream, const char *query,
317 int *position, int *num_entries,
318 ZebraScanEntry **entries,
319 int *is_partial, const char *setname);
321 /** \brief authenticate user. Returns 0 if OK, != 0 on failure
322 \param zh session handle
323 \param user user name
327 ZEBRA_RES zebra_auth(ZebraHandle zh, const char *user, const char *pass);
329 /** \brief Normalize zebra term for register (subject to change!)
330 \param zh session handle
331 \param reg_id register ID, 'w', 'p',..
332 \param input_str input string buffer
333 \param input_len input string length
334 \param output_str output string buffer
335 \param output_len output string length
338 int zebra_string_norm(ZebraHandle zh, unsigned reg_id, const char *input_str,
339 int input_len, char *output_str, int output_len);
341 /** \brief Creates a database
342 \param zh session handle
343 \param db database to be created
346 ZEBRA_RES zebra_create_database(ZebraHandle zh, const char *db);
348 /** \brief Deletes a database (drop)
349 \param zh session handle
350 \param db database to be deleted
353 ZEBRA_RES zebra_drop_database(ZebraHandle zh, const char *db);
356 ZEBRA_RES zebra_admin_shutdown(ZebraHandle zh);
359 ZEBRA_RES zebra_admin_start(ZebraHandle zh);
362 ZEBRA_RES zebra_shutdown(ZebraService zs);
365 ZEBRA_RES zebra_admin_import_begin(ZebraHandle zh, const char *database,
366 const char *record_type);
369 ZEBRA_RES zebra_admin_import_segment(ZebraHandle zh,
373 ZEBRA_RES zebra_admin_import_end(ZebraHandle zh);
376 ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw
377 ) ZEBRA_GCC_ATTR((warn_unused_result));
380 ZEBRA_RES zebra_end_trans(ZebraHandle zh
381 ) ZEBRA_GCC_ATTR((warn_unused_result));
384 ZEBRA_RES zebra_end_transaction(ZebraHandle zh,
385 ZebraTransactionStatus *stat);
388 ZEBRA_RES zebra_commit(ZebraHandle zh);
391 ZEBRA_RES zebra_clean(ZebraHandle zh);
394 ZEBRA_RES zebra_init(ZebraHandle zh);
397 ZEBRA_RES zebra_compact(ZebraHandle zh);
400 ZEBRA_RES zebra_repository_update(ZebraHandle zh, const char *path);
403 ZEBRA_RES zebra_repository_delete(ZebraHandle zh, const char *path);
406 ZEBRA_RES zebra_repository_show(ZebraHandle zh, const char *path);
408 /** \brief Simple update record
409 \param zh session handle
410 \param buf record buffer
411 \param buf_size record buffer size
413 This function is a simple wrapper or zebra_update_record with
414 action=action_update (insert or replace) .
417 ZEBRA_RES zebra_add_record(ZebraHandle zh, const char *buf, int buf_size);
419 /** \brief Updates record
420 \param zh session handle
421 \param action (insert,replace,delete or update (replace/insert)
422 \param recordType filter type (0 indicates default)
423 \param sysno system id (0 may be passed for no known id)
424 \param match match criteria (0 may be passed for no known criteria)
425 \param fname filename to be printed for logging (0 may be passed)
426 \param buf record buffer
427 \param buf_size record buffer size
430 ZEBRA_RES zebra_update_record(ZebraHandle zh,
431 enum zebra_recctrl_action_t action,
432 const char *recordType,
433 zint *sysno, const char *match,
435 const char *buf, int buf_size);
438 ZEBRA_RES zebra_sort(ZebraHandle zh, ODR stream,
439 int num_input_setnames,
440 const char **input_setnames,
441 const char *output_setname,
442 Z_SortKeySpecList *sort_sequence,
444 ) ZEBRA_GCC_ATTR((warn_unused_result));
447 ZEBRA_RES zebra_select_databases(ZebraHandle zh, int num_bases,
448 const char **basenames
449 ) ZEBRA_GCC_ATTR((warn_unused_result));
452 ZEBRA_RES zebra_select_database(ZebraHandle zh, const char *basename
453 ) ZEBRA_GCC_ATTR((warn_unused_result));
456 void zebra_shadow_enable(ZebraHandle zh, int value);
459 int zebra_register_statistics(ZebraHandle zh, int dumpdict);
462 ZEBRA_RES zebra_record_encoding(ZebraHandle zh, const char *encoding);
465 ZEBRA_RES zebra_octet_term_encoding(ZebraHandle zh, const char *encoding);
469 void zebra_set_resource(ZebraHandle zh, const char *name, const char *value);
471 const char *zebra_get_resource(ZebraHandle zh,
472 const char *name, const char *defaultvalue);
476 void zebra_pidfname(ZebraService zs, char *path);
486 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh,
488 int num, zint *positions);
492 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
494 zint start, int num);
497 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
501 struct BFiles_struct *zebra_get_bfs(ZebraHandle zh);
504 ZEBRA_RES zebra_set_limit(ZebraHandle zh, int complement_flag, zint *ids);
507 ZEBRA_RES zebra_set_break_handler(ZebraHandle zh,
508 int (*f)(void *client_data),
515 * \section intro_sec Introduction
517 * Zebra is a search engine for structure data, such as XML, MARC
520 * API users should read the api.h for all the public definitions.
522 * The remaining sections briefly describe each of
523 * Zebra major modules/components.
525 * \section util Base Utilities
527 * The Zebra utilities (util.h) defines fundamental types and a few
528 * utilites for Zebra.
530 * \section res Resources
532 * The resources system (res.h) is a manager of configuration
533 * resources. The resources can be viewed as a simple database.
534 * Resources can be read from a configurtion file, they can be
535 * read or written by an application. Resources can also be written,
536 * but that facility is not currently in use.
538 * \section bfile Bfiles
540 * The Bfiles (bfile.h) provides a portable interface to the
541 * local file system. It also provides a facility for safe updates
542 * (shadow updates). All file system access is handle by this module
543 * (except for trival reads of configuration files).
545 * \section dict Dictionary
547 * The Zebra dictionary (dict.h) maps a search term (key) to a value. The
548 * value is a reference to the list of records identifers in which
549 * the term occurs. Zebra uses an ISAM data structure for the list
550 * of term occurrences. The Dictionary uses \ref bfile.
554 * Zebra maintains an ISAM for each term where each ISAM is a list
555 * of record identifiers corresponding to the records in which the
556 * term occur. Unlike traditional ISAM systems, the Zebra ISAM
557 * is compressed. The ISAM system uses \ref bfile.
559 * Zebra has more than one ISAM system. The old and stable ISAM system
560 * is named isamc (see isamc.h). Another version isams is a write-once
561 * isam system that is quite compact - suitable for CD-ROMs (isams.h).
562 * The newest ISAM system, isamb, is implemented as a B-Tree (see isamb.h).
564 * \section data1 Data-1
566 * The data1 (data1.h) module deals with structured documents. The module can
567 * can read, modify and write documents. The document structure was
568 * originally based on GRS-1 - a Z39.50 v3 structure that predates
569 * DOM. These days the data1 structure may describe XML/SGML as well.
570 * The data1, like DOM, is a tree structure. Each node in the tree
571 * can be of type element, text (cdata), preprocessing instruction,
572 * comment. Element nodes can point to attribute nodes.
574 * \section recctrl Record Control
576 * The record control module (recctrl.h) is responsible for
577 * managing the various record types ("classes" or filters).
579 * \section rset Result-Set
581 * The Result-Set module (rset.h) defines an interface that all
582 * Zebra Search Results must implement. Each operation (AND, OR, ..)
583 * correspond to an implementation of that interface.
587 * DFA (dfa.h) Deterministic Finite Automa is a regular expression engine.
588 * The module compiles a regular expression to a DFA. The DFA can then
589 * be used in various application to perform fast match against the
590 * origianl expression. The \ref Dict uses DFA to perform lookup
591 * using regular expressions.
598 * indent-tabs-mode: nil
600 * vim: shiftwidth=4 tabstop=8 expandtab