2 # ============================================================================
3 # Zebra perl API header
4 # =============================================================================
6 # ============================================================================
7 package IDZebra::Session;
9 use IDZebra::Logger qw(:flags :calls);
10 #use IDZebra::Repository;
11 use IDZebra::Resultset;
15 our @ISA = qw(IDZebra::Logger);
18 # -----------------------------------------------------------------------------
19 # Class constructors, destructor
20 # -----------------------------------------------------------------------------
22 my ($proto, %args) = @_;
23 my $class = ref($proto) || $proto;
25 $self->{args} = \%args;
27 bless ($self, $class);
28 $self->{cql_ct} = undef;
33 my ($self, %args) = @_;
36 unless (defined($self->{zs})) {
37 if (defined($args{'configFile'})) {
38 $self->{zs} = IDZebra::start($args{'configFile'});
40 $self->{zs} = IDZebra::start("zebra.cfg");
47 if (defined($self->{zs})) {
48 IDZebra::stop($self->{zs}) if ($self->{zs});
55 my ($proto,%args) = @_;
58 if (ref($proto)) { $self = $proto; } else {
59 $self = $proto->new(%args);
63 %args = %{$self->{args}};
66 $self->start_service(%args);
68 unless (defined($self->{zs})) {
69 croak ("Falied to open zebra service");
72 unless (defined($self->{zh})) {
73 $self->{zh}=IDZebra::open($self->{zs}) #if ($self->{zs});
77 # This is needed in order to somehow initialize the service
78 $self->select_databases("Default");
80 # Load the default configuration
83 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
84 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
93 while (IDZebra::trans_no($self->{zh}) > 0) {
94 logf (LOG_WARN,"Explicitly closing transaction with session");
98 IDZebra::close($self->{zh});
102 if ($self->{odr_input}) {
103 IDZebra::odr_reset($self->{odr_input});
104 IDZebra::odr_destroy($self->{odr_input});
105 $self->{odr_input} = undef;
108 if ($self->{odr_output}) {
109 IDZebra::odr_reset($self->{odr_output});
110 IDZebra::odr_destroy($self->{odr_output});
111 $self->{odr_output} = undef;
119 logf (LOG_LOG,"DESTROY $self");
122 if (defined ($self->{cql_ct})) {
123 IDZebra::cql_transform_close($self->{cql_ct});
126 # -----------------------------------------------------------------------------
127 # Record group selection
128 # -----------------------------------------------------------------------------
130 my ($self,%args) = @_;
131 # print STDERR "A\n";
133 $self->{rg} = $self->_makeRecordGroup(%args);
134 $self->_selectRecordGroup($self->{rg});
136 # print STDERR "B\n";
140 sub selectRecordGroup {
141 my ($self, $groupName) = @_;
142 $self->{rg} = $self->_getRecordGroup($groupName);
143 $self->_selectRecordGroup($self->{rg});
146 sub _displayRecordGroup {
147 my ($self, $rg) = @_;
148 print STDERR "-----\n";
149 foreach my $key qw (groupName
160 print STDERR "$key:",$rg->{$key},"\n";
164 sub _cloneRecordGroup {
165 my ($self, $orig) = @_;
166 my $rg = IDZebra::recordGroup->new();
167 my $r = IDZebra::init_recordGroup($rg);
168 foreach my $key qw (groupName
180 $rg->{$key} = $orig->{$key} if ($orig->{$key});
185 sub _getRecordGroup {
186 my ($self, $groupName, $ext) = @_;
187 my $rg = IDZebra::recordGroup->new();
188 my $r = IDZebra::init_recordGroup($rg);
189 $rg->{groupName} = $groupName if ($groupName ne "");
190 $ext = "" unless ($ext);
191 my $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
195 sub _makeRecordGroup {
196 my ($self, %args) = @_;
199 my @keys = keys(%args);
200 unless ($#keys >= 0) {
201 return ($self->{rg});
204 if ($args{groupName}) {
205 $rg = $self->_getRecordGroup($args{groupName});
207 $rg = $self->_cloneRecordGroup($self->{rg});
209 $self->_setRecordGroupOptions($rg, %args);
213 sub _setRecordGroupOptions {
214 my ($self, $rg, %args) = @_;
216 foreach my $key qw (databaseName
227 if (defined ($args{$key})) {
228 $rg->{$key} = $args{$key};
232 sub _selectRecordGroup {
233 my ($self, $rg) = @_;
234 my $r = IDZebra::set_group($self->{zh}, $rg);
236 unless ($dbName = $rg->{databaseName}) {
239 if (IDZebra::select_database($self->{zh}, $dbName)) {
241 "Could not select database %s errCode=%d",
244 croak("Fatal error selecting record group");
246 logf(LOG_LOG,"Database %s selected",$dbName);
250 # -----------------------------------------------------------------------------
252 # -----------------------------------------------------------------------------
255 return(IDZebra::errCode($self->{zh}));
260 return(IDZebra::errString($self->{zh}));
265 return(IDZebra::errAdd($self->{zh}));
268 # -----------------------------------------------------------------------------
270 # -----------------------------------------------------------------------------
273 IDZebra::begin_trans($self->{zh});
281 my $stat = IDZebra::ZebraTransactionStatus->new();
282 IDZebra::end_trans($self->{zh}, $stat);
288 return(IDZebra::begin_read($self->{zh}));
293 IDZebra::end_read($self->{zh});
297 my ($self, $value) = @_;
298 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
299 return (IDZebra::get_shadow_enable($self->{zh}));
304 if ($self->shadow_enable) {
305 return(IDZebra::commit($self->{zh}));
309 # -----------------------------------------------------------------------------
310 # We don't really need that...
311 # -----------------------------------------------------------------------------
313 my ($self, $name) = @_;
314 if ($name !~/^(input|output)$/) {
315 croak("Undefined ODR '$name'");
317 IDZebra::odr_reset($self->{"odr_$name"});
320 # -----------------------------------------------------------------------------
322 # -----------------------------------------------------------------------------
325 return(IDZebra::init($self->{zh}));
330 return(IDZebra::compact($self->{zh}));
334 my ($self, %args) = @_;
335 my $rg = $self->update_args(%args);
337 IDZebra::repository_update($self->{zh});
338 $self->_selectRecordGroup($self->{rg});
343 my ($self, %args) = @_;
344 my $rg = $self->update_args(%args);
346 IDZebra::repository_delete($self->{zh});
347 $self->_selectRecordGroup($self->{rg});
352 my ($self, %args) = @_;
353 my $rg = $self->update_args(%args);
355 IDZebra::repository_show($self->{zh});
356 $self->_selectRecordGroup($self->{rg});
361 my ($self, %args) = @_;
362 my $rg = $self->_makeRecordGroup(%args);
363 $self->_selectRecordGroup($rg);
367 # -----------------------------------------------------------------------------
369 # -----------------------------------------------------------------------------
372 my ($self, %args) = @_;
373 return(IDZebra::update_record($self->{zh},
374 $self->record_update_args(%args)));
378 my ($self, %args) = @_;
379 return(IDZebra::delete_record($self->{zh},
380 $self->record_update_args(%args)));
382 sub record_update_args {
383 my ($self, %args) = @_;
385 my $sysno = $args{sysno} ? $args{sysno} : 0;
386 my $match = $args{match} ? $args{match} : "";
387 my $rectype = $args{recordType} ? $args{recordType} : "";
388 my $fname = $args{file} ? $args{file} : "<no file>";
395 elsif ($args{file}) {
396 open (F, $args{file}) || warn ("Cannot open $args{file}");
397 $buff = join('',(<F>));
400 my $len = length($buff);
402 delete ($args{sysno});
403 delete ($args{match});
404 delete ($args{recordType});
405 delete ($args{file});
406 delete ($args{data});
408 my $rg = $self->_makeRecordGroup(%args);
410 # If no record type is given, then try to find it out from the
413 if (my ($ext) = $fname =~ /\.(\w+)$/) {
414 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
415 $rectype = $rg2->{recordType};
419 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
421 # print STDERR "$rectype,$sysno,$match,$fname,$len\n";
425 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
428 # -----------------------------------------------------------------------------
430 # -----------------------------------------------------------------------------
431 sub select_databases {
432 my ($self, @databases) = @_;
433 return (IDZebra::select_databases($self->{zh},
439 my ($self, $query, $setname) = @_;
440 my $hits = IDZebra::search_PQF($self->{zh},
446 my $rs = IDZebra::Resultset->new($self,
448 recordCount => $hits,
449 errCode => $self->errCode,
450 errString => $self->errString);
455 my ($self,$mapfile) = @_;
457 unless (-f $mapfile) {
458 croak("Cannot find $mapfile");
460 if (defined ($self->{cql_ct})) {
461 IDZebra::cql_transform_close($self->{cql_ct});
463 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
464 $self->{cql_mapfile} = $mapfile;
466 return ($self->{cql_mapfile});
470 my ($self, $cqlquery) = @_;
471 unless (defined($self->{cql_ct})) {
472 croak("CQL map file is not specified yet.");
474 my $res = "\0" x 2048;
475 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
481 my ($self, $query, $transfile) = @_;
486 my ($self, $query, $transfile) = @_;
489 # -----------------------------------------------------------------------------
492 # Sorting of multiple result sets is not supported by zebra...
493 # -----------------------------------------------------------------------------
496 my ($self, $sortspec, $setname, @sets) = @_;
500 foreach my $rs (@sets) {
501 push (@setnames, $rs->{name});
502 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
506 my $status = IDZebra::sort($self->{zh},
512 my $errCode = $self->errCode;
513 my $errString = $self->errString;
515 if ($status || $errCode) {$count = 0;}
517 my $rs = IDZebra::Resultset->new($self,
519 recordCount => $count,
521 errString => $errString);
531 IDZebra::Session - A Zebra database server session for update and retrieval
535 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
538 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
544 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
546 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
548 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
550 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
552 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
557 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
559 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
564 - close all transactions
565 - destroy all result sets
568 In the future different database access methods are going to be available,
571 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
573 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
575 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
576 groupName => 'demo');
581 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
583 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
585 $sess->group(groupName => ..., ...)
587 The following options are available:
593 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
595 =item B<databaseName>
597 The name of the (logical) database the updated records will belong to.
601 This path is used for directory updates (B<update>, B<delete> methods);
605 This option determines how to identify your records. See I<Zebra manual: Locating Records>
609 The record type used for indexing.
611 =item B<flagStoreData>
613 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
615 =item B<flagStoreKeys>
617 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
623 =item B<fileVerboseLimit>
625 Skip log messages, when doing a directory update, and the specified number of files are processed...
627 =item B<databaseNamePath>
631 =item B<explainDatabase>
633 The name of the explain database to be used
637 Follow links when doing directory update.
641 You can use the same parameters calling all update methods.
643 =head1 TRANSACTIONS (WRITE LOCKS)
645 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
649 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
651 $stat = $sess->end_trans;
653 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
655 $stat->{processed} # Number of records processed
656 $stat->{updated} # Number of records processed
657 $stat->{deleted} # Number of records processed
658 $stat->{inserted} # Number of records processed
659 $stat->{stime} # System time used
660 $stat->{utime} # User time used
664 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
666 $sess->update(path => 'lib');
668 This will update the database with the files in directory "lib", according to the current record group settings.
672 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
674 $sess->update(groupName => 'demo1',
677 Update the database with files in "lib" according to the settings of group "demo1"
679 $sess->delete(groupName => 'demo1',
682 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
684 You can also update records one by one, even directly from the memory:
686 $sysno = $sess->update_record(data => $rec1,
687 recordType => 'grs.perl.pod',
688 groupName => "demo1");
690 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
692 You can also index a single file:
694 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
696 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
698 $sysno = $sess->update_record(data => $rec1,
699 file => "lib/IDZebra/Data1.pm");
701 And some crazy stuff:
703 $sysno = $sess->delete_record(sysno => $sysno);
705 where sysno in itself is sufficient to identify the record
707 $sysno = $sess->delete_record(data => $rec1,
708 recordType => 'grs.perl.pod',
709 groupName => "demo1");
711 This case the record is extracted, and if already exists, located in the database, then deleted...
713 $sysno = $sess->delete_record(data => $rec1,
715 recordType => 'grs.perl.pod',
716 groupName => "demo1");
718 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
721 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
730 Peter Popovics, pop@technomat.hu
734 IDZebra, IDZebra::Data1, Zebra documentation