1 # $Id: Session.pm,v 1.12 2003-03-05 00:28:16 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
14 use IDZebra::Logger qw(:flags :calls);
15 use IDZebra::Resultset;
16 use IDZebra::ScanList;
17 use IDZebra::RetrievalRecord;
18 our $VERSION = do { my @r = (q$Revision: 1.12 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
19 # our @ISA = qw(IDZebra::Logger);
23 # -----------------------------------------------------------------------------
24 # Class constructors, destructor
25 # -----------------------------------------------------------------------------
27 my ($proto, %args) = @_;
28 my $class = ref($proto) || $proto;
30 $self->{args} = \%args;
32 bless ($self, $class);
33 $self->{cql_ct} = undef;
34 $self->{cql_mapfile} = "";
37 $self->{databases} = {};
41 my ($self, %args) = @_;
44 unless (defined($self->{zs})) {
45 if (defined($args{'configFile'})) {
46 $self->{zs} = IDZebra::start($args{'configFile'});
48 $self->{zs} = IDZebra::start("zebra.cfg");
55 if (defined($self->{zs})) {
56 IDZebra::stop($self->{zs}) if ($self->{zs});
63 my ($proto,%args) = @_;
66 if (ref($proto)) { $self = $proto; } else {
67 $self = $proto->new(%args);
71 %args = %{$self->{args}};
74 $self->start_service(%args);
76 unless (defined($self->{zs})) {
77 croak ("Falied to open zebra service");
80 unless (defined($self->{zh})) {
81 $self->{zh}=IDZebra::open($self->{zs});
84 # Reset result set counter
87 # This is needed in order to somehow initialize the service
88 $self->databases("Default");
90 # Load the default configuration
93 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
94 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
101 unless (defined($self->{zh})) {
102 croak ("Zebra session is not opened");
112 # Delete all resulsets
113 my $r = IDZebra::deleteResultSet($self->{zh},
114 1, #Z_DeleteRequest_all,
118 while (IDZebra::trans_no($self->{zh}) > 0) {
119 logf (LOG_WARN,"Explicitly closing transaction with session");
123 IDZebra::close($self->{zh});
127 if ($self->{odr_input}) {
128 IDZebra::odr_reset($self->{odr_input});
129 IDZebra::odr_destroy($self->{odr_input});
130 $self->{odr_input} = undef;
133 if ($self->{odr_output}) {
134 IDZebra::odr_reset($self->{odr_output});
135 IDZebra::odr_destroy($self->{odr_output});
136 $self->{odr_output} = undef;
144 logf (LOG_LOG,"DESTROY $self");
147 if (defined ($self->{cql_ct})) {
148 IDZebra::cql_transform_close($self->{cql_ct});
152 # -----------------------------------------------------------------------------
153 # Record group selection This is a bit nasty... but used at many places
154 # -----------------------------------------------------------------------------
156 my ($self,%args) = @_;
159 $self->{rg} = $self->_makeRecordGroup(%args);
160 $self->_selectRecordGroup($self->{rg});
165 sub selectRecordGroup {
166 my ($self, $groupName) = @_;
168 $self->{rg} = $self->_getRecordGroup($groupName);
169 $self->_selectRecordGroup($self->{rg});
172 sub _displayRecordGroup {
173 my ($self, $rg) = @_;
174 print STDERR "-----\n";
175 foreach my $key qw (groupName
186 print STDERR "$key:",$rg->{$key},"\n";
190 sub _cloneRecordGroup {
191 my ($self, $orig) = @_;
192 my $rg = IDZebra::recordGroup->new();
193 my $r = IDZebra::init_recordGroup($rg);
194 foreach my $key qw (groupName
206 $rg->{$key} = $orig->{$key} if ($orig->{$key});
211 sub _getRecordGroup {
212 my ($self, $groupName, $ext) = @_;
213 my $rg = IDZebra::recordGroup->new();
214 my $r = IDZebra::init_recordGroup($rg);
215 $rg->{groupName} = $groupName if ($groupName ne "");
216 $ext = "" unless ($ext);
217 $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
221 sub _makeRecordGroup {
222 my ($self, %args) = @_;
225 my @keys = keys(%args);
226 unless ($#keys >= 0) {
227 return ($self->{rg});
230 if ($args{groupName}) {
231 $rg = $self->_getRecordGroup($args{groupName});
233 $rg = $self->_cloneRecordGroup($self->{rg});
235 $self->_setRecordGroupOptions($rg, %args);
239 sub _setRecordGroupOptions {
240 my ($self, $rg, %args) = @_;
242 foreach my $key qw (databaseName
253 if (defined ($args{$key})) {
254 $rg->{$key} = $args{$key};
258 sub _selectRecordGroup {
259 my ($self, $rg) = @_;
260 my $r = IDZebra::set_group($self->{zh}, $rg);
262 unless ($dbName = $rg->{databaseName}) {
265 unless ($self->databases($dbName)) {
266 croak("Fatal error selecting database $dbName");
269 # -----------------------------------------------------------------------------
270 # Selecting databases for search (and also for updating - internally)
271 # -----------------------------------------------------------------------------
273 my ($self, @databases) = @_;
278 return (keys(%{$self->{databases}}));
284 foreach my $db (@databases) {
285 next if ($self->{databases}{$db});
290 foreach my $db (keys (%{$self->{databases}})) {
291 $changed++ unless ($tmp{$db});
296 delete ($self->{databases});
297 foreach my $db (@databases) {
298 $self->{databases}{$db}++;
301 if (IDZebra::select_databases($self->{zh},
305 "Could not select database(s) %s errCode=%d",
306 join(",",@databases),
310 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
313 return (keys(%{$self->{databases}}));
316 # -----------------------------------------------------------------------------
318 # -----------------------------------------------------------------------------
321 return(IDZebra::errCode($self->{zh}));
326 return(IDZebra::errString($self->{zh}));
331 return(IDZebra::errAdd($self->{zh}));
334 # -----------------------------------------------------------------------------
336 # -----------------------------------------------------------------------------
340 if (my $err = IDZebra::begin_trans($self->{zh},1)) {
341 if ($self->errCode == 2) {
342 croak ("TRANS_RW not allowed within TRANS_RO");
344 croak("Error starting transaction; code:".
345 $self->errCode . " message: " . $self->errString);
353 my $stat = IDZebra::ZebraTransactionStatus->new();
354 IDZebra::end_trans($self->{zh}, $stat);
361 return(IDZebra::begin_read($self->{zh}));
367 IDZebra::end_read($self->{zh});
371 my ($self, $value) = @_;
373 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
374 return (IDZebra::get_shadow_enable($self->{zh}));
380 if ($self->shadow_enable) {
381 return(IDZebra::commit($self->{zh}));
385 # -----------------------------------------------------------------------------
386 # We don't really need that...
387 # -----------------------------------------------------------------------------
389 my ($self, $name) = @_;
390 if ($name !~/^(input|output)$/) {
391 croak("Undefined ODR '$name'");
393 IDZebra::odr_reset($self->{"odr_$name"});
396 # -----------------------------------------------------------------------------
398 # -----------------------------------------------------------------------------
402 return(IDZebra::init($self->{zh}));
408 return(IDZebra::compact($self->{zh}));
412 my ($self, %args) = @_;
414 my $rg = $self->_update_args(%args);
415 $self->_selectRecordGroup($rg);
417 IDZebra::repository_update($self->{zh});
418 $self->_selectRecordGroup($self->{rg});
423 my ($self, %args) = @_;
425 my $rg = $self->_update_args(%args);
426 $self->_selectRecordGroup($rg);
428 IDZebra::repository_delete($self->{zh});
429 $self->_selectRecordGroup($self->{rg});
434 my ($self, %args) = @_;
436 my $rg = $self->_update_args(%args);
437 $self->_selectRecordGroup($rg);
439 IDZebra::repository_show($self->{zh});
440 $self->_selectRecordGroup($self->{rg});
445 my ($self, %args) = @_;
446 my $rg = $self->_makeRecordGroup(%args);
447 $self->_selectRecordGroup($rg);
451 # -----------------------------------------------------------------------------
453 # -----------------------------------------------------------------------------
456 my ($self, %args) = @_;
458 return(IDZebra::update_record($self->{zh},
459 $self->_record_update_args(%args)));
463 my ($self, %args) = @_;
465 return(IDZebra::delete_record($self->{zh},
466 $self->_record_update_args(%args)));
468 sub _record_update_args {
469 my ($self, %args) = @_;
471 my $sysno = $args{sysno} ? $args{sysno} : 0;
472 my $match = $args{match} ? $args{match} : "";
473 my $rectype = $args{recordType} ? $args{recordType} : "";
474 my $fname = $args{file} ? $args{file} : "<no file>";
481 elsif ($args{file}) {
482 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
483 $buff = join('',(<F>));
486 my $len = length($buff);
488 delete ($args{sysno});
489 delete ($args{match});
490 delete ($args{recordType});
491 delete ($args{file});
492 delete ($args{data});
494 my $rg = $self->_makeRecordGroup(%args);
496 # If no record type is given, then try to find it out from the
499 if (my ($ext) = $fname =~ /\.(\w+)$/) {
500 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
501 $rectype = $rg2->{recordType};
505 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
510 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
513 # -----------------------------------------------------------------------------
516 my ($self,$mapfile) = @_;
518 if ($self->{cql_mapfile} ne $mapfile) {
519 unless (-f $mapfile) {
520 croak("Cannot find $mapfile");
522 if (defined ($self->{cql_ct})) {
523 IDZebra::cql_transform_close($self->{cql_ct});
525 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
526 $self->{cql_mapfile} = $mapfile;
529 return ($self->{cql_mapfile});
533 my ($self, $cqlquery) = @_;
534 unless (defined($self->{cql_ct})) {
535 croak("CQL map file is not specified yet.");
537 my $res = "\0" x 2048;
538 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
540 carp ("Error transforming CQL query: '$cqlquery', status:$r");
547 # -----------------------------------------------------------------------------
549 # -----------------------------------------------------------------------------
551 my ($self, %args) = @_;
555 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
563 ($query, $cqlstat) = $self->cql2pqf($args{cql});
565 croak ("Failed to transform query: '$args{cql}', ".
566 "status: ($cqlstat)");
570 croak ("No query given to search");
575 if ($args{databases}) {
576 @origdbs = $self->databases;
577 $self->databases(@{$args{databases}});
580 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
582 my $rs = $self->_search_pqf($query, $rsname);
584 if ($args{databases}) {
585 $self->databases(@origdbs);
590 carp("Sort skipped due to search error: ".
593 $rs->sort($args{sort});
602 return ("set_".$self->{rscount}++);
606 my ($self, $query, $setname) = @_;
608 my $hits = IDZebra::search_PQF($self->{zh},
614 my $rs = IDZebra::Resultset->new($self,
616 recordCount => $hits,
617 errCode => $self->errCode,
618 errString => $self->errString);
622 # -----------------------------------------------------------------------------
625 # Sorting of multiple result sets is not supported by zebra...
626 # -----------------------------------------------------------------------------
629 my ($self, $sortspec, $setname, @sets) = @_;
634 croak ("Sorting/merging of multiple resultsets is not supported now");
639 foreach my $rs (@sets) {
640 push (@setnames, $rs->{name});
641 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
645 my $status = IDZebra::sort($self->{zh},
651 my $errCode = $self->errCode;
652 my $errString = $self->errString;
654 logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
655 $status, $errCode, $errString);
657 if ($status || $errCode) {$count = 0;}
659 my $rs = IDZebra::Resultset->new($self,
661 recordCount => $count,
663 errString => $errString);
667 # -----------------------------------------------------------------------------
669 # -----------------------------------------------------------------------------
671 my ($self, %args) = @_;
675 unless ($args{expression}) {
676 croak ("No scan expression given");
679 my $sl = IDZebra::ScanList->new($self,%args);
684 # ============================================================================
691 IDZebra::Session - A Zebra database server session for update and retrieval
695 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
698 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
699 groupName => 'demo1');
701 $sess->group(groupName => 'demo2');
707 $sess->update(path => 'lib');
709 my $s1=$sess->update_record(data => $rec1,
710 recordType => 'grs.perl.pod',
711 groupName => "demo1",
714 my $stat = $sess->end_trans;
716 $sess->databases('demo1','demo2');
718 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
719 cql => 'dc.title=IDZebra',
720 databases => [qw(demo1 demo2)]);
725 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
727 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
729 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
731 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
733 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
738 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
740 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
745 - close all transactions
746 - destroy all result sets
749 In the future different database access methods are going to be available,
752 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
754 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
756 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
757 groupName => 'demo');
762 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
764 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
766 $sess->group(groupName => ..., ...)
768 The following options are available:
774 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
776 =item B<databaseName>
778 The name of the (logical) database the updated records will belong to.
782 This path is used for directory updates (B<update>, B<delete> methods);
786 This option determines how to identify your records. See I<Zebra manual: Locating Records>
790 The record type used for indexing.
792 =item B<flagStoreData>
794 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
796 =item B<flagStoreKeys>
798 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
804 =item B<fileVerboseLimit>
806 Skip log messages, when doing a directory update, and the specified number of files are processed...
808 =item B<databaseNamePath>
812 =item B<explainDatabase>
814 The name of the explain database to be used
818 Follow links when doing directory update.
822 You can use the same parameters calling all update methods.
824 =head1 TRANSACTIONS (WRITE LOCKS)
826 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
830 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
832 $stat = $sess->end_trans;
834 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
836 $stat->{processed} # Number of records processed
837 $stat->{updated} # Number of records processed
838 $stat->{deleted} # Number of records processed
839 $stat->{inserted} # Number of records processed
840 $stat->{stime} # System time used
841 $stat->{utime} # User time used
845 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
847 $sess->update(path => 'lib');
849 This will update the database with the files in directory "lib", according to the current record group settings.
853 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
855 $sess->update(groupName => 'demo1',
858 Update the database with files in "lib" according to the settings of group "demo1"
860 $sess->delete(groupName => 'demo1',
863 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
865 You can also update records one by one, even directly from the memory:
867 $sysno = $sess->update_record(data => $rec1,
868 recordType => 'grs.perl.pod',
869 groupName => "demo1");
871 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
873 You can also index a single file:
875 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
877 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
879 $sysno = $sess->update_record(data => $rec1,
880 file => "lib/IDZebra/Data1.pm");
882 And some crazy stuff:
884 $sysno = $sess->delete_record(sysno => $sysno);
886 where sysno in itself is sufficient to identify the record
888 $sysno = $sess->delete_record(data => $rec1,
889 recordType => 'grs.perl.pod',
890 groupName => "demo1");
892 This case the record is extracted, and if already exists, located in the database, then deleted...
894 $sysno = $sess->delete_record(data => $rec1,
896 recordType => 'grs.perl.pod',
897 groupName => "demo1");
899 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
902 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
904 =head1 DATABASE SELECTION
906 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
908 For searching, you can select databases by calling:
910 $sess->databases('db1','db2');
912 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
914 @dblist = $sess->databases();
918 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
920 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
921 pqf => '@attr 1=4 computer');
923 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
927 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
929 The CCL searching is not currently supported by this API.
933 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
935 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
937 $sess->cqlmap($mapfile);
939 Or, you can directly provide the I<mapfile> parameter for the search:
941 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
942 cql => 'dc.title=IDZebra');
944 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
948 As you have seen, the result of the search request is a I<Resultset> object.
949 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
953 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
955 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
957 =head1 MISC FUNCTIONS
965 Peter Popovics, pop@technomat.hu
969 IDZebra, IDZebra::Data1, Zebra documentation