1 # $Id: Session.pm,v 1.10 2003-03-03 18:27:25 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
14 use IDZebra::Logger qw(:flags :calls);
15 use IDZebra::Resultset;
16 use IDZebra::RetrievalRecord;
17 our $VERSION = do { my @r = (q$Revision: 1.10 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
18 # our @ISA = qw(IDZebra::Logger);
22 # -----------------------------------------------------------------------------
23 # Class constructors, destructor
24 # -----------------------------------------------------------------------------
26 my ($proto, %args) = @_;
27 my $class = ref($proto) || $proto;
29 $self->{args} = \%args;
31 bless ($self, $class);
32 $self->{cql_ct} = undef;
33 $self->{cql_mapfile} = "";
36 $self->{databases} = {};
40 my ($self, %args) = @_;
43 unless (defined($self->{zs})) {
44 if (defined($args{'configFile'})) {
45 $self->{zs} = IDZebra::start($args{'configFile'});
47 $self->{zs} = IDZebra::start("zebra.cfg");
54 if (defined($self->{zs})) {
55 IDZebra::stop($self->{zs}) if ($self->{zs});
62 my ($proto,%args) = @_;
65 if (ref($proto)) { $self = $proto; } else {
66 $self = $proto->new(%args);
70 %args = %{$self->{args}};
73 $self->start_service(%args);
75 unless (defined($self->{zs})) {
76 croak ("Falied to open zebra service");
79 unless (defined($self->{zh})) {
80 $self->{zh}=IDZebra::open($self->{zs});
83 # Reset result set counter
86 # This is needed in order to somehow initialize the service
87 $self->databases("Default");
89 # Load the default configuration
92 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
93 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
100 unless (defined($self->{zh})) {
101 croak ("Zebra session is not opened");
111 # Delete all resulsets
112 my $r = IDZebra::deleteResultSet($self->{zh},
113 1, #Z_DeleteRequest_all,
117 while (IDZebra::trans_no($self->{zh}) > 0) {
118 logf (LOG_WARN,"Explicitly closing transaction with session");
122 IDZebra::close($self->{zh});
126 if ($self->{odr_input}) {
127 IDZebra::odr_reset($self->{odr_input});
128 IDZebra::odr_destroy($self->{odr_input});
129 $self->{odr_input} = undef;
132 if ($self->{odr_output}) {
133 IDZebra::odr_reset($self->{odr_output});
134 IDZebra::odr_destroy($self->{odr_output});
135 $self->{odr_output} = undef;
143 logf (LOG_LOG,"DESTROY $self");
146 if (defined ($self->{cql_ct})) {
147 IDZebra::cql_transform_close($self->{cql_ct});
151 # -----------------------------------------------------------------------------
152 # Record group selection This is a bit nasty... but used at many places
153 # -----------------------------------------------------------------------------
155 my ($self,%args) = @_;
158 $self->{rg} = $self->_makeRecordGroup(%args);
159 $self->_selectRecordGroup($self->{rg});
164 sub selectRecordGroup {
165 my ($self, $groupName) = @_;
167 $self->{rg} = $self->_getRecordGroup($groupName);
168 $self->_selectRecordGroup($self->{rg});
171 sub _displayRecordGroup {
172 my ($self, $rg) = @_;
173 print STDERR "-----\n";
174 foreach my $key qw (groupName
185 print STDERR "$key:",$rg->{$key},"\n";
189 sub _cloneRecordGroup {
190 my ($self, $orig) = @_;
191 my $rg = IDZebra::recordGroup->new();
192 my $r = IDZebra::init_recordGroup($rg);
193 foreach my $key qw (groupName
205 $rg->{$key} = $orig->{$key} if ($orig->{$key});
210 sub _getRecordGroup {
211 my ($self, $groupName, $ext) = @_;
212 my $rg = IDZebra::recordGroup->new();
213 my $r = IDZebra::init_recordGroup($rg);
214 $rg->{groupName} = $groupName if ($groupName ne "");
215 $ext = "" unless ($ext);
216 $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
220 sub _makeRecordGroup {
221 my ($self, %args) = @_;
224 my @keys = keys(%args);
225 unless ($#keys >= 0) {
226 return ($self->{rg});
229 if ($args{groupName}) {
230 $rg = $self->_getRecordGroup($args{groupName});
232 $rg = $self->_cloneRecordGroup($self->{rg});
234 $self->_setRecordGroupOptions($rg, %args);
238 sub _setRecordGroupOptions {
239 my ($self, $rg, %args) = @_;
241 foreach my $key qw (databaseName
252 if (defined ($args{$key})) {
253 $rg->{$key} = $args{$key};
257 sub _selectRecordGroup {
258 my ($self, $rg) = @_;
259 my $r = IDZebra::set_group($self->{zh}, $rg);
261 unless ($dbName = $rg->{databaseName}) {
264 unless ($self->databases($dbName)) {
265 croak("Fatal error selecting database $dbName");
268 # -----------------------------------------------------------------------------
269 # Selecting databases for search (and also for updating - internally)
270 # -----------------------------------------------------------------------------
272 my ($self, @databases) = @_;
277 return (keys(%{$self->{databases}}));
283 foreach my $db (@databases) {
284 next if ($self->{databases}{$db});
289 foreach my $db (keys (%{$self->{databases}})) {
290 $changed++ unless ($tmp{$db});
295 delete ($self->{databases});
296 foreach my $db (@databases) {
297 $self->{databases}{$db}++;
300 if (IDZebra::select_databases($self->{zh},
304 "Could not select database(s) %s errCode=%d",
305 join(",",@databases),
309 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
312 return (keys(%{$self->{databases}}));
315 # -----------------------------------------------------------------------------
317 # -----------------------------------------------------------------------------
320 return(IDZebra::errCode($self->{zh}));
325 return(IDZebra::errString($self->{zh}));
330 return(IDZebra::errAdd($self->{zh}));
333 # -----------------------------------------------------------------------------
335 # -----------------------------------------------------------------------------
339 IDZebra::begin_trans($self->{zh});
345 my $stat = IDZebra::ZebraTransactionStatus->new();
346 IDZebra::end_trans($self->{zh}, $stat);
353 return(IDZebra::begin_read($self->{zh}));
359 IDZebra::end_read($self->{zh});
363 my ($self, $value) = @_;
365 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
366 return (IDZebra::get_shadow_enable($self->{zh}));
372 if ($self->shadow_enable) {
373 return(IDZebra::commit($self->{zh}));
377 # -----------------------------------------------------------------------------
378 # We don't really need that...
379 # -----------------------------------------------------------------------------
381 my ($self, $name) = @_;
382 if ($name !~/^(input|output)$/) {
383 croak("Undefined ODR '$name'");
385 IDZebra::odr_reset($self->{"odr_$name"});
388 # -----------------------------------------------------------------------------
390 # -----------------------------------------------------------------------------
394 return(IDZebra::init($self->{zh}));
400 return(IDZebra::compact($self->{zh}));
404 my ($self, %args) = @_;
406 my $rg = $self->_update_args(%args);
407 $self->_selectRecordGroup($rg);
409 IDZebra::repository_update($self->{zh});
410 $self->_selectRecordGroup($self->{rg});
415 my ($self, %args) = @_;
417 my $rg = $self->_update_args(%args);
418 $self->_selectRecordGroup($rg);
420 IDZebra::repository_delete($self->{zh});
421 $self->_selectRecordGroup($self->{rg});
426 my ($self, %args) = @_;
428 my $rg = $self->_update_args(%args);
429 $self->_selectRecordGroup($rg);
431 IDZebra::repository_show($self->{zh});
432 $self->_selectRecordGroup($self->{rg});
437 my ($self, %args) = @_;
438 my $rg = $self->_makeRecordGroup(%args);
439 $self->_selectRecordGroup($rg);
443 # -----------------------------------------------------------------------------
445 # -----------------------------------------------------------------------------
448 my ($self, %args) = @_;
450 return(IDZebra::update_record($self->{zh},
451 $self->_record_update_args(%args)));
455 my ($self, %args) = @_;
457 return(IDZebra::delete_record($self->{zh},
458 $self->_record_update_args(%args)));
460 sub _record_update_args {
461 my ($self, %args) = @_;
463 my $sysno = $args{sysno} ? $args{sysno} : 0;
464 my $match = $args{match} ? $args{match} : "";
465 my $rectype = $args{recordType} ? $args{recordType} : "";
466 my $fname = $args{file} ? $args{file} : "<no file>";
473 elsif ($args{file}) {
474 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
475 $buff = join('',(<F>));
478 my $len = length($buff);
480 delete ($args{sysno});
481 delete ($args{match});
482 delete ($args{recordType});
483 delete ($args{file});
484 delete ($args{data});
486 my $rg = $self->_makeRecordGroup(%args);
488 # If no record type is given, then try to find it out from the
491 if (my ($ext) = $fname =~ /\.(\w+)$/) {
492 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
493 $rectype = $rg2->{recordType};
497 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
502 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
505 # -----------------------------------------------------------------------------
508 my ($self,$mapfile) = @_;
510 if ($self->{cql_mapfile} ne $mapfile) {
511 unless (-f $mapfile) {
512 croak("Cannot find $mapfile");
514 if (defined ($self->{cql_ct})) {
515 IDZebra::cql_transform_close($self->{cql_ct});
517 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
518 $self->{cql_mapfile} = $mapfile;
521 return ($self->{cql_mapfile});
525 my ($self, $cqlquery) = @_;
526 unless (defined($self->{cql_ct})) {
527 croak("CQL map file is not specified yet.");
529 my $res = "\0" x 2048;
530 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
532 carp ("Error transforming CQL query: '$cqlquery', status:$r");
539 # -----------------------------------------------------------------------------
541 # -----------------------------------------------------------------------------
543 my ($self, %args) = @_;
547 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
555 ($query, $cqlstat) = $self->cql2pqf($args{cql});
557 croak ("Failed to transform query: '$args{cql}', ".
558 "status: ($cqlstat)");
562 croak ("No query given to search");
567 if ($args{databases}) {
568 @origdbs = $self->databases;
569 $self->databases(@{$args{databases}});
572 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
574 my $rs = $self->_search_pqf($query, $rsname);
576 if ($args{databases}) {
577 $self->databases(@origdbs);
582 carp("Sort skipped due to search error: ".
585 $rs->sort($args{sort});
594 return ("set_".$self->{rscount}++);
598 my ($self, $query, $setname) = @_;
600 my $hits = IDZebra::search_PQF($self->{zh},
606 my $rs = IDZebra::Resultset->new($self,
608 recordCount => $hits,
609 errCode => $self->errCode,
610 errString => $self->errString);
614 # -----------------------------------------------------------------------------
617 # Sorting of multiple result sets is not supported by zebra...
618 # -----------------------------------------------------------------------------
621 my ($self, $sortspec, $setname, @sets) = @_;
626 croak ("Sorting/merging of multiple resultsets is not supported now");
631 foreach my $rs (@sets) {
632 push (@setnames, $rs->{name});
633 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
637 my $status = IDZebra::sort($self->{zh},
643 my $errCode = $self->errCode;
644 my $errString = $self->errString;
646 logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
647 $status, $errCode, $errString);
649 if ($status || $errCode) {$count = 0;}
651 my $rs = IDZebra::Resultset->new($self,
653 recordCount => $count,
655 errString => $errString);
660 # ============================================================================
667 IDZebra::Session - A Zebra database server session for update and retrieval
671 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
674 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
675 groupName => 'demo1');
677 $sess->group(groupName => 'demo2');
683 $sess->update(path => 'lib');
685 my $s1=$sess->update_record(data => $rec1,
686 recordType => 'grs.perl.pod',
687 groupName => "demo1",
690 my $stat = $sess->end_trans;
692 $sess->databases('demo1','demo2');
694 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
695 cql => 'dc.title=IDZebra',
696 databases => [qw(demo1 demo2)]);
701 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
703 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
705 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
707 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
709 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
714 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
716 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
721 - close all transactions
722 - destroy all result sets
725 In the future different database access methods are going to be available,
728 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
730 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
732 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
733 groupName => 'demo');
738 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
740 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
742 $sess->group(groupName => ..., ...)
744 The following options are available:
750 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
752 =item B<databaseName>
754 The name of the (logical) database the updated records will belong to.
758 This path is used for directory updates (B<update>, B<delete> methods);
762 This option determines how to identify your records. See I<Zebra manual: Locating Records>
766 The record type used for indexing.
768 =item B<flagStoreData>
770 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
772 =item B<flagStoreKeys>
774 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
780 =item B<fileVerboseLimit>
782 Skip log messages, when doing a directory update, and the specified number of files are processed...
784 =item B<databaseNamePath>
788 =item B<explainDatabase>
790 The name of the explain database to be used
794 Follow links when doing directory update.
798 You can use the same parameters calling all update methods.
800 =head1 TRANSACTIONS (WRITE LOCKS)
802 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
806 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
808 $stat = $sess->end_trans;
810 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
812 $stat->{processed} # Number of records processed
813 $stat->{updated} # Number of records processed
814 $stat->{deleted} # Number of records processed
815 $stat->{inserted} # Number of records processed
816 $stat->{stime} # System time used
817 $stat->{utime} # User time used
821 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
823 $sess->update(path => 'lib');
825 This will update the database with the files in directory "lib", according to the current record group settings.
829 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
831 $sess->update(groupName => 'demo1',
834 Update the database with files in "lib" according to the settings of group "demo1"
836 $sess->delete(groupName => 'demo1',
839 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
841 You can also update records one by one, even directly from the memory:
843 $sysno = $sess->update_record(data => $rec1,
844 recordType => 'grs.perl.pod',
845 groupName => "demo1");
847 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
849 You can also index a single file:
851 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
853 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
855 $sysno = $sess->update_record(data => $rec1,
856 file => "lib/IDZebra/Data1.pm");
858 And some crazy stuff:
860 $sysno = $sess->delete_record(sysno => $sysno);
862 where sysno in itself is sufficient to identify the record
864 $sysno = $sess->delete_record(data => $rec1,
865 recordType => 'grs.perl.pod',
866 groupName => "demo1");
868 This case the record is extracted, and if already exists, located in the database, then deleted...
870 $sysno = $sess->delete_record(data => $rec1,
872 recordType => 'grs.perl.pod',
873 groupName => "demo1");
875 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
878 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
880 =head1 DATABASE SELECTION
882 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
884 For searching, you can select databases by calling:
886 $sess->databases('db1','db2');
888 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
890 @dblist = $sess->databases();
894 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
896 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
897 pqf => '@attr 1=4 computer');
899 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
903 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
905 The CCL searching is not currently supported by this API.
909 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
911 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
913 $sess->cqlmap($mapfile);
915 Or, you can directly provide the I<mapfile> parameter for the search:
917 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
918 cql => 'dc.title=IDZebra');
920 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
924 As you have seen, the result of the search request is a I<Resultset> object.
925 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
929 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
931 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
933 =head1 MISC FUNCTIONS
941 Peter Popovics, pop@technomat.hu
945 IDZebra, IDZebra::Data1, Zebra documentation