perl/lib/IDZebra/Session.pm

   1 # $Id: Session.pm,v 1.19 2003-07-26 16:27:46 pop Exp $
   2 #
   3 # Zebra perl API header
   4 # =============================================================================
   5 package IDZebra::Session;
   6
   7 use strict;
   8 use warnings;
   9 use Carp;
  10
  11 BEGIN {
  12     use IDZebra;
  13     use Scalar::Util;
  14     use IDZebra::Logger qw(:flags :calls);
  15     use IDZebra::Resultset;
  16     use IDZebra::ScanList;
  17     use IDZebra::RetrievalRecord;
  18     require Exporter;
  19     our $VERSION = do { my @r = (q$Revision: 1.19 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  20     our @ISA = qw(IDZebra::Logger Exporter);
  21     our @EXPORT = qw (TRANS_RW TRANS_RO);
  22 }
  23
  24 use constant TRANS_RW => 1;
  25 use constant TRANS_RO => 0;
  26
  27 1;
  28 # -----------------------------------------------------------------------------
  29 # Class constructors, destructor
  30 # -----------------------------------------------------------------------------
  31 sub new {
  32     my ($proto, %args) = @_;
  33     my $class = ref($proto) || $proto;
  34     my $self = {};
  35     $self->{args} = \%args;
  36
  37     bless ($self, $class);
  38     $self->{cql_ct} = undef;
  39     $self->{cql_mapfile} = "";
  40     return ($self);
  41
  42     $self->{databases} = {};
  43 }
  44
  45 sub start_service {
  46     my ($self, %args) = @_;
  47
  48     my $zs;
  49     unless (defined($self->{zs})) {
  50         if (defined($args{'configFile'})) {
  51             $self->{zs} = IDZebra::start($args{'configFile'});
  52         } else {
  53             $self->{zs} = IDZebra::start("zebra.cfg");
  54         }
  55     }
  56 }
  57
  58 sub stop_service {
  59     my ($self) = @_;
  60     if (defined($self->{zs})) {
  61         IDZebra::stop($self->{zs}) if ($self->{zs});
  62         $self->{zs} = undef;
  63     }
  64 }
  65
  66
  67 sub open {
  68     my ($proto,%args) = @_;
  69     my $self = {};
  70
  71     if (ref($proto)) { $self = $proto; } else {
  72         $self = $proto->new(%args);
  73     }
  74
  75     unless (%args) {
  76         %args = %{$self->{args}};
  77     }
  78
  79     $self->start_service(%args);
  80
  81     unless (defined($self->{zs})) {
  82         croak ("Falied to open zebra service");
  83     }
  84
  85     unless (defined($self->{zh})) {
  86         $self->{zh}=IDZebra::open($self->{zs});
  87     }
  88
  89     # Reset result set counter
  90     $self->{rscount} = 0;
  91
  92     # This is needed in order to somehow initialize the service
  93     $self->databases("Default");
  94
  95     # Load the default configuration
  96     $self->group(%args);
  97
  98
  99     # Set shadow usage
 100     my $shadow = defined($args{shadow}) ? $args{shadow} : 0;
 101     $self->shadow($shadow);
 102
 103     $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
 104     $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
 105
 106     return ($self);
 107 }
 108
 109 sub checkzh {
 110     my ($self) = @_;
 111     unless (defined($self->{zh})) {
 112         croak ("Zebra session is not opened");
 113     }
 114 }
 115
 116 sub close {
 117     my ($self) = @_;
 118
 119     if ($self->{zh}) {
 120
 121         my $stats = 0;
 122         # Delete all resulsets
 123         my $r = IDZebra::deleteResultSet($self->{zh},
 124                                          1, #Z_DeleteRequest_all,
 125                                          0,[],
 126                                          $stats);
 127
 128         while (IDZebra::trans_no($self->{zh}) > 0) {
 129             logf (LOG_WARN,"Explicitly closing transaction with session");
 130             $self->end_trans;
 131         }
 132
 133         IDZebra::close($self->{zh});
 134         $self->{zh} = undef;
 135     }
 136
 137     if ($self->{odr_input}) {
 138         IDZebra::odr_reset($self->{odr_input});
 139         IDZebra::odr_destroy($self->{odr_input});
 140         $self->{odr_input} = undef;
 141     }
 142
 143     if ($self->{odr_output}) {
 144         IDZebra::odr_reset($self->{odr_output});
 145         IDZebra::odr_destroy($self->{odr_output});
 146         $self->{odr_output} = undef;
 147     }
 148
 149     $self->stop_service;
 150 }
 151
 152 sub DESTROY {
 153     my ($self) = @_;
 154     logf (LOG_LOG,"DESTROY $self");
 155     $self->close;
 156
 157     if (defined ($self->{cql_ct})) {
 158       IDZebra::cql_transform_close($self->{cql_ct});
 159     }
 160
 161 }
 162 # -----------------------------------------------------------------------------
 163 # Record group selection  This is a bit nasty... but used at many places
 164 # -----------------------------------------------------------------------------
 165 sub group {
 166     my ($self,%args) = @_;
 167     $self->checkzh;
 168     if ($#_ > 0) {
 169         $self->{rg} = $self->_makeRecordGroup(%args);
 170         $self->_selectRecordGroup($self->{rg});
 171     }
 172     return($self->{rg});
 173 }
 174
 175 sub selectRecordGroup {
 176     my ($self, $groupName) = @_;
 177     $self->checkzh;
 178     $self->{rg} = $self->_getRecordGroup($groupName);
 179     $self->_selectRecordGroup($self->{rg});
 180 }
 181
 182 sub _displayRecordGroup {
 183     my ($self, $rg) = @_;
 184     print STDERR "-----\n";
 185     foreach my $key qw (groupName
 186                         databaseName
 187                         path recordId
 188                         recordType
 189                         flagStoreData
 190                         flagStoreKeys
 191                         flagRw
 192                         fileVerboseLimit
 193                         databaseNamePath
 194                         explainDatabase
 195                         followLinks) {
 196         print STDERR "$key:",$rg->{$key},"\n";
 197     }
 198 }
 199
 200 sub _cloneRecordGroup {
 201     my ($self, $orig) = @_;
 202     my $rg = IDZebra::recordGroup->new();
 203     my $r = IDZebra::init_recordGroup($rg);
 204     foreach my $key qw (groupName
 205                         databaseName
 206                         path
 207                         recordId
 208                         recordType
 209                         flagStoreData
 210                         flagStoreKeys
 211                         flagRw
 212                         fileVerboseLimit
 213                         databaseNamePath
 214                         explainDatabase
 215                         followLinks) {
 216         $rg->{$key} = $orig->{$key} if ($orig->{$key});
 217     }
 218     return ($rg);
 219 }
 220
 221 sub _getRecordGroup {
 222     my ($self, $groupName, $ext) = @_;
 223     my $rg = IDZebra::recordGroup->new();
 224     my $r = IDZebra::init_recordGroup($rg);
 225     $rg->{groupName} = $groupName if ($groupName ne "");
 226     $ext = "" unless ($ext);
 227     $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
 228     return ($rg);
 229 }
 230
 231 sub _makeRecordGroup {
 232     my ($self, %args) = @_;
 233     my $rg;
 234
 235     my @keys = keys(%args);
 236     unless ($#keys >= 0) {
 237         return ($self->{rg});
 238     }
 239
 240     if ($args{groupName}) {
 241         $rg = $self->_getRecordGroup($args{groupName});
 242     } else {
 243         $rg = $self->_cloneRecordGroup($self->{rg});
 244     }
 245     $self->_setRecordGroupOptions($rg, %args);
 246     return ($rg);
 247 }
 248
 249 sub _setRecordGroupOptions {
 250     my ($self, $rg, %args) = @_;
 251
 252     foreach my $key qw (databaseName
 253                         path
 254                         recordId
 255                         recordType
 256                         flagStoreData
 257                         flagStoreKeys
 258                         flagRw
 259                         fileVerboseLimit
 260                         databaseNamePath
 261                         explainDatabase
 262                         followLinks) {
 263         if (defined ($args{$key})) {
 264             $rg->{$key} = $args{$key};
 265         }
 266     }
 267 }
 268 sub _selectRecordGroup {
 269     my ($self, $rg) = @_;
 270
 271     my $r = IDZebra::set_group($self->{zh}, $rg);
 272     my $dbName;
 273     unless ($dbName = $rg->{databaseName}) {
 274         $dbName = 'Default';
 275     }
 276     unless ($self->databases($dbName)) {
 277         croak("Fatal error selecting database $dbName");
 278     }
 279 }
 280 # -----------------------------------------------------------------------------
 281 # Selecting databases for search (and also for updating - internally)
 282 # -----------------------------------------------------------------------------
 283 sub databases {
 284     my ($self, @databases) = @_;
 285
 286     $self->checkzh;
 287
 288     unless ($#_ >0) {
 289         return (keys(%{$self->{databases}}));
 290     }
 291
 292     my %tmp;
 293     my $changed = 0;
 294     foreach my $db (@databases) {
 295         $tmp{$db}++;
 296         next if ($self->{databases}{$db});
 297         $changed++;
 298     }
 299
 300     foreach my $db (keys (%{$self->{databases}})) {
 301         $changed++ unless ($tmp{$db});
 302     }
 303
 304     if ($changed) {
 305
 306         delete ($self->{databases});
 307         foreach my $db (@databases) {
 308             $self->{databases}{$db}++;
 309         }
 310
 311         if (IDZebra::select_databases($self->{zh},
 312                                                 ($#databases + 1),
 313                                                 \@databases)) {
 314             logf(LOG_FATAL,
 315                  "Could not select database(s) %s errCode=%d",
 316                  join(",",@databases),
 317                  $self->errCode());
 318             return (0);
 319         } else {
 320             logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
 321         }
 322     }
 323     return (keys(%{$self->{databases}}));
 324 }
 325
 326 # -----------------------------------------------------------------------------
 327 # Error handling
 328 # -----------------------------------------------------------------------------
 329 sub errCode {
 330     my ($self) = @_;
 331     return(IDZebra::errCode($self->{zh}));
 332 }
 333
 334 sub errString {
 335     my ($self) = @_;
 336     return(IDZebra::errString($self->{zh}));
 337 }
 338
 339 sub errAdd {
 340     my ($self) = @_;
 341     return(IDZebra::errAdd($self->{zh}));
 342 }
 343
 344 # -----------------------------------------------------------------------------
 345 # Transaction stuff
 346 # -----------------------------------------------------------------------------
 347 sub begin_trans {
 348     my ($self, $m) = @_;
 349     $m = TRANS_RW unless (defined ($m));
 350     if (my $err = IDZebra::begin_trans($self->{zh},$m)) {
 351         if ($self->errCode == 2) {
 352             croak ("TRANS_RW not allowed within TRANS_RO");
 353         } else {
 354             croak("Error starting transaction; code:".
 355                   $self->errCode . " message: " . $self->errString);
 356         }
 357     }
 358 }
 359
 360 sub end_trans {
 361     my ($self) = @_;
 362     $self->checkzh;
 363     my $stat = IDZebra::ZebraTransactionStatus->new();
 364     IDZebra::end_trans($self->{zh}, $stat);
 365     return ($stat);
 366 }
 367
 368 sub shadow {
 369     my ($self, $value) = @_;
 370     $self->checkzh;
 371     if ($#_ > 0) {
 372         $value = 0 unless (defined($value));
 373         my $r =IDZebra::set_shadow_enable($self->{zh},$value);
 374     }
 375     return (IDZebra::get_shadow_enable($self->{zh}));
 376 }
 377
 378 sub commit {
 379     my ($self) = @_;
 380     $self->checkzh;
 381     if ($self->shadow) {
 382         return(IDZebra::commit($self->{zh}));
 383     }
 384 }
 385
 386 # -----------------------------------------------------------------------------
 387 # We don't really need that...
 388 # -----------------------------------------------------------------------------
 389 sub odr_reset {
 390     my ($self, $name) = @_;
 391     if ($name !~/^(input|output)$/) {
 392         croak("Undefined ODR '$name'");
 393     }
 394   IDZebra::odr_reset($self->{"odr_$name"});
 395 }
 396
 397 # -----------------------------------------------------------------------------
 398 # Init/compact
 399 # -----------------------------------------------------------------------------
 400 sub init {
 401     my ($self) = @_;
 402     $self->checkzh;
 403     return(IDZebra::init($self->{zh}));
 404 }
 405
 406 sub compact {
 407     my ($self) = @_;
 408     $self->checkzh;
 409     return(IDZebra::compact($self->{zh}));
 410 }
 411
 412 sub update {
 413     my ($self, %args) = @_;
 414     $self->checkzh;
 415     my $rg = $self->_update_args(%args);
 416     $self->_selectRecordGroup($rg);
 417     $self->begin_trans;
 418     IDZebra::repository_update($self->{zh});
 419     $self->_selectRecordGroup($self->{rg});
 420     $self->end_trans;
 421 }
 422
 423 sub delete {
 424     my ($self, %args) = @_;
 425     $self->checkzh;
 426     my $rg = $self->_update_args(%args);
 427     $self->_selectRecordGroup($rg);
 428     $self->begin_trans;
 429     IDZebra::repository_delete($self->{zh});
 430     $self->_selectRecordGroup($self->{rg});
 431     $self->end_trans;
 432 }
 433
 434 sub show {
 435     my ($self, %args) = @_;
 436     $self->checkzh;
 437     my $rg = $self->_update_args(%args);
 438     $self->_selectRecordGroup($rg);
 439     $self->begin_trans;
 440     IDZebra::repository_show($self->{zh});
 441     $self->_selectRecordGroup($self->{rg});
 442     $self->end_trans;
 443 }
 444
 445 sub _update_args {
 446     my ($self, %args) = @_;
 447     my $rg = $self->_makeRecordGroup(%args);
 448     $self->_selectRecordGroup($rg);
 449     return ($rg);
 450 }
 451
 452 # -----------------------------------------------------------------------------
 453 # Per record update
 454 # -----------------------------------------------------------------------------
 455 sub insert_record {
 456     my ($self, %args) = @_;
 457     $self->checkzh;
 458     my @args = $self->_record_update_args(%args);
 459     my $stat = IDZebra::insert_record($self->{zh}, @args);
 460     my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);
 461     return $stat ? $stat : $$sysno;
 462     if ($stat) { return ($stat); } else { return $sysno};
 463 }
 464
 465 sub update_record {
 466     my ($self, %args) = @_;
 467     $self->checkzh;
 468     my @args = $self->_record_update_args(%args);
 469     my $stat = IDZebra::update_record($self->{zh}, @args);
 470     my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);
 471     return $stat ? $stat : $$sysno;
 472     if ($stat) { return ($stat); } else { return $$sysno};
 473 }
 474
 475 sub delete_record {
 476     my ($self, %args) = @_;
 477     $self->checkzh;
 478     my @args = $self->_record_update_args(%args);
 479     my $stat = IDZebra::delete_record($self->{zh}, @args);
 480     my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);
 481     return $stat ? $stat : $$sysno;
 482 }
 483
 484 sub _record_update_args {
 485     my ($self, %args) = @_;
 486
 487     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 488     my $match   = $args{match}      ? $args{match}      : "";
 489     my $rectype = $args{recordType} ? $args{recordType} : "";
 490     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 491     my $force   = $args{force}      ? $args{force}      : 0;
 492
 493     my $buff;
 494
 495     if ($args{data}) {
 496         $buff = $args{data};
 497     }
 498     elsif ($args{file}) {
 499         CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
 500         $buff = join('',(<F>));
 501         CORE::close (F);
 502     }
 503     my $len = length($buff);
 504
 505     delete ($args{sysno});
 506     delete ($args{match});
 507     delete ($args{recordType});
 508     delete ($args{file});
 509     delete ($args{data});
 510     delete ($args{force});
 511
 512     my $rg = $self->_makeRecordGroup(%args);
 513
 514     # If no record type is given, then try to find it out from the
 515     # file extension;
 516     unless ($rectype) {
 517         if (my ($ext) = $fname =~ /\.(\w+)$/) {
 518             my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
 519             $rectype = $rg2->{recordType};
 520         }
 521     }
 522
 523     $rg->{databaseName} = "Default" unless ($rg->{databaseName});
 524
 525     unless ($rectype) {
 526         $rectype="";
 527     }
 528     return ($rg, $rectype, \$sysno, $match, $fname, $buff, $len, $force);
 529 }
 530
 531 # -----------------------------------------------------------------------------
 532 # CQL stuff
 533 sub cqlmap {
 534     my ($self,$mapfile) = @_;
 535     if ($#_ > 0) {
 536         if ($self->{cql_mapfile} ne $mapfile) {
 537             unless (-f $mapfile) {
 538                 croak("Cannot find $mapfile");
 539             }
 540             if (defined ($self->{cql_ct})) {
 541               IDZebra::cql_transform_close($self->{cql_ct});
 542             }
 543             $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
 544             $self->{cql_mapfile} = $mapfile;
 545         }
 546     }
 547     return ($self->{cql_mapfile});
 548 }
 549
 550 sub cql2pqf {
 551     my ($self, $cqlquery) = @_;
 552     unless (defined($self->{cql_ct})) {
 553         croak("CQL map file is not specified yet.");
 554     }
 555     my $res = "\0" x 2048;
 556     my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
 557     if ($r) {
 558 #       carp ("Error transforming CQL query: '$cqlquery', status:$r");
 559     }
 560     $res=~s/\0.+$//g;
 561     return ($res,$r);
 562 }
 563
 564
 565 # -----------------------------------------------------------------------------
 566 # Search
 567 # -----------------------------------------------------------------------------
 568 sub search {
 569     my ($self, %args) = @_;
 570
 571     $self->checkzh;
 572
 573     if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
 574
 575     my $query;
 576     if ($args{pqf}) {
 577         $query = $args{pqf};
 578     }
 579     elsif ($args{cql}) {
 580         my $cqlstat;
 581         ($query, $cqlstat) =  $self->cql2pqf($args{cql});
 582         unless ($query) {
 583             croak ("Failed to transform query: '$args{cql}', ".
 584                    "status: ($cqlstat)");
 585         }
 586     }
 587     unless ($query) {
 588         croak ("No query given to search");
 589     }
 590
 591     my @origdbs;
 592
 593     if ($args{databases}) {
 594         @origdbs = $self->databases;
 595         $self->databases(@{$args{databases}});
 596     }
 597
 598
 599     my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
 600
 601     my $rs = $self->_search_pqf($query, $rsname);
 602
 603     if ($args{databases}) {
 604         $self->databases(@origdbs);
 605     }
 606
 607     if ($args{sort}) {
 608         if ($rs->errCode) {
 609             carp("Sort skipped due to search error: ".
 610                  $rs->errCode);
 611         } else {
 612             $rs->sort($args{sort});
 613         }
 614     }
 615
 616     return ($rs);
 617 }
 618
 619 sub _new_setname {
 620     my ($self) = @_;
 621     return ("set_".$self->{rscount}++);
 622 }
 623
 624 sub _search_pqf {
 625     my ($self, $query, $setname) = @_;
 626
 627
 628     my $hits = 0;
 629
 630     my $res = IDZebra::search_PQF($self->{zh},
 631                                    $query,
 632                                    $setname,
 633                                    \$hits);
 634
 635     my $rs  = IDZebra::Resultset->new($self,
 636                                       name        => $setname,
 637                                       query       => $query,
 638                                       recordCount => $hits,
 639                                       errCode     => $self->errCode,
 640                                       errString   => $self->errString);
 641     return($rs);
 642 }
 643
 644 # -----------------------------------------------------------------------------
 645 # Sort
 646 #
 647 # Sorting of multiple result sets is not supported by zebra...
 648 # -----------------------------------------------------------------------------
 649
 650 sub sortResultsets {
 651     my ($self, $sortspec, $setname, @sets) = @_;
 652
 653     $self->checkzh;
 654
 655     if ($#sets > 0) {
 656         croak ("Sorting/merging of multiple resultsets is not supported now");
 657     }
 658
 659     my @setnames;
 660     my $count = 0;
 661     foreach my $rs (@sets) {
 662         push (@setnames, $rs->{name});
 663         $count += $rs->{recordCount};  # is this really sure ??? It doesn't
 664                                        # matter now...
 665     }
 666
 667     my $status = IDZebra::sort($self->{zh},
 668                                $self->{odr_output},
 669                                $sortspec,
 670                                $setname,
 671                                \@setnames);
 672
 673     my $errCode = $self->errCode;
 674     my $errString = $self->errString;
 675
 676     logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
 677           $status, $errCode, $errString);
 678
 679     if ($status || $errCode) {$count = 0;}
 680
 681     my $rs  = IDZebra::Resultset->new($self,
 682                                       name        => $setname,
 683                                       recordCount => $count,
 684                                       errCode     => $errCode,
 685                                       errString   => $errString);
 686
 687     return ($rs);
 688 }
 689 # -----------------------------------------------------------------------------
 690 # Scan
 691 # -----------------------------------------------------------------------------
 692 sub scan {
 693     my ($self, %args) = @_;
 694
 695     $self->checkzh;
 696
 697     unless ($args{expression}) {
 698         croak ("No scan expression given");
 699     }
 700
 701     my $sl = IDZebra::ScanList->new($self,%args);
 702
 703     return ($sl);
 704 }
 705
 706 # ============================================================================
 707
 708 __END__
 709
 710 =head1 NAME
 711
 712 IDZebra::Session - A Zebra database server session for update and retrieval
 713
 714 =head1 SYNOPSIS
 715
 716   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 717   $sess->open();
 718
 719   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 720                                  groupName  => 'demo1');
 721
 722   $sess->group(groupName => 'demo2');
 723
 724   $sess->init();
 725
 726   $sess->begin_trans;
 727
 728   $sess->update(path      =>  'lib');
 729
 730   my $s1=$sess->update_record(data       => $rec1,
 731                               recordType => 'grs.perl.pod',
 732                               groupName  => "demo1",
 733                               );
 734
 735   my $stat = $sess->end_trans;
 736
 737   $sess->databases('demo1','demo2');
 738
 739   my $rs1 = $sess->search(cqlmap    => 'demo/cql.map',
 740                           cql       => 'dc.title=IDZebra',
 741                           databases => [qw(demo1 demo2)]);
 742   $sess->close;
 743
 744 =head1 DESCRIPTION
 745
 746 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
 747
 748 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
 749
 750 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
 751
 752 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
 753
 754   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 755   $sess->open();
 756
 757 or
 758
 759   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
 760
 761 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
 762
 763   $sess->close();
 764
 765 This will
 766   - close all transactions
 767   - destroy all result sets and scan lists
 768   - close the session
 769
 770 Note, that if I<shadow registers> are enabled, the changes will not be committed automatically.
 771
 772 In the future different database access methods are going to be available,
 773 like:
 774
 775   $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
 776
 777 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
 778
 779   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 780                                  groupName  => 'demo');
 781
 782
 783 =head1 RECORD GROUPS
 784
 785 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
 786
 787 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:
 788
 789   $sess->group(groupName => ..., ...)
 790
 791 The following options are available:
 792
 793 =over 4
 794
 795 =item B<groupName>
 796
 797 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
 798
 799 =item B<databaseName>
 800
 801 The name of the (logical) database the updated records will belong to.
 802
 803 =item B<path>
 804
 805 This path is used for directory updates (B<update>, B<delete> methods);
 806
 807 =item B<recordId>
 808
 809 This option determines how to identify your records. See I<Zebra manual: Locating Records>
 810
 811 =item B<recordType>
 812
 813 The record type used for indexing.
 814
 815 =item B<flagStoreData>
 816
 817 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
 818
 819 =item B<flagStoreKeys>
 820
 821 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
 822
 823 =item B<flagRw>
 824
 825 ?
 826
 827 =item B<fileVerboseLimit>
 828
 829 Skip log messages, when doing a directory update, and the specified number of files are processed...
 830
 831 =item B<databaseNamePath>
 832
 833 ?
 834
 835 =item B<explainDatabase>
 836
 837 The name of the explain database to be used
 838
 839 =item B<followLinks>
 840
 841 Follow links when doing directory update.
 842
 843 =back
 844
 845 You can use the same parameters calling all update methods.
 846
 847 =head1 TRANSACTIONS (READ / WRITE LOCKS)
 848
 849 A transaction is a block of record update (insert / modify / delete) or retrieval procedures. So, all call to such function will implicitly start a transaction, unless one is already started by
 850
 851   $sess->begin_trans;
 852
 853 or
 854
 855   $sess->begin_trans(TRANS_RW)
 856
 857 (these two are equivalents). The effect of this call is a kind of lock: if you call is a write lock is put on the registers, so other processes trying to update the database will be blocked. If there is already an RW (Read-Write) transaction opened by another process, the I<begin_trans> call will be blocked.
 858
 859 You can also use
 860
 861   $sess->begin_trans(TRANS_RO),
 862
 863 if you would like to put on a "read lock". This one is B<deprecated>, as while you have explicitly opened a transaction for read, you can't open another one for update. For example:
 864
 865   $sess->begin_trans(TRANS_RO);
 866   $sess->begin_tran(TRANS_RW); # invalid, die here
 867   $sess->end_trans;
 868   $sess->end_trans;
 869
 870 is invalid, but
 871
 872   $sess->begin_tran(TRANS_RW);
 873   $sess->begin_trans(TRANS_RO);
 874   $sess->end_trans;
 875   $sess->end_trans;
 876
 877 is valid, but probably useless. Note again, that for each retrieval call, an RO transaction is opened. I<TRANS_RW> and I<TRANS_RO> are exported by default by IDZebra::Session.pm.
 878
 879 For multiple per-record I<updates> it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
 880
 881   $stat = $sess->end_trans;
 882
 883 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
 884
 885   $stat->{processed} # Number of records processed
 886   $stat->{updated}   # Number of records processed
 887   $stat->{deleted}   # Number of records processed
 888   $stat->{inserted}  # Number of records processed
 889   $stat->{stime}     # System time used
 890   $stat->{utime}     # User time used
 891
 892 Normally, if the perl code dies due to some runtime error, or the session is closed, then the API attempts to close all pending transactions.
 893
 894 =head1 THE SHADOW REGISTERS
 895
 896 The Zebra server supports updating of the index structures. That is, you can add, modify, or remove records from databases managed by Zebra without rebuilding the entire index. Since this process involves modifying structured files with various references between blocks of data in the files, the update process is inherently sensitive to system crashes, or to process interruptions: Anything but a successfully completed update process will leave the register files in an unknown state, and you will essentially have no recourse but to re-index everything, or to restore the register files from a backup medium. Further, while the update process is active, users cannot be allowed to access the system, as the contents of the register files may change unpredictably.
 897
 898 You can solve these problems by enabling the shadow register system in Zebra. During the updating procedure, zebraidx will temporarily write changes to the involved files in a set of "shadow files", without modifying the files that are accessed by the active server processes. If the update procedure is interrupted by a system crash or a signal, you simply repeat the procedure - the register files have not been changed or damaged, and the partially written shadow files are automatically deleted before the new updating procedure commences.
 899
 900 At the end of the updating procedure (or in a separate operation, if you so desire), the system enters a "commit mode". First, any active server processes are forced to access those blocks that have been changed from the shadow files rather than from the main register files; the unmodified blocks are still accessed at their normal location (the shadow files are not a complete copy of the register files - they only contain those parts that have actually been modified). If the commit process is interrupted at any point during the commit process, the server processes will continue to access the shadow files until you can repeat the commit procedure and complete the writing of data to the main register files. You can perform multiple update operations to the registers before you commit the changes to the system files, or you can execute the commit operation at the end of each update operation. When the commit phase has completed successfully, any running server processes are instructed to switch their operations to the new, operational register, and the temporary shadow files are deleted.
 901
 902 By default, (in the API !) the use of shadow registers is disabled. If zebra is configured that way (there is a "shadow" entry in zebra.cfg), then the shadow system can be enabled by calling:
 903
 904  $sess->shadow(1);
 905
 906 or disabled by
 907
 908  $sess->shadow(0);
 909
 910 If shadow system is enabled, then you have to commit changes you did, by calling:
 911
 912  $sess->commit;
 913
 914 Note, that you can also determine shadow usage in the session constructor:
 915
 916  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 917                                 shadow    => 1);
 918
 919 Changes to I<shadow> will not have effect, within a I<transaction> (ie.: a transaction is started either with shadow enabled or disabled). For more details, read Zebra documentation: I<Safe Updating - Using Shadow Registers>.
 920
 921 =head1 UPDATING DATA
 922
 923 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
 924
 925   $sess->update(path      =>  'lib');
 926
 927 This will update the database with the files in directory "lib", according to the current record group settings.
 928
 929   $sess->update();
 930
 931 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
 932
 933   $sess->update(groupName => 'demo1',
 934                 path      =>  'lib');
 935
 936 Update the database with files in "lib" according to the settings of group "demo1"
 937
 938   $sess->delete(groupName => 'demo1',
 939                 path      =>  'lib');
 940
 941 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
 942
 943 You can also update records one by one, even directly from the memory:
 944
 945   $sysno = $sess->update_record(data       => $rec1,
 946                                 recordType => 'grs.perl.pod',
 947                                 groupName  => "demo1");
 948
 949 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
 950
 951 You can also index a single file:
 952
 953   $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
 954
 955 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
 956
 957   $sysno = $sess->update_record(data => $rec1,
 958                                 file => "lib/IDZebra/Data1.pm");
 959
 960 And some crazy stuff:
 961
 962   $sysno = $sess->delete_record(sysno => $sysno);
 963
 964 where sysno in itself is sufficient to identify the record
 965
 966   $sysno = $sess->delete_record(data => $rec1,
 967                                 recordType => 'grs.perl.pod',
 968                                 groupName  => "demo1");
 969
 970 This case the record is extracted, and if already exists, located in the database, then deleted...
 971
 972   $sysno = $sess->update_record(data       => $rec1,
 973                                 match      => $myid,
 974                                 recordType => 'grs.perl.pod',
 975                                 groupName  => "demo1");
 976
 977 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in I<recordId> member of the record group, or in the I<recordId> parameter) is provided directly.... Looks much better this way:
 978
 979   $sysno = $sess->update_record(data          => $rec1,
 980                                 databaseName  => 'books',
 981                                 recordId      => '(bib1,ISBN)',
 982                                 recordType    => 'grs.perl.pod',
 983                                 flagStoreData => 1,
 984                                 flagStoreKeys => 1);
 985
 986 You can notice, that it's not necessary to define a record group in zebra.cfg: you can do it "on the fly" in your code.
 987
 988 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped. If you'd like to override this feature, use the I<force=E<gt>1> flag:
 989
 990   $sysno = $sess->update_record(data       => $rec1,
 991                                 recordType => 'grs.perl.pod',
 992                                 groupName  => "demo1",
 993                                 force      => 1);
 994
 995 If you don't like to update the record, if it alerady exists, use the I<insert_record> method:
 996
 997   $sysno = $sess->insert_record(data       => $rec1,
 998                                 recordType => 'grs.perl.pod',
 999                                 groupName  => "demo1");
1000
1001 In this case, sysno will be -1, if the record could not be added, because there was already one in the database, with the same record identifier (generated according to the I<recordId> setting).
1002
1003 =head1 DATABASE SELECTION
1004
1005 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
1006
1007 For searching, you can select databases by calling:
1008
1009   $sess->databases('db1','db2');
1010
1011 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
1012
1013   @dblist = $sess->databases();
1014
1015 =head1 SEARCHING
1016
1017 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
1018
1019   $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
1020                       pqf       => '@attr 1=4 computer');
1021
1022 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
1023
1024 =head2 CCL searching
1025
1026 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
1027
1028 The CCL searching is not currently supported by this API.
1029
1030 =head2 CQL searching
1031
1032 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
1033
1034 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
1035
1036   $sess->cqlmap($mapfile);
1037
1038 Or, you can directly provide the I<mapfile> parameter for the search:
1039
1040   $rs = $sess->search(cqlmap    => 'demo/cql.map',
1041                       cql       => 'dc.title=IDZebra');
1042
1043 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
1044
1045 =head2 Sorting
1046
1047 If you'd like the search results to be sorted, use the I<sort> parameter:
1048
1049   $rs = $sess->search(cql       => 'IDZebra',
1050                       sort      => '1=4 ia');
1051
1052 Note, that B<currently> this is (almost) equivalent to
1053
1054   $rs = $sess->search(cql       => 'IDZebra');
1055   $rs->sort('1=4 ia');
1056
1057 but in the further versions of Zebra and this API a single phase search and sort will take place, optimizing performance. For more details on sorting, see I<IDZebra::ResultSet> manpage.
1058
1059 =head1 RESULTSETS
1060
1061 As you have seen, the result of the search request is a I<Resultset> object.
1062 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
1063
1064   $count = $rs->count;
1065
1066   printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
1067
1068 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
1069
1070 =head1 SCANNING
1071
1072 Zebra supports scanning index values. The result of the
1073
1074   $sl = $sess->scan(expression => "a");
1075
1076 call is an I<IDZebra::ScanList> object, what you can use to list the values. The scan expression has to be provided in a PQF like format. Examples:
1077
1078 B< a> (scan trough words of "default", "Any" indexes)
1079
1080
1081 B< @attr 1=1016 a> (same effect)
1082
1083
1084 B< @attr 1=4 @attr 6=2 a>  (scan trough titles as phrases)
1085
1086 An illegal scan expression will cause your code to die. If you'd like to select databases just for the scan call, you can optionally use the I<databases> parameter:
1087
1088   $sl = $sess->scan(expression => "a",
1089                     databases  => [qw(demo1 demo2)]);
1090
1091 You can use the I<IDZebra::ScanList> object returned by the i<scan> method, to reach the result. Check I<IDZebra::ScanList> manpage for more details.
1092
1093 =head1 SESSION STATUS AND ERRORS
1094
1095 Most of the API calls causes die, if an error occures. You avoid this, by using eval {} blocks. The following methods are available to get the status of Zebra service:
1096
1097 =over 4
1098
1099 =item B<errCode>
1100
1101 The Zebra provided error code... (for the result of the last call);
1102
1103 =item B<errString>
1104
1105 Error string corresponding to the message
1106
1107 =item B<errAdd>
1108
1109 Additional information for the status
1110
1111 =back
1112
1113 This functionality may change, see TODO.
1114
1115 =head1 LOGGING AND MISC. FUNCTIONS
1116
1117 Zebra provides logging facility for the internal events, and also for application developers trough the API. See manpage I<IDZebra::Logger> for details.
1118
1119 =over 4
1120
1121 =item B<IDZebra::LogFile($filename)>
1122
1123 Will set the output file for logging. By default it's STDERR;
1124
1125 =item B<IDZebra::LogLevel(15)>
1126
1127 Set log level. 0 for no logs. See IDZebra::Logger for usable flags.
1128
1129 =back
1130
1131 Some other functions
1132
1133 =over 4
1134
1135 =item B<$sess-E<gt>init>
1136
1137 Initialize, and clean registers. This will remove all data!
1138
1139 =item B<$sess-E<gt>compact>
1140
1141 Compact the registers (? does this work)
1142
1143 =item B<$sess-E<gt>show>
1144
1145 Doesn't have too much meaning. Don't try :)
1146
1147 =back
1148
1149 =head1 TODO
1150
1151 =over 4
1152
1153 =item B<Clean up error handling>
1154
1155 By default all zebra errors should cause die. (such situations could be avoided by using eval {}), and then check for errCode, errString... An optional flag or package variable should be introduced to override this, and skip zebra errors, to let the user decide what to do.
1156
1157 =item B<Make the package self-distributable>
1158
1159 Build and link with installed header and library files
1160
1161 =item B<Testing>
1162
1163 Test shadow system, unicode...
1164
1165 =item B<C API>
1166
1167 Cleanup, arrange, remove redundancy
1168
1169 =back
1170
1171 =head1 COPYRIGHT
1172
1173 Fill in
1174
1175 =head1 AUTHOR
1176
1177 Peter Popovics, pop@technomat.hu
1178
1179 =head1 SEE ALSO
1180
1181 Zebra documentation, Zebra::ResultSet, Zebra::ScanList, Zebra::Logger manpages
1182
1183 =cut