started explaining each dom filter pipeline

[idzebra-moved-to-github.git] / doc / recordmodel-grs.xml
diff --git a/doc/recordmodel-grs.xml b/doc/recordmodel-grs.xml

index 9810b0b..7ba26d3 100644 (file)
--- a/doc/recordmodel-grs.xml
+++ b/doc/recordmodel-grs.xml
@@ -1,7 +1,14 @@
- <chapter id="record-model-grs">
-  <!-- $Id: recordmodel-grs.xml,v 1.1 2006-02-15 11:07:47 marc Exp $ -->
-  <title>GRS Record Model and Filter Modules</title>
-  
+ <chapter id="grs">
+  <!-- $Id: recordmodel-grs.xml,v 1.8 2007-02-20 14:28:31 marc Exp $ -->
+  <title>&grs1; Record Model and Filter Modules</title>
+
+     <note>
+      <para>
+        The functionality of this record model has been improved and
+        replaced by the DOM &xml; record model. See 
+        <xref linkend="record-model-domxml"/>.
+      </para>
+     </note>
  
    <para>
     The record model described in this chapter applies to the fundamental,
  
    <para>
     The record model described in this chapter applies to the fundamental,
@@ -11,8 +18,8 @@
    </para>
  
  
    </para>
  
  
-  <sect1 id="grs-record-filters">
-   <title>GRS Record Filters</title>
+  <section id="grs-filters">
+   <title>&grs1; Record Filters</title>
     <para>
      Many basic subtypes of the <emphasis>grs</emphasis> type are
      currently available:
     <para>
      Many basic subtypes of the <emphasis>grs</emphasis> type are
      currently available:
@@ -21,128 +28,124 @@
     <para>
      <variablelist>
       <varlistentry>
     <para>
      <variablelist>
       <varlistentry>
-      <term>grs.sgml</term>
+      <term><literal>grs.sgml</literal></term>
        <listitem>
         <para>
          This is the canonical input format
          described <xref linkend="grs-canonical-format"/>. It is using
        <listitem>
         <para>
          This is the canonical input format
          described <xref linkend="grs-canonical-format"/>. It is using
-        simple SGML-like syntax. 
-       </para>
-       <!--
-       <para>
-         <literal>libidzebra1.4-mod-grs-sgml not packaged yet ??</literal>
+        simple &sgml;-like syntax. 
         </para>
         </para>
-       -->
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marc<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marc.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
        <listitem>
         <para>
-        This allows Zebra to read
-        records in the ISO2709 (MARC) encoding standard. 
-        <!-- In this case, the
-        last parameter <emphasis>abstract syntax</emphasis> names the
+        This allows &zebra; to read
+        records in the ISO2709 (&marc;) encoding standard. 
+        Last parameter <replaceable>type</replaceable> names the
          <literal>.abs</literal> file (see below)
          <literal>.abs</literal> file (see below)
-        which describes the specific MARC structure of the input record as
-        well as the indexing rules. -->
+        which describes the specific &marc; structure of the input record as
+        well as the indexing rules.
+       </para>
+       <para>The <literal>grs.marc</literal> uses an internal represtantion
+       which is not &xml; conformant. In particular &marc; tags are
+       presented as elements with the same name. And &xml; elements
+       may not start with digits. Therefore this filter is only
+       suitable for systems returning &grs1; and &marc; records. For &xml;
+       use <literal>grs.marcxml</literal> filter instead (see below).
         </para>
         <para>
           The loadable <literal>grs.marc</literal> filter module
           is packaged in the GNU/Debian package
         </para>
         <para>
           The loadable <literal>grs.marc</literal> filter module
           is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
+        <literal>libidzebra2.0-mod-grs-marc</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marcxml<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marcxml.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
        <listitem>
         <para>
-        This allows Zebra to read
-        records in the ISO2709??? (MARCXML) encoding standard.
+        This allows &zebra; to read ISO2709 encoded records.
+        Last parameter <replaceable>type</replaceable> names the
+        <literal>.abs</literal> file (see below)
+        which describes the specific &marc; structure of the input record as
+        well as the indexing rules.
         </para>
         <para>
         </para>
         <para>
-         The loadable <literal>grs.marcxml</literal> filter module
-         is also contained in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
-      </listitem>
-     </varlistentry>
-     <varlistentry>
-      <term>grs.danbib</term>
-      <listitem>
-       <para>
-        The <literal>grs.danbib</literal> filter parses DanBib
-        records, a danish MARC record variant called DANMARC.
-        DanBib is the Danish Union Catalogue hosted by the
-        Danish Bibliographic Centre (DBC).
+       The internal representation for <literal>grs.marcxml</literal>
+       is the same as for <ulink url="&url.marcxml;">&marcxml;</ulink>.
+       It slightly more complicated to work with than 
+       <literal>grs.marc</literal> but &xml; conformant.
         </para>
         </para>
-       <para>The loadable  <literal>grs.danbib</literal> filter module
-         is packages in the GNU/Debian package 
-         <literal>libidzebra1.4-mod-grs-danbib</literal>.
+       <para>
+       The loadable <literal>grs.marcxml</literal> filter module
+       is also contained in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-marc</literal>
         </para>
        </listitem>
       </varlistentry>
       <varlistentry>
         </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.xml</term>
+      <term><literal>grs.xml</literal></term>
        <listitem>
         <para>
        <listitem>
         <para>
-        This filter reads XML records and uses <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
-        parse them and convert them into IDZebra's internal 
+        This filter reads &xml; records and uses
+       <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
+        parse them and convert them into ID&zebra;'s internal 
          <literal>grs</literal> record model.
          <literal>grs</literal> record model.
-        Only one record per file
-        is supported. The filter is only available if Zebra/YAZ
-        is compiled with EXPAT support.
+        Only one record per file is supported, due to the fact &xml; does
+       not allow two documents to "follow" each other (there is no way
+       to know when a document is finished).
+       This filter is only available if &zebra; is compiled with EXPAT support.
         </para>
         <para>
         </para>
         <para>
-         The loadable <literal>grs.xml</literal> filter module
-         is packagged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-xml</literal>
+       The loadable <literal>grs.xml</literal> filter module
+       is packagged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-xml</literal>
          </para>
        </listitem>
       </varlistentry>
       <varlistentry>
          </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.regx<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.regx.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          This enables a user-supplied Regular Expressions input
        <listitem>
         <para>
          This enables a user-supplied Regular Expressions input
-        filter described in
-        <xref linkend="grs-regx-tcl"/>.
+        filter described in <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
         </para>
         <para>
-         The loadable  <literal>grs.regx</literal> filter module
-         is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.regx</literal> filter module
+       is packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.tcl<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.tcl.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          Similar to grs.regx but using Tcl for rules, described in 
          <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
        <listitem>
         <para>
          Similar to grs.regx but using Tcl for rules, described in 
          <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
-         The loadable <literal>grs.tcl</literal> filter module
-         is also packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.tcl</literal> filter module
+       is also packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
  
      </variablelist>
     </para>
  
        </listitem>
       </varlistentry>
  
      </variablelist>
     </para>
  
-   <sect2 id="grs-canonical-format">
-    <title>GRS Canonical Input Format</title>
+   <section id="grs-canonical-format">
+    <title>&grs1; Canonical Input Format</title>
  
      <para>
       Although input data can take any form, it is sometimes useful to
       describe the record processing capabilities of the system in terms of
       a single, canonical input format that gives access to the full
  
      <para>
       Although input data can take any form, it is sometimes useful to
       describe the record processing capabilities of the system in terms of
       a single, canonical input format that gives access to the full
-     spectrum of structure and flexibility in the system. In Zebra, this
-     canonical format is an "SGML-like" syntax.
+     spectrum of structure and flexibility in the system. In &zebra;, this
+     canonical format is an "&sgml;-like" syntax.
      </para>
  
      <para>
      </para>
  
      <para>
@@ -180,7 +183,7 @@
      <!-- There is no indentation in the example above!  -H
      -note-
       -para-
      <!-- There is no indentation in the example above!  -H
      -note-
       -para-
-      The indentation used above is used to illustrate how Zebra
+      The indentation used above is used to illustrate how &zebra;
        interprets the mark-up. The indentation, in itself, has no
        significance to the parser for the canonical input format, which
        discards superfluous whitespace.
        interprets the mark-up. The indentation, in itself, has no
        significance to the parser for the canonical input format, which
        discards superfluous whitespace.
@@ -207,7 +210,7 @@
       structured data element such a <emphasis>Supplier</emphasis> element.
      </para>
  
       structured data element such a <emphasis>Supplier</emphasis> element.
      </para>
  
-    <sect3>
+    <section id="grs-record-root">
       <title>Record Root</title>
  
       <para>
       <title>Record Root</title>
  
       <para>
@@ -219,8 +222,8 @@
        The following is a GILS record that
        contains only a single element (strictly speaking, that makes it an
        illegal GILS record, since the GILS profile includes several mandatory
        The following is a GILS record that
        contains only a single element (strictly speaking, that makes it an
        illegal GILS record, since the GILS profile includes several mandatory
-      elements - Zebra does not validate the contents of a record against
-      the Z39.50 profile, however - it merely attempts to match up elements
+      elements - &zebra; does not validate the contents of a record against
+      the &z3950; profile, however - it merely attempts to match up elements
        of a local representation with the given schema):
       </para>
  
        of a local representation with the given schema):
       </para>
  
@@ -234,18 +237,18 @@
  
       </para>
  
  
       </para>
  
-    </sect3>
+    </section>
  
  
-    <sect3><!-- ### we shouldn't make such a big deal about this -->
+    <section id="grs-variants">
       <title>Variants</title>
  
       <para>
       <title>Variants</title>
  
       <para>
-      Zebra allows you to provide individual data elements in a number of
+      &zebra; allows you to provide individual data elements in a number of
        <emphasis>variant forms</emphasis>. Examples of variant forms are
        textual data elements which might appear in different languages, and
        images which may appear in different formats or layouts.
        <emphasis>variant forms</emphasis>. Examples of variant forms are
        textual data elements which might appear in different languages, and
        images which may appear in different formats or layouts.
-      The variant system in Zebra is essentially a representation of
-      the variant mechanism of Z39.50-1995.
+      The variant system in &zebra; is essentially a representation of
+      the variant mechanism of &z3950;-1995.
       </para>
  
       <para>
       </para>
  
       <para>
@@ -272,7 +275,7 @@
        The available values for the <emphasis>class</emphasis> and
        <emphasis>type</emphasis> fields are given by the variant set
        that is associated with the current schema
        The available values for the <emphasis>class</emphasis> and
        <emphasis>type</emphasis> fields are given by the variant set
        that is associated with the current schema
-      (see <xref linkend="variant-set"/>).
+      (see <xref linkend="grs-variants"/>).
       </para>
  
       <para>
       </para>
  
       <para>
@@ -325,21 +328,21 @@
       <para>
        The title element above comes in two variants. Both have the IANA body
        type "text/plain", but one is in English, and the other in
       <para>
        The title element above comes in two variants. Both have the IANA body
        type "text/plain", but one is in English, and the other in
-      Danish. The client, using the element selection mechanism of Z39.50,
+      Danish. The client, using the element selection mechanism of &z3950;,
        can retrieve information about the available variant forms of data
        elements, or it can select specific variants based on the requirements
        of the end-user.
       </para>
  
        can retrieve information about the available variant forms of data
        elements, or it can select specific variants based on the requirements
        of the end-user.
       </para>
  
-    </sect3>
+    </section>
  
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="grs-regx-tcl">
-    <title>GRS REGX And TCL Input Filters</title>
+   <section id="grs-regx-tcl">
+    <title>&grs1; REGX And TCL Input Filters</title>
  
      <para>
  
      <para>
-     In order to handle general input formats, Zebra allows the
+     In order to handle general input formats, &zebra; allows the
       operator to define filters which read individual records in their
       native format and produce an internal representation that the system
       can work with.
       operator to define filters which read individual records in their
       native format and produce an internal representation that the system
       can work with.
@@ -374,7 +377,7 @@
       <variablelist>
  
        <varlistentry>
       <variablelist>
  
        <varlistentry>
-       <term>INIT</term>
+       <term><literal>INIT</literal></term>
         <listitem>
          <para>
           The action associated with this expression is evaluated
         <listitem>
          <para>
           The action associated with this expression is evaluated
@@ -386,7 +389,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>BEGIN</term>
+       <term><literal>BEGIN</literal></term>
         <listitem>
          <para>
           Matches the beginning of the record. It can be used to
         <listitem>
          <para>
           Matches the beginning of the record. It can be used to
@@ -397,7 +400,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>END</term>
+       <term><literal>END</literal></term>
         <listitem>
          <para>
           Matches the end of the record - when all of the contents
         <listitem>
          <para>
           Matches the end of the record - when all of the contents
@@ -406,15 +409,20 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>/pattern/</term>
+       <term>
+       <literal>/</literal><replaceable>reg</replaceable><literal>/</literal>
+       </term>
         <listitem>
          <para>
         <listitem>
          <para>
-         Matches a string of characters from the input record.
+        Matches regular expression pattern <replaceable>reg</replaceable>
+        from the input record. The operators supported are the same
+        as for regular expression queries. Refer to 
+        <xref linkend="querymodel-regular"/>.
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>BODY</term>
+       <term><literal>BODY</literal></term>
         <listitem>
          <para>
           This keyword may only be used between two patterns.
         <listitem>
          <para>
           This keyword may only be used between two patterns.
@@ -423,7 +431,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>FINISH</term>
+       <term><literal>FINISH</literal></term>
         <listitem>
          <para>
           The expression associated with this pattern is evaluated
         <listitem>
          <para>
           The expression associated with this pattern is evaluated
@@ -571,19 +579,19 @@
      </para>
  
      <para>
      </para>
  
      <para>
-     If Zebra is compiled with support for Tcl enabled, the statements
+     If &zebra; is compiled with support for Tcl enabled, the statements
       described above are supplemented with a complete
       scripting environment, including control structures (conditional
       expressions and loop constructs), and powerful string manipulation
       mechanisms for modifying the elements of a record.
      </para>
  
       described above are supplemented with a complete
       scripting environment, including control structures (conditional
       expressions and loop constructs), and powerful string manipulation
       mechanisms for modifying the elements of a record.
      </para>
  
-   </sect2>
+   </section>
  
  
-  </sect1>
+  </section>
  
  
-  <sect1 id="grs-internal-representation">
-   <title>GRS Internal Record Representation</title>
+  <section id="grs-internal-representation">
+   <title>&grs1; Internal Record Representation</title>
  
     <para>
      When records are manipulated by the system, they're represented in a
  
     <para>
      When records are manipulated by the system, they're represented in a
@@ -633,7 +641,7 @@
      different tag path.
     </para>
  
      different tag path.
     </para>
  
-   <sect2>
+   <section id="grs-tagged-elements">
      <title>Tagged Elements</title>
  
      <para>
      <title>Tagged Elements</title>
  
      <para>
@@ -650,9 +658,9 @@
       reached from the root of the record).
      </para>
  
       reached from the root of the record).
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-variant-details">
      <title>Variants</title>
  
      <para>
      <title>Variants</title>
  
      <para>
@@ -683,12 +691,12 @@
      
      <para>
       In practice, each variant node is associated with a triple of class,
      
      <para>
       In practice, each variant node is associated with a triple of class,
-     type, value, corresponding to the variant mechanism of Z39.50.
+     type, value, corresponding to the variant mechanism of &z3950;.
      </para>
      
      </para>
      
-   </sect2>
+   </section>
     
     
-   <sect2>
+   <section id="grs-data-elements">
      <title>Data Elements</title>
      
      <para>
      <title>Data Elements</title>
      
      <para>
@@ -702,12 +710,12 @@
     </para>
      -->
      
     </para>
      -->
      
-   </sect2>
+   </section>
     
     
-  </sect1>
+  </section>
    
    
-  <sect1 id="record-model-grs-conf">
-   <title>GRS Record Model Configuration</title>
+  <section id="grs-conf">
+   <title>&grs1; Record Model Configuration</title>
     
     <para>
      The following sections describe the configuration files that govern
     
     <para>
      The following sections describe the configuration files that govern
@@ -717,7 +725,7 @@
      setting in the <literal>zebra.cfg</literal> file.
     </para>
  
      setting in the <literal>zebra.cfg</literal> file.
     </para>
  
-   <sect2>
+   <section id="grs-abstract-syntax">
      <title>The Abstract Syntax</title>
  
      <para>
      <title>The Abstract Syntax</title>
  
      <para>
@@ -737,7 +745,7 @@
        <listitem>
  
         <para>
        <listitem>
  
         <para>
-        The object identifier of the Z39.50 schema associated
+        The object identifier of the &z3950; schema associated
          with the ARS, so that it can be referred to by the client.
         </para>
        </listitem>
          with the ARS, so that it can be referred to by the client.
         </para>
        </listitem>
@@ -774,7 +782,7 @@
          ask for a subset of the data elements contained in a record. Element
          set names, in the retrieval module, are mapped to <emphasis>element
           specifications</emphasis>, which contain information equivalent to the
          ask for a subset of the data elements contained in a record. Element
          set names, in the retrieval module, are mapped to <emphasis>element
           specifications</emphasis>, which contain information equivalent to the
-        <emphasis>Espec-1</emphasis> syntax of Z39.50.
+        <emphasis>Espec-1</emphasis> syntax of &z3950;.
         </para>
        </listitem>
  
         </para>
        </listitem>
  
@@ -788,7 +796,7 @@
        <listitem>
         <para>
          Possibly, a set of rules describing the mapping of elements to a
        <listitem>
         <para>
          Possibly, a set of rules describing the mapping of elements to a
-        MARC representation.
+        &marc; representation.
  
         </para>
        </listitem>
  
         </para>
        </listitem>
@@ -796,7 +804,7 @@
        <listitem>      
         <para>
          A list of element descriptions (this is the actual ARS of the
        <listitem>      
         <para>
          A list of element descriptions (this is the actual ARS of the
-        schema, in Z39.50 terms), which lists the ways in which the various
+        schema, in &z3950; terms), which lists the ways in which the various
          tags can be used and organized hierarchically.
         </para>
        </listitem>
          tags can be used and organized hierarchically.
         </para>
        </listitem>
@@ -810,9 +818,9 @@
       describe the given objects.
      </para>
  
       describe the given objects.
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-configuration-files">
      <title>The Configuration Files</title>
  
      <para>
      <title>The Configuration Files</title>
  
      <para>
@@ -822,7 +830,7 @@
  
      <para>
       The number of different file types may appear daunting at first, but
  
      <para>
       The number of different file types may appear daunting at first, but
-     each type corresponds fairly clearly to a single aspect of the Z39.50
+     each type corresponds fairly clearly to a single aspect of the &z3950;
       retrieval facilities. Further, the average database administrator,
       who is simply reusing an existing profile for which tables already
       exist, shouldn't have to worry too much about the contents of these tables.
       retrieval facilities. Further, the average database administrator,
       who is simply reusing an existing profile for which tables already
       exist, shouldn't have to worry too much about the contents of these tables.
@@ -841,20 +849,20 @@
       mandatory (m).
      </para>
      
       mandatory (m).
      </para>
      
-   </sect2>
+   </section>
     
     
-   <sect2 id="abs-file">
+   <section id="abs-file">
      <title>The Abstract Syntax (.abs) Files</title>
      
      <para>
      <title>The Abstract Syntax (.abs) Files</title>
      
      <para>
-     The name of this file type is slightly misleading in Z39.50 terms,
+     The name of this file type is slightly misleading in &z3950; terms,
       since, apart from the actual abstract syntax of the profile, it also
       includes most of the other definitions that go into a database
       profile.
      </para>
      
      <para>
       since, apart from the actual abstract syntax of the profile, it also
       includes most of the other definitions that go into a database
       profile.
      </para>
      
      <para>
-     When a record in the canonical, SGML-like format is read from a file
+     When a record in the canonical, &sgml;-like format is read from a file
       or from the database, the first tag of the file should reference the
       profile that governs the layout of the record. If the first tag of the
       record is, say, <literal>&lt;gils&gt;</literal>, the system will look
       or from the database, the first tag of the file should reference the
       profile that governs the layout of the record. If the first tag of the
       record is, say, <literal>&lt;gils&gt;</literal>, the system will look
@@ -892,7 +900,7 @@
          <para>
           (m) The reference name of the OID for the profile.
           The reference names can be found in the <emphasis>util</emphasis>
          <para>
           (m) The reference name of the OID for the profile.
           The reference names can be found in the <emphasis>util</emphasis>
-         module of YAZ.
+         module of &yaz;.
          </para>
         </listitem>
        </varlistentry>
          </para>
         </listitem>
        </varlistentry>
@@ -938,7 +946,7 @@
          <para>
           (o) Points to a file containing parameters
           for representing the record contents in the ISO2709 syntax.
          <para>
           (o) Points to a file containing parameters
           for representing the record contents in the ISO2709 syntax.
-         Read the description of the MARC representation facility below.
+         Read the description of the &marc; representation facility below.
          </para>
         </listitem>
        </varlistentry>
          </para>
         </listitem>
        </varlistentry>
@@ -954,7 +962,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>any <replaceable>tags</replaceable></term>
+       <term>all <replaceable>tags</replaceable></term>
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
@@ -974,23 +982,23 @@
          <para>
           (o,r) Adds an element to the abstract record syntax of the schema.
           The <replaceable>path</replaceable> follows the
          <para>
           (o,r) Adds an element to the abstract record syntax of the schema.
           The <replaceable>path</replaceable> follows the
-         syntax which is suggested by the Z39.50 document - that is, a sequence
+         syntax which is suggested by the &z3950; document - that is, a sequence
           of tags separated by slashes (&#x2f;). Each tag is given as a
           comma-separated pair of tag type and -value surrounded by parenthesis.
           The <replaceable>name</replaceable> is the name of the element, and
           the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
           of tags separated by slashes (&#x2f;). Each tag is given as a
           comma-separated pair of tag type and -value surrounded by parenthesis.
           The <replaceable>name</replaceable> is the name of the element, and
           the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
-         A ! in place of the attribute name is equivalent to
-         specifying an attribute name identical to the element name.
-         A - in place of the attribute name
+         A <literal>!</literal> in place of the attribute name is equivalent
+        to specifying an attribute name identical to the element name.
+         A <literal>-</literal> in place of the attribute name
           specifies that no indexing is to take place for the given element.
           The attributes can be qualified with <replaceable>field
            types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
           specifies that no indexing is to take place for the given element.
           The attributes can be qualified with <replaceable>field
            types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
-         See the <xref linkend="field-structure-and-character-sets"/>.
+         See the <xref linkend="fields-and-charsets"/>.
           The default field type is <literal>w</literal> for
           <emphasis>word</emphasis>.
          </para>
           The default field type is <literal>w</literal> for
           <emphasis>word</emphasis>.
          </para>
@@ -1021,8 +1029,8 @@
         <term>melm <replaceable>field$subfield attributes</replaceable></term>
         <listitem>
          <para>
         <term>melm <replaceable>field$subfield attributes</replaceable></term>
         <listitem>
          <para>
-        This directive is specifically for MARC-formatted records,
-        ingested either in the form of MARCXML documents, or in the
+        This directive is specifically for &marc;-formatted records,
+        ingested either in the form of &marcxml; documents, or in the
          ISO2709/Z39.2 format using the grs.marcxml input filter. You can
          specify indexing rules for any subfield, or you can leave off the
          <replaceable>$subfield</replaceable> part and specify default rules
          ISO2709/Z39.2 format using the grs.marcxml input filter. You can
          specify indexing rules for any subfield, or you can leave off the
          <replaceable>$subfield</replaceable> part and specify default rules
@@ -1038,7 +1046,7 @@
         <listitem>
          <para>
           This directive specifies character encoding for external records.
         <listitem>
          <para>
           This directive specifies character encoding for external records.
-         For records such as XML that specifies encoding within the
+         For records such as &xml; that specifies encoding within the
           file via a header this directive is ignored.
           If neither this directive is given, nor an encoding is set
           within external records, ISO-8859-1 encoding is assumed.
           file via a header this directive is ignored.
           If neither this directive is given, nor an encoding is set
           within external records, ISO-8859-1 encoding is assumed.
@@ -1092,7 +1100,7 @@
             <term>sysno</term>
             <listitem> 
              <para>
             <term>sysno</term>
             <listitem> 
              <para>
-             Zebra's system number (record ID) for the
+             &zebra;'s system number (record ID) for the
               record. By default this is mapped to element
               <literal>localControlNumber</literal>.
              </para>
               record. By default this is mapped to element
               <literal>localControlNumber</literal>.
              </para>
@@ -1116,7 +1124,7 @@
         </term>
         <listitem>
         <para>
         </term>
         <listitem>
         <para>
-        Specifies what information, if any, Zebra should
+        Specifies what information, if any, &zebra; should
          automatically include in retrieval records for the 
          ``system fields'' that it supports.
          <replaceable>systemTag</replaceable> may
          automatically include in retrieval records for the 
          ``system fields'' that it supports.
          <replaceable>systemTag</replaceable> may
@@ -1135,7 +1143,7 @@
             An automatically generated identifier for the record,
             unique within this database.  It is represented by the
             <literal>&lt;localControlNumber&gt;</literal> element in
             An automatically generated identifier for the record,
             unique within this database.  It is represented by the
             <literal>&lt;localControlNumber&gt;</literal> element in
-           XML and the <literal>(1,14)</literal> tag in GRS-1.
+           &xml; and the <literal>(1,14)</literal> tag in &grs1;.
            </para></listitem>
           </varlistentry>
           <varlistentry>
            </para></listitem>
           </varlistentry>
           <varlistentry>
@@ -1208,9 +1216,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="attset-files">
+   <section id="attset-files">
      <title>The Attribute Set (.att) Files</title>
  
      <para>
      <title>The Attribute Set (.att) Files</title>
  
      <para>
@@ -1237,7 +1245,7 @@
           (m) The reference name of the OID for
           the attribute set.
           The reference names can be found in the <replaceable>util</replaceable>
           (m) The reference name of the OID for
           the attribute set.
           The reference names can be found in the <replaceable>util</replaceable>
-         module of <replaceable>YAZ</replaceable>.
+         module of <replaceable>&yaz;</replaceable>.
          </para>
         </listitem></varlistentry>
        <varlistentry>
          </para>
         </listitem></varlistentry>
        <varlistentry>
@@ -1250,7 +1258,7 @@
           set. For instance, many new attribute sets are defined as extensions
           to the <replaceable>bib-1</replaceable> set.
           This is an important feature of the retrieval
           set. For instance, many new attribute sets are defined as extensions
           to the <replaceable>bib-1</replaceable> set.
           This is an important feature of the retrieval
-         system of Z39.50, as it ensures the highest possible level of
+         system of &z3950;, as it ensures the highest possible level of
           interoperability, as those access points of your database which are
           derived from the external set (say, bib-1) can be used even by clients
           who are unaware of the new set.
           interoperability, as those access points of your database which are
           derived from the external set (say, bib-1) can be used even by clients
           who are unaware of the new set.
@@ -1294,15 +1302,15 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-tag-files">
      <title>The Tag Set (.tag) Files</title>
  
      <para>
       This file type defines the tagset of the profile, possibly by
       referencing other tag sets (most tag sets, for instance, will include
      <title>The Tag Set (.tag) Files</title>
  
      <para>
       This file type defines the tagset of the profile, possibly by
       referencing other tag sets (most tag sets, for instance, will include
-     tagsetG and tagsetM from the Z39.50 specification. The file may
+     tagsetG and tagsetM from the &z3950; specification. The file may
       contain the following directives.
      </para>
  
       contain the following directives.
      </para>
  
@@ -1323,7 +1331,7 @@
          <para>
           (o) The reference name of the OID for the tag set.
           The reference names can be found in the <emphasis>util</emphasis>
          <para>
           (o) The reference name of the OID for the tag set.
           The reference names can be found in the <emphasis>util</emphasis>
-         module of <emphasis>YAZ</emphasis>.
+         module of <emphasis>&yaz;</emphasis>.
           The directive is optional, since not all tag sets
           are registered outside of their schema.
          </para>
           The directive is optional, since not all tag sets
           are registered outside of their schema.
          </para>
@@ -1452,9 +1460,9 @@
       </screen>
      </para>
  
       </screen>
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="variant-set">
+   <section id="grs-var-files">
      <title>The Variant Set (.var) Files</title>
  
      <para>
      <title>The Variant Set (.var) Files</title>
  
      <para>
@@ -1484,7 +1492,7 @@
          <para>
           (o) The reference name of the OID for
           the variant set, if one is required. The reference names can be found
          <para>
           (o) The reference name of the OID for
           the variant set, if one is required. The reference names can be found
-         in the <emphasis>util</emphasis> module of <emphasis>YAZ</emphasis>.
+         in the <emphasis>util</emphasis> module of <emphasis>&yaz;</emphasis>.
          </para>
         </listitem></varlistentry>
        <varlistentry>
          </para>
         </listitem></varlistentry>
        <varlistentry>
@@ -1533,16 +1541,16 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-est-files">
      <title>The Element Set (.est) Files</title>
  
      <para>
       The element set specification files describe a selection of a subset
       of the elements of a database record. The element selection mechanism
       is equivalent to the one supplied by the <emphasis>Espec-1</emphasis>
      <title>The Element Set (.est) Files</title>
  
      <para>
       The element set specification files describe a selection of a subset
       of the elements of a database record. The element selection mechanism
       is equivalent to the one supplied by the <emphasis>Espec-1</emphasis>
-     syntax of the Z39.50 specification.
+     syntax of the &z3950; specification.
       In fact, the internal representation of an element set
       specification is identical to the <emphasis>Espec-1</emphasis> structure,
       and we'll refer you to the description of that structure for most of
       In fact, the internal representation of an element set
       specification is identical to the <emphasis>Espec-1</emphasis> structure,
       and we'll refer you to the description of that structure for most of
@@ -1673,9 +1681,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="schema-mapping">
+   <section id="schema-mapping">
      <title>The Schema Mapping (.map) Files</title>
  
      <para>
      <title>The Schema Mapping (.map) Files</title>
  
      <para>
@@ -1683,14 +1691,14 @@
       a schema that differs from the native schema of the record. For
       instance, a client might only know how to process WAIS records, while
       the database record is represented in a more specific schema, such as
       a schema that differs from the native schema of the record. For
       instance, a client might only know how to process WAIS records, while
       the database record is represented in a more specific schema, such as
-     GILS. In this module, a mapping of data to one of the MARC formats is
+     GILS. In this module, a mapping of data to one of the &marc; formats is
       also thought of as a schema mapping (mapping the elements of the
       also thought of as a schema mapping (mapping the elements of the
-     record into fields consistent with the given MARC specification, prior
+     record into fields consistent with the given &marc; specification, prior
       to actually converting the data to the ISO2709). This use of the
       to actually converting the data to the ISO2709). This use of the
-     object identifier for USMARC as a schema identifier represents an
+     object identifier for &usmarc; as a schema identifier represents an
       overloading of the OID which might not be entirely proper. However,
       it represents the dual role of schema and record syntax which
       overloading of the OID which might not be entirely proper. However,
       it represents the dual role of schema and record syntax which
-     is assumed by the MARC family in Z39.50.
+     is assumed by the &marc; family in &z3950;.
      </para>
  
      <!--
      </para>
  
      <!--
@@ -1723,7 +1731,7 @@
           This is used, for instance, by a server receiving a request to present
           a record in a different schema from the native one.
           The name, again, is found in the <emphasis>oid</emphasis>
           This is used, for instance, by a server receiving a request to present
           a record in a different schema from the native one.
           The name, again, is found in the <emphasis>oid</emphasis>
-         module of <emphasis>YAZ</emphasis>.
+         module of <emphasis>&yaz;</emphasis>.
          </para>
         </listitem></varlistentry>
        <varlistentry>
          </para>
         </listitem></varlistentry>
        <varlistentry>
@@ -1737,10 +1745,10 @@
       </variablelist>
      </para>
  
       </variablelist>
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
-    <title>The MARC (ISO2709) Representation (.mar) Files</title>
+   <section id="grs-mar-files">
+    <title>The &marc; (ISO2709) Representation (.mar) Files</title>
  
      <para>
       This file provides rules for representing a record in the ISO2709
  
      <para>
       This file provides rules for representing a record in the ISO2709
@@ -1750,258 +1758,15 @@
  
      <!--
       NOTE: FIXME! This will be described better. We're in the process of
  
      <!--
       NOTE: FIXME! This will be described better. We're in the process of
-      re-evaluating and most likely changing the way that MARC records are
+      re-evaluating and most likely changing the way that &marc; records are
        handled by the system.</emphasis>
      -->
  
        handled by the system.</emphasis>
      -->
  
-   </sect2>
-
-   <sect2 id="field-structure-and-character-sets">
-    <title>Field Structure and Character Sets
-    </title>
-
-    <para>
-     In order to provide a flexible approach to national character set
-     handling, Zebra allows the administrator to configure the set up the
-     system to handle any 8-bit character set &mdash; including sets that
-     require multi-octet diacritics or other multi-octet characters. The
-     definition of a character set includes a specification of the
-     permissible values, their sort order (this affects the display in the
-     SCAN function), and relationships between upper- and lowercase
-     characters. Finally, the definition includes the specification of
-     space characters for the set.
-    </para>
-
-    <para>
-     The operator can define different character sets for different fields,
-     typical examples being standard text fields, numerical fields, and
-     special-purpose fields such as WWW-style linkages (URx).
-    </para>
-
-    <sect3 id="default-idx-file">
-     <title>The default.idx file</title>
-     <para>
-      The field types, and hence character sets, are associated with data
-      elements by the .abs files (see above).
-      The file <literal>default.idx</literal>
-      provides the association between field type codes (as used in the .abs
-      files) and the character map files (with the .chr suffix). The format
-      of the .idx file is as follows
-     </para>
-
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>index <emphasis>field type code</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a new search index code.
-         The argument is a one-character code to be used in the
-         .abs files to select this particular index type. An index, roughly,
-         corresponds to a particular structure attribute during search. Refer
-         to <xref linkend="search"/>.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>sort <emphasis>field code type</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a 
-         sort index. The argument is a one-character code to be used in the
-         .abs fie to select this particular index type. The corresponding
-         use attribute must be used in the sort request to refer to this
-         particular sort index. The corresponding character map (see below)
-         is used in the sort process.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>completeness <emphasis>boolean</emphasis></term>
-       <listitem>
-        <para>
-         This directive enables or disables complete field indexing.
-         The value of the <emphasis>boolean</emphasis> should be 0
-         (disable) or 1. If completeness is enabled, the index entry will
-         contain the complete contents of the field (up to a limit), with words
-         (non-space characters) separated by single space characters
-         (normalized to " " on display). When completeness is
-         disabled, each word is indexed as a separate entry. Complete subfield
-         indexing is most useful for fields which are typically browsed (eg.
-         titles, authors, or subjects), or instances where a match on a
-         complete subfield is essential (eg. exact title searching). For fields
-         where completeness is disabled, the search engine will interpret a
-         search containing space characters as a word proximity search.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>charmap <emphasis>filename</emphasis></term>
-       <listitem>
-        <para>
-         This is the filename of the character
-         map to be used for this index for field type.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-
-    <sect3 id="character-map-files">
-     <title>The character map file format</title>
-     <para>
-      The contents of the character map files are structured as follows:
-     </para>
+   </section>
+  </section>
  
  
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>lowercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the basic value set of the field type.
-         The format is an ordered list (without spaces) of the
-         characters which may occur in "words" of the given type.
-         The order of the entries in the list determines the
-         sort order of the index. In addition to single characters, the
-         following combinations are legal:
-        </para>
-
-        <para>
-
-         <itemizedlist>
-          <listitem>
-           <para>
-            Backslashes may be used to introduce three-digit octal, or
-            two-digit hex representations of single characters
-            (preceded by <literal>x</literal>).
-            In addition, the combinations
-            \\, \\r, \\n, \\t, \\s (space &mdash; remember that real
-            space-characters may not occur in the value definition), and
-            \\ are recognized, with their usual interpretation.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            Curly braces {} may be used to enclose ranges of single
-            characters (possibly using the escape convention described in the
-            preceding point), eg. {a-z} to introduce the
-            standard range of ASCII characters.
-            Note that the interpretation of such a range depends on
-            the concrete representation in your local, physical character set.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            paranthesises () may be used to enclose multi-byte characters -
-            eg. diacritics or special national combinations (eg. Spanish
-            "ll"). When found in the input stream (or a search term),
-            these characters are viewed and sorted as a single character, with a
-            sorting value depending on the position of the group in the value
-            statement.
-           </para>
-          </listitem>
-
-         </itemizedlist>
-
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>uppercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the
-         upper-case equivalencis to the value set (if any). The number and
-         order of the entries in the list should be the same as in the
-         <literal>lowercase</literal> directive.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>space <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the character
-         which separate words in the input stream. Depending on the
-         completeness mode of the field in question, these characters either
-         terminate an index entry, or delimit individual "words" in
-         the input stream. The order of the elements is not significant &mdash;
-         otherwise the representation is the same as for the
-         <literal>uppercase</literal> and <literal>lowercase</literal>
-         directives.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>map <emphasis>value-set</emphasis>
-        <emphasis>target</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a mapping between each of the
-         members of the value-set on the left to the character on the
-         right. The character on the right must occur in the value
-         set (the <literal>lowercase</literal> directive) of the
-         character set, but it may be a paranthesis-enclosed
-         multi-octet character. This directive may be used to map
-         diacritics to their base characters, or to map HTML-style
-         character-representations to their natural form, etc. The
-         map directive can also be used to ignore leading articles in
-         searching and/or sorting, and to perform other special
-         transformations. See section <xref
-         linkend="leading-articles"/>.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-    <sect3 id="leading-articles">
-     <title>Ignoring leading articles</title>
-     <para>
-      In addition to specifying sort orders, space (blank) handling,
-      and upper/lowercase folding, you can also use the character map
-      files to make Zebra ignore leading articles in sorting records,
-      or when doing complete field searching.
-     </para>
-     <para>
-      This is done using the <literal>map</literal> directive in the
-      character map file. In a nutshell, what you do is map certain
-      sequences of characters, when they occur <emphasis> in the
-      beginning of a field</emphasis>, to a space. Assuming that the
-      character "@" is defined as a space character in your file, you
-      can do:
-      <screen>
-       map (^The\s) @
-       map (^the\s) @
-      </screen>
-      The effect of these directives is to map either 'the' or 'The',
-      followed by a space character, to a space. The hat ^ character
-      denotes beginning-of-field only when complete-subfield indexing
-      or sort indexing is taking place; otherwise, it is treated just
-      as any other character.
-     </para>
-     <para>
-      Because the <literal>default.idx</literal> file can be used to
-      associate different character maps with different indexing types
-      -- and you can create additional indexing types, should the need
-      arise -- it is possible to specify that leading articles should
-      be ignored either in sorting, in complete-field searching, or
-      both.
-     </para>
-     <para>
-      If you ignore certain prefixes in sorting, then these will be
-      eliminated from the index, and sorting will take place as if
-      they weren't there. However, if you set the system up to ignore
-      certain prefixes in <emphasis>searching</emphasis>, then these
-      are deleted both from the indexes and from query terms, when the
-      client specifies complete-field searching. This has the effect
-      that a search for 'the science journal' and 'science journal'
-      would both produce the same results.
-     </para>
-    </sect3>
-   </sect2>
-  </sect1>
-
-  <sect1 id="grs-exchange-formats">
-   <title>GRS Exchange Formats</title>
+  <section id="grs-exchange-formats">
+   <title>&grs1; Exchange Formats</title>
  
     <para>
      Converting records from the internal structure to an exchange format
  
     <para>
      Converting records from the internal structure to an exchange format
@@ -2013,7 +1778,7 @@
      <itemizedlist>
       <listitem>
        <para>
      <itemizedlist>
       <listitem>
        <para>
-       GRS-1. The internal representation is based on GRS-1/XML, so the
+       &grs1;. The internal representation is based on &grs1;/&xml;, so the
         conversion here is straightforward. The system will create
         applied variant and supported variant lists as required, if a record
         contains variant information.
         conversion here is straightforward. The system will create
         applied variant and supported variant lists as required, if a record
         contains variant information.
@@ -2022,34 +1787,34 @@
  
       <listitem>
        <para>
  
       <listitem>
        <para>
-       XML. The internal representation is based on GRS-1/XML so
-       the mapping is trivial. Note that XML schemas, preprocessing
+       &xml;. The internal representation is based on &grs1;/&xml; so
+       the mapping is trivial. Note that &xml; schemas, preprocessing
         instructions and comments are not part of the internal representation
         instructions and comments are not part of the internal representation
-       and therefore will never be part of a generated XML record.
-       Future versions of the Zebra will support that.
+       and therefore will never be part of a generated &xml; record.
+       Future versions of the &zebra; will support that.
        </para>
       </listitem>
  
       <listitem>
        <para>
        </para>
       </listitem>
  
       <listitem>
        <para>
-       SUTRS. Again, the mapping is fairly straightforward. Indentation
+       &sutrs;. Again, the mapping is fairly straightforward. Indentation
         is used to show the hierarchical structure of the record. All
         is used to show the hierarchical structure of the record. All
-       "GRS" type records support both the GRS-1 and SUTRS
+       "&grs1;" type records support both the &grs1; and &sutrs;
         representations.
         representations.
-       <!-- FIXME - What is SUTRS - should be expanded here -->
+       <!-- FIXME - What is &sutrs; - should be expanded here -->
        </para>
       </listitem>
  
       <listitem>
        <para>
        </para>
       </listitem>
  
       <listitem>
        <para>
-       ISO2709-based formats (USMARC, etc.). Only records with a
+       ISO2709-based formats (&usmarc;, etc.). Only records with a
         two-level structure (corresponding to fields and subfields) can be
         directly mapped to ISO2709. For records with a different structuring
         two-level structure (corresponding to fields and subfields) can be
         directly mapped to ISO2709. For records with a different structuring
-       (eg., GILS), the representation in a structure like USMARC involves a
+       (eg., GILS), the representation in a structure like &usmarc; involves a
         schema-mapping (see <xref linkend="schema-mapping"/>), to an
         schema-mapping (see <xref linkend="schema-mapping"/>), to an
-       "implied" USMARC schema (implied,
+       "implied" &usmarc; schema (implied,
         because there is no formal schema which specifies the use of the
         because there is no formal schema which specifies the use of the
-       USMARC fields outside of ISO2709). The resultant, two-level record is
+       &usmarc; fields outside of ISO2709). The resultant, two-level record is
         then mapped directly from the internal representation to ISO2709. See
         the GILS schema definition files for a detailed example of this
         approach.
         then mapped directly from the internal representation to ISO2709. See
         the GILS schema definition files for a detailed example of this
         approach.
@@ -2085,8 +1850,326 @@
     
      </itemizedlist>
     </para>
     
      </itemizedlist>
     </para>
-  </sect1>
+  </section>
+  
+  <section id="grs-extended-marc-indexing">
+   <title>Extended indexing of &marc; records</title>
+   
+   <para>Extended indexing of &marc; records will help you if you need index a
+    combination of subfields, or index only a part of the whole field,
+    or use during indexing process embedded fields of &marc; record.
+   </para>
+   
+   <para>Extended indexing of &marc; records additionally allows:
+    <itemizedlist>
+     
+     <listitem>
+      <para>to index data in LEADER of &marc; record</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index data in control fields (with fixed length)</para>
+     </listitem>
+     
+     <listitem>
+      <para>to use during indexing the values of indicators</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index linked fields for UNI&marc; based formats</para>
+     </listitem>
+     
+    </itemizedlist>
+   </para>
+   
+   <note><para>In compare with simple indexing process the extended indexing
+     may increase (about 2-3 times) the time of indexing process for &marc;
+     records.</para></note>
+   
+   <section id="formula">
+    <title>The index-formula</title>
+    
+    <para>At the beginning, we have to define the term
+     <emphasis>index-formula</emphasis> for &marc; records. This term helps
+     to understand the notation of extended indexing of &marc; records by &zebra;.
+     Our definition is based on the document
+     <ulink url="http://www.rba.ru/rusmarc/soft/Z39-50.htm">"The table
+      of conformity for &z3950; use attributes and R&usmarc; fields"</ulink>.
+     The document is available only in russian language.</para>
+    
+    <para>
+     The <emphasis>index-formula</emphasis> is the combination of
+     subfields presented in such way:
+    </para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (1)
+    </screen>
+    
+    <para>
+     We know that &zebra; supports a &bib1; attribute - right truncation.
+     In this case, the <emphasis>index-formula</emphasis> (1) consists from 
+     forms, defined in the same way as (1)</para>
+    
+    <screen>
+     71-00$a, $g, $h
+     71-00$a, $g
+     71-00$a
+    </screen>
+    
+    <note>
+     <para>The original &marc; record may be without some elements, which included in <emphasis>index-formula</emphasis>.
+     </para>
+    </note>
+    
+    <para>This notation includes such operands as:
+     <variablelist>
+      
+      <varlistentry>
+       <term>#</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>-</term>
+       <listitem><para>The position may contain any value, defined by
+        &marc; format.
+        For example, <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (2)
+       </screen>
+       
+       <para>includes</para> 
+       
+       <screen>
+        700#1$a, $g
+        701#1$a, $g
+        702#1$a, $g
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem>
+       <para>The repeatable elements are defined in figure-brackets {}.
+        For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71-00$a, $g, $h ($c){.$b ($c)} , (3)
+       </screen>
+       
+       <para>includes</para>
+       
+       <screen>
+        71-00$a, $g, $h ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c). $b ($c)
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+     
+     <note>
+      <para>
+       All another operands are the same as accepted in &marc; world.
+      </para>
+     </note>
+    </para>
+   </section>
+   
+   <section id="notation">
+    <title>Notation of <emphasis>index-formula</emphasis> for &zebra;</title>
+    
+    
+    <para>Extended indexing overloads <literal>path</literal> of
+     <literal>elm</literal> definition in abstract syntax file of &zebra;
+     (<literal>.abs</literal> file). It means that names beginning with
+     <literal>"mc-"</literal> are interpreted by &zebra; as
+     <emphasis>index-formula</emphasis>. The database index is created and
+     linked with <emphasis>access point</emphasis> (&bib1; use attribute)
+     according to this formula.</para>
+    
+    <para>For example, <emphasis>index-formula</emphasis></para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (4)
+    </screen>
+    
+    <para>in <literal>.abs</literal> file looks like:</para>
+    
+    <screen>
+     mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}
+    </screen>
+    
+    
+    <para>The notation of <emphasis>index-formula</emphasis> uses the operands:
+     <variablelist>
+      
+      <varlistentry>
+       <term>_</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>.</term>
+       <listitem><para>The position may contain any value, defined by
+        &marc; format. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (5)
+       </screen>
+       
+       <para>matches <literal>mc-70._1_$a,_$g_</literal> and includes</para>
+       
+       <screen>
+        700_1_$a,_$g_
+        701_1_$a,_$g_
+        702_1_$a,_$g_
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem><para>The repeatable elements are defined in
+        figure-brackets {}. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71#00$a, $g, $h ($c) {.$b ($c)} , (6)
+       </screen>
+       
+       <para>matches 
+        <literal>mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}</literal> and
+        includes</para>
+       
+       <screen>
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_).$b_(_$c_)
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>&#60;...&#62;</term>
+       <listitem><para>Embedded <emphasis>index-formula</emphasis> (for
+        linked fields) is between &#60;&#62;. For example,
+        <emphasis>index-formula</emphasis>
+       </para>
+       
+       <screen>
+        4--#-$170-#1$a, $g ($c) , (7)
+       </screen>
+       
+       <para>matches
+        <literal>mc-4.._._$1&#60;70._1_$a,_$g_(_$c_)&#62;_</literal> and
+        includes</para>
+       
+       <screen>
+        463_._$1&#60;70._1_$a,_$g_(_$c_)&#62;_
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+    
+    <note>
+     <para>All another operands are the same as accepted in &marc; world.</para>
+    </note>
+    
+    <section id="grs-examples">
+     <title>Examples</title>
+     
+     <para>
+      <orderedlist>
+       
+       <listitem>
+       
+       <para>indexing LEADER</para>
+       
+       <para>You need to use keyword "ldr" to index leader. For example,
+        indexing data from 6th and 7th position of LEADER</para>
+       
+       <screen>
+        elm mc-ldr[6] Record-type !
+        elm mc-ldr[7] Bib-level   !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing data from control fields</para>
+       
+       <para>indexing date (the time added to database)</para>
+       
+       <screen>
+        elm mc-008[0-5] Date/time-added-to-db !        
+       </screen>
+       
+       <para>or for R&usmarc; (this data included in 100th field)</para>
+       
+       <screen>
+        elm mc-100___$a[0-7]_ Date/time-added-to-db !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>using indicators while indexing</para>
+
+       <para>For R&usmarc; <emphasis>index-formula</emphasis>
+        <literal>70-#1$a, $g</literal> matches</para>
+       
+       <screen>
+        elm 70._1_$a,_$g_ Author !:w,!:p
+       </screen>
+       
+       <para>When &zebra; finds a field according to 
+        <literal>"70."</literal> pattern it checks the indicators. In this
+        case the value of first indicator doesn't mater, but the value of
+        second one must be whitespace, in another case a field is not 
+        indexed.</para>
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing embedded (linked) fields for UNI&marc; based
+        formats</para>
+       
+       <para>For R&usmarc; <emphasis>index-formula</emphasis> 
+        <literal>4--#-$170-#1$a, $g ($c)</literal> matches</para>
+       
+       <screen><![CDATA[
+        elm mc-4.._._$1<70._1_$a,_$g_(_$c_)>_ Author !:w,!:p
+        ]]></screen>
+       
+       <para>Data are extracted from record if the field matches to
+        <literal>"4.._."</literal> pattern and data in linked field
+        match to embedded
+        <emphasis>index-formula</emphasis>
+        <literal>70._1_$a,_$g_(_$c_)</literal>.</para>
+       
+       </listitem>
+       
+      </orderedlist>
+     </para>
+     
+     
+    </section>
+   </section>
  
  
+  </section>
+  
   </chapter>
   <!-- Keep this comment at the end of the file
   Local variables:
   </chapter>
   <!-- Keep this comment at the end of the file
   Local variables: