Merge branch 'master' into fetch_more

[pazpar2-moved-to-github.git] / doc / pazpar2_conf.xml
diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml

index dcc1fdf..fbd39fb 100644 (file)
--- a/doc/pazpar2_conf.xml
+++ b/doc/pazpar2_conf.xml
@@ -262,13 +262,35 @@
              Specifies that this element is to be used to
              help rank 
              records against the user's query (when ranking is
-            requested). The value is an integer, used as a
-            multiplier against the basic TF*IDF score. A value of
-            1 is the base, higher values give additional
-            weight to 
+            requested). 
+            The valus is of the form 
+            <literallayout>
+             M [F N]
+            </literallayout>
+            where M is an integer, used as a
+            weight against the basic TF*IDF score. A value of
+            1 is the base, higher values give additional weight to
              elements of this type. The default is '0', which
              excludes this element from the rank calculation.
             </para>
+           <para>
+            F is a CCL field and N is the multipler for terms
+            that matches those part of the CCL field in search.
+            The F+N combo allows the system to use a different
+            multipler for a certain field. For example, a rank value of
+            "<literal>1 au 3</literal>" gives a multipler of 3 for
+            all terms part of the au(thor) terms and 1 for everything else.
+           </para>
+           <para>
+            For Pazpar2 1.6.13 and later, the rank may also defined
+            "per-document", by the normalization stylesheet. 
+           </para>
+           <para>
+            The per field rank was introduced in Pazpar2 1.6.15. Earlier
+            releases only allowed a rank value M (simple integer).
+           </para>
+           See <xref linkend="relevance_ranking"/> for more
+           about ranking.
            </listitem>
           </varlistentry>
           
@@ -300,6 +322,11 @@
              all elements), or 'no' (don't merge; this is the
              default);
             </para>
+           <para>
+            Pazpar 1.6.24 also offers a new value for merge, 'first', which
+            is like 'all' but only takes all from first database that returns
+            the particular metadata field.
+           </para>
            </listitem>
           </varlistentry>
           
@@ -339,6 +366,44 @@
           </varlistentry>
           
           <varlistentry>
+          <term id="limitcluster">limitcluster</term>
+          <listitem>
+           <para>
+            Allow a limit on merged metadata. The value of this attribute
+            is the name of actual metadata content to be used for matching
+            (most often same name as metadata name).
+           </para>
+           <note>
+            <para>
+             Requires Pazpar2 1.6.23 or later.
+            </para>
+           </note>
+          </listitem>
+         </varlistentry>
+         
+         <varlistentry>
+          <term id="metadata_limitmap">limitmap</term>
+          <listitem>
+           <para>
+            Specifies a default limitmap for this field. This is to avoid mass 
+            configuring of targets. However it is important to review/do this on a per
+            target since it is usually target-specific. See limitmap for format. 
+           </para>
+          </listitem>
+         </varlistentry>
+         
+         <varlistentry>
+          <term id="metadata_facetmap">facetmap</term>
+          <listitem>
+           <para>
+            Specifies a default facetmap for this field. This is to avoid mass 
+            configuring of targets. However it is important to review/do this on a per
+            target since it is usually target-specific. See facetmap for format. 
+           </para>
+          </listitem>
+         </varlistentry>
+         
+         <varlistentry>
            <term>setting</term>
            <listitem>
             <para>
@@ -521,7 +586,138 @@
          </para>
         </listitem>
         </varlistentry>
+
+       <varlistentry>
+       <term>ccldirective</term>
+       <listitem>
+        <para>
+         Customizes the CCL parsing (interpretation of query parameter
+         in search).
+         The name and value of the CCL directive is gigen by attributes
+         'name' and 'value' respectively. Refer to possible list of names
+         in the
+         <ulink 
+             url="http://www.indexdata.com/yaz/doc/tools.html#ccl.directives.table">
+          YAZ manual
+          </ulink>.
+        </para>
+       </listitem>
+       </varlistentry>
+
+       <varlistentry>
+       <term>rank</term>
+       <listitem>
+        <para>
+         Customizes the ranking (relevance) algorithm. Also known as
+         rank tweaks. The rank element
+         accepts the following attributes - all being optional:
+        </para>
+        <variablelist>
+         <varlistentry>
+          <term>cluster</term>
+          <listitem>
+           <para>
+            Attribute 'cluster' is a boolean
+            that controls whether Pazpar2 should boost ranking for merged
+            records. Is 'yes' by default. A value of 'no' will make
+            Pazpar2 average ranking of each record in a cluster.
+           </para>
+          </listitem>
+         </varlistentry>
+         <varlistentry>
+          <term>debug</term>
+          <listitem>
+           <para>
+            Attribute 'debug' is a boolean
+            that controls whether Pazpar2 should include details
+            about ranking for each document in the show command's
+            response. Enable by using value "yes", disable by using
+            value "no" (default).
+           </para>
+          </listitem>
+         </varlistentry>
+         <varlistentry>
+          <term>follow</term>
+          <listitem>
+           <para>
+            Attribute 'follow' is a a floating point number greater than
+            or equal to 0. A positive number will boost weight for terms
+            that occur close to each other (proximity, distance).
+            A value of 1, will double the weight if two terms are in
+            proximity distance of 1 (next to each other). The default
+            value of 'follow' is 0 (order will not affect weight).
+           </para>
+          </listitem>
+         </varlistentry>
+         <varlistentry>
+          <term>lead</term>
+          <listitem>
+           <para>
+            Attribute 'lead' is a floating point number.
+            It controls if term weight should be reduced by position
+            from start in a metadata field. A positive value of 'lead'
+            will reduce weight as it apperas further away from the lead
+            of the field. Default value is 0 (no reduction of weight by
+            position).
+           </para>
+          </listitem>
+         </varlistentry>
+         <varlistentry>
+          <term>length</term>
+          <listitem>
+           <para>
+            Attribute 'length' determines how/if term weight should be
+            divided by lenght of metadata field. A value of "linear"
+            divide by length. A value of "log" will divide by log2(length).
+            A value of "none" will leave term weight as is (no division).
+            Default value is "linear".
+           </para>
+          </listitem>
+         </varlistentry>
+        </variablelist>
+        <para>
+         Refer to <xref linkend="relevance_ranking"/> to see how
+         these tweaks are used in computation of score.
+        </para>
+        <para>
+         Customization of ranking algorithm was introduced with
+         Pazpar2 1.6.18. The semantics of some of the fields changed
+         in versions up to 1.6.22.
+        </para>
+       </listitem>
+       </varlistentry>
         
+       <varlistentry id="sort-default">
+       <term>sort-default</term>
+       <listitem>
+        <para>
+         Specifies the default sort criteria (default 'relevance'),
+         which previous was hard-coded as default criteria in search. 
+         This is a fix/work-around to avoid re-searching when using 
+         target-based sorting. In order for this to work efficient, 
+         the search must also have the sort critera parameter; otherwise 
+         pazpar2 will do re-searching on search criteria changes, if
+         changed between search and show command.
+        </para>
+        <para>
+         This configuration was added in pazpar2 1.6.20.
+        </para>
+       </listitem>
+       </varlistentry>
+
+<!--       
+       <varlistentry>
+       <term>set</term>
+       <listitem>
+        <para>
+         Specifies a variable that will be inherited by all targets defined in settings
+         <screen>
+          &lt;set name="test" value="en"..&lt;set>
+         </screen>
+        </para>
+       </listitem>
+       </varlistentry>
+-->   
         <varlistentry>
         <term>settings</term>
         <listitem>
@@ -583,7 +779,7 @@
               type="year" merge="range" termlist="yes"/>
     <metadata name="author" brief="yes" termlist="yes"
               merge="longest" rank="2"/>
-   <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
+   <metadata name="subject" merge="unique" termlist="yes" rank="3" limitmap="local:"/>
     <metadata name="url" merge="unique"/>
     <icu_chain id="relevance" locale="el">
      <transform rule="[:Control:] Any-Remove"/>
@@ -1078,6 +1274,15 @@
       </listitem>
      </varlistentry>
      <varlistentry>
+     <term>pz:presentchunk</term>
+     <listitem>
+      <para>
+       Controls the chunk size in present requests. Pazpar2 will 
+       make (maxrecs / chunk) request(s). The default is 20.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry>
       <term>pz:id</term>
       <listitem>
        <para>
@@ -1114,7 +1319,7 @@
       <listitem>
        <para>
         This setting enables
-       <ulink url="&url.sru;">SRU</ulink>/<ulink url="&url.solr;">SOLR</ulink>
+       <ulink url="&url.sru;">SRU</ulink>/<ulink url="&url.solr;">Solr</ulink>
         support.
         It has four possible settings.
         'get', enables SRU access through GET requests. 'post' enables SRU/POST
@@ -1123,7 +1328,7 @@
         the protocol.
        </para>
        <para>
-       A value of 'solr' anables SOLR client support. This is supported
+       A value of 'solr' enables Solr client support. This is supported
         for Pazpar version 1.5.0 and later.
        </para>
       </listitem>
@@ -1135,7 +1340,7 @@
        <para>
         This allows SRU version to be specified. If unset Pazpar2
         will the default of YAZ (currently 1.2). Should be set
-       to 1.1 or 1.2. For SOLR, the current supported/tested version is 1.4
+       to 1.1 or 1.2. For Solr, the current supported/tested version is 1.4 and 3.x.
        </para>
       </listitem>
      </varlistentry>
@@ -1145,7 +1350,7 @@
       <listitem>
        <para>
         Allows you to specify an arbitrary PQF query language substring.
-       The provided string is prefixed the user's query after it has been
+       The provided string is prefixed to the user's query after it has been
         normalized to PQF internally in pazpar2.
         This allows you to attach complex 'filters' to queries for a given
         target, sometimes necessary to select sub-catalogs
@@ -1168,6 +1373,17 @@
         <literal>@and @attr 1=30 @attr 2=3 %Y %%</literal>
         would search for current year combined with the original PQF (%%).
        </para>
+      <para>
+       This setting can also be used as more general alternative to
+       pz:pqf_prefix -- a way of embedding the submitted query
+       anywhere in the string rather than appending it to prefix.  For
+       example, if it is desired to omit all records satisfying the
+       query <literal>@attr 1=pica.bib 0007</literal> then this
+       subquery can be combined with the submitted query as the second
+       argument of <literal>@andnot</literal> by using the
+       pz:pqf_strftime value <literal>@not %% @attr 1=pica.bib
+       0007</literal>.
+      </para>
       </listitem>
      </varlistentry>
      
@@ -1209,7 +1425,6 @@
        </para>
       </listitem>
      </varlistentry>
-
      <varlistentry>
       <term>pz:block_timeout</term>
       <listitem>
@@ -1219,6 +1434,29 @@
        </para>
       </listitem>
      </varlistentry>
+    <varlistentry>
+     <term>pz:termlist_term_count</term>
+     <listitem>
+      <para>
+       Specifies number of facet terms to be requested from the target. 
+       The default is unspecified e.g. server-decided. Also see pz:facetmap.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry>
+     <term>pz:termlist_term_factor</term>
+     <listitem>
+      <para>
+       Specifies whether to use a factor for pazpar2 generated facets (1) or not (0). 
+       When mixing locallly generated (by the downloaded (pz:maxrecs) samples) 
+       facet with native (target-generated) facets, the later will dominated the dominate the facet list
+       since they are generated based on the complete result set. 
+       By scaling up the facet count using the ratio between total hit count and the sample size, 
+       the total facet count can be approximated and thus better compared with native facets. 
+       This is not enabled by default.
+      </para>
+     </listitem>
+    </varlistentry>
  
      <varlistentry>
       <term>pz:facetmap:<replaceable>name</replaceable></term>
@@ -1230,7 +1468,7 @@
        </para>
        <note>
         <para>
-       At this point only SOLR targets have been tested with this
+       At this point only Solr targets have been tested with this
         facility.
         </para>
        </note>
@@ -1242,20 +1480,31 @@
       <listitem>
        <para>
         Specifies attributes for limiting a search to a field - using
-       the limit parameter for search. In some cases the mapping of 
+       the limit parameter for search. It can be used to filter locally
+       or remotely (search in a target). In some cases the mapping of 
         a field to a value is identical to an existing cclmap field; in
         other cases the field must be specified in a different way - for
         example to match a complete field (rather than parts of a subfield).
        </para>
        <para>
-       The value of limitmap may have one of two forms: referral to
-       an exisiting CCL field or a raw PQF string. Leading string
-       determines type; either <literal>ccl:</literal> for CCL field or
-       <literal>rpn:</literal> for PQF/RPN.
+       The value of limitmap may have one of three forms: referral to
+       an existing CCL field, a raw PQF string or a local limit. Leading string
+       determines type; either <literal>ccl:</literal> for CCL field, 
+       <literal>rpn:</literal> for PQF/RPN, or <literal>local:</literal>
+       for filtering in Pazpar2. The local filtering may be followed
+       by a field a metadata field (default is to use the name of the 
+       limitmap itself).
+      </para>
+      <para>
+       For Pazpar2 version 1.6.23 and later the limitmap may include multiple
+       specifications, separated by <literal>,</literal> (comma).
+       For example:
+       <literal>ccl:title,local:ltitle,rpn:@attr 1=4</literal>.
        </para>
        <note>
         <para>
         The limitmap facility is supported for Pazpar2 version 1.6.0.
+       Local filtering is supported in Pazpar2 1.6.6.
         </para>
        </note>
       </listitem>