and uses LibXSLT for both indexing (extract) and retrieval (present).
During indexing the filter generates a Zebra indexing record via XSLT
which describes how Zebra is to index the record. Because the driver is
XSLT driven it can use any X-Path plus logic behind the scenes and is thus
more powerful than xelm/elm in .abs. The XSLT can accept parameters from
Zebra. For example, if a date is received the filter could make a date
index. The filter also uses allows splitting of XML records during
indexing, so that MARC collections can be indexed directly (but it
is quite limited and takes place before XSLT is invoked: XSLT requires
a DOM structure in memory). Refer ot example test case in in test/xslt.
dnl Zebra, Index Data ApS, 1995-2005
-dnl $Id: configure.in,v 1.118 2005-04-26 08:11:22 adam Exp $
+dnl $Id: configure.in,v 1.119 2005-04-28 08:20:39 adam Exp $
dnl
AC_INIT(include/idzebra/version.h)
AM_INIT_AUTOMAKE(idzebra,1.4.0)
dnl ------ YAZ
YAZ_INIT($yazflag,2.1.3)
YAZ_DOC
+dnl ----- libXSLT
+AC_SUBST(XSLT_LIBS)
+AC_SUBST(XSLT_CFLAGS)
+xsltdir=yes
+AC_ARG_WITH(xslt,[[ --with-xslt[=PREFIX] use libxslt in PREFIX]],xsltdir=$withval)
+if test "$xsltdir" = "yes"; then
+ for d in /usr /usr/local; do
+ if test -x $d/bin/xslt-config; then
+ xsltdir=$d
+ fi
+ done
+fi
+if test "$xsltdir" != "no"; then
+ AC_MSG_CHECKING(for libXSLT)
+ if test -x $xsltdir/bin/xslt-config; then
+ XSLT_LIBS=`$xsltdir/bin/xslt-config --libs`
+ XSLT_CFLAGS=`$xsltdir/bin/xslt-config --cflags`
+ XSLT_VER=`$xsltdir/bin/xslt-config --version`
+ AC_MSG_RESULT($XSLT_VER)
+ AC_DEFINE(HAVE_XSLT)
+ else
+ AC_MSG_RESULT(Not found)
+ fi
+fi
dnl ------ Look for Tcl
dnl See if user has specified location of tclConfig.sh; otherwise
dnl see if tclConfig.sh exists in same prefix lcoation as tclsh; otherwise
ZEBRA_MODULE(grs-regx,shared,[ --enable-mod-grs-regx REGX/TCL filter])
ZEBRA_MODULE(grs-marc,shared,[ --enable-mod-grs-marc MARC filter])
ZEBRA_MODULE(grs-danbib,shared,[ --enable-mod-grs-danbib DanBib filter (DBC)])
-ZEBRA_MODULE(safari,shared,[ --enable-mod-safari Safari filter (DBC)])
+ZEBRA_MODULE(safari,shared, [ --enable-mod-safari Safari filter (DBC)])
if test "$ac_cv_header_expat_h" = "yes"; then
def="shared"
else
def="no"
fi
ZEBRA_MODULE(grs-xml,[$def], [ --enable-mod-grs-xml XML filter (Expat based)])
-ZEBRA_MODULE(alvis,shared, [ --enable-mod-alvis ALVIS XML filter])
+if test "$XSLT_VER"; then
+ def="shared"
+else
+ def="no"
+fi
+ZEBRA_MODULE(xslt,[$def], [ --enable-mod-xslt XSLT filter])
+ZEBRA_MODULE(alvis,shared, [ --enable-mod-alvis ALVIS filter])
dnl ------ ANSI C Header files
AC_STDC_HEADERS
if test "$ac_cv_header_stdc" = "no"; then
doc/zebraphp.dsl
doc/tkl.xsl
test/Makefile test/gils/Makefile test/usmarc/Makefile test/api/Makefile
+ test/xslt/Makefile
test/xpath/Makefile
test/rusmarc/Makefile test/cddb/Makefile test/malxml/Makefile
test/mbox/Makefile
-/* $Id: recctrl.h,v 1.9 2005-03-31 12:42:06 adam Exp $
+/* $Id: recctrl.h,v 1.10 2005-04-28 08:20:39 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
void (*init)(struct recExtractCtrl *p, RecWord *w);
void *clientData;
void (*tokenAdd)(RecWord *w);
+ void (*setStoreData)(struct recExtractCtrl *p, void *buf, size_t size);
ZebraMaps zebra_maps;
+ int first_record;
int flagShowRecords;
int seqno[256];
char match_criteria[256];
-/* $Id: extract.c,v 1.178 2005-04-15 10:47:48 adam Exp $
+/* $Id: extract.c,v 1.179 2005-04-28 08:20:39 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
}
}
+static void extract_set_store_data_prepare(struct recExtractCtrl *p);
+
static void extract_init (struct recExtractCtrl *p, RecWord *w)
{
w->zebra_maps = p->zebra_maps;
SYSNO *sysno, const char *fname,
int deleteFlag,
struct file_read_info *fi,
- int force_update)
+ int force_update,
+ RecType recType,
+ void *recTypeClientData)
{
RecordAttr *recordAttr;
int r;
SYSNO sysnotmp;
Record rec;
off_t recordOffset = 0;
- RecType recType;
- void *clientData;
- if (!(recType =
- recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type,
- &clientData)))
- {
- yaz_log (YLOG_WARN, "No such record type: %s", zh->m_record_type);
- return 0;
- }
-
/* announce database */
if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
{
create_rec_keys_codec(&zh->reg->keys);
zh->reg->sortKeys.buf_used = 0;
+
recordOffset = fi->file_moffset;
+ extractCtrl.handle = zh;
extractCtrl.offset = fi->file_moffset;
extractCtrl.readf = file_read;
extractCtrl.seekf = file_seek;
extractCtrl.schemaAdd = extract_schema_add;
extractCtrl.dh = zh->reg->dh;
extractCtrl.match_criteria[0] = '\0';
- extractCtrl.handle = zh;
+ extractCtrl.first_record = fi->file_offset ? 0 : 1;
+
+ extract_set_store_data_prepare(&extractCtrl);
+
for (i = 0; i<256; i++)
{
if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
yaz_log_init_prefix2 (msg);
}
- r = (*recType->extract)(clientData, &extractCtrl);
+ r = (*recType->extract)(recTypeClientData, &extractCtrl);
yaz_log_init_prefix2 (0);
if (r == RECCTRL_EXTRACT_EOF)
/* update store data */
xfree (rec->info[recInfo_storeData]);
- if (zh->m_store_data)
+ if (zh->store_data_buf)
+ {
+ rec->size[recInfo_storeData] = zh->store_data_size;
+ rec->info[recInfo_storeData] = zh->store_data_buf;
+ zh->store_data_buf = 0;
+ file_end(fi, fi->file_offset);
+ }
+ else if (zh->m_store_data)
{
rec->size[recInfo_storeData] = recordAttr->recordSize;
rec->info[recInfo_storeData] = (char *)
char ext_res[128];
struct file_read_info *fi;
const char *original_record_type = 0;
+ RecType recType;
+ void *recTypeClientData;
if (!zh->m_group || !*zh->m_group)
*gprefix = '\0';
zh->m_record_id = res_get (zh->res, ext_res);
}
+ if (!(recType =
+ recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type,
+ &recTypeClientData)))
+ {
+ yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
+ return 0;
+ }
+
+ switch(recType->version)
+ {
+ case 0:
+ break;
+ default:
+ yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
+ }
if (sysno && deleteFlag)
fd = -1;
else
do
{
file_begin (fi);
- r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1);
+ r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1,
+ recType, recTypeClientData);
} while (r && !sysno && fi->file_more);
file_read_stop (fi);
if (fd != -1)
extractCtrl.seekf = zebra_record_int_seek;
extractCtrl.tellf = zebra_record_int_tell;
extractCtrl.endf = zebra_record_int_end;
+ extractCtrl.first_record = 1;
extractCtrl.fh = &fc;
create_rec_keys_codec(&zh->reg->keys);
else
extractCtrl.seqno[i] = 0;
}
+ extract_set_store_data_prepare(&extractCtrl);
r = (*recType->extract)(clientData, &extractCtrl);
extractCtrl.flagShowRecords = 0;
extractCtrl.match_criteria[0] = '\0';
extractCtrl.handle = handle;
+ extractCtrl.first_record = 1;
+
+ extract_set_store_data_prepare(&extractCtrl);
if (n)
grs_extract_tree(&extractCtrl, n);
extract_add_incomplete_field(p);
}
+static void extract_set_store_data_cb(struct recExtractCtrl *p,
+ void *buf, size_t sz)
+{
+ ZebraHandle zh = (ZebraHandle) p->handle;
+
+ xfree(zh->store_data_buf);
+ zh->store_data_buf = 0;
+ zh->store_data_size = 0;
+ if (buf && sz)
+ {
+ zh->store_data_buf = xmalloc(sz);
+ zh->store_data_size = sz;
+ memcpy(zh->store_data_buf, buf, sz);
+ }
+}
+
+static void extract_set_store_data_prepare(struct recExtractCtrl *p)
+{
+ ZebraHandle zh = (ZebraHandle) p->handle;
+ xfree(zh->store_data_buf);
+ zh->store_data_buf = 0;
+ zh->store_data_size = 0;
+ p->setStoreData = extract_set_store_data_cb;
+}
+
void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
{
- ZebraHandle zh = (ZebraHandle) (p->handle);
+ ZebraHandle zh = (ZebraHandle) p->handle;
zebraExplain_addSchema (zh->reg->zei, oid);
}
-/* $Id: index.h,v 1.134 2005-04-25 11:54:08 adam Exp $
+/* $Id: index.h,v 1.135 2005-04-28 08:20:40 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
int m_explain_database;
int m_flag_rw;
int m_file_verbose_limit;
+
+ void *store_data_buf;
+ size_t store_data_size;
};
struct rank_control {
-/* $Id: zebraapi.c,v 1.162 2005-04-26 08:11:22 adam Exp $
+/* $Id: zebraapi.c,v 1.163 2005-04-28 08:20:40 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
zebra_mutex_cond_unlock (&zs->session_lock);
+ zh->store_data_buf = 0;
+
return zh;
}
ASSERTZH;
assert(name);
assert(value);
- yaz_log(log_level, "zebra_set_resource %s:%s",name,value);
+ yaz_log(log_level, "zebra_set_resource %s:%s", name, value);
zh->errCode = 0;
res_set(zh->res, name, value);
}
ASSERTZH;
assert(name);
assert(defaultvalue);
- v= res_get_def( zh->res, name, (char *)defaultvalue);
+ v = res_get_def (zh->res, name, (char *)defaultvalue);
zh->errCode = 0;
- yaz_log(log_level, "zebra_get_resource %s:%s",name,v);
+ yaz_log(log_level, "zebra_get_resource %s:%s", name, v);
return v;
}
-## $Id: Makefile.am,v 1.16 2005-03-31 12:42:06 adam Exp $
+## $Id: Makefile.am,v 1.17 2005-04-28 08:20:40 adam Exp $
common_libs = libidzebra-recctrl.la \
../data1/libidzebra-data1.la \
mod_alvis_la_SOURCES = alvis.c
mod_alvis_la_LDFLAGS = -rpath $(pkglibdir) -module -avoid-version
-mod_alvis_la_LADD =
+mod_alvis_la_LADD = $(XSLT_LIBS)
mod_alvis_la_LIBADD = $(common_libs) $(mod_alvis_la_LADD)
+mod_xslt_la_SOURCES = xslt.c
+mod_xslt_la_LDFLAGS = -rpath $(pkglibdir) -module -avoid-version
+mod_xslt_la_LADD = $(XSLT_LIBS)
+mod_xslt_la_LIBADD = $(common_libs) $(mod_alvis_la_LADD)
+
pkglib_LTLIBRARIES = $(SHARED_MODULE_LA)
EXTRA_LTLIBRARIES = \
mod-grs-regx.la \
mod-grs-marc.la \
mod-grs-danbib.la \
mod-safari.la \
- mod-alvis.la
+ mod-alvis.la \
+ mod-xslt.la
# The common library
lib_LTLIBRARIES = libidzebra-recctrl.la
$(mod_grs_xml_la_SOURCES) \
$(mod_grs_marc_la_SOURCES) \
$(mod_grs_danbib_la_SOURCES) \
- $(mod_safari_la_SOURCES)
+ $(mod_safari_la_SOURCES) \
+ $(mod_alvis_la_SOURCES) \
+ $(mod_xslt_la_SOURCES)
-AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) $(TCL_INCLUDE) -DDEFAULT_MODULE_PATH=\"$(pkglibdir)\"
+AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) $(XSLT_CFLAGS) \
+ $(TCL_INCLUDE) -DDEFAULT_MODULE_PATH=\"$(pkglibdir)\"
-/* $Id: alvis.c,v 1.1 2005-03-31 12:42:06 adam Exp $
+/* $Id: alvis.c,v 1.2 2005-04-28 08:20:40 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
}
-static void filter_destroy(void *clientData)
+static void filter_destroy (void *clientData)
{
struct filter_info *tinfo = clientData;
xfree (tinfo->sep);
xfree (tinfo);
}
-struct fi_info {
+struct buf_info {
struct recExtractCtrl *p;
char *buf;
int offset;
int max;
};
-static struct fi_info *fi_open(struct recExtractCtrl *p)
+static struct buf_info *buf_open (struct recExtractCtrl *p)
{
- struct fi_info *fi = (struct fi_info *) xmalloc (sizeof(*fi));
+ struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi));
fi->p = p;
fi->buf = (char *) xmalloc (4096);
return fi;
}
-static int fi_getchar(struct fi_info *fi, char *dst)
+static int buf_read (struct filter_info *tinfo, struct buf_info *fi, char *dst)
{
if (fi->offset >= fi->max)
{
return 0;
}
*dst = fi->buf[(fi->offset)++];
- return 1;
-}
-
-static int fi_gets(struct fi_info *fi, char *dst, int max)
-{
- int l;
- for (l = 0; l < max; l++)
+ if (tinfo->sep && *dst == *tinfo->sep)
{
- if (!fi_getchar(fi, dst+l))
- return 0;
- if (dst[l] == '\n')
- break;
+ off_t off = (*fi->p->tellf)(fi->p->fh);
+ (*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset));
+ return 0;
}
- dst[l] = '\0';
return 1;
}
-static void fi_close (struct fi_info *fi)
+static void buf_close (struct buf_info *fi)
{
xfree (fi->buf);
xfree (fi);
}
-static int filter_extract(void *clientData, struct recExtractCtrl *p)
+static int filter_extract (void *clientData, struct recExtractCtrl *p)
{
struct filter_info *tinfo = clientData;
- char line[512];
+ char w[512];
RecWord recWord;
- struct fi_info *fi = fi_open(p);
+ int r;
+ struct buf_info *fi = buf_open (p);
#if 0
yaz_log(YLOG_LOG, "filter_extract off=%ld",
xfree(tinfo->sep);
tinfo->sep = 0;
(*p->init)(p, &recWord);
-
- if (!fi_gets(fi, line, sizeof(line)-1))
- return RECCTRL_EXTRACT_ERROR_GENERIC;
- sscanf(line, "%255s", p->match_criteria);
-
recWord.reg_type = 'w';
- while (fi_gets(fi, line, sizeof(line)-1))
+ do
{
- int nor = 0;
- char field[40];
- char *cp;
-#if 0
- yaz_log(YLOG_LOG, "safari line: %s", line);
-#endif
- if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
- &recWord.record_id, &recWord.section_id, &recWord.seqno,
- field, &nor) < 4)
- {
- yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
- return RECCTRL_EXTRACT_ERROR_GENERIC;
+ int i = 0;
+
+ r = buf_read (tinfo, fi, w);
+ while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
+ {
+ i++;
+ r = buf_read (tinfo, fi, w + i);
}
- for (cp = line + nor; *cp == ' '; cp++)
- ;
- recWord.attrStr = field;
- recWord.term_buf = cp;
- recWord.term_len = strlen(cp);
- (*p->tokenAdd)(&recWord);
- }
- fi_close(fi);
+ if (i)
+ {
+ recWord.term_buf = w;
+ recWord.term_len = i;
+ (*p->tokenAdd)(&recWord);
+ }
+ } while (r > 0);
+ buf_close (fi);
return RECCTRL_EXTRACT_OK;
}
-/* $Id: recctrl.c,v 1.19 2005-03-31 12:42:07 adam Exp $
+/* $Id: recctrl.c,v 1.20 2005-04-28 08:20:40 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
}
#endif
#ifdef IDZEBRA_STATIC_ALVIS
+#if HAVE_XSLT
if (1)
{
extern RecType idzebra_filter_alvis[];
recTypeClass_add (&rts, idzebra_filter_alvis, nmem, 0);
}
#endif
+#endif
+#ifdef IDZEBRA_STATIC_XSLT
+#if HAVE_XSLT
+ if (1)
+ {
+ extern RecType idzebra_filter_xslt[];
+ recTypeClass_add (&rts, idzebra_filter_xslt, nmem, 0);
+ }
+#endif
+#endif
#if HAVE_DLFCN_H
if (module_path)
--- /dev/null
+/* $Id: xslt.c,v 1.1 2005-04-28 08:20:40 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include <yaz/diagbib1.h>
+#include <libxml/xmlreader.h>
+#include <libxslt/transform.h>
+
+#include <idzebra/util.h>
+#include <idzebra/recctrl.h>
+
+struct filter_info {
+ xsltStylesheetPtr stylesheet_xsp;
+ xmlTextReaderPtr reader;
+ char *fname;
+ int split_depth;
+};
+
+static const char *zebra_index_ns = "http://indexdata.dk/zebra/indexing/1";
+
+static void *filter_init (Res res, RecType recType)
+{
+ struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
+ tinfo->stylesheet_xsp = 0;
+ tinfo->reader = 0;
+ tinfo->fname = 0;
+ tinfo->split_depth = 1;
+ return tinfo;
+}
+
+static void filter_config(void *clientData, Res res, const char *args)
+{
+ struct filter_info *tinfo = clientData;
+ if (!args || !*args)
+ args = "default.xsl";
+ if (!tinfo->fname || strcmp(args, tinfo->fname))
+ {
+ /* different filename so must reread stylesheet */
+ xfree(tinfo->fname);
+ tinfo->fname = xstrdup(args);
+ if (tinfo->stylesheet_xsp)
+ xsltFreeStylesheet(tinfo->stylesheet_xsp);
+ tinfo->stylesheet_xsp =
+ xsltParseStylesheetFile((const xmlChar*) tinfo->fname);
+ }
+}
+
+static void filter_destroy(void *clientData)
+{
+ struct filter_info *tinfo = clientData;
+ if (tinfo->stylesheet_xsp)
+ xsltFreeStylesheet(tinfo->stylesheet_xsp);
+ xfree(tinfo->fname);
+ xfree(tinfo);
+}
+
+static int ioread_ex(void *context, char *buffer, int len)
+{
+ struct recExtractCtrl *p = context;
+ return (*p->readf)(p->fh, buffer, len);
+}
+
+static int ioclose_ex(void *context)
+{
+ return 0;
+}
+
+static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
+ xmlNodePtr ptr, RecWord *recWord)
+{
+ for(; ptr; ptr = ptr->next)
+ {
+ index_field(tinfo, ctrl, ptr->children, recWord);
+ if (ptr->type != XML_TEXT_NODE)
+ continue;
+ recWord->term_buf = ptr->content;
+ recWord->term_len = strlen(ptr->content);
+ (*ctrl->tokenAdd)(recWord);
+ }
+}
+
+static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
+ xmlNodePtr ptr, RecWord *recWord)
+{
+ for(; ptr; ptr = ptr->next)
+ {
+ index_node(tinfo, ctrl, ptr->children, recWord);
+ if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
+ strcmp(ptr->ns->href, zebra_index_ns))
+ continue;
+ if (!strcmp(ptr->name, "index"))
+ {
+ char *field_str = 0;
+ const char *xpath_str = 0;
+ struct _xmlAttr *attr;
+ for (attr = ptr->properties; attr; attr = attr->next)
+ {
+ if (!strcmp(attr->name, "field")
+ && attr->children && attr->children->type == XML_TEXT_NODE)
+ field_str = attr->children->content;
+ if (!strcmp(attr->name, "xpath")
+ && attr->children && attr->children->type == XML_TEXT_NODE)
+ xpath_str = attr->children->content;
+ }
+ if (field_str)
+ {
+ recWord->attrStr = field_str;
+ index_field(tinfo, ctrl, ptr->children, recWord);
+ }
+ }
+ }
+}
+
+static int filter_extract(void *clientData, struct recExtractCtrl *p)
+{
+ static const char *params[] = {
+ "schema", "'http://indexdata.dk/zebra/indexing/1'",
+ 0
+ };
+ struct filter_info *tinfo = clientData;
+ RecWord recWord;
+ int ret;
+
+ if (p->first_record)
+ {
+ if (tinfo->reader)
+ xmlFreeTextReader(tinfo->reader);
+ tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
+ p /* I/O handler */,
+ 0 /* URL */,
+ 0 /* encoding */,
+ XML_PARSE_XINCLUDE);
+ }
+ if (!tinfo->reader)
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+
+ if (!tinfo->stylesheet_xsp)
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+
+ (*p->init)(p, &recWord);
+ recWord.reg_type = 'w';
+
+ ret = xmlTextReaderRead(tinfo->reader);
+ while (ret == 1) {
+ int type = xmlTextReaderNodeType(tinfo->reader);
+ int depth = xmlTextReaderDepth(tinfo->reader);
+ if (tinfo->split_depth == 0 ||
+ (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
+ {
+ xmlChar *buf_out;
+ int len_out;
+
+ xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
+ xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
+ xmlDocPtr doc = xmlNewDoc("1.0");
+
+ xmlDocSetRootElement(doc, ptr2);
+
+ if (tinfo->stylesheet_xsp)
+ {
+ xmlDocPtr resDoc =
+ xsltApplyStylesheet(tinfo->stylesheet_xsp,
+ doc, params);
+ if (p->flagShowRecords)
+ {
+ xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);
+ }
+ index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
+ xmlFreeDoc(resDoc);
+ }
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ if (p->flagShowRecords)
+ fwrite(buf_out, len_out, 1, stdout);
+ (*p->setStoreData)(p, buf_out, len_out);
+ xmlFree(buf_out);
+
+ xmlFreeDoc(doc);
+ return RECCTRL_EXTRACT_OK;
+ }
+ ret = xmlTextReaderRead(tinfo->reader);
+ }
+ xmlFreeTextReader(tinfo->reader);
+ tinfo->reader = 0;
+ return RECCTRL_EXTRACT_EOF;
+}
+
+static int ioread_ret(void *context, char *buffer, int len)
+{
+ struct recRetrieveCtrl *p = context;
+ return (*p->readf)(p->fh, buffer, len);
+}
+
+static int ioclose_ret(void *context)
+{
+ return 0;
+}
+
+static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
+{
+ static const char *params[] = {
+ "schema", "'F'",
+ 0
+ };
+ struct filter_info *tinfo = clientData;
+ xmlDocPtr resDoc;
+ xmlDocPtr doc;
+
+ if (p->comp)
+ {
+ const char *esn;
+ char *esn_quoted;
+ if (p->comp->which != Z_RecordComp_simple
+ || p->comp->u.simple->which != Z_ElementSetNames_generic)
+ {
+ p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
+ return 0;
+ }
+ esn = p->comp->u.simple->u.generic;
+ esn_quoted = odr_malloc(p->odr, 3 + strlen(esn));
+ sprintf(esn_quoted, "'%s'", esn);
+ params[1] = esn_quoted;
+ }
+ if (!tinfo->stylesheet_xsp)
+ {
+ p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ return 0;
+ }
+ doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
+ 0 /* URL */,
+ 0 /* encoding */,
+ XML_PARSE_XINCLUDE);
+ if (!doc)
+ {
+ p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ return 0;
+ }
+ resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
+ doc, params);
+ if (!resDoc)
+ {
+ p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+ }
+ else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
+ {
+ xmlChar *buf_out;
+ int len_out;
+ xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+
+ p->output_format = VAL_TEXT_XML;
+ p->rec_len = len_out;
+ p->rec_buf = odr_malloc(p->odr, p->rec_len);
+ memcpy(p->rec_buf, buf_out, p->rec_len);
+
+ xmlFree(buf_out);
+ }
+ else if (p->output_format == VAL_SUTRS)
+ {
+ xmlChar *buf_out;
+ int len_out;
+ xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+
+ p->output_format = VAL_SUTRS;
+ p->rec_len = len_out;
+ p->rec_buf = odr_malloc(p->odr, p->rec_len);
+ memcpy(p->rec_buf, buf_out, p->rec_len);
+
+ xmlFree(buf_out);
+ }
+ else
+ {
+ p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
+ }
+ xmlFreeDoc(resDoc);
+ xmlFreeDoc(doc);
+ return 0;
+}
+
+static struct recType filter_type = {
+ 0,
+ "xslt",
+ filter_init,
+ filter_config,
+ filter_destroy,
+ filter_extract,
+ filter_retrieve
+};
+
+RecType
+#ifdef IDZEBRA_STATIC_XSLT
+idzebra_filter_xslt
+#else
+idzebra_filter
+#endif
+
+[] = {
+ &filter_type,
+ 0,
+};
-SUBDIRS=codec api xpath gils malxml config usmarc dmoz sort sort2 xelm cddb \
- rusmarc zsh marcxml charmap mbox espec
+SUBDIRS=codec api xslt xpath gils malxml config usmarc dmoz sort \
+ sort2 xelm cddb rusmarc zsh marcxml charmap mbox espec
-# $Id: Makefile.am,v 1.3 2004-12-02 12:04:49 adam Exp $
+# $Id: Makefile.am,v 1.4 2005-04-28 08:20:41 adam Exp $
check_PROGRAMS = t1 t2
t1_SOURCES = t1.c
t2_SOURCES = t2.c
-AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC)
+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(srcdir)/../api $(YAZINC)
zebralibs = \
../../index/libidzebra-api.la \
-/* $Id: t1.c,v 1.4 2005-01-15 19:38:36 adam Exp $
+/* $Id: t1.c,v 1.5 2005-04-28 08:20:41 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
02111-1307, USA.
*/
-#include "../api/testlib.h"
+#include "testlib.h"
int main(int argc, char **argv)
{
-/* $Id: t2.c,v 1.3 2005-01-15 19:38:37 adam Exp $
+/* $Id: t2.c,v 1.4 2005-04-28 08:20:41 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
02111-1307, USA.
*/
-#include "../api/testlib.h"
+#include "testlib.h"
int main(int argc, char **argv)
{
--- /dev/null
+# $Id: Makefile.am,v 1.1 2005-04-28 08:20:41 adam Exp $
+
+check_PROGRAMS = xslt1
+TESTS = $(check_PROGRAMS)
+
+EXTRA_DIST=zebra.cfg marc-col.xml marc1.xsl
+
+xslt1_SOURCES = xslt1.c
+
+AM_CPPFLAGS = -I$(srcdir)/../api -I$(top_srcdir)/include $(YAZINC)
+
+zebralibs = \
+ ../../index/libidzebra-api.la \
+ ../../rset/libidzebra-rset.la \
+ ../../recctrl/libidzebra-recctrl.la \
+ ../../dict/libidzebra-dict.la \
+ ../../isams/libidzebra-isams.la \
+ ../../isamc/libidzebra-isamc.la \
+ ../../isamb/libidzebra-isamb.la \
+ ../../data1/libidzebra-data1.la \
+ ../../bfile/libidzebra-bfile.la \
+ ../../dfa/libidzebra-dfa.la \
+ ../../util/libidzebra-util.la
+
+LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB)
+
--- /dev/null
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+<!-- Identity transform stylesheet -->
+
+<xsl:output indent="yes"
+ method="xml"
+ version="1.0"
+ encoding="UTF-8"/>
+
+ <xsl:template match="node()|@*">
+ <xsl:copy>
+ <xsl:apply-templates select="@*|node()"/>
+ </xsl:copy>
+ </xsl:template>
+
+</xsl:stylesheet>
--- /dev/null
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>00366nam 22001698a 4500</leader>
+ <controlfield tag="001"> 11224466 </controlfield>
+ <controlfield tag="003">DLC </controlfield>
+ <controlfield tag="005">00000000000000.0 </controlfield>
+ <controlfield tag="008">910710c19910701nju 00010 eng </controlfield>
+ <datafield tag="010" ind1=" " ind2=" ">
+ <subfield code="a"> 11224466 </subfield>
+ </datafield>
+ <datafield tag="040" ind1=" " ind2=" ">
+ <subfield code="a">DLC</subfield>
+ <subfield code="c">DLC</subfield>
+ </datafield>
+ <datafield tag="050" ind1="0" ind2="0">
+ <subfield code="a">123-xyz</subfield>
+ </datafield>
+ <datafield tag="100" ind1="1" ind2="0">
+ <subfield code="a">Jack Collins</subfield>
+ </datafield>
+ <datafield tag="245" ind1="1" ind2="0">
+ <subfield code="a">How to program a computer</subfield>
+ </datafield>
+ <datafield tag="260" ind1="1" ind2=" ">
+ <subfield code="a">Penguin</subfield>
+ </datafield>
+ <datafield tag="263" ind1=" " ind2=" ">
+ <subfield code="a">8710</subfield>
+ </datafield>
+ <datafield tag="300" ind1=" " ind2=" ">
+ <subfield code="a">p. cm.</subfield>
+ </datafield>
+</record>
+<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>00366nam 22001698a 4500</leader>
+ <controlfield tag="001"> 11224467 </controlfield>
+ <controlfield tag="003">DLC </controlfield>
+ <controlfield tag="005">00000000000000.0 </controlfield>
+ <controlfield tag="008">910710c19910701nju 00010 eng </controlfield>
+ <datafield tag="010" ind1=" " ind2=" ">
+ <subfield code="a"> 11224467 </subfield>
+ </datafield>
+ <datafield tag="040" ind1=" " ind2=" ">
+ <subfield code="a">DLC</subfield>
+ <subfield code="c">DLC</subfield>
+ </datafield>
+ <datafield tag="050" ind1="0" ind2="0">
+ <subfield code="a">123-xyz</subfield>
+ </datafield>
+ <datafield tag="100" ind1="1" ind2="0">
+ <subfield code="a">Jack Collins</subfield>
+ </datafield>
+ <datafield tag="245" ind1="1" ind2="0">
+ <subfield code="a">How to program a computer</subfield>
+ </datafield>
+ <datafield tag="260" ind1="1" ind2=" ">
+ <subfield code="a">Penguin</subfield>
+ </datafield>
+ <datafield tag="263" ind1=" " ind2=" ">
+ <subfield code="a">8710</subfield>
+ </datafield>
+ <datafield tag="300" ind1=" " ind2=" ">
+ <subfield code="a">p. cm.</subfield>
+ </datafield>
+</record>
+<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>01369cam 2200265 i 4500</leader>
+ <controlfield tag="001"> 73090924 //r82 </controlfield>
+ <controlfield tag="003">DLC </controlfield>
+ <controlfield tag="005">19820524000000.0 </controlfield>
+ <controlfield tag="008">760609s1974 nyua b 10110 eng </controlfield>
+ <datafield tag="010" ind1=" " ind2=" ">
+ <subfield code="a"> 73090924 //r82</subfield>
+ </datafield>
+ <datafield tag="040" ind1=" " ind2=" ">
+ <subfield code="a">DLC</subfield>
+ <subfield code="c">DLC</subfield>
+ <subfield code="d">DLC</subfield>
+ </datafield>
+ <datafield tag="050" ind1="0" ind2="0">
+ <subfield code="a">RC71.3</subfield>
+ <subfield code="b">.W67 1971</subfield>
+ </datafield>
+ <datafield tag="082" ind1="0" ind2="0">
+ <subfield code="a">616.07/575/02854</subfield>
+ </datafield>
+ <datafield tag="111" ind1="2" ind2="0">
+ <subfield code="a">Workshop on Computer Processing of Dynamic Images from an Anger Scintillation Camera,</subfield>
+ <subfield code="c">Washington University,</subfield>
+ <subfield code="d">1971.</subfield>
+ </datafield>
+ <datafield tag="245" ind1="1" ind2="0">
+ <subfield code="a">Computer processing of dynamic images from an Anger scintillation camera :</subfield>
+ <subfield code="b">the proceedings of a workshop /</subfield>
+ <subfield code="c">cosponsored by the Biomedical Computer Laboratory and the Nuclear Medicine Division, Department of Radiology, School of Medicine, Washington University, St. Louis, January 18-22, 1971 ; edited by Kenneth B. Larson, Jerome R. Cox, Jr.</subfield>
+ </datafield>
+ <datafield tag="260" ind1="0" ind2=" ">
+ <subfield code="a">New York :</subfield>
+ <subfield code="b">Society of Nuclear Medicine,</subfield>
+ <subfield code="c">[c1974]</subfield>
+ </datafield>
+ <datafield tag="300" ind1=" " ind2=" ">
+ <subfield code="a">xiv, p. :</subfield>
+ <subfield code="b">ill. ;</subfield>
+ <subfield code="c">24 cm.</subfield>
+ </datafield>
+ <datafield tag="504" ind1=" " ind2=" ">
+ <subfield code="a">Includes bibliographical references and index.</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2="0">
+ <subfield code="a">Radioisotope scanning</subfield>
+ <subfield code="x">Data processing</subfield>
+ <subfield code="x">Congresses.</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2="0">
+ <subfield code="a">Scintillation cameras</subfield>
+ <subfield code="x">Congresses.</subfield>
+ </datafield>
+ <datafield tag="650" ind1=" " ind2="0">
+ <subfield code="a">Imaging systems in medicine</subfield>
+ <subfield code="x">Data processing</subfield>
+ <subfield code="x">Congresses.</subfield>
+ </datafield>
+ <datafield tag="700" ind1="1" ind2="0">
+ <subfield code="a">Larson, Kenneth B.</subfield>
+ </datafield>
+ <datafield tag="700" ind1="1" ind2="0">
+ <subfield code="a">Cox, Jerome R. </subfield>
+ </datafield>
+ <datafield tag="710" ind1="2" ind2="0">
+ <subfield code="a">Washington University, St. Louis.</subfield>
+ <subfield code="b">Biomedical Computer Laboratory.</subfield>
+ </datafield>
+ <datafield tag="710" ind1="2" ind2="0">
+ <subfield code="a">Washington University, St. Louis.</subfield>
+ <subfield code="b">Nuclear Medicine Division.</subfield>
+ </datafield>
+</record>
+</collection>
--- /dev/null
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:m="http://www.loc.gov/MARC21/slim"
+ xmlns:z="http://indexdata.dk/zebra/indexing/1"
+ version="1.0">
+
+<xsl:output indent="yes"
+ method="xml"
+ version="1.0"
+ encoding="UTF-8"/>
+
+ <xsl:template match="/m:record/m:controlfield[@tag=001]">
+ <z:index field="control">
+ <xsl:apply-templates match="."/>
+ </z:index>
+ </xsl:template>
+
+ <xsl:template match="/m:record/m:datafield[@tag=245]">
+ <z:index field="title">
+ <xsl:apply-templates match="."/>
+ </z:index>
+ </xsl:template>
+
+</xsl:stylesheet>
--- /dev/null
+profilePath: ${srcdir:-.}/../../tab
+
+modulePath: ../../recctrl/.libs
+
+recordType: xslt.marc1.xsl