From: Dennis Schafroth Date: Wed, 10 Nov 2010 14:14:33 +0000 (+0100) Subject: Stripped down to only use English/porter for now X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=HEAD;p=libstemmer_c.git Stripped down to only use English/porter for now --- diff --git a/libstemmer/modules.h b/libstemmer/modules.h index 7a1f685..42e16f7 100644 --- a/libstemmer/modules.h +++ b/libstemmer/modules.h @@ -8,12 +8,15 @@ * russian, spanish, swedish, turkish */ +/* #include "../src_c/stem_ISO_8859_1_danish.h" #include "../src_c/stem_UTF_8_danish.h" #include "../src_c/stem_ISO_8859_1_dutch.h" #include "../src_c/stem_UTF_8_dutch.h" #include "../src_c/stem_ISO_8859_1_english.h" +*/ #include "../src_c/stem_UTF_8_english.h" +/* #include "../src_c/stem_ISO_8859_1_finnish.h" #include "../src_c/stem_UTF_8_finnish.h" #include "../src_c/stem_ISO_8859_1_french.h" @@ -27,7 +30,9 @@ #include "../src_c/stem_ISO_8859_1_norwegian.h" #include "../src_c/stem_UTF_8_norwegian.h" #include "../src_c/stem_ISO_8859_1_porter.h" +*/ #include "../src_c/stem_UTF_8_porter.h" +/* #include "../src_c/stem_ISO_8859_1_portuguese.h" #include "../src_c/stem_UTF_8_portuguese.h" #include "../src_c/stem_ISO_8859_2_romanian.h" @@ -39,7 +44,7 @@ #include "../src_c/stem_ISO_8859_1_swedish.h" #include "../src_c/stem_UTF_8_swedish.h" #include "../src_c/stem_UTF_8_turkish.h" - +*/ typedef enum { ENC_UNKNOWN=0, ENC_ISO_8859_1, @@ -53,9 +58,12 @@ struct stemmer_encoding { stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { + +/* {"ISO_8859_1", ENC_ISO_8859_1}, {"ISO_8859_2", ENC_ISO_8859_2}, {"KOI8_R", ENC_KOI8_R}, +*/ {"UTF_8", ENC_UTF_8}, {0,ENC_UNKNOWN} }; @@ -68,6 +76,7 @@ struct stemmer_modules { int (*stem)(struct SN_env *); }; static struct stemmer_modules modules[] = { +/* {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, @@ -83,11 +92,13 @@ static struct stemmer_modules modules[] = { {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, +*/ {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, +// {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, +// {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, +/* {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, @@ -167,24 +178,31 @@ static struct stemmer_modules modules[] = { {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, +*/ {0,ENC_UNKNOWN,0,0,0} }; static const char * algorithm_names[] = { - "danish", - "dutch", - "english", + /* + "danish", + "dutch", + */ + "english", + /* "finnish", "french", "german", "hungarian", "italian", - "norwegian", + "norwegian", +*/ "porter", +/* "portuguese", "romanian", "russian", "spanish", "swedish", - "turkish", + "turkish", +*/ 0 };