Initial import into git.
[galago.git] / cpp / galago / contrib / libstemmer_c / libstemmer / libstemmer_utf8.c
blob793a0816fd9e41df91f4806bc19b49531e06532f
2 #include <stdlib.h>
3 #include <string.h>
4 #include "../include/libstemmer.h"
5 #include "../runtime/api.h"
6 #include "modules_utf8.h"
8 struct sb_stemmer {
9 struct SN_env * (*create)(void);
10 void (*close)(struct SN_env *);
11 int (*stem)(struct SN_env *);
13 struct SN_env * env;
16 extern const char **
17 sb_stemmer_list(void)
19 return algorithm_names;
22 static stemmer_encoding_t
23 sb_getenc(const char * charenc)
25 struct stemmer_encoding * encoding;
26 if (charenc == NULL) return ENC_UTF_8;
27 for (encoding = encodings; encoding->name != 0; encoding++) {
28 if (strcmp(encoding->name, charenc) == 0) break;
30 if (encoding->name == NULL) return ENC_UNKNOWN;
31 return encoding->enc;
34 extern struct sb_stemmer *
35 sb_stemmer_new(const char * algorithm, const char * charenc)
37 stemmer_encoding_t enc;
38 struct stemmer_modules * module;
39 struct sb_stemmer * stemmer =
40 (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
41 if (stemmer == NULL) return NULL;
42 enc = sb_getenc(charenc);
43 if (enc == ENC_UNKNOWN) return NULL;
45 for (module = modules; module->name != 0; module++) {
46 if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
48 if (module->name == NULL) return NULL;
50 stemmer->create = module->create;
51 stemmer->close = module->close;
52 stemmer->stem = module->stem;
54 stemmer->env = stemmer->create();
55 if (stemmer->env == NULL)
57 sb_stemmer_delete(stemmer);
58 return NULL;
61 return stemmer;
64 void
65 sb_stemmer_delete(struct sb_stemmer * stemmer)
67 if (stemmer == 0) return;
68 if (stemmer->close == 0) return;
69 stemmer->close(stemmer->env);
70 stemmer->close = 0;
71 free(stemmer);
74 const sb_symbol *
75 sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
77 int ret;
78 if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
80 stemmer->env->l = 0;
81 return NULL;
83 ret = stemmer->stem(stemmer->env);
84 if (ret < 0) return NULL;
85 stemmer->env->p[stemmer->env->l] = 0;
86 return (const sb_symbol *)(stemmer->env->p);
89 int
90 sb_stemmer_length(struct sb_stemmer * stemmer)
92 return stemmer->env->l;