1 # -*- coding: utf-8 -*-
3 # Copyright 2008 Zuza Software Foundation
5 # This file is part of translate.
7 # translate is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # translate is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with translate; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 interface for differrent indexing engines for the translate toolkit
34 """ TODO for indexing engines:
35 * get rid of jToolkit.glock dependency
36 * add partial matching at the beginning of a term
37 * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/
38 * do unittests for PyLucene v1.x
41 def _get_available_indexers():
42 """get a list of the available supported indexing engines
44 search through the translate.search.indexer package for modules derived from
45 the CommonIndexer class
48 # get the package directory
49 indexer_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
50 # sort the files in the directory by name - to make it determinable,
51 # which indexing engine is chosen in case of multiple possibilities
52 all_files
= os
.listdir(indexer_dir
)
54 for mod_file
in all_files
:
55 if mod_file
== __file__
:
56 # we should not import ourself
58 mod_path
= os
.path
.join(indexer_dir
, mod_file
)
59 if (not mod_path
.endswith(".py")) or (not os
.path
.isfile(mod_path
)) \
60 or (not os
.access(mod_path
, os
.R_OK
)):
61 # no file / wrong extension / not readable -> skip it
63 # strip the ".py" prefix
64 mod_name
= mod_file
[:-3]
65 # TODO - debug: "[Indexer]: trying to import indexing engines from '%s'" % mod_path
67 module
= __import__(mod_name
, globals(), {})
69 # maybe it is unusable or dependencies are missing
71 # the module function "is_available" must return "True"
72 if not (hasattr(module
, "is_available") and \
73 callable(module
.is_available
) and \
74 module
.is_available()):
76 for item
in dir(module
):
78 element
= getattr(module
, item
)
80 # this rarely happens: e.g. for 'item' being 'None'
83 # the class must inherit CommonDatabase (without being the same)
84 if issubclass(element
, CommonIndexer
.CommonDatabase
) \
85 and not element
is CommonIndexer
.CommonDatabase
:
86 # TODO: debug - "[Indexer]: indexing engine found in '%s': %s" % (mod_path, element)
88 result
.append(element
)
90 # 'element' is not a class
94 def _sort_indexers_by_preference(indexer_classes
, pref_order
):
95 """sort a given list of indexer classes according to the given order
97 the list of preferred indexers are strings that should match the filenames
98 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or
101 @param indexer_classes: the list of all available indexer classes
102 @type indexer_classes: list of CommonIndexer.CommonDatabase objects
103 @param pref_order: list of preferred indexer names
104 @type pref_order: str
105 @return: sorted list of indexer classes
106 @rtype: list of CommonIndexer.CommonDatabase objects
108 # define useful function for readability
109 get_indexer_name
= lambda indexer_class
: \
110 os
.path
.basename(indexer_class
.__module
__).split(".")[-1]
111 # use a copy to avoid side effects
112 avail_indexers
= indexer_classes
[:]
114 # go through all preferred items and move the matching indexers to 'result'
115 for choice
in pref_order
:
116 # find matching indexers
117 matches
= [ indexer
for indexer
in avail_indexers
118 if get_indexer_name(indexer
) == choice
]
119 # move all matching items to the 'result' queue
120 for match_item
in matches
:
121 result
.append(match_item
)
122 avail_indexers
.remove(match_item
)
123 # append the remaining indexers to the result
124 return result
+ avail_indexers
127 # store the available indexers - this is done only once during the first import
128 _AVAILABLE_INDEXERS
= _get_available_indexers()
130 # True for a not-empty list - this should be used to check if indexing support
132 HAVE_INDEXER
= bool(_AVAILABLE_INDEXERS
)
135 def get_indexer(basedir
, preference
=None):
136 """return an appropriate indexer for the given directory
138 If the directory already exists, then we check, if one of the available
139 indexers knows how to handle it. Otherwise we return the first available
142 The following exceptions can be thrown:
143 IndexError: there is no indexing engine available
144 ValueError: the database location already exists, but we did not find
145 a suitable indexing engine for it
146 OSError: any error that could occour while creating or opening the
149 @param basedir: the parent directory of (possible) different indexing
151 @type basedir: string
152 @return: the class of the most appropriate indexer
153 @rtype: subclass of CommonIndexer.CommonDatabase
156 if not _AVAILABLE_INDEXERS
:
157 raise IndexError("Indexer: no indexing engines are available")
158 if preference
is None:
160 # sort available indexers by preference
161 preferred_indexers
= _sort_indexers_by_preference(_AVAILABLE_INDEXERS
,
163 if os
.path
.exists(basedir
):
164 for index_class
in preferred_indexers
:
166 # the first match is sufficient - but we do not want to
167 # create a new database, if a database for another
168 # indexing engine could exist. Thus we try it read-only first.
169 return index_class(basedir
, create_allowed
=False)
170 except (ValueError, OSError):
171 # invalid type of database or some other error
173 # the database does not exist yet or we did not find an appropriate
174 # class that can handle it - so we remove the whole base directory
175 shutil
.rmtree(basedir
, ignore_errors
=True)
176 print "Deleting invalid indexing directory '%s'" % basedir
177 # the database does not exist or it was deleted (see above)
178 # we choose the first available indexing engine
179 return preferred_indexers
[0](basedir
)
182 if __name__
== "__main__":
183 # show all supported indexing engines (with fulfilled requirements)
184 for ONE_INDEX
in _AVAILABLE_INDEXERS
: