From 4182598a937f80f980593b5739a8f22ce63ad2cd Mon Sep 17 00:00:00 2001
From: =?utf8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?=
 <pclouds@gmail.com>
Date: Fri, 13 Feb 2004 11:07:10 +0000
Subject: [PATCH] Libvspell now using new engine. Everything is stored in
 WordEntries.

Possible mispelled positions are generated.
---
 libvspell/Doxyfile       | 175 +++++++++++
 libvspell/Makefile.am    |   1 +
 libvspell/Makefile.in    |  81 ++---
 libvspell/dictionary.cpp |  49 ++-
 libvspell/posgen.h       |  33 ++
 libvspell/sections.cpp   | 116 +++++++
 libvspell/sentence.cpp   | 133 +++++++-
 libvspell/spell.cpp      |   4 +-
 libvspell/spell.h        | 200 +++++++++++-
 libvspell/syllable.h     |   2 +-
 libvspell/wfst.cpp       | 800 ++++++++++++++++++++---------------------------
 libvspell/wfst.h         |  81 +++--
 libvspell/wordnode.h     |  15 +-
 libvspell/words.cpp      | 244 +++++++++++++++
 14 files changed, 1367 insertions(+), 567 deletions(-)
 create mode 100644 libvspell/Doxyfile
 create mode 100644 libvspell/posgen.h
 create mode 100644 libvspell/sections.cpp
 rewrite libvspell/wfst.cpp (83%)
 create mode 100644 libvspell/words.cpp

diff --git a/libvspell/Doxyfile b/libvspell/Doxyfile
new file mode 100644
index 0000000..4895160
--- /dev/null
+++ b/libvspell/Doxyfile
@@ -0,0 +1,175 @@
+# Doxyfile 1.2.12-20011209
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = LibVspell
+PROJECT_NUMBER         = 
+OUTPUT_DIRECTORY       = 
+OUTPUT_LANGUAGE        = English
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = YES
+EXTRACT_STATIC         = YES
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = YES
+STRIP_FROM_PATH        = $(PWD)/
+INTERNAL_DOCS          = NO
+STRIP_CODE_COMMENTS    = YES
+CASE_SENSE_NAMES       = NO
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+VERBATIM_HEADERS       = YES
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = YES
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = wfst.cpp dictionary.cpp distance.cpp sentence.cpp spell.cpp debug.h       distance.h    sentence.h  syllable.h  wordnode.h dictionary.h  propername.h  spell.h     wfst.h
+
+
+FILE_PATTERNS          = *.h 
+RECURSIVE              = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = 
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = YES
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = 
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = YES
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = NO
+LATEX_OUTPUT           = 
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = YES
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = 
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = YES
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = 
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = NO
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = YES
+EXPAND_ONLY_PREDEF     = YES
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = qtools_docs/qtools.tag=../../qtools_docs/html
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HAVE_DOT               = YES
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+TEMPLATE_RELATIONS     = YES
+HIDE_UNDOC_RELATIONS   = YES
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+GRAPHICAL_HIERARCHY    = YES
+CALL_GRAPH = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+UML_LOOK = NO
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
diff --git a/libvspell/Makefile.am b/libvspell/Makefile.am
index 3153c2b..9a4ea62 100644
--- a/libvspell/Makefile.am
+++ b/libvspell/Makefile.am
@@ -7,6 +7,7 @@ libvspell_a_SOURCES = \
 	distance.cpp distance.h\
 	spell.cpp spell.h\
 	wfst.cpp wfst.h\
+	words.cpp sections.cpp\
 	debug.h
 INCLUDES = -I$(top_builddir)/libsrilm
 libvspell_a_LIBADD = $(top_builddir)/libsrilm/libsrilm.a
diff --git a/libvspell/Makefile.in b/libvspell/Makefile.in
index 71a3ac9..fef0bbf 100644
--- a/libvspell/Makefile.in
+++ b/libvspell/Makefile.in
@@ -35,7 +35,6 @@ POST_INSTALL = :
 NORMAL_UNINSTALL = :
 PRE_UNINSTALL = :
 POST_UNINSTALL = :
-host_triplet = @host@
 ACLOCAL = @ACLOCAL@
 AMDEP_FALSE = @AMDEP_FALSE@
 AMDEP_TRUE = @AMDEP_TRUE@
@@ -55,7 +54,6 @@ CXXFLAGS = @CXXFLAGS@
 CYGPATH_W = @CYGPATH_W@
 DEFS = @DEFS@
 DEPDIR = @DEPDIR@
-ECHO = @ECHO@
 ECHO_C = @ECHO_C@
 ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
@@ -68,7 +66,6 @@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LDFLAGS = @LDFLAGS@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
@@ -81,6 +78,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
 PACKAGE_VERSION = @PACKAGE_VERSION@
 PATH_SEPARATOR = @PATH_SEPARATOR@
 PKG_CONFIG = @PKG_CONFIG@
+POW_LIB = @POW_LIB@
 RANLIB = @RANLIB@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
@@ -100,18 +98,10 @@ am__include = @am__include@
 am__leading_dot = @am__leading_dot@
 am__quote = @am__quote@
 bindir = @bindir@
-build = @build@
 build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
 datadir = @datadir@
 exec_prefix = @exec_prefix@
-host = @host@
 host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
 includedir = @includedir@
 infodir = @infodir@
 install_sh = @install_sh@
@@ -135,6 +125,7 @@ libvspell_a_SOURCES = \
 	distance.cpp distance.h\
 	spell.cpp spell.h\
 	wfst.cpp wfst.h\
+	words.cpp sections.cpp\
 	debug.h
 
 INCLUDES = -I$(top_builddir)/libsrilm
@@ -151,7 +142,8 @@ libvspell_a_AR = $(AR) cru
 libvspell_a_DEPENDENCIES = $(top_builddir)/libsrilm/libsrilm.a
 am_libvspell_a_OBJECTS = syllable.$(OBJEXT) dictionary.$(OBJEXT) \
 	propername.$(OBJEXT) sentence.$(OBJEXT) distance.$(OBJEXT) \
-	spell.$(OBJEXT) wfst.$(OBJEXT)
+	spell.$(OBJEXT) wfst.$(OBJEXT) words.$(OBJEXT) \
+	sections.$(OBJEXT)
 libvspell_a_OBJECTS = $(am_libvspell_a_OBJECTS)
 
 DEFAULT_INCLUDES =  -I. -I$(srcdir) -I$(top_builddir)
@@ -159,23 +151,18 @@ depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
 @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/dictionary.Po \
 @AMDEP_TRUE@	./$(DEPDIR)/distance.Po ./$(DEPDIR)/propername.Po \
-@AMDEP_TRUE@	./$(DEPDIR)/sentence.Po ./$(DEPDIR)/spell.Po \
-@AMDEP_TRUE@	./$(DEPDIR)/syllable.Po ./$(DEPDIR)/wfst.Po
+@AMDEP_TRUE@	./$(DEPDIR)/sections.Po ./$(DEPDIR)/sentence.Po \
+@AMDEP_TRUE@	./$(DEPDIR)/spell.Po ./$(DEPDIR)/syllable.Po \
+@AMDEP_TRUE@	./$(DEPDIR)/wfst.Po ./$(DEPDIR)/words.Po
 CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
-	$(AM_CXXFLAGS) $(CXXFLAGS)
 CXXLD = $(CXX)
-CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \
-	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
+	-o $@
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
-	$(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 CCLD = $(CC)
-LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
-	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 DIST_SOURCES = $(libvspell_a_SOURCES)
 DATA = $(data_DATA)
 
@@ -185,7 +172,7 @@ SOURCES = $(libvspell_a_SOURCES)
 all: all-am
 
 .SUFFIXES:
-.SUFFIXES: .cpp .lo .o .obj
+.SUFFIXES: .cpp .o .obj
 $(srcdir)/Makefile.in:  Makefile.am  $(top_srcdir)/configure.in $(ACLOCAL_M4)
 	cd $(top_srcdir) && \
 	  $(AUTOMAKE) --gnu  libvspell/Makefile
@@ -237,10 +224,12 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dictionary.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/distance.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/propername.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sentence.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spell.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syllable.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wfst.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/words.Po@am__quote@
 
 distclean-depend:
 	-rm -rf ./$(DEPDIR)
@@ -266,26 +255,6 @@ distclean-depend:
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
-
-.cpp.lo:
-@am__fastdepCXX_TRUE@	if $(LTCXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \
-@am__fastdepCXX_TRUE@	  -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<; \
-@am__fastdepCXX_TRUE@	then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; \
-@am__fastdepCXX_TRUE@	else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \
-@am__fastdepCXX_TRUE@	fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
-
-mostlyclean-libtool:
-	-rm -f *.lo
-
-clean-libtool:
-	-rm -rf .libs _libs
-
-distclean-libtool:
-	-rm -f libtool
 uninstall-info-am:
 dataDATA_INSTALL = $(INSTALL_DATA)
 install-dataDATA: $(data_DATA)
@@ -423,12 +392,12 @@ maintainer-clean-generic:
 	@echo "it deletes files that may require special tools to rebuild."
 clean: clean-am
 
-clean-am: clean-generic clean-libLIBRARIES clean-libtool mostlyclean-am
+clean-am: clean-generic clean-libLIBRARIES mostlyclean-am
 
 distclean: distclean-am
 
 distclean-am: clean-am distclean-compile distclean-depend \
-	distclean-generic distclean-libtool distclean-tags
+	distclean-generic distclean-tags
 
 dvi: dvi-am
 
@@ -454,8 +423,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
 
 mostlyclean: mostlyclean-am
 
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
-	mostlyclean-libtool
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
 
 pdf: pdf-am
 
@@ -469,15 +437,14 @@ uninstall-am: uninstall-dataDATA uninstall-info-am \
 	uninstall-libLIBRARIES
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
-	clean-libLIBRARIES clean-libtool ctags distclean \
-	distclean-compile distclean-depend distclean-generic \
-	distclean-libtool distclean-tags distdir dvi dvi-am info \
-	info-am install install-am install-data install-data-am \
-	install-dataDATA install-exec install-exec-am install-info \
-	install-info-am install-libLIBRARIES install-man install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	clean-libLIBRARIES ctags distclean distclean-compile \
+	distclean-depend distclean-generic distclean-tags distdir dvi \
+	dvi-am info info-am install install-am install-data \
+	install-data-am install-dataDATA install-exec install-exec-am \
+	install-info install-info-am install-libLIBRARIES install-man \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
 	tags uninstall uninstall-am uninstall-dataDATA \
 	uninstall-info-am uninstall-libLIBRARIES
 
diff --git a/libvspell/dictionary.cpp b/libvspell/dictionary.cpp
index b27fec4..e3330f8 100644
--- a/libvspell/dictionary.cpp
+++ b/libvspell/dictionary.cpp
@@ -1,5 +1,6 @@
 #include "config.h"		// -*- tab-width: 2 -*-
 #include <iostream>
+#include <stdlib.h>
 #include <libsrilm/File.h>
 #include <fstream>
 #include <algorithm>
@@ -7,7 +8,6 @@
 #include "dictionary.h"
 #include "wordnode.h"
 #include "distance.h"
-#include <stdlib.h>
 #include <math.h>
 #ifndef _SArray_cc_
 #include <libsrilm/SArray.cc>
@@ -169,8 +169,12 @@ bool WordNode::load(const char* filename)
 		int i,nr_syllables = syllables.size();
 		node->info->syllables = new VocabIndex[nr_syllables+1];
 		node->info->syllables[nr_syllables] = Vocab_None;
-		for (int i = 0;i < nr_syllables;i ++)
+		//cerr << "Word " << toks[0] << "(" << node->info->id << "| ";
+		for (int i = 0;i < nr_syllables;i ++) {
 			node->info->syllables[i] = syllables[i];
+			//cerr << node->info->syllables[i] << ",";
+		}
+		//cerr << endl;
 		node->set_prob(atof(toks[2].c_str()));
 
 		if (csyllables != syllables) {
@@ -259,7 +263,7 @@ void WordNode::set_prob(float _prob)
 }
 	
 
-int WordNode::get_syllable_count()
+int WordNode::get_syllable_count() const
 {
 	ASSERT(info != NULL);
 	int n;
@@ -267,11 +271,11 @@ int WordNode::get_syllable_count()
 	return n;
 }
 
-void WordNode::get_syllables(vector<strid> &syllables)
+void WordNode::get_syllables(vector<strid> &syllables) const
 {
-	// ASSERT: info != NULL
+	ASSERT(info != NULL);
 	int n;
-	for (n = 0;info->syllables[n] != 0;n ++)
+	for (n = 0;info->syllables[n] != Vocab_None;n ++)
 		syllables.push_back(info->syllables[n]);
 }
 
@@ -358,6 +362,7 @@ bool FuzzyWordNode::fuzzy_get_next_with_syllable(strid str,
 }
 
 
+// the get_next()'s result must be included.
 bool FuzzyWordNode::fuzzy_get_next(strid str,
 																	 vector<DistanceNode>& _nodes) const
 {
@@ -425,8 +430,11 @@ strid StringArchive::operator[] (VocabString s)
 		return vi;
 	if (blocked) {
 		vi = rest->getIndex(s);
-		if (vi == Vocab_None) 
-			return rest->addWord(s)+dict.numWords();
+		if (vi == Vocab_None) {
+			int i = rest->addWord(s)+dict.numWords();
+			cerr << "New word " << s << " as " << i << endl;
+			return i;
+		}
 		return vi+dict.numWords();
 	}
 	return dict.addWord(s);
@@ -477,6 +485,31 @@ strpair make_strpair(strid str)
 	pair.cid = sarch[st];
 	return pair;
 }
+
+void WordNode::dump_next(std::ostream &os) const
+{
+	node_map_iterator iter(*nodes);
+	strid id;
+	WordNodePtr *pnode;
+	while ((pnode = iter.next(id))) {
+		os << (*pnode)->id << " ";
+	}
+}
+
+std::ostream& operator << (std::ostream &os,const WordNode::DistanceNode &dnode)
+{
+	os << *dnode.node;
+	return os;
+}
+std::ostream& operator << (std::ostream &os,const WordNode &node)
+{
+	std::vector<strid> syll;
+  node.get_syllables(syll);
+	for (int i = 0;i < syll.size();i ++)
+		os << sarch[syll[i]] << "(" << syll[i] << ") ";
+	return os;
+}
+
 /*
 	}
 */
diff --git a/libvspell/posgen.h b/libvspell/posgen.h
new file mode 100644
index 0000000..678abbd
--- /dev/null
+++ b/libvspell/posgen.h
@@ -0,0 +1,33 @@
+#ifndef __POSGEN_H__ // -*- tab-width: 2 mode: c++ -*-
+#define __POSGEN_H__
+
+#ifndef __SPELL_H__
+#include "spell.h"
+#endif
+
+#ifndef __VECTOR__
+#include <vector>
+#endif
+
+/**
+	 Position generator.
+ */
+
+class Generator
+{
+private:
+	int ii,i,nr_misspelled;
+	std::vector<int> misspelled_pos;
+	bool run;
+
+	int len;
+	SentenceRef st;
+
+public:
+	void init(const Sentence &st);
+	bool step(std::vector<int> &pos,int &len);
+	void done();
+};
+
+
+#endif
diff --git a/libvspell/sections.cpp b/libvspell/sections.cpp
new file mode 100644
index 0000000..7a9b5bf
--- /dev/null
+++ b/libvspell/sections.cpp
@@ -0,0 +1,116 @@
+#include "config.h"							// -*- tab-width: 2 -*-
+#include "wfst.h"
+#include <iterator>
+#include <algorithm>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <boost/format.hpp>
+#include <set>
+
+
+using namespace std;
+
+extern int ngram_length;
+
+/**
+ * dump a Sections
+ */
+
+std::ostream& operator << (std::ostream &os,const Sections &me)
+{
+	using namespace boost;
+	const Sentence &_sent = *me.st;
+	int i,ii,nn,n = _sent.get_syllable_count();
+	ii = 0;
+	nn = me.size();
+	for (i = 0;i < n;i ++) {
+		if (ii < nn && me[ii].start == i)
+			os << "[";
+		os << format("%s") % sarch[_sent[i].id];
+		if (ii < nn && me[ii].start+me[ii].len-1 == i) {
+			os << "]";
+			ii ++;
+		}
+		os << " ";
+	}
+	return os;
+}
+
+/**
+ Split Words into Sections
+ \param words input
+ \param sects output
+ */
+
+void Sections::construct(const Words &words)
+{
+	Sections& sects = *this;
+
+	// mark all possible words. All bounds left is really bounds.
+	// because we need at most n-1 boundary syllable for n-gram
+	// if two ambiguous sections is near than n-1 syllables, then merge them.
+	int i,ii,n,nn;
+
+	sects.st = words.st;
+
+	n = words.get_word_count();
+
+	vector<int> bound(n);
+
+	for (i = 0;i < n;i ++) {
+		nn = words.get_len(i);
+		for (ii = 0;ii < nn-2;ii ++)
+			bound[ii+i] = 1;
+	}
+	bound[bound.size()-1] = 1; // it's obvious there is a boundary in the end
+
+	//copy(bound.begin(),bound.end(),ostream_iterator<int>(cerr," "));
+
+	// "tokenize" _sent
+	int pos,len = bound.size();
+	Section sect;
+
+	sect.start = 0;
+	sect.len = 0;
+
+	for (pos = 0;pos < len;pos ++) {
+		// ignore "1" boundaries
+		if (bound[pos])
+			continue;
+
+		bool is_section;
+		// just write down and figure out what the formulas mean
+		sect.len = pos - sect.start + 1;
+		is_section = words.get_len(sect.start) > 2 || 
+			(words.get_len(sect.start) >= 2 && 
+			 words.get_fuzzy_count(sect.start,1) > 0);
+
+		if (is_section) {
+			// now merge two sections (this and the previous) if needed
+			if (!sects.empty()) {
+				Section &prev = sects.back();
+				if (sect.start - (prev.start + prev.len) < ngram_length-1)
+					prev.len = pos - prev.start + 1; // merge
+				else
+					sects.push_back(sect); // not merge
+			} else
+				sects.push_back(sect);	// sects is empty -> nothing to merge
+		}
+		sect.start = pos+1;
+	}
+
+	if (sects.empty()) {
+		sect.start=0;
+		sect.len = n-sect.start;
+		sects.push_back(sect);
+	}
+}
+
+std::ostream& operator <<(std::ostream &os,const Segmentation &seg)
+{
+	int i,n = seg.size();
+	for (i = 0;i < n;i ++)
+		os << "[" << seg[i].node << "] ";
+	return os;
+}
diff --git a/libvspell/sentence.cpp b/libvspell/sentence.cpp
index d6e7b1a..757a9c6 100644
--- a/libvspell/sentence.cpp
+++ b/libvspell/sentence.cpp
@@ -1,4 +1,5 @@
-#include "sentence.h"
+#include "sentence.h"						// -*- tab-width: 2 -*-
+#include "spell.h"
 using namespace std;
 
 void sentences_split(const string &_input,vector<string> &output)
@@ -36,3 +37,133 @@ void sentences_split(const string &_input,vector<string> &output)
   if (!input.empty())
     output.push_back(input);
 }
+
+void Sentence::standardize()
+{
+}
+
+/**
+ Split punctiations off the token
+ \param s input token
+ \param ret a sequence of tokens
+*/
+
+void Sentence::tokenize_punctuation(const string &s,vector<string> &ret)
+{
+	int npos,pos = 0,start = 0;
+	int len = s.size();
+	while (start < len) {
+		if (pos < len) {
+			npos = s.find_first_of("!#()'\";:.,?/",pos);
+			if (npos == string::npos)
+				npos = len;
+			pos = npos+1;
+			if (npos < len) {					// TODO: some checks here
+				// floating point
+				if ((s[npos] == '.' || s[npos] == ',') &&
+						(npos+1 < len && s[npos+1] >= '0' && s[npos+1] <= '9'))
+					continue;							// skip the dot/comma
+
+				// date
+				if ((s[npos] == '/') &&
+						(npos+1 < len && s[npos+1] >= '0' && s[npos+1] <= '9'))
+					continue;
+
+				// only split dot when it's in the end.
+				if (s[npos] == '.' && npos+1 != len)
+					continue;
+			}
+		} else
+			npos = len;
+
+		ret.push_back(s.substr(start,npos-start));
+		if (npos < len)
+			ret.push_back(s.substr(npos,1));
+		start = npos+1;
+	}
+}
+
+/**
+ Convert a string to a sequence of token
+*/
+
+void Sentence::tokenize()
+{
+	string &st = sent_;
+	int pos = 0,len = st.size();
+	bool started = false;					// we have found start of a new word
+	Syllable sy;
+	sy.span = 1;
+	sy.sent_ = this;
+	sy.sid = -1;
+
+	while (pos < len) {
+		if (started) {
+			pos = st.find(' ',sy.start);
+			if (pos == string::npos)
+				pos = len;
+			vector<string> ss;
+			int i,n;
+			tokenize_punctuation(st.substr(sy.start,pos-sy.start),ss);
+			n = ss.size();
+			for (i = 0;i < n;i ++) {
+				string &s = ss[i];
+				sy.id = sarch[s];
+				transform(s.begin(),s.end(),s.begin(),viet_tolower);
+				sy.cid = sarch[s];
+				syllables.push_back(sy);
+				sy.start += s.size();
+			}
+			started = false;
+		} else {
+			if (st[pos] == ' ')
+				pos++;
+			else {
+				started = true;
+				sy.start = pos;
+			}
+		}
+	}
+}
+
+
+/**
+	 Dump a Sentence
+*/
+
+ostream& operator <<(ostream &os, const Sentence &st)
+{
+	int cc,i,n = st.get_syllable_count();
+	/*
+		for (i = 0;i < n;i ++) {
+		cout << *st[i] << " ";
+		if (items[i].flags & SEGM_SEPARATOR)
+		cout << "| ";
+		}
+		cout << endl;
+	*/
+	/*
+	for (cc = i = 0;i < n;i ++) {
+		int c = st.words[st[i]].node(st).node->get_syllable_count();
+		for (int k = 0;k < c;k ++) {
+			if (k) os << "_";
+			os << sarch[st[cc+k].get_id()];
+		}
+		cc += c;
+		os << " ";
+	}
+	os << prob << endl;
+	*/
+}
+
+/*
+	std::string& Sentence::const_iterator::operator++()
+	{
+	}
+
+	std::string Sentence::const_iterator::operator++(int)
+	{
+	}
+*/
+
+
diff --git a/libvspell/spell.cpp b/libvspell/spell.cpp
index a64c97f..f51ed33 100644
--- a/libvspell/spell.cpp
+++ b/libvspell/spell.cpp
@@ -54,12 +54,12 @@ void spell_check1(Sentence &st,Suggestions &sugg)
 
 void spell_check2(Sentence &st,Segmentation &seg,Suggestions &sugg)
 {
-	int i,n = seg.items.size();
+	int i,n = seg.size();
 	int cc = 0;
 
 	for (i = 0;i < n;i ++) {
 		vector<strid> sylls;
-		int len = seg.items[i].state->get_syllable_count();
+		int len = seg[i].node->get_syllable_count();
 		if (len == 1) {
 			cc += len;
 			continue;
diff --git a/libvspell/spell.h b/libvspell/spell.h
index 28ca0b7..d0b4121 100644
--- a/libvspell/spell.h
+++ b/libvspell/spell.h
@@ -5,6 +5,10 @@
 #include "dictionary.h"
 #endif
 
+#ifndef __WORDNODE_H__
+#include "wordnode.h"
+#endif
+
 #ifndef __VECTOR__
 #include <vector>
 #endif
@@ -12,6 +16,173 @@
 #include <string>
 #endif
 
+class Sentence;
+
+/**
+	 Store information of a word in a sentence.
+ */
+
+struct WordEntry {
+	int pos;											/// syllable index
+	int len;											/// word len
+	int fuzid; /// to identify different WordEntry with the same pos/len.
+																/// fuzid is a mask of fuzzy/exact match.
+	int id;												/// index in WordEntries
+
+	WordNode::DistanceNode node;	/// Word content
+
+	bool operator < (const WordEntry &we) const {
+		return pos != we.pos ? pos < we.pos :
+			(len != we.len ? len < we.len : 
+			 (fuzid != we.fuzid ? fuzid < we.fuzid : node.node < we.node.node));
+	}
+};
+
+std::ostream& operator << (std::ostream &os,const WordEntry &we);
+
+typedef WordEntry* WordEntryRef;
+typedef std::vector<WordEntry> WordEntries;
+typedef std::vector<WordEntryRef> WordEntryRefs;
+//struct WordEntries:public std::vector<WordEntry> {
+//};
+
+/**
+	 Store all WordEntry pointers which started at a specified pos, 
+	 and have specified len
+ */
+
+struct WordInfo {
+	WordEntry* exact_match;				
+	WordEntryRefs fuzzy_match;	 
+};
+
+/**
+	 Store WordInfo(s) which have a specified length
+ */
+
+class WordInfos : public std::vector<WordInfo*> {
+public:
+	int exact_len;
+	WordEntryRefs fuzzy_map; /// contains all WordEntry which are fuzzy at this position
+	WordEntryRefs we;	/// contains all WordEntry which started at this pos
+};
+
+/**
+	 Store WordInfos(s) started at specified positions.
+ */
+
+class Words:public std::vector<WordInfos*> {
+public:
+
+	WordEntries *we;
+	Sentence *st;
+
+	void construct(const Sentence &st);
+
+	/// Get the number of available positions, from 0 to n-1
+	int get_word_count() const { 
+		return size(); 
+	}
+
+	/**
+		 Get maximal length of words at specified position.
+		 \param i specify a position in sentence
+	*/
+	int get_len(int i) const {
+		return (*this)[i]->size(); 
+	}
+
+	/**
+		 Get the length of the exact words at specified position.
+		 \param i specify a position in sentence
+	*/
+	int get_exact_len(int i) const {
+		return (*this)[i]->exact_len; 
+	}
+
+	/**
+		 Get fuzzy map at specified position.
+		 \param i specify a position
+	 */
+
+	const WordEntryRefs& get_fuzzy_map(int i) const {
+		return (*this)[i]->fuzzy_map; 
+	}
+
+	/**
+		 Get number of fuzzy words at specified (pos,len).
+		 \param i specify pos
+		 \param l specify len
+	 */
+	int get_fuzzy_count(int i,int l) const { 
+		return (*(*this)[i])[l]->fuzzy_match.size();
+	}
+
+	/**
+		 Get the first WordEntry at specified pos.
+		 \param pos specify a position.
+	 */
+
+	WordEntryRefs& get_we(int pos) {
+		return (*this)[pos]->we;
+	}
+
+	const WordEntry& get_we_fuzzy(int i,int l,int f) const {
+		return *(*(*this)[i])[l]->fuzzy_match[f];
+	}
+
+	const WordEntry* get_we_exact(int i,int l) const {
+		return (*(*this)[i])[l]->exact_match;
+	}
+
+	/**
+		 Get the Node of a specified fuzzy match  (pos,len,fuz)
+		 \param i is pos
+		 \param l is len
+		 \param f is fuz
+	 */
+
+	WordNode::DistanceNode& get_fuzzy(int i,int l,int f) {
+		return (*(*this)[i])[l]->fuzzy_match[f]->node;
+	}
+
+	/// a const version of get_fuzzy()
+	const WordNode::DistanceNode& get_fuzzy(int i,int l,int f) const{
+		return (*(*this)[i])[l]->fuzzy_match[f]->node;
+	}
+
+	~Words();											// WARN: destroy all.
+
+	/**
+		 Construct Words based on member we.
+		 we must be valid.
+	 */
+
+	void construct();	
+
+	/**
+		 Construct Words based on another Words.
+		 Only exact matches are copied.
+		 \param w specify the "template" Words
+	 */
+
+	void based_on(const Words &w);
+
+	/**
+		 Add WordEntry w into Words.
+	 */
+
+	void add(WordEntry &w);
+  
+	friend std::ostream& operator << (std::ostream& os,const Words &w);
+}; 
+// Words[from][len].fuzzy_match[i]
+
+/**
+	 Sentence is used to store a sequence of syllables.
+	 Sentence and Words will keep all necessary info for a spelling checker.
+ */
+
 class Sentence
 {
 public:
@@ -51,26 +222,31 @@ public:
   //  Syllable& operator[] (int i) { return syllables[i]; }
   bool is_contiguous(int i);		// i & i+1 is contiguous ?
   void merge(int i);
+
 };
 
-struct Segmentation
-{
-  struct Item {
-    int flags;			// Separator mark
-    int distance;		// from ed() or fuzzy syllable
-    WordNodePtr state; // used to get prob.
+typedef Sentence* SentenceRef;
 
-    Item():flags(0),distance(0),state(NULL) {}
-  };
+/**
+	 Segmentation store a sequence of WordEntry index.
+	 From WordEntry we can get the real word.
+ */
 
-  std::vector<Item> items;
-  float prob;			// total prob
-  int distance;			// total distance
+struct Segmentation : public std::vector<int> 
+{
+	WordEntries *we;							/// WordEntries associated with Segmentation
+  float prob;										/// total prob
+  int distance;									/// total distance
 
   Segmentation():prob(0),distance(0) {}
-  void print(std::ostream &os,const Sentence &st);
+	const WordEntry& operator[] (int id) const {
+		return (*we)[std::vector<int>::operator[](id)];
+	}
+	friend std::ostream& operator <<(std::ostream &os,const Segmentation &seg);
 };
 
+typedef std::vector<Segmentation> Segmentations;
+
 struct Suggestion {
   int id;
   std::vector<strid> suggestions;
diff --git a/libvspell/syllable.h b/libvspell/syllable.h
index 78f2220..a66d389 100644
--- a/libvspell/syllable.h
+++ b/libvspell/syllable.h
@@ -47,7 +47,7 @@ public:
   void standardize(std::string str);
   void print();
   strid to_id();
-  string to_str();
+  std::string to_str();
 };
 
 extern char *vowels[];
diff --git a/libvspell/wfst.cpp b/libvspell/wfst.cpp
dissimilarity index 83%
index 702e2dd..db47658 100644
--- a/libvspell/wfst.cpp
+++ b/libvspell/wfst.cpp
@@ -1,454 +1,346 @@
-#include "config.h" // -*- tab-width: 2 -*-
-#include "wfst.h"
-#include <iterator>
-#include <algorithm>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-
-using namespace std;
-
-
-void Sentence::standardize()
-{
-}
-
-void Sentence::tokenize_punctuation(const string &s,vector<string> &ret)
-{
-	int npos,pos = 0,start = 0;
-	int len = s.size();
-	while (start < len) {
-		if (pos < len) {
-			npos = s.find_first_of("!#()'\";:.,?/",pos);
-			if (npos == string::npos)
-				npos = len;
-			pos = npos+1;
-			if (npos < len) {		// TODO: some checks here
-				// floating point
-				if ((s[npos] == '.' || s[npos] == ',') &&
-						(npos+1 < len && s[npos+1] >= '0' && s[npos+1] <= '9'))
-					continue;			// skip the dot/comma
-
-				// date
-				if ((s[npos] == '/') &&
-						(npos+1 < len && s[npos+1] >= '0' && s[npos+1] <= '9'))
-					continue;
-
-				// only split dot when it's in the end.
-				if (s[npos] == '.' && npos+1 != len)
-					continue;
-			}
-		} else
-			npos = len;
-
-		ret.push_back(s.substr(start,npos-start));
-		if (npos < len)
-			ret.push_back(s.substr(npos,1));
-		start = npos+1;
-	}
-}
-
-void Sentence::tokenize()
-{
-	string &st = sent_;
-	int pos = 0,len = st.size();
-	bool started = false;		// we have found start of a new word
-	Syllable sy;
-	sy.span = 1;
-	sy.sent_ = this;
-	sy.sid = -1;
-
-	while (pos < len) {
-		if (started) {
-			pos = st.find(' ',sy.start);
-			if (pos == string::npos)
-				pos = len;
-			vector<string> ss;
-			int i,n;
-			tokenize_punctuation(st.substr(sy.start,pos-sy.start),ss);
-			n = ss.size();
-			for (i = 0;i < n;i ++) {
-				string &s = ss[i];
-				sy.id = sarch[s];
-				transform(s.begin(),s.end(),s.begin(),viet_tolower);
-				sy.cid = sarch[s];
-				syllables.push_back(sy);
-				sy.start += s.size();
-			}
-			started = false;
-		} else {
-			if (st[pos] == ' ')
-				pos++;
-			else {
-				started = true;
-				sy.start = pos;
-			}
-		}
-	}
-}
-
-/*
-	std::string& Sentence::const_iterator::operator++()
-	{
-	}
-
-	std::string Sentence::const_iterator::operator++(int)
-	{
-	}
-*/
-
-class SegmentationComparator
-{
-public:
-	bool operator() (const Segmentation &seg1,const Segmentation &seg2) {
-		return seg1.prob > seg2.prob;
-	}
-};
-
-void WFST::segment_best(const Sentence &_sent,
-												const Words &words,
-												Segmentation &seps)
-{
-	vector<Segmentation> segms;
-
-	// Try to split _sent into small parts
-	// mark all multiple-syllable words
-	// those leaved unmarked are obviously word-segmented correctly
-	vector<int> marks(_sent.get_syllable_count());
-	vector<int> bound(_sent.get_syllable_count());
-	int i,ii,n,nn;
-	n = words.get_word_count();
-	for (i = 0;i < n;i ++) {
-		nn = words.get_len(i);
-		if (nn > 1)
-			for (ii = 0;ii < nn;ii ++) 
-				marks[i+ii] = 1;
-		for (ii = 0;ii < nn-1;ii ++)
-			bound[ii+i] = 1;
-	}
-
-	for (i = 0;i < n;i ++) {
-		printf("%s",sarch[_sent[i].id]);
-		if (i+1<n) {
-			if (!bound[i])
-				printf("|");
-			else
-				printf(" ");
-		}
-	}
-	printf("\n");
-	return;
-
-	/*
-		for (i = 0;i < marks.size();i ++)
-		if (marks[i])
-		cout << sarch[_sent[i].get_id()] << " ";
-		else
-		cout << "*" << sarch[_sent[i].get_id()] << "* ";
-		cout << endl;
-	*/
-
-	// "tokenize" _sent
-	int pos = 0,len = marks.size();
-	bool started = false;
-	int start = 0;
-
-	seps.items.reserve(20);
-
-	while (pos < len) {
-		if (!marks[pos]) {
-			seps.items.push_back(Segmentation::Item());
-			Segmentation::Item &item = seps.items.back();
-			item.state = words.get_fuzzy(pos,0,0).node;
-			item.distance = 0/*FIXME*/;
-			item.flags |= SEGM_SEPARATOR;
-			seps.prob += item.state->get_prob();
-			int len = seps.items.size();
-			if (len > 1 && ngram_enabled) {
-				VocabIndex vi[2];
-				vi[0] = seps.items[len-1].state->get_id();
-				vi[1] = Vocab_None;
-				seps.prob += -ngram.wordProb(seps.items[len-2].state->get_id(),vi);
-			}
-			seps.distance += item.distance;
-			pos++;
-		} else {
-			start = pos;
-			while (pos < len && marks[pos] == 1) pos ++;
-			segms.clear();
-			segment_all1(_sent,words,start,pos,segms);
-			if (!segms.empty()) {
-				int len = seps.items.size();
-				if (len > 0 && ngram_enabled) {
-					VocabIndex vi[2];
-					vi[0] = segms[0].items[0].state->get_id();
-					vi[1] = Vocab_None;
-					seps.prob += -ngram.wordProb(seps.items[len-1].state->get_id(),vi);
-				}
-				seps.items.insert(seps.items.end(),
-													segms[0].items.begin(),
-													segms[0].items.end());
-				seps.prob += segms[0].prob;
-				seps.distance += segms[0].distance;
-			}
-		}
-	}
-
-	/*
-		vector<Segmentation>::iterator i;
-		for (i = segms.begin();i != segms.end(); ++i)
-		i->print(_sent);
-	*/
-}
-
-
-// find all possible words
-// words must be cleared before calling this function
-void WFST::get_all_words(const Sentence &sent,Words &words)
-{
-	int i,n,ii,nn,k;
-	vector<WordNode::DistanceNode> states,old_states;
-	vector<strid> syll;
-
-	states.reserve(10);
-	old_states.reserve(10);
-
-	n = sent.get_syllable_count();
-
-	syll.resize(n);
-	for (i = 0;i < n;i ++)
-		syll[i] = sent[i].get_cid();
-
-	for (i = 0;i < n;i ++) {
-
-		// make a word
-		words.push_back(new WordInfos());
-		WordInfos &winfos = *words.back();
-
-		// finding all possible words started by the i-th syllable.
-
-		// init states with root node
-		states.clear();
-		states.push_back(WordNode::DistanceNode(wl));
-
-		k = 0;
-		while (!states.empty() && i+k < n) {
-
-			// make a winfo
-			winfos.push_back(new WordInfo());
-			WordInfo &winfo = *winfos.back();
-
-			// save states
-			old_states = states;
-
-			// clear states, ready for new states
-			states.clear();
-
-			// get next new states -> states
-			nn = old_states.size();
-			for (ii = 0;ii < nn;ii ++) {
-				WordNode::DistanceNode &state = old_states[ii];
-				WordNodePtr exact_node = state.node->get_next(syll[i+k]);
-				if (exact_node) states.push_back(exact_node);
-				state.node->fuzzy_get_next(syll[i+k],states);
-			}
-
-			// fill winfo
-			nn = states.size();
-			for (ii = 0;ii < nn;ii ++)
-				if (states[ii].node->get_prob() >= 0) {
-					winfo.fuzzy_match.push_back(states[ii]);
-				}
-
-			// unknown word
-			if (k == 0 && winfo.fuzzy_match.empty()) {
-				WordNode::DistanceNode dn;
-				dn.node = wl->get_next(unk_id);
-				dn.distance = 0;
-				winfo.fuzzy_match.push_back(dn);
-			}
-
-			// remove duplicated items
-			sort(winfo.fuzzy_match.begin(),winfo.fuzzy_match.end());
-			winfo.fuzzy_match.erase(unique(winfo.fuzzy_match.begin(),
-																		 winfo.fuzzy_match.end()),
-															winfo.fuzzy_match.end());
-
-			k ++;
-		}
-		// remove the last if it is empty
-		while (!winfos.empty() && winfos.back()->fuzzy_match.empty())
-			winfos.pop_back();
-	}
-
-}
-
-void Words::print() const
-{
-	int i, nn = get_word_count();
-	for (i = 0;i < nn;i ++) {
-		int nnn = get_len(i);
-		cerr << "From " << i << endl;
-		for (int ii = 0;ii < nnn;ii ++) {
-			int nnnn = get_fuzzy_count(i,ii);
-			cerr << "Len " << ii << endl;
-			for (int iii = 0;iii < nnnn;iii ++) {
-				cerr << get_fuzzy(i,ii,iii).node->get_id( ) << "-" << sarch[get_fuzzy(i,ii,iii).node->get_id()] << " ";
-				cerr << get_fuzzy(i,ii,iii).node->get_syllable_count() << " ";
-				cerr << get_fuzzy(i,ii,iii).distance << " ";
-				cerr << get_fuzzy(i,ii,iii).node->get_prob() << endl;
-			}
-		}
-	}
-}
-
-// WFST (Weighted Finite State Transducer) implementation
-// TUNE: think about genetic/greedy. Vietnamese is almost one-syllable words..
-// we find where is likely a start of a word, then try to construct word
-// and check if others are still valid words.
-
-// the last item is always the incompleted item. We will try to complete
-// a word from the item. If we reach the end of sentence, we will remove it
-// from segs
-
-struct Trace
-{
-	Segmentation s;
-	int next_syllable;
-	Trace():next_syllable(0) {}
-};
-
-void WFST::segment_all(const Sentence &sent,vector<Segmentation> &result)
-{
-	Words words;
-	get_all_words(sent,words);
-	segment_all1(sent,words,0,sent.get_syllable_count(),result);
-	/*
-		int nn = words.size();
-		for (i = 0;i < nn;i ++) {
-		int nnn = words[i].size();
-		cerr << "From " << i << endl;
-		for (int ii = 0;ii < nnn;ii ++) {
-		int nnnn = words[i][ii].fuzzy_match.size();
-		cerr << "Len " << ii << endl;
-		for (int iii = 0;iii < nnnn;iii ++) {
-		cerr << words[i][ii].fuzzy_match[iii].distance << " ";
-		cerr << words[i][ii].fuzzy_match[iii].node->get_prob() << endl;
-		}
-		}
-		}
-	*/
-
-}
-
-void WFST::segment_all1(const Sentence &sent,
-												const Words &words,
-												int from,int to,
-												std::vector<Segmentation> &result)
-{
-	int nr_syllables = to;
-	vector<Trace> segs;		// used to store all incomplete seg
-	int i;
-	int count = 0;
-	Trace trace;
-
-	trace.next_syllable = from;
-	//Segmentation start_seg;
-	segs.reserve(100);
-	segs.push_back(trace);	// empty seg
-
-	while (!segs.empty()) {
-		// get one
-		trace = segs.back();
-		segs.pop_back();
-		Segmentation seg = trace.s;
-		int next_syllable = trace.next_syllable;
-//		cerr << segs.size() << " " << count << endl;
-
-		// segmentation completed. throw it away
-		// TUNE: do we need these? -> yes we do
-		// in case of bad/incomplete segmentations
-		if (next_syllable == nr_syllables)
-			continue;
-
-		int nr_size_1 = words.get_len(next_syllable);
-
-		// word with the length of (size_1+1) syllables
-		for (int size_1 = 0;size_1 < nr_size_1; size_1 ++) {
-			int nr_fuzzy = words.get_fuzzy_count(next_syllable,size_1);
-
-			// number of words
-			for (i = 0;i < nr_fuzzy;i ++) {
-				const WordNode::DistanceNode &next_state = 
-					words.get_fuzzy(next_syllable,size_1,i);
-
-				// New segmentation for longer incomplete word
-				Trace newseg;
-				newseg = trace;
-				newseg.s.items.push_back(Segmentation::Item());
-				Segmentation::Item &item = newseg.s.items.back();
-				item.state = next_state.node; // move to the next node
-				item.distance = 0/*FIXME*/;
-				item.flags |= SEGM_SEPARATOR;
-
-				// Segmentation's parameters
-				newseg.s.distance += item.distance;
-				newseg.s.prob += item.state->get_prob();
-				int len = newseg.s.items.size();
-				if (len > 1 && ngram_enabled) {
-					VocabIndex vi[2];
-					vi[0] = newseg.s.items[len-1].state->get_id();
-					vi[1] = Vocab_None;
-					newseg.s.prob += -ngram.wordProb(newseg.s.items[len-2].state->get_id(),vi);
-				}
-				newseg.next_syllable += size_1+1;
-				if (newseg.next_syllable == nr_syllables) {
-					result.push_back(newseg.s);
-					push_heap(result.begin(),result.end(),SegmentationComparator());
-					//cerr << "New: ";newseg.s.print(sent);
-					//cerr << "Candidate: ";result[0].print(sent);
-					count ++;
-				} else {
-					segs.push_back(newseg);
-					//cerr << "New seg: ";
-					//newseg.print(sent);
-				}
-			}
-		}
-	} // end while
-}
-
-
-void Segmentation::print(ostream &os, const Sentence &st)
-{
-	int cc,i,n = items.size();
-	/*
-		for (i = 0;i < n;i ++) {
-		cout << *st[i] << " ";
-		if (items[i].flags & SEGM_SEPARATOR)
-		cout << "| ";
-		}
-		cout << endl;
-	*/
-	for (cc = i = 0;i < n;i ++) {
-		int c = items[i].state->get_syllable_count();
-		for (int k = 0;k < c;k ++) {
-			if (k) os << "_";
-			os << sarch[st[cc+k].get_id()];
-		}
-		cc += c;
-		os << " ";
-	}
-	os << prob << endl;
-}
-
-Words::~Words()
-{
-	int pos,nr_pos = get_word_count();
-	for (pos = 0;pos < nr_pos;pos ++) {
-		int len,nr_len = get_len(pos);
-		for (len = 0;len < nr_len;len ++)
-			delete (*(*this)[pos])[len];
-		delete (*this)[pos];
-	}
-}
+#include "config.h"							// -*- tab-width: 2 -*-
+#include "wfst.h"
+#include <iterator>
+#include <algorithm>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <boost/format.hpp>
+#include <set>
+#include "posgen.h"
+
+using namespace std;
+
+int ngram_length = 2;						// for lazy men ;)
+
+
+/*
+	Create a "template" segmentation. Mark all sections in template to sects
+ */
+/*
+void WFST::create_base_segmentation(const Words &words,
+																		Sections &sects,
+																		Segmentation &seg)
+{
+	int i,ii,n,nn;
+	i = ii = 0;
+	n = words.get_word_count();
+	nn = sects.size();
+
+	while (i < n)
+		if (ii < nn && sects[ii].start == i) {
+			seg.items.push_back(Segmentation::Item());
+			sects[ii].segment = seg.items.size()-1;
+			ii ++;
+			i += sects[ii].len;
+		} else {
+			// only word(i,0,0) exists
+			seg.items.push_back(Segmentation::Item());
+			seg.items.back().pos = i;
+			i ++;
+		}
+}
+*/
+
+void Generator::init(const Sentence &_st)
+{
+  nr_misspelled = 3;		// REMEMBER THIS NUMBER
+	misspelled_pos.resize(nr_misspelled);
+  run = true;
+
+	st = &(Sentence&)_st;
+
+	len = st->get_syllable_count();
+
+  // initialize
+  for (i = 0;i < nr_misspelled;i ++)
+    misspelled_pos[i] = i;
+
+  i = 0;
+}
+
+void Generator::done()
+{
+}
+
+/**
+	Generate every possible 3-misspelled-positions. 
+	Then call WFST::generate_misspelled_words.
+ */
+
+bool Generator::step(vector<int> &_pos,int &_len)
+{
+	while (run) {
+
+		// move to the next counter
+		if (i+1 < nr_misspelled && misspelled_pos[i] < len) {
+			i ++;
+
+			// do something here with misspelled_pos[i]
+			_pos = misspelled_pos;
+			_len = i;
+			return true;
+		}
+
+		// the last counter is not full
+		if (misspelled_pos[i] < len) {
+			// do something here with misspelled_pos[nr_misspelled]
+			_pos = misspelled_pos;
+			_len = nr_misspelled;
+			misspelled_pos[i] ++;
+			return true;
+		}
+
+		// the last counter is full, step back
+		while (i >= 0 && misspelled_pos[i] == len) i --;
+		if (i < 0) 
+			run = false;
+		else {
+			misspelled_pos[i] ++;
+			for (ii = i+1;ii < nr_misspelled;ii ++)
+				misspelled_pos[ii] = misspelled_pos[ii-1]+1;
+		}
+	}
+	return false;
+}
+
+/**
+	Generate new Words info based on misspelled position info.
+	Call WFST::get_sections() to split into sections
+	then call WFST::segment_all1() to do real segmentation.
+	\param pos contains possibly misspelled position.
+	\param len specify the actual length pos. Don't use pos.size()
+*/
+void WFST::generate_misspelled_words(const vector<int> &pos,int len)
+{
+	const Words &words = *p_words;
+	Words w;
+
+	w.based_on(words);
+	
+	// 2. Compute the score, jump to 1. if score is too low (pruning 1)
+
+	// create new (more compact) Words structure
+	int i,j,n = p_st->get_syllable_count();
+	for (i = 0;i < len;i ++) {
+		const WordEntryRefs &fmap = words.get_fuzzy_map(pos[i]);
+		WordEntryRefs::const_iterator iter;
+		for (iter = fmap.begin();iter != fmap.end(); ++iter)
+			w.add(**iter);
+	}
+
+	//cerr << w << endl;
+
+	// 4. Call get_sections
+	Sections sects;
+	sects.construct(words);
+
+	// 5. Call create_base_segmentation
+	//Segmentation base_seg;
+	//create_base_segmentation(words,sects,base_seg);
+
+	// 6. Call segment_all1 for each sections.
+	n = sects.size();
+	for (i = 0;i < n;i ++) {
+		Segmentation seg;
+		Segmentor segtor;
+
+		segtor.init(*p_st,					// Sentence
+								w,							// Words
+								sects[i].start, // from
+								sects[i].start+sects[i].len-1);	// to
+
+		while (segtor.step(seg)) {
+			// compute ngram. take the best seg.
+			seg.prob = 0;
+			VocabIndex *vi = new VocabIndex[ngram_length];
+			vi[ngram_length] = Vocab_None;
+			for (int i = ngram_length-1;i < seg.size();i ++) {
+				for (int j = 0;j < ngram_length-1;j++)
+					vi[j] = seg[i-1-j].node.node->get_id();
+				seg.prob += -ngram.wordProb(seg[i].node.node->get_id(),vi);
+			}
+
+			cout << seg << " " << seg.prob << endl;
+			// merge seg to base seg.
+		
+			// 6.1. Recompute the score after each section processed. (pruning 2)
+		}
+
+	}
+
+
+}
+
+/**
+	Create the best segmentation for a sentence.
+	\param _sent specify the sentence
+	\param words store Words info
+	\param seps return the best segmentation
+ */
+
+void WFST::segment_best(const Sentence &_sent,
+												const Words &words,
+												Segmentation &seps)
+{
+	int i,ii,n,nn;
+
+	p_st = &_sent;
+	p_words = &words;
+
+	// in test mode, generate all positions where misspelled can appear, 
+	// then create a new Words for them, re get_sections, 
+	// create_base_segmentation and call segment_all1 for each sections.
+
+	// 1. Generate mispelled positions (pruning 0 - GA)
+	// 2. Compute the score, jump to 1. if score is too low (pruning 1)
+	// 3. Make a new Words based on the original Words
+	// 4. Call get_sections
+	// 5. Call create_base_segmentation
+	// 6. Call segment_all1 for each sections.
+	// 6.1. Recompute the score after each section processed. (pruning 2)
+
+	// 1. Bai toan hoan vi, tinh chap C(k,n) with modification. C(1,n)+C(2,n)+...+C(k,n)
+	Generator gen;
+
+	gen.init(_sent);
+	vector<int> pos;
+	int len;
+	while (gen.step(pos,len)) {
+		cerr << "POS :";
+		for (int i = 0;i < len;i ++) cerr << pos[i];
+		cerr << endl;
+		generate_misspelled_words(pos,len);
+	}
+	gen.done();
+
+}
+
+// WFST (Weighted Finite State Transducer) implementation
+// TUNE: think about genetic/greedy. Vietnamese is almost one-syllable words..
+// we find where is likely a start of a word, then try to construct word
+// and check if others are still valid words.
+
+// the last item is always the incompleted item. We will try to complete
+// a word from the item. If we reach the end of sentence, we will remove it
+// from segs
+
+// obsolete
+void WFST::segment_all(const Sentence &sent,vector<Segmentation> &result)
+{
+	Words words;
+	words.construct(sent);
+	//	segment_all1(sent,words,0,sent.get_syllable_count(),result);a
+	/*
+		int nn = words.size();
+		for (i = 0;i < nn;i ++) {
+		int nnn = words[i].size();
+		cerr << "From " << i << endl;
+		for (int ii = 0;ii < nnn;ii ++) {
+		int nnnn = words[i][ii].fuzzy_match.size();
+		cerr << "Len " << ii << endl;
+		for (int iii = 0;iii < nnnn;iii ++) {
+		cerr << words[i][ii].fuzzy_match[iii].distance << " ";
+		cerr << words[i][ii].fuzzy_match[iii].node->get_prob() << endl;
+		}
+		}
+		}
+	*/
+
+}
+
+/**
+ * Segmentation comparator
+ */
+
+class SegmentationComparator
+{
+public:
+	bool operator() (const Segmentation &seg1,const Segmentation &seg2) {
+		return seg1.prob > seg2.prob;
+	}
+};
+
+
+
+
+
+
+void Segmentor::init(const Sentence &sent,
+										 Words &words,
+										 int from,
+										 int to)
+{
+	nr_syllables = to+1;
+
+	segs.clear();
+	segs.reserve(100);
+
+	Trace trace;
+	trace.next_syllable = from;
+	trace.s.we = words.we;
+	segs.push_back(trace);	// empty seg
+
+	_sent = &(Sentence&)sent;
+	_words = &words;
+}
+
+bool Segmentor::step(Segmentation &result)
+{
+	Sentence &sent = *_sent;
+	Words &words = *_words;
+	while (!segs.empty()) {
+		// get one
+		Trace trace = segs.back();
+		segs.pop_back();
+
+		Segmentation seg = trace.s;
+		int next_syllable = trace.next_syllable;
+
+		// segmentation completed. throw it away
+		if (next_syllable >= nr_syllables)
+			continue;
+
+		//WordEntries::iterator i_entry;
+		WordEntryRefs &wes = words.get_we(next_syllable);
+		int ii,nn = wes.size();
+		for (ii = 0;ii < nn;ii ++) {
+			WordEntryRef i_entry = wes[ii];
+
+			// New segmentation for longer incomplete word
+			Trace newtrace;
+			newtrace = trace;
+			newtrace.s.push_back(i_entry->id);
+			newtrace.s.prob += i_entry->node.node->get_prob();
+
+			/*-it's better to compute ngram outside this function
+			if (ngram_enabled) {
+				if (newtrace.s.size() >= ngram_length) {
+					VocabIndex *vi = new VocabIndex[ngram_length];
+					vi[0] = newtrace.s.items[len-1].node(sent).node->get_id();
+					vi[1] = Vocab_None;
+					newtrace.s.prob += -ngram.wordProb(newtrace.s.items[len-2].node(sent).node->get_id(),vi);
+					delete[] vi;
+				}
+			}
+			*/
+
+			newtrace.next_syllable += i_entry->len;
+			if (newtrace.next_syllable == nr_syllables) {
+				//cout << count << " " << newtrace.s << endl;
+				result = newtrace.s;
+				return true;
+				//result.push_back(newtrace.s);
+				//push_heap(result.begin(),result.end(),SegmentationComparator());
+				//count ++;
+			} else {
+				segs.push_back(newtrace);
+			}
+		}
+	} // end while
+	return false;
+}
+
+void Segmentor::done()
+{
+}
diff --git a/libvspell/wfst.h b/libvspell/wfst.h
index d37693d..39680d9 100644
--- a/libvspell/wfst.h
+++ b/libvspell/wfst.h
@@ -19,30 +19,48 @@
 
 #define SEGM_SEPARATOR 1
 
-struct WordInfo {
-	WordNode::DistanceNode exact_match;
-	std::vector<WordNode::DistanceNode> fuzzy_match;
+struct Section {
+	int segment;
+	int start;
+	int len;
 };
 
-typedef std::vector<WordInfo*> WordInfos;
+class Sections: public std::vector<Section> {
+public:
+	SentenceRef st;
+	void construct(const Words &words);
+};
+
+std::ostream& operator << (std::ostream &os,const Sections &s);
+
+/**
+	 Segmentor takes a Sentence, a Words and a range, then try to generate
+	 all possible Segmentation.
+ */
+
+class Segmentor
+{
+private:
+	struct Trace
+	{
+		Segmentation s;
+		int next_syllable;
+		Trace():next_syllable(0) {}
+	};
+	int nr_syllables;
+	std::vector<Trace> segs;
+	SentenceRef _sent;
+	Words *_words;
+	int from,to;
 
-class Words:public std::vector<WordInfos*> {
 public:
-	int get_word_count() const { return size(); }
-	int get_len(int i) const { return (*this)[i]->size(); }
-	int get_fuzzy_count(int i,int l) const { 
-		return (*(*this)[i])[l]->fuzzy_match.size();
-	}
-	WordNode::DistanceNode& get_fuzzy(int i,int l,int f) {
-		return (*(*this)[i])[l]->fuzzy_match[f];
-	}
-	const WordNode::DistanceNode& get_fuzzy(int i,int l,int f) const{
-		return (*(*this)[i])[l]->fuzzy_match[f];
-	}
-	~Words();											// WARN: destroy all.
-	void print() const;
+	void init(const Sentence &sent,
+						Words &words,
+						int from,
+						int to);
+	bool step(Segmentation &seg);
+	void done();
 };
-// Words[from][len].fuzzy_match[i]
 
 class WFST
 {
@@ -60,19 +78,26 @@ public:
 
 	void enable_ngram(bool enable = true) { ngram_enabled = enable; }
 
-	void get_all_words(const Sentence &sent,
-				 Words &words);
 	void segment_best(const Sentence &sent,
-				const Words &words,
-				Segmentation &seps);
+										const Words &words,
+										Segmentation &seps);
 	void segment_all(const Sentence &sent,
 			 std::vector<Segmentation> &result);
-private:
+
+	//private:
+public:													// for testing purpose
+	void generate_misspelled_words(const std::vector<int> &pos,int len);
 	void segment_all1(const Sentence &sent,
-				const Words &words,
-				int from,int to,
-				std::vector<Segmentation> &result);
-	
+										Words &words,
+										int from,
+										int to,
+										//const Segmentation &base_seg,
+										//int seg_id,
+										std::vector<Segmentation> &result);
+
+	// variables needed when run wfst
+	const Sentence *p_st;
+	const Words *p_words;
 };
 
 #endif
diff --git a/libvspell/wordnode.h b/libvspell/wordnode.h
index 3b0906c..e48f555 100644
--- a/libvspell/wordnode.h
+++ b/libvspell/wordnode.h
@@ -31,6 +31,7 @@ protected:
   typedef SArrayIter<strid,WordNodePtr> node_map_iterator;
   node_map *nodes;
   WordInfo *info;
+	VocabIndex id;
 
 public:
   struct DistanceNode {
@@ -43,16 +44,18 @@ public:
     int operator < (const DistanceNode &dn1) const {
       return (int)node+distance < (int)dn1.node+dn1.distance;
     }
+		WordNode& operator* () const { return *node; }
+		WordNode* operator-> () const { return node; }
   };
 
   void recalculate();
 
 public:
 
-  WordNode(strid _syllable):info(NULL),nodes(new node_map) {}
+  WordNode(strid _syllable):info(NULL),nodes(new node_map),id(_syllable) {}
   //    ~WordNode();
 
-  // strpair get_syllable() const { return syllable; }
+  VocabIndex get_syllable() const { return id; }
   virtual WordNodePtr get_next(strid str) const;
   void inc_a() { ASSERT(info != NULL); info->a++; }
   void inc_b() { ASSERT(info != NULL); info->b++; }
@@ -65,10 +68,12 @@ public:
   float get_prob() const { return info ? info->prob : -1; }
   void set_prob(float _prob);
 
-  int get_syllable_count();
-  void get_syllables(std::vector<strid> &syllables);
+  int get_syllable_count() const;
+  void get_syllables(std::vector<strid> &syllables) const;
   WordNodePtr follow_syllables(const std::vector<strid> &syllables);
 
+	void dump_next(std::ostream &os) const;
+
   bool load(const char* filename);
   bool save(const char* filename);
 };
@@ -92,5 +97,7 @@ public:
 };
 
 WordNodePtr get_root();
+std::ostream& operator << (std::ostream &os,const WordNode::DistanceNode &dnode);
+std::ostream& operator << (std::ostream &os,const WordNode &node);
 
 #endif
diff --git a/libvspell/words.cpp b/libvspell/words.cpp
new file mode 100644
index 0000000..025b730
--- /dev/null
+++ b/libvspell/words.cpp
@@ -0,0 +1,244 @@
+#include "config.h"							// -*- tab-width: 2 -*-
+#include "spell.h"
+#include <iterator>
+#include <algorithm>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <boost/format.hpp>
+#include <set>
+
+
+using namespace std;
+
+
+/**
+	Store information used by WFST::get_all_words()
+*/
+struct WordState {
+
+	/** 
+		 the currently processing node
+	 */
+	WordNode::DistanceNode dnode;
+
+	int fuzid;
+
+	WordState(const WordNode::DistanceNode &n):dnode(n),fuzid(0) {}
+};
+
+/**
+	Find all possible words
+	\param sent specify the input sentence
+	\param w must be cleared before calling this function
+*/
+void Words::construct(const Sentence &sent)
+{
+	Words &w = *this;
+	int i,n,ii,nn,k,nnn,iii;
+	vector<strid> syll;
+	set<WordEntry> we;
+
+	w.st = &(Sentence&)sent;
+
+	vector<WordState> states;
+	vector<WordState> old_states(states);
+	states.reserve(10);
+	old_states.reserve(10);
+
+	n = sent.get_syllable_count();
+
+	syll.resize(n);
+	for (i = 0;i < n;i ++) {
+		syll[i] = sent[i].get_cid();
+		//cerr << syll[i] << " ";
+	}
+	//cerr << endl;
+
+	for (i = 0;i < n;i ++) {			// pos
+
+		// finding all possible words started at the i-th syllable.
+
+		// init states with root node
+		states.clear();
+		states.push_back(WordNode::DistanceNode(get_root()));
+
+		for (k = 0;k+i < n && !states.empty(); k ++) { // length of k
+			// save states
+			old_states = states;
+
+			/*
+			cerr << "State:" << i << " " << k << endl;
+			int jj,jnn = states.size();
+			for (jj = 0;jj < jnn;jj ++) {
+				cerr << sarch[states[jj].dnode.node->get_syllable()] << endl;
+				if (states[jj].dnode.node != wl) {
+					states[jj].dnode.node->dump_next(cerr);
+					cerr << endl;
+				}
+			}
+			*/
+
+			// clear states, ready for new states
+			states.clear();
+
+			// get next new states -> states
+			// those in old_states which has reached end will be removed from states
+			nn = old_states.size();
+			bool has_words = false;
+			for (ii = 0;ii < nn;ii ++) {
+				WordNode::DistanceNode &state = old_states[ii].dnode;
+				WordNodePtr exact_node = state.node->get_next(syll[i+k]);
+				vector<WordNode::DistanceNode> nodes;
+				//nodes.clear();
+				//if (exact_node)
+				//nodes.push_back(exact_node);
+				state.node->fuzzy_get_next(syll[i+k],nodes);
+				if (exact_node && 
+						find(nodes.begin(),nodes.end(),exact_node) == nodes.end())
+					nodes.push_back(exact_node);
+				nnn = nodes.size();
+				for (iii = 0;iii < nnn;iii ++) {
+					states.push_back(old_states[ii]);
+					states.back().dnode = nodes[iii];
+					if (nodes[iii].node != exact_node)
+						states.back().fuzid |=  1 << k;
+
+					// get completed words
+					if (states.back().dnode.node->get_prob() >= 0) {
+						WordEntry e;
+						e.pos = i;
+						e.len = k+1;
+						e.fuzid = states.back().fuzid;
+						e.node = states.back().dnode;
+						we.insert(e);
+						has_words = true;
+						//winfo.fuzzy_match.push_back(states[ii]);
+					}
+				}
+			}
+
+
+			// this is an unknown syllable, we presume this is an unknown word
+			if (k == 0 && !has_words) {
+				WordEntry e;
+				e.pos = i;
+				e.len = 1;
+				e.fuzid = 1;
+				e.node = get_root()->get_next(unk_id);
+				we.insert(e);
+			}
+		}	// end of k
+	}	// end of i
+
+	//copy(we.begin(),we.end(),ostream_iterator<WordEntry>(cerr));
+	// copy to real _we
+	n = we.size();
+	w.we = new WordEntries;
+	w.we->reserve(n);
+	copy(we.begin(),we.end(),back_insert_iterator<vector<WordEntry> >(*w.we));
+	for (i = 0;i < n;i ++)
+		(*w.we)[i].id = i;
+
+	// build Words structure
+	w.construct();
+}
+
+/**
+	 Post-process after initialize core member of Words
+ */
+
+void Words::construct()
+{
+	int i_we,n_we = we->size();
+
+	for (i_we = 0;i_we < n_we;i_we ++)
+		add((*we)[i_we]);
+}
+/**
+	 Dump a Words
+ */
+ostream& operator << (ostream &os, const Words &w)
+{
+	int i, nn = w.get_word_count();
+	for (i = 0;i < nn;i ++) {
+		int nnn = w.get_len(i);
+		for (int ii = 0;ii < nnn;ii ++) {
+			int nnnn = w.get_fuzzy_count(i,ii);
+			if (w.get_we_exact(i,ii))
+				os << *w.get_we_exact(i,ii) << endl;
+			for (int iii = 0;iii < nnnn;iii ++)
+				os << w.get_we_fuzzy(i,ii,iii) << endl;
+		}
+	}
+	return os;
+}
+
+Words::~Words()
+{
+	int pos,nr_pos = get_word_count();
+	for (pos = 0;pos < nr_pos;pos ++) {
+		int len,nr_len = get_len(pos);
+		for (len = 0;len < nr_len;len ++)
+			delete (*(*this)[pos])[len];
+		delete (*this)[pos];
+	}
+}
+
+std::ostream& operator << (std::ostream &os,const WordEntry &we)
+{
+	using namespace boost;
+	os << format("%d %d %x ") % we.pos % we.len % we.fuzid << we.node;
+	return os;
+}
+
+
+/**
+	 Construct a Words based on another Words. 
+	 Keep only exact matches.
+ */
+
+void Words::based_on(const Words &w)
+{
+	Words &me = *this;
+	we = w.we;
+	me.st = w.st;
+
+	int i_pos,n_pos = w.get_word_count();
+
+	for (i_pos = 0;i_pos < n_pos;i_pos ++) {
+		int i_len,n_len = w.get_len(i_pos);
+		for (i_len = 0;i_len < n_len;i_len ++)
+			if ((*w[i_pos])[i_len]->exact_match)
+				add(*(*w[i_pos])[i_len]->exact_match);
+	}
+}
+
+void Words::add(WordEntry &w)
+{
+	Words &me = *this;
+	while (me.size() <= w.pos)
+		me.push_back(new WordInfos);
+
+	WordInfos &wis = *me[w.pos];
+
+	while (wis.size() <= w.len) {
+		wis.push_back(new WordInfo);
+		wis.back()->exact_match = NULL;
+	}
+
+	WordInfo &wi = *wis[w.len];
+	if (!w.fuzid)								// exact match
+		wi.exact_match = &w;
+	else {
+		wi.fuzzy_match.push_back(&w);
+		for (int j = 0;j < w.len;j ++)
+			if (w.fuzid & (1 << j)) {
+				while (me.size() <= j+w.pos)
+					me.push_back(new WordInfos);
+				me[j+w.pos]->fuzzy_map.push_back(&w);
+			}
+	}
+
+	wis.we.push_back(&w);
+}
-- 
2.11.4.GIT