From ac37937b45af4abed9697efb61b7d401f1b294f7 Mon Sep 17 00:00:00 2001 From: JanLehnardt Date: Sat, 3 Feb 2007 12:12:45 +0000 Subject: [PATCH] new protocol, cleaner file structre and bugfixes for fulltext indexing git-svn-id: http://couchdb.googlecode.com/svn/trunk@127 efc57d8c-411c-0410-91b3-cfdf4dd2bc77 --- CouchProjects/Lucene/CouchConfig.java | 45 ++++ CouchProjects/Lucene/CouchDbDirFilter.java | 18 ++ CouchProjects/Lucene/CouchDocument.java | 96 ++++++++ CouchProjects/Lucene/LuceneIndexer.java | 282 ++++------------------ CouchProjects/Lucene/SAXCouchDocumentBuilder.java | 77 ++++++ 5 files changed, 283 insertions(+), 235 deletions(-) create mode 100644 CouchProjects/Lucene/CouchConfig.java create mode 100644 CouchProjects/Lucene/CouchDbDirFilter.java create mode 100644 CouchProjects/Lucene/CouchDocument.java create mode 100644 CouchProjects/Lucene/SAXCouchDocumentBuilder.java diff --git a/CouchProjects/Lucene/CouchConfig.java b/CouchProjects/Lucene/CouchConfig.java new file mode 100644 index 0000000..68c5bdf --- /dev/null +++ b/CouchProjects/Lucene/CouchConfig.java @@ -0,0 +1,45 @@ +import java.util.*; +class CouchConfig +{ +/* private CouchDocument[] documents; +*/ + private Hashtable documents; + private long updateSequence; + + public CouchConfig() + { + documents = new Hashtable(); + updateSequence = 0; + } + + public void setUpdateSequence(long newUpdateSequence) + { + updateSequence = newUpdateSequence; + } + + public long getUpdateSequence() + { + return updateSequence; + } + + public void addDocument(CouchDocument document) + { + String field; +// System.out.println(document); + field = document.getField("__couchdb_database", 0); +// System.out.println(field); + if(field != null) { + documents.put(field, document); + } + } + + public Hashtable getDocuments() + { + return documents; + } + + public boolean hasDb(String db) + { + return documents.containsKey(db); + } +} diff --git a/CouchProjects/Lucene/CouchDbDirFilter.java b/CouchProjects/Lucene/CouchDbDirFilter.java new file mode 100644 index 0000000..c6ff900 --- /dev/null +++ b/CouchProjects/Lucene/CouchDbDirFilter.java @@ -0,0 +1,18 @@ +/* + (c) 2007 Jan Lehnardt + + This is free sofware, see LICENSE for details. + + LuceneIndexer creates a lucene index by intrementally + fetching changes from a a CouhcDB server. It is managed + by the CouchDb daemon. +*/ +import java.io.*; + +class CouchDbDirFilter implements FilenameFilter +{ + public boolean accept(File dir, String name) + { + return new File(dir, name).isFile(); + } +} \ No newline at end of file diff --git a/CouchProjects/Lucene/CouchDocument.java b/CouchProjects/Lucene/CouchDocument.java new file mode 100644 index 0000000..6c37be3 --- /dev/null +++ b/CouchProjects/Lucene/CouchDocument.java @@ -0,0 +1,96 @@ +import java.util.*; + +class CouchDocument +{ + private String docId; + private String revision; + private boolean deleted = false; + private Hashtable fields; + private long updateSequence = -1; + private boolean delete = false; + public CouchDocument() + { + + } + + public CouchDocument(String docId, String revision) + { + this.docId = docId; + this.revision = revision; + fields = new Hashtable(3, (float)0.75); + Hashtable idHash = new Hashtable(3, (float)0.75); + Hashtable revHash = new Hashtable(3, (float)0.75); + idHash.put(0, docId); + revHash.put(0, revision); + fields.put("__couchdb_document_id", idHash); + fields.put("__couchdb_document_revision", revHash); + } + + public void setUpdateSequence(long sequence) + { + updateSequence = sequence; + } + + public long getUpdateSequence() + { + return updateSequence; + } + + public String toString() + { + return "CouchDocument: " + docId + " Revision: " + revision + " update_seq: "+ updateSequence + " fields: " + fields.toString(); + } + + public Hashtable getFields() + { + return fields; + } + + public void addField(String name, String value) + { + if(name == null) {return;} +// System.out.println("DEBUG: add field: "+name+" with value: "+value + fields.size()); + Hashtable field; + if((fields.size() == 0) || (!fields.containsKey(name))) { + field = new Hashtable(3, (float)0.75); + field.put(0, value); + } else { + field = (Hashtable)fields.get(name); + field.put(field.size() + 1, value); + } + fields.put(name, field); + } + + public String getField(String name, int idx) + { + if((name == null) || !fields.containsKey(name)) { + return null; + } + + Hashtable field = (Hashtable)fields.get(name); +// System.out.println(field); + return (String)field.get(idx); + } + + public String getDocId() + { + return docId; + } + + public void setDelete(boolean doDelete) + { +/* System.out.println("set as:" + doDelete + " for hash code: " + hashCode());*/ + delete = doDelete; +/* System.out.println("set as:" + delete + " for hash code: " + hashCode());*/ + + } + public boolean getDelete() + { + return delete; + } + + public String getRevision() + { + return revision; + } +} diff --git a/CouchProjects/Lucene/LuceneIndexer.java b/CouchProjects/Lucene/LuceneIndexer.java index 5653efe..45471bb 100644 --- a/CouchProjects/Lucene/LuceneIndexer.java +++ b/CouchProjects/Lucene/LuceneIndexer.java @@ -46,40 +46,63 @@ public class LuceneIndexer public static void main(String[] args) throws Exception { - System.out.println("Lucene Indexer launched"); - - BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); - +/* BufferedWriter out = new BufferedWriter(new FileWriter("LuceneIndexer.log")); + out.write("indexer started");out.flush(); +*/ String db; - +/* out.write("indexer about to read config");out.flush();*/ + readConfig(); - while((db = in.readLine()) != null) { - if(db.equals("couchdbfulltext")) { - System.out.println("refresh config"); - readConfig(); - } +/* out.write("indexer read config: " + configuration.getDocuments());out.flush();*/ + + BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); + try { + while((db = in.readLine()) != null) { +/* out.write("indexer got a poke");out.flush();*/ + + if(db.equals("couchdbfulltext")) { +/* System.out.println("refresh config"); + +*/ readConfig(); +/* out.write("indexer refreshed config");out.flush();*/ + + } + +/* out.write("indexer has table: " + db + "?");*/ + + if(!configuration.hasDb(db)) { +/* out.write("... no wait for input");out.flush();*/ + + continue; + } - if(!configuration.hasDb(db)) { - continue; - } +/* out.write("yeppa");out.flush();*/ + - createIndexDir(db); - long revision = getRevisionForDb(db); - indexChanges(db, revision); - System.out.println(db + " to revision: " + revision); + createIndexDir(db); + long revision = getRevisionForDb(db); + indexChanges(db, revision); +/* System.out.println(db + " to revision: " + revision);*/ + } + } catch (IOException e) { +/* out.write("indexer caught IO exception: " + e.getMessage());out.flush();*/ + } - System.out.println("Lucene Indexer stopped"); +/* System.out.println("Lucene Indexer stopped");*/ +/* out.write("indexer stopped");out.flush();*/ + +/* out.close();*/ + } - public static CouchConfig readConfig() throws Exception + public static void readConfig() throws Exception { //get all docs in /$ftconfig //return array of config docs - if(configuration == null) { - configuration = new CouchConfig(); - } - + configuration = null; + configuration = new CouchConfig(); + CouchDocument[] changedDocuments = getChangedDocumentsSinceRevision("couchdbfulltext", 0); for(int i = 0; i < changedDocuments.length; i++) { @@ -87,8 +110,6 @@ public class LuceneIndexer document = loadDocumentData(document, "couchdbfulltext"); configuration.addDocument(document); } - - return configuration; } public static void indexChanges(String db, long revision) throws Exception @@ -99,7 +120,7 @@ public class LuceneIndexer CouchDocument[] changedDocuments = getChangedDocumentsSinceRevision(db, revision); if(changedDocuments.length == 0) { - System.out.println("Index is up-to date at sequence_id: " + revision); +// System.out.println("Index is up-to date at sequence_id: " + revision); return; } @@ -282,212 +303,3 @@ public class LuceneIndexer } } -class SAXCouchDocumentBuilder extends DefaultHandler -{ - private ArrayList documents = new ArrayList(); - private String currentField; - private String cdataBuffer = ""; - private boolean buildFull = false; - private CouchDocument currentDocument; - private long replicationRevision = -1; - - public void startElement(String namespace, String localname, String qname, Attributes attributes) throws SAXException - { - if(qname.equals("doc_info")) { - CouchDocument document = new CouchDocument(attributes.getValue("id"), attributes.getValue("rev")); - document.setUpdateSequence((long)Integer.parseInt(attributes.getValue("update_seq"))); - String delete = attributes.getValue("deleted"); - if(delete != null) { -/* System.out.println(delete);*/ - document.setDelete((boolean)(delete.equals("true"))); - } - -// System.out.println(document); -/* for(int idx = 0; idx < attributes.getLength(); idx++) { - System.out.println(attributes.getQName(idx) + ": "+ attributes.getValue(idx)); - }*/ - - documents.add(document); - } - - if(qname.equals("doc")) { - CouchDocument document = new CouchDocument(attributes.getValue("id"), attributes.getValue("rev")); - currentDocument = document; - buildFull = true; - - } - - if(qname.equals("field")) { - currentField = attributes.getValue("name"); - } - - } - - public void endElement(String namespace, String localname, String qname) throws SAXException - { - if(!buildFull) {return;} - currentDocument.addField(currentField, cdataBuffer); - cdataBuffer = ""; - } - - public void characters(char[] ch, int start, int len) { - cdataBuffer = cdataBuffer + new String(ch, start, len); - } - - public long getRevision() - { - return replicationRevision; - } - - public CouchDocument getDocument() - { - - return currentDocument; - } - - public CouchDocument[] getDocuments() - { - CouchDocument[] type = (CouchDocument[])Array.newInstance(CouchDocument.class, documents.size()); - return documents.toArray(type); - } -} - -class CouchDocument -{ - private String docId; - private String revision; - private boolean deleted = false; - private Hashtable fields; - private long updateSequence = -1; - private boolean delete = false; - public CouchDocument() - { - - } - - public CouchDocument(String docId, String revision) - { - this.docId = docId; - this.revision = revision; - fields = new Hashtable(3, (float)0.75); - Hashtable idHash = new Hashtable(3, (float)0.75); - Hashtable revHash = new Hashtable(3, (float)0.75); - idHash.put(0, docId); - revHash.put(0, revision); - fields.put("__couchdb_document_id", idHash); - fields.put("__couchdb_document_revision", revHash); - } - - public void setUpdateSequence(long sequence) - { - updateSequence = sequence; - } - - public long getUpdateSequence() - { - return updateSequence; - } - - public String toString() - { - return "CouchDocument: " + docId + " Revision: " + revision + " update_seq: "+ updateSequence + " fields: " + fields.toString(); - } - - public Hashtable getFields() - { - return fields; - } - - public void addField(String name, String value) - { - if(name == null) {return;} -// System.out.println("DEBUG: add field: "+name+" with value: "+value + fields.size()); - Hashtable field; - if((fields.size() == 0) || (!fields.containsKey(name))) { - field = new Hashtable(3, (float)0.75); - field.put(0, value); - } else { - field = (Hashtable)fields.get(name); - field.put(field.size() + 1, value); - } - fields.put(name, field); - } - - public String getField(String name, int idx) - { - if((name == null) || !fields.containsKey(name)) { - return null; - } - - Hashtable field = (Hashtable)fields.get(name); -// System.out.println(field); - return (String)field.get(idx); - } - - public String getDocId() - { - return docId; - } - - public void setDelete(boolean doDelete) - { -/* System.out.println("set as:" + doDelete + " for hash code: " + hashCode());*/ - delete = doDelete; -/* System.out.println("set as:" + delete + " for hash code: " + hashCode());*/ - - } - public boolean getDelete() - { - return delete; - } - - public String getRevision() - { - return revision; - } -} - -class CouchConfig -{ -/* private CouchDocument[] documents; -*/ - private Hashtable documents; - private long updateSequence; - - public CouchConfig() - { - documents = new Hashtable(); - updateSequence = 0; - } - - public void setUpdateSequence(long newUpdateSequence) - { - updateSequence = newUpdateSequence; - } - - public long getUpdateSequence() - { - return updateSequence; - } - - public void addDocument(CouchDocument document) - { - String field; -// System.out.println(document); - field = document.getField("__couchdb_database", 0); -// System.out.println(field); - if(field != null) { - documents.put(field, document); - } - } - - public Hashtable getDocuments() - { - return documents; - } - - public boolean hasDb(String db) - { - return documents.containsKey(db); - } -} diff --git a/CouchProjects/Lucene/SAXCouchDocumentBuilder.java b/CouchProjects/Lucene/SAXCouchDocumentBuilder.java new file mode 100644 index 0000000..98598fd --- /dev/null +++ b/CouchProjects/Lucene/SAXCouchDocumentBuilder.java @@ -0,0 +1,77 @@ +import org.xml.sax.*; +import org.xml.sax.helpers.*; +import javax.xml.parsers.*; +import java.util.*; +import java.lang.reflect.*; + +class SAXCouchDocumentBuilder extends DefaultHandler +{ + private ArrayList documents = new ArrayList(); + private String currentField; + private String cdataBuffer = ""; + private boolean buildFull = false; + private CouchDocument currentDocument; + private long replicationRevision = -1; + + public void startElement(String namespace, String localname, String qname, Attributes attributes) throws SAXException + { + if(qname.equals("doc_info")) { + CouchDocument document = new CouchDocument(attributes.getValue("id"), attributes.getValue("rev")); + document.setUpdateSequence((long)Integer.parseInt(attributes.getValue("update_seq"))); + String delete = attributes.getValue("deleted"); + if(delete != null) { +/* System.out.println(delete);*/ + document.setDelete((boolean)(delete.equals("true"))); + } + +// System.out.println(document); +/* for(int idx = 0; idx < attributes.getLength(); idx++) { + System.out.println(attributes.getQName(idx) + ": "+ attributes.getValue(idx)); + }*/ + + documents.add(document); + } + + if(qname.equals("doc")) { + CouchDocument document = new CouchDocument(attributes.getValue("id"), attributes.getValue("rev")); + currentDocument = document; + buildFull = true; + + } + + if(qname.equals("field")) { + currentField = attributes.getValue("name"); + } + + } + + public void endElement(String namespace, String localname, String qname) throws SAXException + { + if(!buildFull) {return;} + currentDocument.addField(currentField, cdataBuffer); + cdataBuffer = ""; + } + + public void characters(char[] ch, int start, int len) { + cdataBuffer = cdataBuffer + new String(ch, start, len); + } + + public long getRevision() + { + return replicationRevision; + } + + public CouchDocument getDocument() + { + + return currentDocument; + } + + public CouchDocument[] getDocuments() + { + CouchDocument[] type = (CouchDocument[])Array.newInstance(CouchDocument.class, documents.size()); + return documents.toArray(type); + } +} + + -- 2.11.4.GIT