Initial import into git.
[galago.git] / java / galago / src / galago / parse / PostingsReducer.java
blob3c05f97f4061d8517b0302821a76a8f58eb9f465
1 /*
2 * PostingsReducer
4 * May 5, 2007 -- Trevor Strohman
6 * BSD License (http://www.galagosearch.org/license)
7 */
9 package galago.parse;
11 import galago.Sorts;
12 import galago.tupleflow.Reducer;
13 import galago.types.DocumentLengthWordCount;
14 import java.io.IOException;
15 import java.util.ArrayList;
16 import java.util.Collections;
17 import java.util.List;
19 /**
21 * @author trevor
23 public class PostingsReducer implements Reducer<DocumentLengthWordCount> {
24 public ArrayList<DocumentLengthWordCount> reduce(List<DocumentLengthWordCount> input) throws IOException {
25 Collections.sort(input, new DocumentLengthWordCount().getOrder( "+document", "+word" ).lessThan() );
26 List<DocumentLengthWordCount> sorted = input;
28 DocumentLengthWordCount o = null;
29 ArrayList<DocumentLengthWordCount> newList = new ArrayList<DocumentLengthWordCount>();
31 for( DocumentLengthWordCount wc : input ) {
32 if( o != null && wc.word.equals(o.word) && wc.document.equals(o.document) ) {
33 o.count += wc.count;
34 } else {
35 if( o != null )
36 newList.add(o);
37 o = wc;
41 return newList;