Initial import into git.
[galago.git] / java / pig-galago / src / com / yahoo / pig / data / DataBag.java
blob7236bff34324be05e964c0d4e668f0a7f7d21916
1 /*
2 * Copyright (c) 2007 Yahoo! Inc. All rights reserved.
3 * See accompanying LICENSE file.
4 */
5 package com.yahoo.pig.data;
7 import java.io.IOException;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.Comparator;
11 import java.util.Iterator;
12 import java.util.List;
14 import com.yahoo.pig.impl.eval.EvalItem;
15 import com.yahoo.pig.impl.eval.EvalItemList;
16 import com.yahoo.pig.impl.eval.StarEvalItem;
18 /**
19 * A collection of Tuples
21 public class DataBag extends DataCollector implements Datum{
22 protected List<Tuple> content;
24 public DataBag() {
25 content = new ArrayList<Tuple>();
28 public DataBag(List<Tuple> c) {
29 content = c;
32 public DataBag(Tuple t) {
33 content = new ArrayList<Tuple>();
34 content.add(t);
37 public int cardinality() {
38 return content.size();
41 public boolean isEmpty() {
42 return content.size() == 0;
45 public int compareTo(Object other) {
46 if (other instanceof DataAtom) return +1;
47 if (other instanceof Tuple) return -1;
48 if (other instanceof DataBag){
49 DataBag bOther = (DataBag) other;
50 if (this.cardinality() != bOther.cardinality()) {
51 return (this.cardinality() - bOther.cardinality());
54 // same cardinality, so compare tuple by tuple ...
55 this.sort();
56 bOther.sort();
58 Iterator<Tuple> thisIt = this.content();
59 Iterator<Tuple> otherIt = bOther.content();
60 while (thisIt.hasNext() && otherIt.hasNext()) {
61 Tuple thisT = thisIt.next();
62 Tuple otherT = otherIt.next();
64 int c = thisT.compareTo(otherT);
65 if (c != 0) return c;
68 return 0; // if we got this far, they must be equal
69 }else{
70 return -1;
75 public boolean equals(Datum other) {
76 return (compareTo(other) == 0);
79 public void sort() {
80 EvalItem item = new StarEvalItem(null);
81 EvalItemList itemList = new EvalItemList(null);
82 itemList.add(item);
84 sort(itemList);
87 public void sort(EvalItemList spec) {
88 Collections.sort(content, spec.getComparator());
91 public void arrange(EvalItemList spec) {
92 sort(spec);
95 public void distinct(EvalItemList spec) {
96 Comparator<Tuple> comparator = spec.getComparator();
98 Collections.sort(content, comparator);
100 Tuple lastTup = null;
101 for (Iterator<Tuple> it = content.iterator(); it.hasNext(); ) {
102 Tuple thisTup = it.next();
104 if (lastTup == null) {
105 lastTup = thisTup;
106 continue;
109 if (comparator.compare(thisTup, lastTup) == 0) {
110 it.remove();
111 } else {
112 lastTup = thisTup;
117 public Iterator<Tuple> content() {
118 return content.iterator();
122 public void add(Tuple t) {
123 //if (t!=null)
124 content.add(t);
127 public void addAll(DataBag b) {
128 if (b instanceof BigDataBag) {
129 Iterator<Tuple> it = b.content();
130 while (it.hasNext()) {
131 content.add(it.next());
133 } else {
134 content.addAll(b.content);
138 public void remove(Tuple d) {
139 content.remove(d);
143 * Returns the value of field i. Since there may be more than one tuple in the bag, this
144 * function throws an exception if it is not the case that all tuples agree on this field
146 public DataAtom getField(int i) throws IOException {
147 DataAtom val = null;
149 for (Iterator<Tuple> it = content(); it.hasNext();) {
150 DataAtom currentVal = it.next().getAtomField(i);
152 if (val == null) {
153 val = currentVal;
154 } else {
155 if (!val.strval().equals(currentVal.strval()))
156 throw new IOException("Cannot call getField on a databag unless all tuples agree.");
160 if (val == null)
161 throw new IOException("Cannot call getField on an empty databag.");
163 return val;
166 public void clear(){
167 content = new ArrayList<Tuple>();
168 Runtime.getRuntime().gc();
171 public String toString() {
172 StringBuffer sb = new StringBuffer();
173 sb.append('{');
174 for (Iterator it = content(); it.hasNext();) {
175 sb.append(it.next().toString());
176 if (it.hasNext())
177 sb.append(", ");
179 sb.append('}');
180 return sb.toString();