2 * Copyright (c) 2007 Yahoo! Inc. All rights reserved.
3 * See accompanying LICENSE file.
5 package com
.yahoo
.pig
.data
;
7 import java
.io
.IOException
;
8 import java
.util
.ArrayList
;
9 import java
.util
.Collections
;
10 import java
.util
.Comparator
;
11 import java
.util
.Iterator
;
12 import java
.util
.List
;
14 import com
.yahoo
.pig
.impl
.eval
.EvalItem
;
15 import com
.yahoo
.pig
.impl
.eval
.EvalItemList
;
16 import com
.yahoo
.pig
.impl
.eval
.StarEvalItem
;
19 * A collection of Tuples
21 public class DataBag
extends DataCollector
implements Datum
{
22 protected List
<Tuple
> content
;
25 content
= new ArrayList
<Tuple
>();
28 public DataBag(List
<Tuple
> c
) {
32 public DataBag(Tuple t
) {
33 content
= new ArrayList
<Tuple
>();
37 public int cardinality() {
38 return content
.size();
41 public boolean isEmpty() {
42 return content
.size() == 0;
45 public int compareTo(Object other
) {
46 if (other
instanceof DataAtom
) return +1;
47 if (other
instanceof Tuple
) return -1;
48 if (other
instanceof DataBag
){
49 DataBag bOther
= (DataBag
) other
;
50 if (this.cardinality() != bOther
.cardinality()) {
51 return (this.cardinality() - bOther
.cardinality());
54 // same cardinality, so compare tuple by tuple ...
58 Iterator
<Tuple
> thisIt
= this.content();
59 Iterator
<Tuple
> otherIt
= bOther
.content();
60 while (thisIt
.hasNext() && otherIt
.hasNext()) {
61 Tuple thisT
= thisIt
.next();
62 Tuple otherT
= otherIt
.next();
64 int c
= thisT
.compareTo(otherT
);
68 return 0; // if we got this far, they must be equal
75 public boolean equals(Datum other
) {
76 return (compareTo(other
) == 0);
80 EvalItem item
= new StarEvalItem(null);
81 EvalItemList itemList
= new EvalItemList(null);
87 public void sort(EvalItemList spec
) {
88 Collections
.sort(content
, spec
.getComparator());
91 public void arrange(EvalItemList spec
) {
95 public void distinct(EvalItemList spec
) {
96 Comparator
<Tuple
> comparator
= spec
.getComparator();
98 Collections
.sort(content
, comparator
);
100 Tuple lastTup
= null;
101 for (Iterator
<Tuple
> it
= content
.iterator(); it
.hasNext(); ) {
102 Tuple thisTup
= it
.next();
104 if (lastTup
== null) {
109 if (comparator
.compare(thisTup
, lastTup
) == 0) {
117 public Iterator
<Tuple
> content() {
118 return content
.iterator();
122 public void add(Tuple t
) {
127 public void addAll(DataBag b
) {
128 if (b
instanceof BigDataBag
) {
129 Iterator
<Tuple
> it
= b
.content();
130 while (it
.hasNext()) {
131 content
.add(it
.next());
134 content
.addAll(b
.content
);
138 public void remove(Tuple d
) {
143 * Returns the value of field i. Since there may be more than one tuple in the bag, this
144 * function throws an exception if it is not the case that all tuples agree on this field
146 public DataAtom
getField(int i
) throws IOException
{
149 for (Iterator
<Tuple
> it
= content(); it
.hasNext();) {
150 DataAtom currentVal
= it
.next().getAtomField(i
);
155 if (!val
.strval().equals(currentVal
.strval()))
156 throw new IOException("Cannot call getField on a databag unless all tuples agree.");
161 throw new IOException("Cannot call getField on an empty databag.");
167 content
= new ArrayList
<Tuple
>();
168 Runtime
.getRuntime().gc();
171 public String
toString() {
172 StringBuffer sb
= new StringBuffer();
174 for (Iterator it
= content(); it
.hasNext();) {
175 sb
.append(it
.next().toString());
180 return sb
.toString();