2 * Copyright (c) 2007 Yahoo! Inc. All rights reserved.
3 * See accompanying LICENSE file.
5 package com
.yahoo
.pig
.builtin
;
7 import java
.io
.IOException
;
8 import java
.util
.Iterator
;
10 import com
.yahoo
.pig
.BagEvalFunc
;
11 import com
.yahoo
.pig
.data
.DataAtom
;
12 import com
.yahoo
.pig
.data
.DataBag
;
13 import com
.yahoo
.pig
.data
.DataCollector
;
14 import com
.yahoo
.pig
.data
.Tuple
;
17 * DIFF compares the fields of a tuple with arity 2. If the fields are DataBags, it
18 * will emit any Tuples that are in on of the DataBags but not the other. If the
19 * fields are values, it will emit tuples with values that do not match.
24 public class DIFF
extends BagEvalFunc
{
26 * Compares a tuple with two fields. Emits any differences.
27 * @param input a tuple with exactly two fields.
28 * @throws IOException if there are not exactly two fields in a tuple
31 public void exec(Tuple input
, DataCollector output
) throws IOException
{
32 if (input
.arity() != 2) {
33 throw new IOException("DIFF must compare two fields not " + input
.arity());
35 if (input
.getField(0) instanceof DataBag
) {
36 DataBag field1
= input
.getBagField(0);
37 DataBag field2
= input
.getBagField(1);
38 Iterator
<Tuple
> it1
= field1
.content();
39 checkInBag(field2
, it1
, output
);
40 Iterator
<Tuple
> it2
= field2
.content();
41 checkInBag(field1
, it2
, output
);
43 DataAtom d1
= input
.getAtomField(0);
44 DataAtom d2
= input
.getAtomField(1);
46 output
.add(new Tuple(d1
));
47 output
.add(new Tuple(d2
));
52 private void checkInBag(DataBag bag
, Iterator
<Tuple
> iterator
, DataCollector emitTo
) throws IOException
{
53 while(iterator
.hasNext()) {
54 Tuple t
= iterator
.next();
55 Iterator
<Tuple
> it2
= bag
.content();
56 boolean found
= false;
57 while(it2
.hasNext()) {
58 if (t
.equals(it2
.next())) {