2 * Copyright (c) 2001 - 2005
3 * Yuan Wang. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Redistributions in any form must be accompanied by information on
14 * how to obtain complete source code for the X-Diff software and any
15 * accompanying software that uses the X-Diff software. The source code
16 * must either be included in the distribution or be available for no
17 * more than the cost of distribution plus a nominal fee, and must be
18 * freely redistributable under reasonable conditions. For an executable
19 * file, complete source code means the source code for all modules it
20 * contains. It does not include source code for modules or files that
21 * typically accompany the major components of the operating system on
22 * which the executable file runs.
24 * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
27 * ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
28 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
40 #include <xercesc/sax/InputSource.hpp>
41 #include <xercesc/framework/LocalFileInputSource.hpp>
43 using namespace xercesc
;
55 const int XDiff::_CIRCUIT_SIZE
= 2048;
56 const int XDiff::_MATRIX_SIZE
= 1024;
57 const int XDiff::_ATTRIBUTE_SIZE
= 1024;
58 const int XDiff::_TEXT_SIZE
= 1024;
60 XDiff::XDiff(const char* input1
, const char* input2
,
61 bool optimal
, bool greedy
, double no_match_threshold
, int sample_count
)
63 // open the first file.
64 XMLCh
*wstr1
= XMLString::transcode(input1
);
65 InputSource
*source1
= new LocalFileInputSource(wstr1
);
66 source1
->setPublicId(wstr1
);
67 if (wstr1
) XMLString::release(&wstr1
);
69 // open the second file.
70 XMLCh
*wstr2
= XMLString::transcode(input2
);
71 InputSource
*source2
= new LocalFileInputSource(wstr2
);
72 source2
->setPublicId(wstr2
);
73 if (wstr2
) XMLString::release(&wstr2
);
75 init(*source1
, *source2
, optimal
, greedy
, no_match_threshold
, sample_count
);
77 if (source2
) delete source2
;
78 if (source1
) delete source1
;
82 void XDiff::init(const InputSource
&source1
, const InputSource
&source2
,
83 bool optimal
, bool greedy
, double no_match_threshold
, int sample_count
)
85 struct timeval
*tv0
= new struct timeval
;
86 struct timeval
*tv1
= new struct timeval
;
87 struct timeval
*tv2
= new struct timeval
;
88 struct timeval
*tv3
= new struct timeval
;
89 struct timezone
*tz
= new struct timezone
;
98 _leastCostMatrix
= NULL
;
101 _sampleCount
= sample_count
;
102 _NO_MATCH_THRESHOLD
= no_match_threshold
;
107 gettimeofday(tv0
, tz
);
109 // parse the first file.
110 XParser
*parser1
= new XParser();
111 _xtree1
= parser1
->parse(source1
);
113 gettimeofday(tv1
, tz
);
115 // parse the second file.
116 XParser
*parser2
= new XParser();
117 _xtree2
= parser2
->parse(source2
);
119 gettimeofday(tv2
, tz
);
121 // check both root nodes.
122 int root1
= _xtree1
->getRoot();
123 int root2
= _xtree2
->getRoot();
124 if (_xtree1
->getHashValue(root1
) == _xtree2
->getHashValue(root2
))
126 cerr
<< "No difference!" << endl
;
127 cerr
<< "Execution time:\t\t" << _diffTime(tv0
, tv2
) << " s\n";
128 cerr
<< "Parsing " << XMLString::transcode(source1
.getPublicId())
129 << " ( " << XMLString::transcode(source1
.getSystemId()) << " )"
130 << ":\t" << _diffTime(tv0
, tv1
) << " s\n";
131 cerr
<< "Parsing " << XMLString::transcode(source2
.getPublicId())
132 << " ( " << XMLString::transcode(source2
.getSystemId()) << " )"
133 << ":\t" << _diffTime(tv1
, tv2
) << " s\n";
137 if (_xtree1
->getTag(root1
).compare(_xtree2
->getValue(root2
)) != 0)
139 cerr
<< "The root is changed!" << endl
;
140 _xtree1
->addMatching(root1
, XTree::DELETE
, root2
);
141 _xtree2
->addMatching(root2
, XTree::INSERT
, root1
);
145 _attrList1
= new int[_ATTRIBUTE_SIZE
];
146 _attrList2
= new int[_ATTRIBUTE_SIZE
];
147 _textList1
= new int[_TEXT_SIZE
];
148 _textList2
= new int[_TEXT_SIZE
];
149 _attrMatch
= new bool[_ATTRIBUTE_SIZE
];
150 _textMatch1
= new bool[_TEXT_SIZE
];
151 _textMatch2
= new bool[_TEXT_SIZE
];
152 _attrHash
= new unsigned long long[_ATTRIBUTE_SIZE
];
153 _textHash
= new unsigned long long[_TEXT_SIZE
];
154 _attrTag
= new string
[_ATTRIBUTE_SIZE
];
155 _leastCostMatrix
= new int*[_MATRIX_SIZE
];
156 _pathMatrix
= new int*[_MATRIX_SIZE
];
157 _circuit
= new int[_CIRCUIT_SIZE
];
158 for (int i
= 0; i
< _MATRIX_SIZE
; i
++)
160 _leastCostMatrix
[i
] = new int[_MATRIX_SIZE
];
161 _pathMatrix
[i
] = new int[_MATRIX_SIZE
];
163 _xtree1
->addMatching(root1
, XTree::CHANGE
, root2
);
164 _xtree2
->addMatching(root2
, XTree::CHANGE
, root1
);
165 _seed
= (unsigned int)(tv0
->tv_usec
& 0xffffffff);
166 _xlut
= new XLut((_xtree1
->getNodeCount() > 0xffff) ||
167 (_xtree2
->getNodeCount() > 0xffff));
168 xdiff(root1
, root2
, false);
171 gettimeofday(tv3
, tz
);
173 cerr
<< "Difference detected!" << endl
;
174 cerr
<< "Parsing " << XMLString::transcode(source1
.getPublicId())
175 << " ( " << XMLString::transcode(source1
.getSystemId()) << " )"
176 << ":\t" << _diffTime(tv0
, tv1
) << " s\n";
177 cerr
<< "Parsing " << XMLString::transcode(source2
.getPublicId())
178 << " ( " << XMLString::transcode(source2
.getSystemId()) << " )"
179 << ":\t" << _diffTime(tv1
, tv2
) << " s\n";
180 cerr
<< "Diffing:\t\t" << _diffTime(tv2
, tv3
) << " s\n";
182 delete _xlut
; _xlut
= NULL
;
183 delete[] _attrList1
; _attrList1
= NULL
;
184 delete[] _attrList2
; _attrList2
= NULL
;
185 delete[] _attrMatch
; _attrMatch
= NULL
;
186 delete[] _attrHash
; _attrHash
= NULL
;
187 delete[] _attrTag
; _attrTag
= NULL
;
188 delete[] _circuit
; _circuit
= NULL
;
189 for (int i
= 0; i
< _MATRIX_SIZE
; i
++)
191 if (_leastCostMatrix
) delete[] _leastCostMatrix
[i
];
192 if (_pathMatrix
) delete[] _pathMatrix
[i
];
194 delete[] _leastCostMatrix
; _leastCostMatrix
= NULL
;
195 delete[] _pathMatrix
; _pathMatrix
= NULL
;
211 void XDiff::xdiff(int pid1
, int pid2
, bool matchFlag
)
214 int attrCount1
= 0, attrCount2
= 0;
215 int attr1
= _xtree1
->getFirstAttribute(pid1
);
216 while (attr1
!= XTree::NULL_NODE
)
218 _attrList1
[attrCount1
++] = attr1
;
219 attr1
= _xtree1
->getNextAttribute(attr1
);
221 int attr2
= _xtree2
->getFirstAttribute(pid2
);
222 while (attr2
!= XTree::NULL_NODE
)
224 _attrList2
[attrCount2
++] = attr2
;
225 attr2
= _xtree2
->getNextAttribute(attr2
);
230 diffAttributes(attrCount1
, attrCount2
);
233 for (int i
= 0; i
< attrCount1
; i
++)
234 _xtree1
->addMatching(_attrList1
[i
],
238 else if (attrCount2
> 0) // attrCount1 == 0
240 for (int i
= 0; i
< attrCount2
; i
++)
241 _xtree2
->addMatching(_attrList2
[i
], XTree::NO_MATCH
);
244 // Match element nodes.
245 int count1
= _xtree1
->getChildrenCount(pid1
) - attrCount1
;
246 int count2
= _xtree2
->getChildrenCount(pid2
) - attrCount2
;
248 if ((count1
== 0) && (count2
== 0))
251 else if (count1
== 0)
253 int node2
= _xtree2
->getFirstChild(pid2
);
254 _xtree2
->addMatching(node2
, XTree::NO_MATCH
);
255 for (int i
= 1; i
< count2
; i
++)
257 node2
= _xtree2
->getNextSibling(node2
);
258 _xtree2
->addMatching(attr2
, XTree::NO_MATCH
);
261 else if (count2
== 0)
263 int node1
= _xtree1
->getFirstChild(pid1
);
264 _xtree1
->addMatching(node1
, XTree::NO_MATCH
);
265 for (int i
= 0; i
< count1
; i
++)
267 node1
= _xtree1
->getNextSibling(node1
);
268 _xtree1
->addMatching(node1
, XTree::NO_MATCH
);
271 else if ((count1
== 1) && (count2
== 1))
273 int node1
= _xtree1
->getFirstChild(pid1
);
274 int node2
= _xtree2
->getFirstChild(pid2
);
276 if (_xtree1
->getHashValue(node1
) == _xtree2
->getHashValue(node2
))
279 bool isE1
= _xtree1
->isElement(node1
);
280 bool isE2
= _xtree2
->isElement(node2
);
284 if (_xtree1
->getTag(node1
).compare(_xtree2
->getTag(node2
)) == 0)
286 _xtree1
->addMatching(node1
, XTree::CHANGE
, node2
);
287 _xtree2
->addMatching(node2
, XTree::CHANGE
, node1
);
288 xdiff(node1
, node2
, matchFlag
);
292 _xtree1
->addMatching(node1
, XTree::NO_MATCH
);
293 _xtree2
->addMatching(node2
, XTree::NO_MATCH
);
296 else if (!isE1
&& !isE2
)
298 _xtree1
->addMatching(node1
, XTree::CHANGE
, node2
);
299 _xtree2
->addMatching(node2
, XTree::CHANGE
, node1
);
303 _xtree1
->addMatching(node1
, XTree::NO_MATCH
);
304 _xtree2
->addMatching(node2
, XTree::NO_MATCH
);
309 int *elements1
= new int[count1
];
310 int *elements2
= new int[count2
];
311 int elementCount1
= 0, textCount1
= 0;
312 int elementCount2
= 0, textCount2
= 0;
314 int child1
= _xtree1
->getFirstChild(pid1
);
315 if (_xtree1
->isElement(child1
))
316 elements1
[elementCount1
++] = child1
;
318 _textList1
[textCount1
++] = child1
;
319 for (int i
= 1; i
< count1
; i
++)
321 child1
= _xtree1
->getNextSibling(child1
);
322 if (_xtree1
->isElement(child1
))
323 elements1
[elementCount1
++] = child1
;
325 _textList1
[textCount1
++] = child1
;
328 int child2
= _xtree2
->getFirstChild(pid2
);
329 if (_xtree2
->isElement(child2
))
330 elements2
[elementCount2
++] = child2
;
332 _textList2
[textCount2
++] = child2
;
333 for (int i
= 1; i
< count2
; i
++)
335 child2
= _xtree2
->getNextSibling(child2
);
336 if (_xtree2
->isElement(child2
))
337 elements2
[elementCount2
++] = child2
;
339 _textList2
[textCount2
++] = child2
;
346 diffText(textCount1
, textCount2
);
349 for (int i
= 0; i
< textCount1
; i
++)
350 _xtree1
->addMatching(_textList1
[i
],
354 else if (textCount2
> 0) // textCount1 == 0
356 for (int i
= 0; i
< textCount2
; i
++)
357 _xtree2
->addMatching(_textList2
[i
],
361 bool *matched1
= new bool[elementCount1
];
362 bool *matched2
= new bool[elementCount2
];
363 int mcount
= _matchFilter(elements1
, elements2
,
365 elementCount1
, elementCount2
);
367 if ((elementCount1
== mcount
) &&
368 (elementCount2
== mcount
))
371 else if (elementCount1
== mcount
)
373 for (int i
= 0; i
< elementCount2
; i
++)
376 _xtree2
->addMatching(elements2
[i
],
380 else if (elementCount2
== mcount
)
382 for (int i
= 0; i
< elementCount1
; i
++)
385 _xtree1
->addMatching(elements1
[i
],
391 // Write the list of unmatched nodes.
392 int ucount1
= elementCount1
- mcount
;
393 int ucount2
= elementCount2
- mcount
;
394 int *unmatched1
= new int[ucount1
];
395 int *unmatched2
= new int[ucount2
];
396 int muc1
= 0, muc2
= 0;
399 while ((muc1
< ucount1
) && (muc2
< ucount2
))
401 for (; (start
< elementCount1
) && matched1
[start
]; start
++);
402 string startTag
= _xtree1
->getTag(elements1
[start
]);
403 int uele1
= 0, uele2
= 0;
405 unmatched1
[uele1
++] = elements1
[start
];
406 matched1
[start
++] = true;
408 for (int i
= start
; (i
< elementCount1
) && (muc1
< ucount1
); i
++)
410 if (!matched1
[i
] && (startTag
.compare(_xtree1
->getTag(elements1
[i
])) == 0))
414 unmatched1
[uele1
++] = elements1
[i
];
418 for (int i
= 0; (i
< elementCount2
) && (muc2
< ucount2
); i
++)
420 if (!matched2
[i
] && (startTag
.compare(_xtree2
->getTag(elements2
[i
])) == 0))
424 unmatched2
[uele2
++] = elements2
[i
];
430 for (int i
= 0; i
< uele1
; i
++)
431 _xtree1
->addMatching(unmatched1
[i
], XTree::NO_MATCH
);
435 if ((uele1
== 1) && (uele2
== 1))
437 _xtree1
->addMatching(unmatched1
[0], XTree::CHANGE
, unmatched2
[0]);
438 _xtree2
->addMatching(unmatched2
[0], XTree::CHANGE
, unmatched1
[0]);
440 unmatched2
[0], matchFlag
);
442 // To find minimal-cost matching between those unmatched elements (with the same tag name.
443 else if (uele1
>= uele2
)
445 if ((uele2
<= _sampleCount
) ||
447 matchListO(unmatched1
, unmatched2
, uele1
, uele2
, true, matchFlag
);
449 matchList(unmatched1
, unmatched2
, uele1
, uele2
, true, matchFlag
);
453 if ((uele1
<= _sampleCount
) ||
455 matchListO(unmatched2
, unmatched1
, uele2
, uele1
, false, matchFlag
);
457 matchList(unmatched2
, unmatched1
, uele2
, uele1
, false, matchFlag
);
464 for (int i
= start
; i
< elementCount1
; i
++)
467 _xtree1
->addMatching(elements1
[i
], XTree::NO_MATCH
);
470 else if (muc2
< ucount2
)
472 for (int i
= 0; i
< elementCount2
; i
++)
475 _xtree2
->addMatching(elements2
[i
], XTree::NO_MATCH
);
490 void XDiff::diffAttributes(int attrCount1
, int attrCount2
)
492 if ((attrCount1
== 1) && (attrCount2
== 1))
494 int a1
= _attrList1
[0];
495 int a2
= _attrList2
[0];
496 if (_xtree1
->getHashValue(a1
) == _xtree2
->getHashValue(a2
))
499 if (_xtree1
->getTag(a1
).compare(_xtree2
->getTag(a2
)) == 0)
501 _xtree1
->addMatching(a1
, XTree::CHANGE
, a2
);
502 _xtree2
->addMatching(a2
, XTree::CHANGE
, a1
);
503 _xtree1
->addMatching(_xtree1
->getFirstChild(a1
),
505 _xtree2
->getFirstChild(a2
));
506 _xtree2
->addMatching(_xtree2
->getFirstChild(a2
),
508 _xtree1
->getFirstChild(a1
));
512 _xtree1
->addMatching(a1
, XTree::NO_MATCH
);
513 _xtree2
->addMatching(a2
, XTree::NO_MATCH
);
518 for (int i
= 0; i
< attrCount2
; i
++)
520 _attrHash
[i
] = _xtree2
->getHashValue(_attrList2
[i
]);
521 _attrTag
[i
] = _xtree2
->getTag(_attrList2
[i
]);
522 _attrMatch
[i
] = false;
526 for (int i
= 0; i
< attrCount1
; i
++)
528 int attr1
= _attrList1
[i
];
529 unsigned long long ah1
= _xtree1
->getHashValue(attr1
);
530 string tag1
= _xtree1
->getTag(attr1
);
533 for (int j
= 0; j
< attrCount2
; j
++)
535 int attr2
= _attrList2
[j
];
538 else if (ah1
== _attrHash
[j
])
540 _attrMatch
[j
] = true;
545 else if (tag1
.compare(_attrTag
[j
]) == 0)
547 _attrMatch
[j
] = true;
550 _xtree1
->addMatching(attr1
, XTree::CHANGE
, attr2
);
551 _xtree2
->addMatching(attr2
, XTree::CHANGE
, attr1
);
552 _xtree1
->addMatching(_xtree1
->getFirstChild(attr1
), XTree::CHANGE
, _xtree2
->getFirstChild(attr2
));
553 _xtree2
->addMatching(_xtree2
->getFirstChild(attr2
), XTree::CHANGE
, _xtree1
->getFirstChild(attr1
));
561 _xtree1
->addMatching(attr1
, XTree::NO_MATCH
);
564 if (matchCount
!= attrCount2
)
566 for (int i
= 0; i
< attrCount2
; i
++)
569 _xtree2
->addMatching(_attrList2
[i
], XTree::NO_MATCH
);
574 void XDiff::diffText(int textCount1
, int textCount2
)
576 for (int i
= 0; i
< textCount1
; i
++)
577 _textMatch1
[i
] = false;
578 for (int i
= 0; i
< textCount2
; i
++)
580 _textMatch2
[i
] = false;
581 _textHash
[i
] = _xtree2
->getHashValue(_textList2
[i
]);
585 for (int i
= 0; i
< textCount1
; i
++)
587 unsigned long long hash1
= _xtree1
->getHashValue(_textList1
[i
]);
588 for (int j
= 0; j
< textCount2
; j
++)
590 if (!_textMatch2
[j
] && (hash1
== _textHash
[j
]))
592 _textMatch1
[i
] = true;
593 _textMatch2
[j
] = true;
599 if (mcount
== textCount2
)
603 if ((mcount
< textCount1
) && (textCount1
<= textCount2
))
605 for (int i
= 0, j
= 0;
606 (i
< textCount1
) && (mcount
< textCount1
); i
++)
610 for (; _textMatch2
[j
]; j
++);
611 _xtree1
->addMatching(_textList1
[i
], XTree::CHANGE
,
613 _textMatch1
[i
] = true;
614 _xtree2
->addMatching(_textList2
[j
], XTree::CHANGE
,
616 _textMatch2
[j
] = true;
620 else if ((mcount
< textCount2
) && (textCount2
< textCount1
))
622 for (int i
= 0, j
= 0;
623 (i
< textCount2
) && (mcount
< textCount2
); i
++)
627 for (; _textMatch1
[j
]; j
++);
628 _xtree2
->addMatching(_textList2
[i
], XTree::CHANGE
,
630 _textMatch2
[i
] = true;
631 _xtree1
->addMatching(_textList1
[j
], XTree::CHANGE
,
633 _textMatch1
[j
] = true;
638 if (mcount
< textCount1
)
640 for (int i
= 0; i
< textCount1
; i
++)
643 _xtree1
->addMatching(_textList1
[i
],
647 else if (mcount
< textCount2
)
649 for (int i
= 0; i
< textCount2
; i
++)
652 _xtree2
->addMatching(_textList2
[i
],
658 int XDiff::_matchFilter(int *elements1
, int *elements2
, bool *matched1
,
659 bool *matched2
, int count1
, int count2
)
661 unsigned long long *value1
= new unsigned long long[count1
];
662 unsigned long long *value2
= new unsigned long long[count2
];
664 for (int i
= 0; i
< count1
; i
++)
666 value1
[i
] = _xtree1
->getHashValue(elements1
[i
]);
669 for (int i
= 0; i
< count2
; i
++)
671 value2
[i
] = _xtree2
->getHashValue(elements2
[i
]);
676 for (int i
= 0; i
< count2
; i
++)
677 for (int j
= 0; j
< count1
; j
++)
679 if (!matched1
[j
] && !matched2
[i
] &&
680 (value1
[j
] == value2
[i
]))
695 void XDiff::matchListO(int *nodes1
, int *nodes2
, int count1
, int count2
,
696 bool treeOrder
, bool matchFlag
)
698 int **distanceMatrix
= new int*[count1
+1];
699 int *matching1
= new int[count1
];
700 int *matching2
= new int[count2
];
703 distanceMatrix
[count1
] = new int[count2
+1];
704 for (int i
= 0; i
< count2
; i
++)
705 distanceMatrix
[count1
][i
] = (treeOrder
? _xtree2
->getDecendentsCount(nodes2
[i
]) : _xtree1
->getDecendentsCount(nodes2
[i
])) + 1;
707 for (int i
= 0; i
< count1
; i
++)
709 distanceMatrix
[i
] = new int[count2
+1];
710 int deleteCost
= (treeOrder
? _xtree1
->getDecendentsCount(nodes1
[i
]) : _xtree2
->getDecendentsCount(nodes1
[i
])) + 1;
711 for (int j
= 0; j
< count2
; j
++)
715 dist
= treeOrder
? _xlut
->get(nodes1
[i
], nodes2
[j
]) : _xlut
->get(nodes2
[j
], nodes1
[i
]);
718 dist
= treeOrder
? distance(nodes1
[i
], nodes2
[j
], true, XTree::NO_CONNECTION
) : distance(nodes2
[j
], nodes1
[i
], true, XTree::NO_CONNECTION
);
721 if (!_oFlag
&& (dist
> 1) && (dist
>= _NO_MATCH_THRESHOLD
* (deleteCost
+ distanceMatrix
[count1
][j
])))
722 dist
= XTree::NO_CONNECTION
;
723 if (dist
< XTree::NO_CONNECTION
)
726 _xlut
->add(nodes1
[i
],
730 _xlut
->add(nodes2
[j
],
735 distanceMatrix
[i
][j
] = dist
;
738 distanceMatrix
[i
][count2
] = deleteCost
;
741 // compute the minimal cost matching.
742 findMatching(count1
, count2
, distanceMatrix
, matching1
, matching2
);
744 for (int i
= 0; i
< count1
; i
++)
746 if (matching1
[i
] == XTree::NO_MATCH
)
749 _xtree1
->addMatching(nodes1
[i
], XTree::NO_MATCH
);
751 _xtree2
->addMatching(nodes1
[i
], XTree::NO_MATCH
);
756 _xtree1
->addMatching(nodes1
[i
], XTree::CHANGE
,
757 nodes2
[matching1
[i
]]);
759 _xtree2
->addMatching(nodes1
[i
], XTree::CHANGE
,
760 nodes2
[matching1
[i
]]);
764 for (int i
= 0; i
< count2
; i
++)
766 if (matching2
[i
] == XTree::NO_MATCH
)
769 _xtree2
->addMatching(nodes2
[i
], XTree::NO_MATCH
);
771 _xtree1
->addMatching(nodes2
[i
], XTree::NO_MATCH
);
776 _xtree2
->addMatching(nodes2
[i
], XTree::CHANGE
,
777 nodes1
[matching2
[i
]]);
779 _xtree1
->addMatching(nodes2
[i
], XTree::CHANGE
,
780 nodes1
[matching2
[i
]]);
784 for (int i
= 0; i
< count1
; i
++)
786 if (matching1
[i
] != XTree::NO_MATCH
)
788 int todo1
= nodes1
[i
];
789 int todo2
= nodes2
[matching1
[i
]];
792 if (_xtree1
->isElement(todo1
) &&
793 _xtree2
->isElement(todo2
))
794 xdiff(todo1
, todo2
, true);
798 if (_xtree1
->isElement(todo2
) &&
799 _xtree2
->isElement(todo1
))
800 xdiff(todo2
, todo1
, true);
807 for (int i
= 0; i
<= count1
; i
++)
808 delete[] distanceMatrix
[i
];
809 delete[] distanceMatrix
;
812 void XDiff::matchList(int *nodes1
, int *nodes2
, int count1
, int count2
,
813 bool treeOrder
, bool matchFlag
)
815 int *matching1
= new int[count1
];
816 int *matching2
= new int[count2
];
817 for (int i
= 0; i
< count1
; i
++)
818 matching1
[i
] = XTree::NO_MATCH
;
819 for (int i
= 0; i
< count2
; i
++)
820 matching2
[i
] = XTree::NO_MATCH
;
824 for (int i
= 0; i
< count1
; i
++)
826 for (int j
= 0; j
< count2
; j
++)
828 int d
= treeOrder
? _xlut
->get(nodes1
[i
], nodes2
[j
]) : _xlut
->get(nodes2
[j
], nodes1
[i
]);
829 if (d
!= XTree::NO_CONNECTION
)
842 int matchingThreshold
= 0;
843 for (int i
= 0; ((i
< _sampleCount
) && (scount2
< count2
)); scount2
++)
847 int snode
= (int)((long long)_seed
* (count2
- scount2
) / (RAND_MAX
+ 1)) + scount2
;
848 int dist
= XTree::NO_CONNECTION
;
849 int bestmatch
= XTree::NULL_NODE
;
850 for (int j
= scount1
; j
< count1
; j
++)
852 int d
= treeOrder
? distance(nodes1
[j
], nodes2
[snode
], false, dist
) : distance(nodes2
[snode
], nodes1
[j
], false, dist
);
862 int deleteCost
= (treeOrder
? _xtree2
->getDecendentsCount(nodes2
[snode
]) : _xtree1
->getDecendentsCount(nodes2
[snode
])) + 1;
864 (dist
> _NO_MATCH_THRESHOLD
* deleteCost
))
866 int tmp
= nodes2
[snode
];
867 nodes2
[snode
] = nodes2
[scount2
];
868 nodes2
[scount2
] = tmp
;
872 int tmp
= nodes1
[bestmatch
];
873 nodes1
[bestmatch
] = nodes1
[scount1
];
874 nodes1
[scount1
] = tmp
;
876 nodes2
[snode
] = nodes2
[scount2
];
877 nodes2
[scount2
] = tmp
;
880 _xlut
->add(nodes1
[scount1
], nodes2
[scount2
], dist
);
882 _xlut
->add(nodes2
[scount2
], nodes1
[scount1
], dist
);
883 matching1
[scount1
] = scount2
;
884 matching2
[scount2
] = scount1
;
888 if (matchingThreshold
< dist
)
889 matchingThreshold
= dist
;
893 for (;scount2
< count2
; scount2
++)
895 int dist
= XTree::NO_CONNECTION
;
896 int bestmatch
= XTree::NO_MATCH
;
897 for (int i
= scount1
; i
< count1
; i
++)
899 int d
= treeOrder
? distance(nodes1
[i
], nodes2
[scount2
], false, dist
) : distance(nodes2
[scount2
], nodes1
[i
], false, dist
);
900 if (d
<= matchingThreshold
)
913 if (bestmatch
!= XTree::NO_MATCH
)
915 int tmp
= nodes1
[bestmatch
];
916 nodes1
[bestmatch
] = nodes1
[scount1
];
917 nodes1
[scount1
] = tmp
;
920 _xlut
->add(nodes1
[scount1
], nodes2
[scount2
], dist
);
922 _xlut
->add(nodes2
[scount2
], nodes1
[scount1
], dist
);
923 matching1
[scount1
] = scount2
;
924 matching2
[scount2
] = scount1
;
931 for (int i
= 0; i
< count1
; i
++)
933 if (matching1
[i
] == XTree::NO_MATCH
)
936 _xtree1
->addMatching(nodes1
[i
], XTree::NO_MATCH
);
938 _xtree2
->addMatching(nodes1
[i
], XTree::NO_MATCH
);
943 _xtree1
->addMatching(nodes1
[i
], XTree::CHANGE
,
944 nodes2
[matching1
[i
]]);
946 _xtree2
->addMatching(nodes1
[i
], XTree::CHANGE
,
947 nodes2
[matching1
[i
]]);
951 for (int i
= 0; i
< count2
; i
++)
953 if (matching2
[i
] == XTree::NO_MATCH
)
956 _xtree2
->addMatching(nodes2
[i
], XTree::NO_MATCH
);
958 _xtree1
->addMatching(nodes2
[i
], XTree::NO_MATCH
);
963 _xtree2
->addMatching(nodes2
[i
], XTree::CHANGE
,
964 nodes1
[matching2
[i
]]);
966 _xtree1
->addMatching(nodes2
[i
], XTree::CHANGE
,
967 nodes1
[matching2
[i
]]);
971 for (int i
= 0; i
< count1
; i
++)
973 if (matching1
[i
] != XTree::NO_MATCH
)
975 int todo1
= nodes1
[i
];
976 int todo2
= nodes2
[matching1
[i
]];
979 if (_xtree1
->isElement(todo1
) &&
980 _xtree2
->isElement(todo2
))
981 xdiff(todo1
, todo2
, true);
985 if (_xtree1
->isElement(todo2
) &&
986 _xtree2
->isElement(todo1
))
987 xdiff(todo2
, todo1
, true);
996 int XDiff::distance(int eid1
, int eid2
, bool toRecord
, int threshold
)
998 bool isE1
= _xtree1
->isElement(eid1
);
999 bool isE2
= _xtree2
->isElement(eid2
);
1002 if (_xtree1
->getTag(eid1
).compare(_xtree2
->getTag(eid2
)) != 0)
1003 return XTree::NO_CONNECTION
;
1006 int dist
= _xdiff(eid1
, eid2
, threshold
);
1007 if (toRecord
&& (dist
< XTree::NO_CONNECTION
))
1008 _xlut
->add(eid1
, eid2
, dist
);
1012 else if (!isE1
&& !isE2
)
1015 return XTree::NO_CONNECTION
;
1018 int XDiff::_xdiff(int pid1
, int pid2
, int threshold
)
1023 int attrCount1
= 0, attrCount2
= 0;
1024 int attr1
= _xtree1
->getFirstAttribute(pid1
);
1025 while (attr1
!= XTree::NULL_NODE
)
1027 _attrList1
[attrCount1
++] = attr1
;
1028 attr1
= _xtree1
->getNextAttribute(attr1
);
1030 int attr2
= _xtree2
->getFirstAttribute(pid2
);
1031 while (attr2
!= XTree::NULL_NODE
)
1033 _attrList2
[attrCount2
++] = attr2
;
1034 attr2
= _xtree2
->getNextAttribute(attr2
);
1037 if (attrCount1
== 0)
1038 dist
= attrCount2
* 2;
1039 else if (attrCount2
== 0)
1040 dist
= attrCount1
* 2;
1042 dist
= _diffAttributes(attrCount1
, attrCount2
);
1043 if (!_gFlag
&& (dist
>= threshold
))
1044 return XTree::NO_CONNECTION
;
1046 // Match element nodes.
1047 int count1
= _xtree1
->getChildrenCount(pid1
) - attrCount1
;
1048 int count2
= _xtree2
->getChildrenCount(pid2
) - attrCount2
;
1052 int node2
= _xtree2
->getFirstChild(pid2
);
1053 while (node2
!= XTree::NULL_NODE
)
1055 dist
+= _xtree2
->getDecendentsCount(node2
) + 1;
1056 if (!_gFlag
&& (dist
>= threshold
))
1057 return XTree::NO_CONNECTION
;
1058 node2
= _xtree2
->getNextSibling(node2
);
1061 else if (count2
== 0)
1063 int node1
= _xtree1
->getFirstChild(pid1
);
1064 while (node1
!= XTree::NULL_NODE
)
1066 dist
+= _xtree1
->getDecendentsCount(node1
) + 1;
1067 if (!_gFlag
&& (dist
>= threshold
))
1068 return XTree::NO_CONNECTION
;
1069 node1
= _xtree1
->getNextSibling(node1
);
1072 else if ((count1
== 1) && (count2
== 1))
1074 int node1
= _xtree1
->getFirstChild(pid1
);
1075 int node2
= _xtree2
->getFirstChild(pid2
);
1077 if (_xtree1
->getHashValue(node1
) == _xtree2
->getHashValue(node2
))
1080 bool isE1
= _xtree1
->isElement(node1
);
1081 bool isE2
= _xtree2
->isElement(node2
);
1085 if (_xtree1
->getTag(node1
).compare(_xtree2
->getTag(node2
)) == 0)
1086 dist
+= _xdiff(node1
, node2
, threshold
- dist
);
1088 dist
+= _xtree1
->getDecendentsCount(node1
) +
1089 _xtree2
->getDecendentsCount(node2
) + 2;
1091 else if (!isE1
&& !isE2
)
1094 dist
+= _xtree1
->getDecendentsCount(node1
) +
1095 _xtree2
->getDecendentsCount(node2
) + 2;
1099 int *elements1
= new int[count1
];
1100 int *elements2
= new int[count2
];
1101 int elementCount1
= 0, textCount1
= 0;
1102 int elementCount2
= 0, textCount2
= 0;
1104 int child1
= _xtree1
->getFirstChild(pid1
);
1105 if (_xtree1
->isElement(child1
))
1106 elements1
[elementCount1
++] = child1
;
1108 _textList1
[textCount1
++] = child1
;
1109 for (int i
= 1; i
< count1
; i
++)
1111 child1
= _xtree1
->getNextSibling(child1
);
1112 if (_xtree1
->isElement(child1
))
1113 elements1
[elementCount1
++] = child1
;
1115 _textList1
[textCount1
++] = child1
;
1118 int child2
= _xtree2
->getFirstChild(pid2
);
1119 if (_xtree2
->isElement(child2
))
1120 elements2
[elementCount2
++] = child2
;
1122 _textList2
[textCount2
++] = child2
;
1123 for (int i
= 1; i
< count2
; i
++)
1125 child2
= _xtree2
->getNextSibling(child2
);
1126 if (_xtree2
->isElement(child2
))
1127 elements2
[elementCount2
++] = child2
;
1129 _textList2
[textCount2
++] = child2
;
1132 // Match text nodes.
1133 if (textCount1
== 0)
1137 else if (textCount2
== 0)
1142 dist
+= _diffText(textCount1
, textCount2
);
1144 if (_gFlag
&& (dist
>= threshold
))
1145 return XTree::NO_CONNECTION
;
1147 bool *matched1
= new bool[elementCount1
];
1148 bool *matched2
= new bool[elementCount2
];
1149 int mcount
= _matchFilter(elements1
, elements2
,
1151 elementCount1
, elementCount2
);
1153 if ((elementCount1
== mcount
) &&
1154 (elementCount2
== mcount
))
1157 else if (elementCount1
== mcount
)
1159 for (int i
= 0; i
< elementCount2
; i
++)
1163 dist
+= _xtree2
->getDecendentsCount(elements2
[i
]) + 1;
1164 if (_gFlag
&& (dist
>= threshold
))
1166 dist
= XTree::NO_CONNECTION
;
1172 else if (elementCount2
== mcount
)
1174 for (int i
= 0; i
< elementCount1
; i
++)
1178 dist
+= _xtree1
->getDecendentsCount(elements1
[i
]) + 1;
1179 if (_gFlag
&& (dist
>= threshold
))
1181 dist
= XTree::NO_CONNECTION
;
1187 else //if ((count1 > mcount) && (count2 > mcount))
1189 // Write the list of unmatched nodes.
1190 int ucount1
= elementCount1
- mcount
;
1191 int ucount2
= elementCount2
- mcount
;
1192 int *unmatched1
= new int[ucount1
];
1193 int *unmatched2
= new int[ucount2
];
1194 int muc1
= 0, muc2
= 0, start
= 0;
1196 while ((muc1
< ucount1
) && (muc2
< ucount2
))
1198 for (; (start
< elementCount1
) && matched1
[start
]; start
++);
1199 string startTag
= _xtree1
->getTag(elements1
[start
]);
1200 int uele1
= 0, uele2
= 0;
1202 unmatched1
[uele1
++] = elements1
[start
];
1203 matched1
[start
++] = true;
1205 for (int i
= start
; (i
< elementCount1
) && (muc1
< ucount1
); i
++)
1207 if (!matched1
[i
] && (startTag
.compare(_xtree1
->getTag(elements1
[i
])) == 0))
1211 unmatched1
[uele1
++] = elements1
[i
];
1215 for (int i
= 0; (i
< elementCount2
) && (muc2
< ucount2
); i
++)
1217 if (!matched2
[i
] && (startTag
.compare(_xtree2
->getTag(elements2
[i
])) == 0))
1221 unmatched2
[uele2
++] = elements2
[i
];
1227 for (int i
= 0; i
< uele1
; i
++)
1228 dist
+= _xtree1
->getDecendentsCount(unmatched1
[i
]);
1233 if ((uele1 == 1) && (uele2 == 1))
1235 dist += _xdiff(unmatched1[0],
1239 // To find minimal-cost matching between those unmatched elements (with the same tag name.
1240 else if (uele1 >= uele2)
1244 if ((uele2
<= _sampleCount
) ||
1246 dist
+= _matchListO(unmatched1
, unmatched2
, uele1
, uele2
, true);
1248 dist
+= _matchList(unmatched1
, unmatched2
, uele1
, uele2
, true, threshold
- dist
);
1252 if ((uele1
<= _sampleCount
) ||
1254 dist
+= _matchListO(unmatched2
, unmatched1
, uele2
, uele1
, false);
1256 dist
+= _matchList(unmatched2
, unmatched1
, uele2
, uele1
, false, threshold
- dist
);
1260 if (_gFlag
&& (dist
>= threshold
))
1262 dist
= XTree::NO_CONNECTION
;
1267 if (dist
< XTree::NO_CONNECTION
)
1271 for (int i
= start
; i
< elementCount1
; i
++)
1275 dist
=+ _xtree1
->getDecendentsCount(elements1
[i
]);
1276 if (_gFlag
&& (dist
>= threshold
))
1278 dist
= XTree::NO_CONNECTION
;
1284 else if (muc2
< ucount2
)
1286 for (int i
= 0; i
< elementCount2
; i
++)
1290 dist
+= _xtree2
->getDecendentsCount(elements2
[i
]);
1291 if (_gFlag
&& (dist
>= threshold
))
1293 dist
= XTree::NO_CONNECTION
;
1301 delete[] unmatched1
;
1302 delete[] unmatched2
;
1311 if (!_gFlag
|| (dist
< threshold
))
1314 return XTree::NO_CONNECTION
;
1317 int XDiff::_diffAttributes(int attrCount1
, int attrCount2
)
1319 if ((attrCount1
== 1) && (attrCount2
== 1))
1321 int a1
= _attrList1
[0];
1322 int a2
= _attrList2
[0];
1323 if (_xtree1
->getHashValue(a1
) == _xtree2
->getHashValue(a2
))
1326 if (_xtree1
->getTag(a1
).compare(_xtree2
->getTag(a2
)) == 0)
1333 for (int i
= 0; i
< attrCount2
; i
++)
1335 _attrHash
[i
] = _xtree2
->getHashValue(_attrList2
[i
]);
1336 _attrTag
[i
] = _xtree2
->getTag(_attrList2
[i
]);
1337 _attrMatch
[i
] = false;
1341 for (int i
= 0; i
< attrCount1
; i
++)
1343 int attr1
= _attrList1
[i
];
1344 unsigned long long ah1
= _xtree1
->getHashValue(attr1
);
1345 string tag1
= _xtree1
->getTag(attr1
);
1348 for (int j
= 0; j
< attrCount2
; j
++)
1352 else if (ah1
== _attrHash
[j
])
1354 _attrMatch
[j
] = true;
1359 else if (tag1
.compare(_attrTag
[j
]) == 0)
1361 _attrMatch
[j
] = true;
1373 dist
+= (attrCount2
- matchCount
) * 2;
1377 int XDiff::_diffText(int textCount1
, int textCount2
)
1379 for (int i
= 0; i
< textCount2
; i
++)
1381 _textMatch2
[i
] = false;
1382 _textHash
[i
] = _xtree2
->getHashValue(_textList2
[i
]);
1386 for (int i
= 0; i
< textCount1
; i
++)
1388 unsigned long long hash1
= _xtree1
->getHashValue(_textList1
[i
]);
1389 for (int j
= 0; j
< textCount2
; j
++)
1391 if (!_textMatch2
[j
] && (hash1
== _textHash
[j
]))
1393 _textMatch2
[j
] = true;
1399 if (mcount
== textCount2
)
1403 if (textCount1
>= textCount2
)
1404 return textCount1
- mcount
;
1406 return textCount2
- mcount
;
1409 int XDiff::_matchListO(int *nodes1
, int *nodes2
, int count1
, int count2
,
1412 int **distanceMatrix
= new int*[count1
+1];
1413 int *matching1
= new int[count1
];
1414 int *matching2
= new int[count2
];
1417 distanceMatrix
[count1
] = new int[count2
+1];
1418 for (int i
= 0; i
< count2
; i
++)
1419 distanceMatrix
[count1
][i
] = (treeOrder
? _xtree2
->getDecendentsCount(nodes2
[i
]) : _xtree1
->getDecendentsCount(nodes2
[i
])) + 1;
1421 for (int i
= 0; i
< count1
; i
++)
1423 distanceMatrix
[i
] = new int[count2
+1];
1424 int deleteCost
= (treeOrder
? _xtree1
->getDecendentsCount(nodes1
[i
]) : _xtree2
->getDecendentsCount(nodes1
[i
])) + 1;
1425 for (int j
= 0; j
< count2
; j
++)
1427 int dist
= treeOrder
? distance(nodes1
[i
], nodes2
[j
], true, XTree::NO_CONNECTION
) : distance(nodes2
[j
], nodes1
[i
], true, XTree::NO_CONNECTION
);
1429 // the default mode.
1430 if (!_oFlag
&& (dist
> 1) &&
1431 (dist
< XTree::NO_CONNECTION
) &&
1432 (dist
>= _NO_MATCH_THRESHOLD
*
1433 (deleteCost
+ distanceMatrix
[count1
][j
])))
1434 dist
= XTree::NO_CONNECTION
;
1436 if (dist
< XTree::NO_CONNECTION
)
1439 _xlut
->add(nodes1
[i
], nodes2
[j
], dist
);
1441 _xlut
->add(nodes2
[j
], nodes1
[i
], dist
);
1443 distanceMatrix
[i
][j
] = dist
;
1446 distanceMatrix
[i
][count2
] = deleteCost
;
1449 // compute the minimal cost matching.
1450 int dist_value
= findMatching(count1
, count2
, distanceMatrix
,
1451 matching1
, matching2
);
1455 for (int i
= 0; i
<= count1
; i
++)
1456 delete[] distanceMatrix
[i
];
1457 delete[] distanceMatrix
;
1462 int XDiff::_matchList(int *nodes1
, int *nodes2
, int count1
, int count2
,
1463 bool treeOrder
, int threshold
)
1465 int *matching1
= new int[count1
];
1466 int *matching2
= new int[count2
];
1467 for (int i
= 0; i
< count1
; i
++)
1468 matching1
[i
] = XTree::NULL_NODE
;
1469 for (int i
= 0; i
< count2
; i
++)
1470 matching2
[i
] = XTree::NULL_NODE
;
1475 int matchingThreshold
= 0;
1477 for (int i
= 0; ((i
< _sampleCount
) && (scount2
< count2
)); scount2
++)
1479 int snode
= rand() % (count2
- scount2
) + scount2
;
1480 int dist
= XTree::NO_CONNECTION
;
1481 int bestmatch
= XTree::NULL_NODE
;
1482 for (int j
= scount1
; j
< count1
; j
++)
1484 int d
= treeOrder
? distance(nodes1
[j
], nodes2
[snode
], false, threshold
- Distance
) : distance(nodes2
[snode
], nodes1
[j
], false, threshold
- Distance
);
1494 int deleteCost
= (treeOrder
? _xtree2
->getDecendentsCount(nodes2
[snode
]) : _xtree1
->getDecendentsCount(nodes2
[snode
])) + 1;
1496 if ((dist
> 1) && (dist
> _NO_MATCH_THRESHOLD
* deleteCost
))
1498 int tmp
= nodes2
[snode
];
1499 nodes2
[snode
] = nodes2
[scount2
];
1500 nodes2
[scount2
] = tmp
;
1501 Distance
+= deleteCost
;
1505 int tmp
= nodes1
[bestmatch
];
1506 nodes1
[bestmatch
] = nodes1
[scount1
];
1507 nodes1
[scount1
] = tmp
;
1508 tmp
= nodes2
[snode
];
1509 nodes2
[snode
] = nodes2
[scount2
];
1510 nodes2
[scount2
] = tmp
;
1513 _xlut
->add(nodes1
[scount1
], nodes2
[scount2
], dist
);
1515 _xlut
->add(nodes2
[scount2
], nodes1
[scount1
], dist
);
1516 matching1
[scount1
] = scount2
;
1517 matching2
[scount2
] = scount1
;
1521 if (matchingThreshold
< dist
)
1522 matchingThreshold
= dist
;
1526 if (Distance
>= threshold
)
1530 return XTree::NO_CONNECTION
;
1534 for (;scount2
< count2
; scount2
++)
1536 int deleteCost
= (treeOrder
? _xtree2
->getDecendentsCount(nodes2
[scount2
]) : _xtree1
->getDecendentsCount(nodes2
[scount2
])) + 1;
1537 int dist
= XTree::NO_CONNECTION
;
1538 int bestmatch
= XTree::NULL_NODE
;
1539 for (int i
= scount1
; i
< count1
; i
++)
1541 int d
= treeOrder
? distance(nodes1
[i
], nodes2
[scount2
], false, threshold
- Distance
) : distance(nodes2
[scount2
], nodes1
[i
], false, threshold
- Distance
);
1542 if (d
<= matchingThreshold
)
1548 else if ((d
== 1) || (d
< _NO_MATCH_THRESHOLD
* dist
))
1555 if (bestmatch
== XTree::NO_MATCH
)
1557 Distance
+= deleteCost
;
1561 int tmp
= nodes1
[bestmatch
];
1562 nodes1
[bestmatch
] = nodes1
[scount1
];
1563 nodes1
[scount1
] = tmp
;
1566 _xlut
->add(nodes1
[scount1
], nodes2
[scount2
], dist
);
1568 _xlut
->add(nodes2
[scount2
], nodes1
[scount1
], dist
);
1570 matching1
[scount1
] = scount2
;
1571 matching2
[scount2
] = scount1
;
1576 if (Distance
>= threshold
)
1580 return XTree::NO_CONNECTION
;
1584 for (int i
= 0; i
< count1
; i
++)
1586 if (matching1
[i
] == XTree::NO_MATCH
)
1588 Distance
+= (treeOrder
? _xtree1
->getDecendentsCount(nodes1
[i
]) : _xtree2
->getDecendentsCount(nodes1
[i
])) + 1;
1589 if (Distance
>= threshold
)
1593 return XTree::NO_CONNECTION
;
1603 int XDiff::findMatching(int count1
, int count2
, int** dist
,
1604 int* matching1
, int* matching2
)
1609 if (dist
[0][0] < XTree::NO_CONNECTION
)
1616 matching1
[0] = XTree::DELETE
;
1617 matching2
[0] = XTree::DELETE
;
1622 else if (count2
== 1)
1626 int mindist
= XTree::NO_CONNECTION
;
1627 matching2
[0] = XTree::DELETE
;
1629 for (int i
= 0; i
< count1
; i
++)
1631 matching1
[i
] = XTree::DELETE
;
1632 if (mindist
> dist
[i
][0])
1634 mindist
= dist
[i
][0];
1638 // Suppose we delete every node on list1.
1639 distance
+= dist
[i
][1];
1642 if (mindist
< XTree::NO_CONNECTION
)
1644 matching1
[mate
] = 0;
1645 matching2
[0] = mate
;
1646 distance
+= mindist
- dist
[mate
][1];
1650 // Add the delete cost of the single node on list2.
1651 distance
+= dist
[count1
][0];
1656 else if ((count1
== 2) && (count2
== 2))
1658 int distance1
= dist
[0][0] + dist
[1][1];
1659 int distance2
= dist
[0][1] + dist
[1][0];
1660 if (distance1
< distance2
)
1662 if (dist
[0][0] < XTree::NO_CONNECTION
)
1666 distance1
= dist
[0][0];
1670 matching1
[0] = XTree::DELETE
;
1671 matching2
[0] = XTree::DELETE
;
1672 distance1
= dist
[0][2] + dist
[2][0];
1675 if (dist
[1][1] < XTree::NO_CONNECTION
)
1679 distance1
+= dist
[1][1];
1683 matching1
[1] = XTree::DELETE
;
1684 matching2
[1] = XTree::DELETE
;
1685 distance1
+= dist
[1][2] + dist
[2][1];
1692 if (dist
[0][1] < XTree::NO_CONNECTION
)
1696 distance2
= dist
[0][1];
1700 matching1
[0] = XTree::DELETE
;
1701 matching2
[1] = XTree::DELETE
;
1702 distance2
= dist
[0][2] + dist
[2][1];
1705 if (dist
[1][0] < XTree::NO_CONNECTION
)
1709 distance2
+= dist
[1][0];
1713 matching1
[1] = XTree::DELETE
;
1714 matching2
[0] = XTree::DELETE
;
1715 distance2
+= dist
[1][2] + dist
[2][0];
1723 return optimalMatching(count1
, count2
, dist
,
1724 matching1
, matching2
);
1728 int XDiff::optimalMatching(int count1
, int count2
, int** dist
,
1729 int* matching1
, int* matching2
)
1731 // Initialize matching.
1732 // Initial guess will be pair-matching between two lists.
1733 // Others will be insertion or deletion
1734 for (int i
= 0; i
< count2
; i
++)
1736 for (int i
= count2
; i
< count1
; i
++)
1737 matching1
[i
] = XTree::DELETE
;
1739 // Three artificial nodes: "start", "end" and "delete".
1740 int count
= count1
+ count2
+ 3;
1742 // Initialize least cost matrix and path matrix.
1743 // Both have been initialized at the very beginning.
1748 // Construct least cost matrix.
1749 constructLCM(dist
, matching1
, count1
, count2
);
1751 // Initialize path matrix.
1752 for (int i
= 0; i
< count
; i
++)
1753 for (int j
= 0; j
< count
; j
++)
1754 _pathMatrix
[i
][j
] = i
;
1756 // Search negative cost circuit.
1757 int clen
= searchNCC(count
);
1761 for (int i
= 0, next
= 0; i
< clen
- 1; i
++)
1763 int n1
= _circuit
[next
];
1764 next
= _circuit
[next
+1];
1765 // Node in node list 1.
1766 if ((n1
> 0) && (n1
<= count1
))
1769 int nid2
= _circuit
[next
] - count1
- 1;
1771 nid2
= XTree::DELETE
;
1773 matching1
[nid1
] = nid2
;
1782 // Suppose all insertion on list2
1783 for (int i
= 0; i
< count2
; i
++)
1785 matching2
[i
] = XTree::INSERT
;
1786 distance
+= dist
[count1
][i
];
1789 // update distance by looking at matching pairs.
1790 for (int i
= 0; i
< count1
; i
++)
1792 int mmm
= matching1
[i
];
1793 if (mmm
== XTree::DELETE
)
1794 distance
+= dist
[i
][count2
];
1798 distance
+= dist
[i
][mmm
] -
1806 void XDiff::constructLCM(int** costMatrix
, int* matching
,
1807 int nodeCount1
, int nodeCount2
)
1809 // Three artificial nodes: "start", "end" and "delete".
1810 int nodeCount
= nodeCount1
+ nodeCount2
+ 3;
1813 for (int i
= 0; i
< nodeCount
; i
++)
1815 for (int j
= 0; j
< nodeCount
; j
++)
1816 _leastCostMatrix
[i
][j
] = XTree::NO_CONNECTION
;
1819 _leastCostMatrix
[i
][i
] = 0;
1822 // Between start node and nodes in list 1.
1823 // Start -> node1 = Infinity; node1 -> Start = -0.
1824 for (int i
= 0; i
< nodeCount1
; i
++)
1825 _leastCostMatrix
[i
+1][0] = 0;
1827 // Between nodes in list2 and the end node.
1828 // Unless matched (later), node2 -> end = 0;
1829 // end -> node2 = Infinity.
1830 for (int i
= 0; i
< nodeCount2
; i
++)
1831 _leastCostMatrix
[i
+nodeCount1
+1][nodeCount
-1] = 0;
1833 int deleteCount
= 0;
1835 // Between nodes in list1 and nodes in list2.
1836 // For matched, node1 -> node2 = Infinity;
1837 // node2 -> node1 = -1 * distance
1838 // For unmatched, node1 -> node2 = distance;
1839 // node2 -> node1 = Infinity
1840 for (int i
= 0; i
< nodeCount1
; i
++)
1845 // According to cost matrix.
1846 for (int j
= 0; j
< nodeCount2
; j
++)
1848 node2
= j
+ nodeCount1
+ 1;
1849 _leastCostMatrix
[node1
][node2
] = costMatrix
[i
][j
];
1852 // According to matching.
1853 if (matching
[i
] == XTree::DELETE
)
1857 // node1 -> Delete = Infinity;
1858 // Delete -> node1 = -1 * DELETE_COST
1859 _leastCostMatrix
[nodeCount
-2][node1
] = -1 * costMatrix
[i
][nodeCount2
];
1863 node2
= matching
[i
] + nodeCount1
+ 1;
1865 // Between node1 and node2.
1866 _leastCostMatrix
[node1
][node2
] = XTree::NO_CONNECTION
;
1867 _leastCostMatrix
[node2
][node1
] = costMatrix
[i
][matching
[i
]] * -1;
1869 // Between node1 and delete.
1870 _leastCostMatrix
[node1
][nodeCount
-2] = costMatrix
[i
][nodeCount2
];
1872 // Between node2 and end.
1873 _leastCostMatrix
[node2
][nodeCount
-1] = XTree::NO_CONNECTION
;
1874 _leastCostMatrix
[nodeCount
-1][node2
] = costMatrix
[nodeCount1
][matching
[i
]];
1878 // Between the "Delete" and the "End".
1879 // If delete all, delete -> end = Infinity; end -> delete = 0.
1880 if (deleteCount
== nodeCount1
)
1881 _leastCostMatrix
[nodeCount
-1][nodeCount
-2] = 0;
1882 // if no delete, delete -> end = 0; end -> delete = Infinity.
1883 else if (deleteCount
== 0)
1884 _leastCostMatrix
[nodeCount
-2][nodeCount
-1] = 0;
1888 _leastCostMatrix
[nodeCount
-2][nodeCount
-1] = 0;
1889 _leastCostMatrix
[nodeCount
-1][nodeCount
-2] = 0;
1893 int XDiff::searchNCC(int nodeCount
)
1895 for (int k
= 0; k
< nodeCount
; k
++)
1897 for (int i
= 0; i
< nodeCount
; i
++)
1900 (_leastCostMatrix
[i
][k
] != XTree::NO_CONNECTION
))
1902 for (int j
= 0; j
< nodeCount
; j
++)
1905 (_leastCostMatrix
[k
][j
] != XTree::NO_CONNECTION
))
1907 int less
= _leastCostMatrix
[i
][k
] +
1908 _leastCostMatrix
[k
][j
];
1909 if (less
< _leastCostMatrix
[i
][j
])
1911 _leastCostMatrix
[i
][j
] = less
;
1912 _pathMatrix
[i
][j
] = k
;
1915 if ((i
== j
) && (less
< 0))
1917 int clen
= 0; // the length of the circuit.
1919 // Locate the circuit.
1920 //circuit.addElement(new Integer(i));
1924 //circuit.addElement(new Integer(pathMatrix[i][i]));
1925 _circuit
[2] = _pathMatrix
[i
][i
];
1928 //circuit.addElement(new Integer(i));
1939 for (int cit
= 0, n
= 0; cit
< clen
- 1; cit
++)
1941 int left
= _circuit
[n
];
1942 int next
= _circuit
[n
+1];
1943 int right
= (next
== -1)? -1 : _circuit
[next
];
1945 //int middle = pathMatrix[circuit[n-1]][circuit[n]];
1946 int middle
= _pathMatrix
[left
][right
];
1950 //circuit.insert( cit, middle );
1951 _circuit
[clen
*2] = middle
;
1952 _circuit
[clen
*2+1] = next
;
1953 _circuit
[n
+1] = clen
*2;
1974 double XDiff::_diffTime(const struct timeval
*time1
,
1975 const struct timeval
*time2
)
1977 long sec
= time2
->tv_sec
- time1
->tv_sec
;
1978 long usec
= time2
->tv_usec
- time1
->tv_usec
;
1985 return 0.000001 * usec
+ sec
;