1 //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains definitons for the AST differencing interface.
11 //===----------------------------------------------------------------------===//
13 #include "clang/Tooling/ASTDiff/ASTDiff.h"
14 #include "clang/AST/ParentMapContext.h"
15 #include "clang/AST/RecursiveASTVisitor.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Lex/Lexer.h"
18 #include "llvm/ADT/PriorityQueue.h"
23 #include <unordered_set>
26 using namespace clang
;
32 /// Maps nodes of the left tree to ones on the right, and vice versa.
36 Mapping(Mapping
&&Other
) = default;
37 Mapping
&operator=(Mapping
&&Other
) = default;
39 Mapping(size_t Size
) {
40 SrcToDst
= std::make_unique
<NodeId
[]>(Size
);
41 DstToSrc
= std::make_unique
<NodeId
[]>(Size
);
44 void link(NodeId Src
, NodeId Dst
) {
45 SrcToDst
[Src
] = Dst
, DstToSrc
[Dst
] = Src
;
48 NodeId
getDst(NodeId Src
) const { return SrcToDst
[Src
]; }
49 NodeId
getSrc(NodeId Dst
) const { return DstToSrc
[Dst
]; }
50 bool hasSrc(NodeId Src
) const { return getDst(Src
).isValid(); }
51 bool hasDst(NodeId Dst
) const { return getSrc(Dst
).isValid(); }
54 std::unique_ptr
<NodeId
[]> SrcToDst
, DstToSrc
;
56 } // end anonymous namespace
60 SyntaxTree::Impl
&T1
, &T2
;
63 Impl(SyntaxTree::Impl
&T1
, SyntaxTree::Impl
&T2
,
64 const ComparisonOptions
&Options
);
66 /// Matches nodes one-by-one based on their similarity.
67 void computeMapping();
69 // Compute Change for each node based on similarity.
70 void computeChangeKinds(Mapping
&M
);
72 NodeId
getMapped(const std::unique_ptr
<SyntaxTree::Impl
> &Tree
,
75 return TheMapping
.getDst(Id
);
76 assert(&*Tree
== &T2
&& "Invalid tree.");
77 return TheMapping
.getSrc(Id
);
81 // Returns true if the two subtrees are identical.
82 bool identical(NodeId Id1
, NodeId Id2
) const;
84 // Returns false if the nodes must not be mached.
85 bool isMatchingPossible(NodeId Id1
, NodeId Id2
) const;
87 // Returns true if the nodes' parents are matched.
88 bool haveSameParents(const Mapping
&M
, NodeId Id1
, NodeId Id2
) const;
90 // Uses an optimal albeit slow algorithm to compute a mapping between two
91 // subtrees, but only if both have fewer nodes than MaxSize.
92 void addOptimalMapping(Mapping
&M
, NodeId Id1
, NodeId Id2
) const;
94 // Computes the ratio of common descendants between the two nodes.
95 // Descendants are only considered to be equal when they are mapped in M.
96 double getJaccardSimilarity(const Mapping
&M
, NodeId Id1
, NodeId Id2
) const;
98 // Returns the node that has the highest degree of similarity.
99 NodeId
findCandidate(const Mapping
&M
, NodeId Id1
) const;
101 // Returns a mapping of identical subtrees.
102 Mapping
matchTopDown() const;
104 // Tries to match any yet unmapped nodes, in a bottom-up fashion.
105 void matchBottomUp(Mapping
&M
) const;
107 const ComparisonOptions
&Options
;
109 friend class ZhangShashaMatcher
;
112 /// Represents the AST of a TranslationUnit.
113 class SyntaxTree::Impl
{
115 Impl(SyntaxTree
*Parent
, ASTContext
&AST
);
116 /// Constructs a tree from an AST node.
117 Impl(SyntaxTree
*Parent
, Decl
*N
, ASTContext
&AST
);
118 Impl(SyntaxTree
*Parent
, Stmt
*N
, ASTContext
&AST
);
120 Impl(SyntaxTree
*Parent
,
121 std::enable_if_t
<std::is_base_of_v
<Stmt
, T
>, T
> *Node
, ASTContext
&AST
)
122 : Impl(Parent
, dyn_cast
<Stmt
>(Node
), AST
) {}
124 Impl(SyntaxTree
*Parent
,
125 std::enable_if_t
<std::is_base_of_v
<Decl
, T
>, T
> *Node
, ASTContext
&AST
)
126 : Impl(Parent
, dyn_cast
<Decl
>(Node
), AST
) {}
130 PrintingPolicy TypePP
;
131 /// Nodes in preorder.
132 std::vector
<Node
> Nodes
;
133 std::vector
<NodeId
> Leaves
;
134 // Maps preorder indices to postorder ones.
135 std::vector
<int> PostorderIds
;
136 std::vector
<NodeId
> NodesBfs
;
138 int getSize() const { return Nodes
.size(); }
139 NodeId
getRootId() const { return 0; }
140 PreorderIterator
begin() const { return getRootId(); }
141 PreorderIterator
end() const { return getSize(); }
143 const Node
&getNode(NodeId Id
) const { return Nodes
[Id
]; }
144 Node
&getMutableNode(NodeId Id
) { return Nodes
[Id
]; }
145 bool isValidNodeId(NodeId Id
) const { return Id
>= 0 && Id
< getSize(); }
146 void addNode(Node
&N
) { Nodes
.push_back(N
); }
147 int getNumberOfDescendants(NodeId Id
) const;
148 bool isInSubtree(NodeId Id
, NodeId SubtreeRoot
) const;
149 int findPositionInParent(NodeId Id
, bool Shifted
= false) const;
151 std::string
getRelativeName(const NamedDecl
*ND
,
152 const DeclContext
*Context
) const;
153 std::string
getRelativeName(const NamedDecl
*ND
) const;
155 std::string
getNodeValue(NodeId Id
) const;
156 std::string
getNodeValue(const Node
&Node
) const;
157 std::string
getDeclValue(const Decl
*D
) const;
158 std::string
getStmtValue(const Stmt
*S
) const;
162 void setLeftMostDescendants();
165 static bool isSpecializedNodeExcluded(const Decl
*D
) { return D
->isImplicit(); }
166 static bool isSpecializedNodeExcluded(const Stmt
*S
) { return false; }
167 static bool isSpecializedNodeExcluded(CXXCtorInitializer
*I
) {
168 return !I
->isWritten();
172 static bool isNodeExcluded(const SourceManager
&SrcMgr
, T
*N
) {
175 SourceLocation SLoc
= N
->getSourceRange().getBegin();
176 if (SLoc
.isValid()) {
177 // Ignore everything from other files.
178 if (!SrcMgr
.isInMainFile(SLoc
))
181 if (SLoc
!= SrcMgr
.getSpellingLoc(SLoc
))
184 return isSpecializedNodeExcluded(N
);
188 // Sets Height, Parent and Children for each node.
189 struct PreorderVisitor
: public RecursiveASTVisitor
<PreorderVisitor
> {
190 int Id
= 0, Depth
= 0;
192 SyntaxTree::Impl
&Tree
;
194 PreorderVisitor(SyntaxTree::Impl
&Tree
) : Tree(Tree
) {}
196 template <class T
> std::tuple
<NodeId
, NodeId
> PreTraverse(T
*ASTNode
) {
198 Tree
.Nodes
.emplace_back();
199 Node
&N
= Tree
.getMutableNode(MyId
);
202 N
.ASTNode
= DynTypedNode::create(*ASTNode
);
203 assert(!N
.ASTNode
.getNodeKind().isNone() &&
204 "Expected nodes to have a valid kind.");
205 if (Parent
.isValid()) {
206 Node
&P
= Tree
.getMutableNode(Parent
);
207 P
.Children
.push_back(MyId
);
212 return std::make_tuple(MyId
, Tree
.getNode(MyId
).Parent
);
214 void PostTraverse(std::tuple
<NodeId
, NodeId
> State
) {
215 NodeId MyId
, PreviousParent
;
216 std::tie(MyId
, PreviousParent
) = State
;
217 assert(MyId
.isValid() && "Expecting to only traverse valid nodes.");
218 Parent
= PreviousParent
;
220 Node
&N
= Tree
.getMutableNode(MyId
);
221 N
.RightMostDescendant
= Id
- 1;
222 assert(N
.RightMostDescendant
>= 0 &&
223 N
.RightMostDescendant
< Tree
.getSize() &&
224 "Rightmost descendant must be a valid tree node.");
226 Tree
.Leaves
.push_back(MyId
);
228 for (NodeId Child
: N
.Children
)
229 N
.Height
= std::max(N
.Height
, 1 + Tree
.getNode(Child
).Height
);
231 bool TraverseDecl(Decl
*D
) {
232 if (isNodeExcluded(Tree
.AST
.getSourceManager(), D
))
234 auto SavedState
= PreTraverse(D
);
235 RecursiveASTVisitor
<PreorderVisitor
>::TraverseDecl(D
);
236 PostTraverse(SavedState
);
239 bool TraverseStmt(Stmt
*S
) {
240 if (auto *E
= dyn_cast_or_null
<Expr
>(S
))
241 S
= E
->IgnoreImplicit();
242 if (isNodeExcluded(Tree
.AST
.getSourceManager(), S
))
244 auto SavedState
= PreTraverse(S
);
245 RecursiveASTVisitor
<PreorderVisitor
>::TraverseStmt(S
);
246 PostTraverse(SavedState
);
249 bool TraverseType(QualType T
) { return true; }
250 bool TraverseConstructorInitializer(CXXCtorInitializer
*Init
) {
251 if (isNodeExcluded(Tree
.AST
.getSourceManager(), Init
))
253 auto SavedState
= PreTraverse(Init
);
254 RecursiveASTVisitor
<PreorderVisitor
>::TraverseConstructorInitializer(Init
);
255 PostTraverse(SavedState
);
259 } // end anonymous namespace
261 SyntaxTree::Impl::Impl(SyntaxTree
*Parent
, ASTContext
&AST
)
262 : Parent(Parent
), AST(AST
), TypePP(AST
.getLangOpts()) {
263 TypePP
.AnonymousTagLocations
= false;
266 SyntaxTree::Impl::Impl(SyntaxTree
*Parent
, Decl
*N
, ASTContext
&AST
)
267 : Impl(Parent
, AST
) {
268 PreorderVisitor
PreorderWalker(*this);
269 PreorderWalker
.TraverseDecl(N
);
273 SyntaxTree::Impl::Impl(SyntaxTree
*Parent
, Stmt
*N
, ASTContext
&AST
)
274 : Impl(Parent
, AST
) {
275 PreorderVisitor
PreorderWalker(*this);
276 PreorderWalker
.TraverseStmt(N
);
280 static std::vector
<NodeId
> getSubtreePostorder(const SyntaxTree::Impl
&Tree
,
282 std::vector
<NodeId
> Postorder
;
283 std::function
<void(NodeId
)> Traverse
= [&](NodeId Id
) {
284 const Node
&N
= Tree
.getNode(Id
);
285 for (NodeId Child
: N
.Children
)
287 Postorder
.push_back(Id
);
293 static std::vector
<NodeId
> getSubtreeBfs(const SyntaxTree::Impl
&Tree
,
295 std::vector
<NodeId
> Ids
;
298 while (Expanded
< Ids
.size())
299 for (NodeId Child
: Tree
.getNode(Ids
[Expanded
++]).Children
)
300 Ids
.push_back(Child
);
304 void SyntaxTree::Impl::initTree() {
305 setLeftMostDescendants();
307 PostorderIds
.resize(getSize());
308 std::function
<void(NodeId
)> PostorderTraverse
= [&](NodeId Id
) {
309 for (NodeId Child
: getNode(Id
).Children
)
310 PostorderTraverse(Child
);
311 PostorderIds
[Id
] = PostorderId
;
314 PostorderTraverse(getRootId());
315 NodesBfs
= getSubtreeBfs(*this, getRootId());
318 void SyntaxTree::Impl::setLeftMostDescendants() {
319 for (NodeId Leaf
: Leaves
) {
320 getMutableNode(Leaf
).LeftMostDescendant
= Leaf
;
321 NodeId Parent
, Cur
= Leaf
;
322 while ((Parent
= getNode(Cur
).Parent
).isValid() &&
323 getNode(Parent
).Children
[0] == Cur
) {
325 getMutableNode(Cur
).LeftMostDescendant
= Leaf
;
330 int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id
) const {
331 return getNode(Id
).RightMostDescendant
- Id
+ 1;
334 bool SyntaxTree::Impl::isInSubtree(NodeId Id
, NodeId SubtreeRoot
) const {
335 return Id
>= SubtreeRoot
&& Id
<= getNode(SubtreeRoot
).RightMostDescendant
;
338 int SyntaxTree::Impl::findPositionInParent(NodeId Id
, bool Shifted
) const {
339 NodeId Parent
= getNode(Id
).Parent
;
340 if (Parent
.isInvalid())
342 const auto &Siblings
= getNode(Parent
).Children
;
344 for (size_t I
= 0, E
= Siblings
.size(); I
< E
; ++I
) {
346 Position
+= getNode(Siblings
[I
]).Shift
;
347 if (Siblings
[I
] == Id
) {
352 llvm_unreachable("Node not found in parent's children.");
355 // Returns the qualified name of ND. If it is subordinate to Context,
356 // then the prefix of the latter is removed from the returned value.
358 SyntaxTree::Impl::getRelativeName(const NamedDecl
*ND
,
359 const DeclContext
*Context
) const {
360 std::string Val
= ND
->getQualifiedNameAsString();
361 std::string ContextPrefix
;
364 if (auto *Namespace
= dyn_cast
<NamespaceDecl
>(Context
))
365 ContextPrefix
= Namespace
->getQualifiedNameAsString();
366 else if (auto *Record
= dyn_cast
<RecordDecl
>(Context
))
367 ContextPrefix
= Record
->getQualifiedNameAsString();
368 else if (AST
.getLangOpts().CPlusPlus11
)
369 if (auto *Tag
= dyn_cast
<TagDecl
>(Context
))
370 ContextPrefix
= Tag
->getQualifiedNameAsString();
371 // Strip the qualifier, if Val refers to something in the current scope.
372 // But leave one leading ':' in place, so that we know that this is a
374 if (!ContextPrefix
.empty() && StringRef(Val
).starts_with(ContextPrefix
))
375 Val
= Val
.substr(ContextPrefix
.size() + 1);
379 std::string
SyntaxTree::Impl::getRelativeName(const NamedDecl
*ND
) const {
380 return getRelativeName(ND
, ND
->getDeclContext());
383 static const DeclContext
*getEnclosingDeclContext(ASTContext
&AST
,
386 const auto &Parents
= AST
.getParents(*S
);
389 const auto &P
= Parents
[0];
390 if (const auto *D
= P
.get
<Decl
>())
391 return D
->getDeclContext();
397 static std::string
getInitializerValue(const CXXCtorInitializer
*Init
,
398 const PrintingPolicy
&TypePP
) {
399 if (Init
->isAnyMemberInitializer())
400 return std::string(Init
->getAnyMember()->getName());
401 if (Init
->isBaseInitializer())
402 return QualType(Init
->getBaseClass(), 0).getAsString(TypePP
);
403 if (Init
->isDelegatingInitializer())
404 return Init
->getTypeSourceInfo()->getType().getAsString(TypePP
);
405 llvm_unreachable("Unknown initializer type");
408 std::string
SyntaxTree::Impl::getNodeValue(NodeId Id
) const {
409 return getNodeValue(getNode(Id
));
412 std::string
SyntaxTree::Impl::getNodeValue(const Node
&N
) const {
413 const DynTypedNode
&DTN
= N
.ASTNode
;
414 if (auto *S
= DTN
.get
<Stmt
>())
415 return getStmtValue(S
);
416 if (auto *D
= DTN
.get
<Decl
>())
417 return getDeclValue(D
);
418 if (auto *Init
= DTN
.get
<CXXCtorInitializer
>())
419 return getInitializerValue(Init
, TypePP
);
420 llvm_unreachable("Fatal: unhandled AST node.\n");
423 std::string
SyntaxTree::Impl::getDeclValue(const Decl
*D
) const {
425 if (auto *V
= dyn_cast
<ValueDecl
>(D
))
426 return getRelativeName(V
) + "(" + V
->getType().getAsString(TypePP
) + ")";
427 if (auto *N
= dyn_cast
<NamedDecl
>(D
))
428 Value
+= getRelativeName(N
) + ";";
429 if (auto *T
= dyn_cast
<TypedefNameDecl
>(D
))
430 return Value
+ T
->getUnderlyingType().getAsString(TypePP
) + ";";
431 if (auto *T
= dyn_cast
<TypeDecl
>(D
))
432 if (T
->getTypeForDecl())
434 T
->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP
) +
436 if (auto *U
= dyn_cast
<UsingDirectiveDecl
>(D
))
437 return std::string(U
->getNominatedNamespace()->getName());
438 if (auto *A
= dyn_cast
<AccessSpecDecl
>(D
)) {
439 CharSourceRange
Range(A
->getSourceRange(), false);
441 Lexer::getSourceText(Range
, AST
.getSourceManager(), AST
.getLangOpts()));
446 std::string
SyntaxTree::Impl::getStmtValue(const Stmt
*S
) const {
447 if (auto *U
= dyn_cast
<UnaryOperator
>(S
))
448 return std::string(UnaryOperator::getOpcodeStr(U
->getOpcode()));
449 if (auto *B
= dyn_cast
<BinaryOperator
>(S
))
450 return std::string(B
->getOpcodeStr());
451 if (auto *M
= dyn_cast
<MemberExpr
>(S
))
452 return getRelativeName(M
->getMemberDecl());
453 if (auto *I
= dyn_cast
<IntegerLiteral
>(S
)) {
454 SmallString
<256> Str
;
455 I
->getValue().toString(Str
, /*Radix=*/10, /*Signed=*/false);
456 return std::string(Str
.str());
458 if (auto *F
= dyn_cast
<FloatingLiteral
>(S
)) {
459 SmallString
<256> Str
;
460 F
->getValue().toString(Str
);
461 return std::string(Str
.str());
463 if (auto *D
= dyn_cast
<DeclRefExpr
>(S
))
464 return getRelativeName(D
->getDecl(), getEnclosingDeclContext(AST
, S
));
465 if (auto *String
= dyn_cast
<StringLiteral
>(S
))
466 return std::string(String
->getString());
467 if (auto *B
= dyn_cast
<CXXBoolLiteralExpr
>(S
))
468 return B
->getValue() ? "true" : "false";
472 /// Identifies a node in a subtree by its postorder offset, starting at 1.
476 explicit SNodeId(int Id
) : Id(Id
) {}
477 explicit SNodeId() = default;
479 operator int() const { return Id
; }
480 SNodeId
&operator++() { return ++Id
, *this; }
481 SNodeId
&operator--() { return --Id
, *this; }
482 SNodeId
operator+(int Other
) const { return SNodeId(Id
+ Other
); }
488 const SyntaxTree::Impl
&Tree
;
489 /// Maps SNodeIds to original ids.
490 std::vector
<NodeId
> RootIds
;
491 /// Maps subtree nodes to their leftmost descendants wtihin the subtree.
492 std::vector
<SNodeId
> LeftMostDescendants
;
495 std::vector
<SNodeId
> KeyRoots
;
497 Subtree(const SyntaxTree::Impl
&Tree
, NodeId SubtreeRoot
) : Tree(Tree
) {
498 RootIds
= getSubtreePostorder(Tree
, SubtreeRoot
);
499 int NumLeaves
= setLeftMostDescendants();
500 computeKeyRoots(NumLeaves
);
502 int getSize() const { return RootIds
.size(); }
503 NodeId
getIdInRoot(SNodeId Id
) const {
504 assert(Id
> 0 && Id
<= getSize() && "Invalid subtree node index.");
505 return RootIds
[Id
- 1];
507 const Node
&getNode(SNodeId Id
) const {
508 return Tree
.getNode(getIdInRoot(Id
));
510 SNodeId
getLeftMostDescendant(SNodeId Id
) const {
511 assert(Id
> 0 && Id
<= getSize() && "Invalid subtree node index.");
512 return LeftMostDescendants
[Id
- 1];
514 /// Returns the postorder index of the leftmost descendant in the subtree.
515 NodeId
getPostorderOffset() const {
516 return Tree
.PostorderIds
[getIdInRoot(SNodeId(1))];
518 std::string
getNodeValue(SNodeId Id
) const {
519 return Tree
.getNodeValue(getIdInRoot(Id
));
523 /// Returns the number of leafs in the subtree.
524 int setLeftMostDescendants() {
526 LeftMostDescendants
.resize(getSize());
527 for (int I
= 0; I
< getSize(); ++I
) {
529 const Node
&N
= getNode(SI
);
530 NumLeaves
+= N
.isLeaf();
531 assert(I
== Tree
.PostorderIds
[getIdInRoot(SI
)] - getPostorderOffset() &&
532 "Postorder traversal in subtree should correspond to traversal in "
533 "the root tree by a constant offset.");
534 LeftMostDescendants
[I
] = SNodeId(Tree
.PostorderIds
[N
.LeftMostDescendant
] -
535 getPostorderOffset());
539 void computeKeyRoots(int Leaves
) {
540 KeyRoots
.resize(Leaves
);
541 std::unordered_set
<int> Visited
;
543 for (SNodeId
I(getSize()); I
> 0; --I
) {
544 SNodeId LeftDesc
= getLeftMostDescendant(I
);
545 if (Visited
.count(LeftDesc
))
547 assert(K
>= 0 && "K should be non-negative");
549 Visited
.insert(LeftDesc
);
555 /// Implementation of Zhang and Shasha's Algorithm for tree edit distance.
556 /// Computes an optimal mapping between two trees using only insertion,
557 /// deletion and update as edit actions (similar to the Levenshtein distance).
558 class ZhangShashaMatcher
{
559 const ASTDiff::Impl
&DiffImpl
;
562 std::unique_ptr
<std::unique_ptr
<double[]>[]> TreeDist
, ForestDist
;
565 ZhangShashaMatcher(const ASTDiff::Impl
&DiffImpl
, const SyntaxTree::Impl
&T1
,
566 const SyntaxTree::Impl
&T2
, NodeId Id1
, NodeId Id2
)
567 : DiffImpl(DiffImpl
), S1(T1
, Id1
), S2(T2
, Id2
) {
568 TreeDist
= std::make_unique
<std::unique_ptr
<double[]>[]>(
569 size_t(S1
.getSize()) + 1);
570 ForestDist
= std::make_unique
<std::unique_ptr
<double[]>[]>(
571 size_t(S1
.getSize()) + 1);
572 for (int I
= 0, E
= S1
.getSize() + 1; I
< E
; ++I
) {
573 TreeDist
[I
] = std::make_unique
<double[]>(size_t(S2
.getSize()) + 1);
574 ForestDist
[I
] = std::make_unique
<double[]>(size_t(S2
.getSize()) + 1);
578 std::vector
<std::pair
<NodeId
, NodeId
>> getMatchingNodes() {
579 std::vector
<std::pair
<NodeId
, NodeId
>> Matches
;
580 std::vector
<std::pair
<SNodeId
, SNodeId
>> TreePairs
;
584 bool RootNodePair
= true;
586 TreePairs
.emplace_back(SNodeId(S1
.getSize()), SNodeId(S2
.getSize()));
588 while (!TreePairs
.empty()) {
589 SNodeId LastRow
, LastCol
, FirstRow
, FirstCol
, Row
, Col
;
590 std::tie(LastRow
, LastCol
) = TreePairs
.back();
591 TreePairs
.pop_back();
594 computeForestDist(LastRow
, LastCol
);
597 RootNodePair
= false;
599 FirstRow
= S1
.getLeftMostDescendant(LastRow
);
600 FirstCol
= S2
.getLeftMostDescendant(LastCol
);
605 while (Row
> FirstRow
|| Col
> FirstCol
) {
606 if (Row
> FirstRow
&&
607 ForestDist
[Row
- 1][Col
] + 1 == ForestDist
[Row
][Col
]) {
609 } else if (Col
> FirstCol
&&
610 ForestDist
[Row
][Col
- 1] + 1 == ForestDist
[Row
][Col
]) {
613 SNodeId LMD1
= S1
.getLeftMostDescendant(Row
);
614 SNodeId LMD2
= S2
.getLeftMostDescendant(Col
);
615 if (LMD1
== S1
.getLeftMostDescendant(LastRow
) &&
616 LMD2
== S2
.getLeftMostDescendant(LastCol
)) {
617 NodeId Id1
= S1
.getIdInRoot(Row
);
618 NodeId Id2
= S2
.getIdInRoot(Col
);
619 assert(DiffImpl
.isMatchingPossible(Id1
, Id2
) &&
620 "These nodes must not be matched.");
621 Matches
.emplace_back(Id1
, Id2
);
625 TreePairs
.emplace_back(Row
, Col
);
636 /// We use a simple cost model for edit actions, which seems good enough.
637 /// Simple cost model for edit actions. This seems to make the matching
638 /// algorithm perform reasonably well.
639 /// The values range between 0 and 1, or infinity if this edit action should
640 /// always be avoided.
641 static constexpr double DeletionCost
= 1;
642 static constexpr double InsertionCost
= 1;
644 double getUpdateCost(SNodeId Id1
, SNodeId Id2
) {
645 if (!DiffImpl
.isMatchingPossible(S1
.getIdInRoot(Id1
), S2
.getIdInRoot(Id2
)))
646 return std::numeric_limits
<double>::max();
647 return S1
.getNodeValue(Id1
) != S2
.getNodeValue(Id2
);
650 void computeTreeDist() {
651 for (SNodeId Id1
: S1
.KeyRoots
)
652 for (SNodeId Id2
: S2
.KeyRoots
)
653 computeForestDist(Id1
, Id2
);
656 void computeForestDist(SNodeId Id1
, SNodeId Id2
) {
657 assert(Id1
> 0 && Id2
> 0 && "Expecting offsets greater than 0.");
658 SNodeId LMD1
= S1
.getLeftMostDescendant(Id1
);
659 SNodeId LMD2
= S2
.getLeftMostDescendant(Id2
);
661 ForestDist
[LMD1
][LMD2
] = 0;
662 for (SNodeId D1
= LMD1
+ 1; D1
<= Id1
; ++D1
) {
663 ForestDist
[D1
][LMD2
] = ForestDist
[D1
- 1][LMD2
] + DeletionCost
;
664 for (SNodeId D2
= LMD2
+ 1; D2
<= Id2
; ++D2
) {
665 ForestDist
[LMD1
][D2
] = ForestDist
[LMD1
][D2
- 1] + InsertionCost
;
666 SNodeId DLMD1
= S1
.getLeftMostDescendant(D1
);
667 SNodeId DLMD2
= S2
.getLeftMostDescendant(D2
);
668 if (DLMD1
== LMD1
&& DLMD2
== LMD2
) {
669 double UpdateCost
= getUpdateCost(D1
, D2
);
671 std::min({ForestDist
[D1
- 1][D2
] + DeletionCost
,
672 ForestDist
[D1
][D2
- 1] + InsertionCost
,
673 ForestDist
[D1
- 1][D2
- 1] + UpdateCost
});
674 TreeDist
[D1
][D2
] = ForestDist
[D1
][D2
];
677 std::min({ForestDist
[D1
- 1][D2
] + DeletionCost
,
678 ForestDist
[D1
][D2
- 1] + InsertionCost
,
679 ForestDist
[DLMD1
][DLMD2
] + TreeDist
[D1
][D2
]});
686 ASTNodeKind
Node::getType() const { return ASTNode
.getNodeKind(); }
688 StringRef
Node::getTypeLabel() const { return getType().asStringRef(); }
690 std::optional
<std::string
> Node::getQualifiedIdentifier() const {
691 if (auto *ND
= ASTNode
.get
<NamedDecl
>()) {
692 if (ND
->getDeclName().isIdentifier())
693 return ND
->getQualifiedNameAsString();
698 std::optional
<StringRef
> Node::getIdentifier() const {
699 if (auto *ND
= ASTNode
.get
<NamedDecl
>()) {
700 if (ND
->getDeclName().isIdentifier())
701 return ND
->getName();
707 // Compares nodes by their depth.
709 const SyntaxTree::Impl
&Tree
;
710 HeightLess(const SyntaxTree::Impl
&Tree
) : Tree(Tree
) {}
711 bool operator()(NodeId Id1
, NodeId Id2
) const {
712 return Tree
.getNode(Id1
).Height
< Tree
.getNode(Id2
).Height
;
715 } // end anonymous namespace
718 // Priority queue for nodes, sorted descendingly by their height.
720 const SyntaxTree::Impl
&Tree
;
722 std::vector
<NodeId
> Container
;
723 PriorityQueue
<NodeId
, std::vector
<NodeId
>, HeightLess
> List
;
726 PriorityList(const SyntaxTree::Impl
&Tree
)
727 : Tree(Tree
), Cmp(Tree
), List(Cmp
, Container
) {}
729 void push(NodeId id
) { List
.push(id
); }
731 std::vector
<NodeId
> pop() {
733 std::vector
<NodeId
> Result
;
736 while (peekMax() == Max
) {
737 Result
.push_back(List
.top());
740 // TODO this is here to get a stable output, not a good heuristic
744 int peekMax() const {
747 return Tree
.getNode(List
.top()).Height
;
749 void open(NodeId Id
) {
750 for (NodeId Child
: Tree
.getNode(Id
).Children
)
754 } // end anonymous namespace
756 bool ASTDiff::Impl::identical(NodeId Id1
, NodeId Id2
) const {
757 const Node
&N1
= T1
.getNode(Id1
);
758 const Node
&N2
= T2
.getNode(Id2
);
759 if (N1
.Children
.size() != N2
.Children
.size() ||
760 !isMatchingPossible(Id1
, Id2
) ||
761 T1
.getNodeValue(Id1
) != T2
.getNodeValue(Id2
))
763 for (size_t Id
= 0, E
= N1
.Children
.size(); Id
< E
; ++Id
)
764 if (!identical(N1
.Children
[Id
], N2
.Children
[Id
]))
769 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1
, NodeId Id2
) const {
770 return Options
.isMatchingAllowed(T1
.getNode(Id1
), T2
.getNode(Id2
));
773 bool ASTDiff::Impl::haveSameParents(const Mapping
&M
, NodeId Id1
,
775 NodeId P1
= T1
.getNode(Id1
).Parent
;
776 NodeId P2
= T2
.getNode(Id2
).Parent
;
777 return (P1
.isInvalid() && P2
.isInvalid()) ||
778 (P1
.isValid() && P2
.isValid() && M
.getDst(P1
) == P2
);
781 void ASTDiff::Impl::addOptimalMapping(Mapping
&M
, NodeId Id1
,
783 if (std::max(T1
.getNumberOfDescendants(Id1
), T2
.getNumberOfDescendants(Id2
)) >
786 ZhangShashaMatcher
Matcher(*this, T1
, T2
, Id1
, Id2
);
787 std::vector
<std::pair
<NodeId
, NodeId
>> R
= Matcher
.getMatchingNodes();
788 for (const auto &Tuple
: R
) {
789 NodeId Src
= Tuple
.first
;
790 NodeId Dst
= Tuple
.second
;
791 if (!M
.hasSrc(Src
) && !M
.hasDst(Dst
))
796 double ASTDiff::Impl::getJaccardSimilarity(const Mapping
&M
, NodeId Id1
,
798 int CommonDescendants
= 0;
799 const Node
&N1
= T1
.getNode(Id1
);
800 // Count the common descendants, excluding the subtree root.
801 for (NodeId Src
= Id1
+ 1; Src
<= N1
.RightMostDescendant
; ++Src
) {
802 NodeId Dst
= M
.getDst(Src
);
803 CommonDescendants
+= int(Dst
.isValid() && T2
.isInSubtree(Dst
, Id2
));
805 // We need to subtract 1 to get the number of descendants excluding the root.
806 double Denominator
= T1
.getNumberOfDescendants(Id1
) - 1 +
807 T2
.getNumberOfDescendants(Id2
) - 1 - CommonDescendants
;
808 // CommonDescendants is less than the size of one subtree.
809 assert(Denominator
>= 0 && "Expected non-negative denominator.");
810 if (Denominator
== 0)
812 return CommonDescendants
/ Denominator
;
815 NodeId
ASTDiff::Impl::findCandidate(const Mapping
&M
, NodeId Id1
) const {
817 double HighestSimilarity
= 0.0;
818 for (NodeId Id2
: T2
) {
819 if (!isMatchingPossible(Id1
, Id2
))
823 double Similarity
= getJaccardSimilarity(M
, Id1
, Id2
);
824 if (Similarity
>= Options
.MinSimilarity
&& Similarity
> HighestSimilarity
) {
825 HighestSimilarity
= Similarity
;
832 void ASTDiff::Impl::matchBottomUp(Mapping
&M
) const {
833 std::vector
<NodeId
> Postorder
= getSubtreePostorder(T1
, T1
.getRootId());
834 for (NodeId Id1
: Postorder
) {
835 if (Id1
== T1
.getRootId() && !M
.hasSrc(T1
.getRootId()) &&
836 !M
.hasDst(T2
.getRootId())) {
837 if (isMatchingPossible(T1
.getRootId(), T2
.getRootId())) {
838 M
.link(T1
.getRootId(), T2
.getRootId());
839 addOptimalMapping(M
, T1
.getRootId(), T2
.getRootId());
843 bool Matched
= M
.hasSrc(Id1
);
844 const Node
&N1
= T1
.getNode(Id1
);
845 bool MatchedChildren
= llvm::any_of(
846 N1
.Children
, [&](NodeId Child
) { return M
.hasSrc(Child
); });
847 if (Matched
|| !MatchedChildren
)
849 NodeId Id2
= findCandidate(M
, Id1
);
852 addOptimalMapping(M
, Id1
, Id2
);
857 Mapping
ASTDiff::Impl::matchTopDown() const {
861 Mapping
M(T1
.getSize() + T2
.getSize());
863 L1
.push(T1
.getRootId());
864 L2
.push(T2
.getRootId());
867 while (std::min(Max1
= L1
.peekMax(), Max2
= L2
.peekMax()) >
870 for (NodeId Id
: L1
.pop())
875 for (NodeId Id
: L2
.pop())
879 std::vector
<NodeId
> H1
, H2
;
882 for (NodeId Id1
: H1
) {
883 for (NodeId Id2
: H2
) {
884 if (identical(Id1
, Id2
) && !M
.hasSrc(Id1
) && !M
.hasDst(Id2
)) {
885 for (int I
= 0, E
= T1
.getNumberOfDescendants(Id1
); I
< E
; ++I
)
886 M
.link(Id1
+ I
, Id2
+ I
);
890 for (NodeId Id1
: H1
) {
894 for (NodeId Id2
: H2
) {
902 ASTDiff::Impl::Impl(SyntaxTree::Impl
&T1
, SyntaxTree::Impl
&T2
,
903 const ComparisonOptions
&Options
)
904 : T1(T1
), T2(T2
), Options(Options
) {
906 computeChangeKinds(TheMapping
);
909 void ASTDiff::Impl::computeMapping() {
910 TheMapping
= matchTopDown();
911 if (Options
.StopAfterTopDown
)
913 matchBottomUp(TheMapping
);
916 void ASTDiff::Impl::computeChangeKinds(Mapping
&M
) {
917 for (NodeId Id1
: T1
) {
918 if (!M
.hasSrc(Id1
)) {
919 T1
.getMutableNode(Id1
).Change
= Delete
;
920 T1
.getMutableNode(Id1
).Shift
-= 1;
923 for (NodeId Id2
: T2
) {
924 if (!M
.hasDst(Id2
)) {
925 T2
.getMutableNode(Id2
).Change
= Insert
;
926 T2
.getMutableNode(Id2
).Shift
-= 1;
929 for (NodeId Id1
: T1
.NodesBfs
) {
930 NodeId Id2
= M
.getDst(Id1
);
933 if (!haveSameParents(M
, Id1
, Id2
) ||
934 T1
.findPositionInParent(Id1
, true) !=
935 T2
.findPositionInParent(Id2
, true)) {
936 T1
.getMutableNode(Id1
).Shift
-= 1;
937 T2
.getMutableNode(Id2
).Shift
-= 1;
940 for (NodeId Id2
: T2
.NodesBfs
) {
941 NodeId Id1
= M
.getSrc(Id2
);
944 Node
&N1
= T1
.getMutableNode(Id1
);
945 Node
&N2
= T2
.getMutableNode(Id2
);
948 if (!haveSameParents(M
, Id1
, Id2
) ||
949 T1
.findPositionInParent(Id1
, true) !=
950 T2
.findPositionInParent(Id2
, true)) {
951 N1
.Change
= N2
.Change
= Move
;
953 if (T1
.getNodeValue(Id1
) != T2
.getNodeValue(Id2
)) {
954 N1
.Change
= N2
.Change
= (N1
.Change
== Move
? UpdateMove
: Update
);
959 ASTDiff::ASTDiff(SyntaxTree
&T1
, SyntaxTree
&T2
,
960 const ComparisonOptions
&Options
)
961 : DiffImpl(std::make_unique
<Impl
>(*T1
.TreeImpl
, *T2
.TreeImpl
, Options
)) {}
963 ASTDiff::~ASTDiff() = default;
965 NodeId
ASTDiff::getMapped(const SyntaxTree
&SourceTree
, NodeId Id
) const {
966 return DiffImpl
->getMapped(SourceTree
.TreeImpl
, Id
);
969 SyntaxTree::SyntaxTree(ASTContext
&AST
)
970 : TreeImpl(std::make_unique
<SyntaxTree::Impl
>(
971 this, AST
.getTranslationUnitDecl(), AST
)) {}
973 SyntaxTree::~SyntaxTree() = default;
975 const ASTContext
&SyntaxTree::getASTContext() const { return TreeImpl
->AST
; }
977 const Node
&SyntaxTree::getNode(NodeId Id
) const {
978 return TreeImpl
->getNode(Id
);
981 int SyntaxTree::getSize() const { return TreeImpl
->getSize(); }
982 NodeId
SyntaxTree::getRootId() const { return TreeImpl
->getRootId(); }
983 SyntaxTree::PreorderIterator
SyntaxTree::begin() const {
984 return TreeImpl
->begin();
986 SyntaxTree::PreorderIterator
SyntaxTree::end() const { return TreeImpl
->end(); }
988 int SyntaxTree::findPositionInParent(NodeId Id
) const {
989 return TreeImpl
->findPositionInParent(Id
);
992 std::pair
<unsigned, unsigned>
993 SyntaxTree::getSourceRangeOffsets(const Node
&N
) const {
994 const SourceManager
&SrcMgr
= TreeImpl
->AST
.getSourceManager();
995 SourceRange Range
= N
.ASTNode
.getSourceRange();
996 SourceLocation BeginLoc
= Range
.getBegin();
997 SourceLocation EndLoc
= Lexer::getLocForEndOfToken(
998 Range
.getEnd(), /*Offset=*/0, SrcMgr
, TreeImpl
->AST
.getLangOpts());
999 if (auto *ThisExpr
= N
.ASTNode
.get
<CXXThisExpr
>()) {
1000 if (ThisExpr
->isImplicit())
1003 unsigned Begin
= SrcMgr
.getFileOffset(SrcMgr
.getExpansionLoc(BeginLoc
));
1004 unsigned End
= SrcMgr
.getFileOffset(SrcMgr
.getExpansionLoc(EndLoc
));
1005 return {Begin
, End
};
1008 std::string
SyntaxTree::getNodeValue(NodeId Id
) const {
1009 return TreeImpl
->getNodeValue(Id
);
1012 std::string
SyntaxTree::getNodeValue(const Node
&N
) const {
1013 return TreeImpl
->getNodeValue(N
);
1016 } // end namespace diff
1017 } // end namespace clang