2 * Copyright (C) 2008, Google Inc.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org
.spearce
.jgit
.patch
;
40 import static org
.spearce
.jgit
.lib
.Constants
.encodeASCII
;
41 import static org
.spearce
.jgit
.util
.RawParseUtils
.decode
;
42 import static org
.spearce
.jgit
.util
.RawParseUtils
.decodeNoFallback
;
43 import static org
.spearce
.jgit
.util
.RawParseUtils
.extractBinaryString
;
44 import static org
.spearce
.jgit
.util
.RawParseUtils
.match
;
45 import static org
.spearce
.jgit
.util
.RawParseUtils
.nextLF
;
46 import static org
.spearce
.jgit
.util
.RawParseUtils
.parseBase10
;
48 import java
.io
.IOException
;
49 import java
.nio
.charset
.CharacterCodingException
;
50 import java
.nio
.charset
.Charset
;
51 import java
.util
.ArrayList
;
52 import java
.util
.Collections
;
53 import java
.util
.List
;
55 import org
.spearce
.jgit
.diff
.EditList
;
56 import org
.spearce
.jgit
.lib
.AbbreviatedObjectId
;
57 import org
.spearce
.jgit
.lib
.Constants
;
58 import org
.spearce
.jgit
.lib
.FileMode
;
59 import org
.spearce
.jgit
.util
.QuotedString
;
60 import org
.spearce
.jgit
.util
.RawParseUtils
;
61 import org
.spearce
.jgit
.util
.TemporaryBuffer
;
63 /** Patch header describing an action for a single file path. */
64 public class FileHeader
{
65 /** Magical file name used for file adds or deletes. */
66 public static final String DEV_NULL
= "/dev/null";
68 private static final byte[] OLD_MODE
= encodeASCII("old mode ");
70 private static final byte[] NEW_MODE
= encodeASCII("new mode ");
72 static final byte[] DELETED_FILE_MODE
= encodeASCII("deleted file mode ");
74 static final byte[] NEW_FILE_MODE
= encodeASCII("new file mode ");
76 private static final byte[] COPY_FROM
= encodeASCII("copy from ");
78 private static final byte[] COPY_TO
= encodeASCII("copy to ");
80 private static final byte[] RENAME_OLD
= encodeASCII("rename old ");
82 private static final byte[] RENAME_NEW
= encodeASCII("rename new ");
84 private static final byte[] RENAME_FROM
= encodeASCII("rename from ");
86 private static final byte[] RENAME_TO
= encodeASCII("rename to ");
88 private static final byte[] SIMILARITY_INDEX
= encodeASCII("similarity index ");
90 private static final byte[] DISSIMILARITY_INDEX
= encodeASCII("dissimilarity index ");
92 static final byte[] INDEX
= encodeASCII("index ");
94 static final byte[] OLD_NAME
= encodeASCII("--- ");
96 static final byte[] NEW_NAME
= encodeASCII("+++ ");
98 /** General type of change a single file-level patch describes. */
99 public static enum ChangeType
{
100 /** Add a new file to the project */
103 /** Modify an existing file in the project (content and/or mode) */
106 /** Delete an existing file from the project */
109 /** Rename an existing file to a new location */
112 /** Copy an existing file to a new location, keeping the original */
116 /** Type of patch used by this file. */
117 public static enum PatchType
{
118 /** A traditional unified diff style patch of a text file. */
121 /** An empty patch with a message "Binary files ... differ" */
124 /** A Git binary patch, holding pre and post image deltas */
128 /** Buffer holding the patch data for this file. */
131 /** Offset within {@link #buf} to the "diff ..." line. */
132 final int startOffset
;
134 /** Position 1 past the end of this file within {@link #buf}. */
137 /** File name of the old (pre-image). */
138 private String oldName
;
140 /** File name of the new (post-image). */
141 private String newName
;
143 /** Old mode of the file, if described by the patch, else null. */
144 private FileMode oldMode
;
146 /** New mode of the file, if described by the patch, else null. */
147 protected FileMode newMode
;
149 /** General type of change indicated by the patch. */
150 protected ChangeType changeType
;
152 /** Similarity score if {@link #changeType} is a copy or rename. */
155 /** ObjectId listed on the index line for the old (pre-image) */
156 private AbbreviatedObjectId oldId
;
158 /** ObjectId listed on the index line for the new (post-image) */
159 protected AbbreviatedObjectId newId
;
161 /** Type of patch used to modify this file */
164 /** The hunks of this file */
165 private List
<HunkHeader
> hunks
;
167 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
168 BinaryHunk forwardBinaryHunk
;
170 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
171 BinaryHunk reverseBinaryHunk
;
173 FileHeader(final byte[] b
, final int offset
) {
175 startOffset
= offset
;
176 changeType
= ChangeType
.MODIFY
; // unless otherwise designated
177 patchType
= PatchType
.UNIFIED
;
180 int getParentCount() {
184 /** @return the byte array holding this file's patch script. */
185 public byte[] getBuffer() {
189 /** @return offset the start of this file's script in {@link #getBuffer()}. */
190 public int getStartOffset() {
194 /** @return offset one past the end of the file script. */
195 public int getEndOffset() {
200 * Convert the patch script for this file into a string.
202 * The default character encoding ({@link Constants#CHARSET}) is assumed for
203 * both the old and new files.
205 * @return the patch script, as a Unicode string.
207 public String
getScriptText() {
208 return getScriptText(null, null);
212 * Convert the patch script for this file into a string.
215 * hint character set to decode the old lines with.
217 * hint character set to decode the new lines with.
218 * @return the patch script, as a Unicode string.
220 public String
getScriptText(Charset oldCharset
, Charset newCharset
) {
221 return getScriptText(new Charset
[] { oldCharset
, newCharset
});
224 String
getScriptText(Charset
[] charsetGuess
) {
225 if (getHunks().isEmpty()) {
226 // If we have no hunks then we can safely assume the entire
227 // patch is a binary style patch, or a meta-data only style
228 // patch. Either way the encoding of the headers should be
229 // strictly 7-bit US-ASCII and the body is either 7-bit ASCII
230 // (due to the base 85 encoding used for a BinaryHunk) or is
231 // arbitrary noise we have chosen to ignore and not understand
232 // (e.g. the message "Binary files ... differ").
234 return extractBinaryString(buf
, startOffset
, endOffset
);
237 if (charsetGuess
!= null && charsetGuess
.length
!= getParentCount() + 1)
238 throw new IllegalArgumentException("Expected "
239 + (getParentCount() + 1) + " character encoding guesses");
241 if (trySimpleConversion(charsetGuess
)) {
242 Charset cs
= charsetGuess
!= null ? charsetGuess
[0] : null;
244 cs
= Constants
.CHARSET
;
246 return decodeNoFallback(cs
, buf
, startOffset
, endOffset
);
247 } catch (CharacterCodingException cee
) {
248 // Try the much slower, more-memory intensive version which
249 // can handle a character set conversion patch.
253 final StringBuilder r
= new StringBuilder(endOffset
- startOffset
);
255 // Always treat the headers as US-ASCII; Git file names are encoded
256 // in a C style escape if any character has the high-bit set.
258 final int hdrEnd
= getHunks().get(0).getStartOffset();
259 for (int ptr
= startOffset
; ptr
< hdrEnd
;) {
260 final int eol
= Math
.min(hdrEnd
, nextLF(buf
, ptr
));
261 r
.append(extractBinaryString(buf
, ptr
, eol
));
265 final String
[] files
= extractFileLines(charsetGuess
);
266 final int[] offsets
= new int[files
.length
];
267 for (final HunkHeader h
: getHunks())
268 h
.extractFileLines(r
, files
, offsets
);
272 private static boolean trySimpleConversion(final Charset
[] charsetGuess
) {
273 if (charsetGuess
== null)
275 for (int i
= 1; i
< charsetGuess
.length
; i
++) {
276 if (charsetGuess
[i
] != charsetGuess
[0])
282 private String
[] extractFileLines(final Charset
[] csGuess
) {
283 final TemporaryBuffer
[] tmp
= new TemporaryBuffer
[getParentCount() + 1];
285 for (int i
= 0; i
< tmp
.length
; i
++)
286 tmp
[i
] = new TemporaryBuffer();
287 for (final HunkHeader h
: getHunks())
288 h
.extractFileLines(tmp
);
290 final String
[] r
= new String
[tmp
.length
];
291 for (int i
= 0; i
< tmp
.length
; i
++) {
292 Charset cs
= csGuess
!= null ? csGuess
[i
] : null;
294 cs
= Constants
.CHARSET
;
295 r
[i
] = RawParseUtils
.decode(cs
, tmp
[i
].toByteArray());
298 } catch (IOException ioe
) {
299 throw new RuntimeException("Cannot convert script to text", ioe
);
301 for (final TemporaryBuffer b
: tmp
) {
309 * Get the old name associated with this file.
311 * The meaning of the old name can differ depending on the semantic meaning
314 * <li><i>file add</i>: always <code>/dev/null</code></li>
315 * <li><i>file modify</i>: always {@link #getNewName()}</li>
316 * <li><i>file delete</i>: always the file being deleted</li>
317 * <li><i>file copy</i>: source file the copy originates from</li>
318 * <li><i>file rename</i>: source file the rename originates from</li>
321 * @return old name for this file.
323 public String
getOldName() {
328 * Get the new name associated with this file.
330 * The meaning of the new name can differ depending on the semantic meaning
333 * <li><i>file add</i>: always the file being created</li>
334 * <li><i>file modify</i>: always {@link #getOldName()}</li>
335 * <li><i>file delete</i>: always <code>/dev/null</code></li>
336 * <li><i>file copy</i>: destination file the copy ends up at</li>
337 * <li><i>file rename</i>: destination file the rename ends up at/li>
340 * @return new name for this file.
342 public String
getNewName() {
346 /** @return the old file mode, if described in the patch */
347 public FileMode
getOldMode() {
351 /** @return the new file mode, if described in the patch */
352 public FileMode
getNewMode() {
356 /** @return the type of change this patch makes on {@link #getNewName()} */
357 public ChangeType
getChangeType() {
362 * @return similarity score between {@link #getOldName()} and
363 * {@link #getNewName()} if {@link #getChangeType()} is
364 * {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
366 public int getScore() {
371 * Get the old object id from the <code>index</code>.
373 * @return the object id; null if there is no index line
375 public AbbreviatedObjectId
getOldId() {
380 * Get the new object id from the <code>index</code>.
382 * @return the object id; null if there is no index line
384 public AbbreviatedObjectId
getNewId() {
388 /** @return style of patch used to modify this file */
389 public PatchType
getPatchType() {
393 /** @return true if this patch modifies metadata about a file */
394 public boolean hasMetaDataChanges() {
395 return changeType
!= ChangeType
.MODIFY
|| newMode
!= oldMode
;
398 /** @return hunks altering this file; in order of appearance in patch */
399 public List
<?
extends HunkHeader
> getHunks() {
401 return Collections
.emptyList();
405 void addHunk(final HunkHeader h
) {
406 if (h
.getFileHeader() != this)
407 throw new IllegalArgumentException("Hunk belongs to another file");
409 hunks
= new ArrayList
<HunkHeader
>();
413 HunkHeader
newHunkHeader(final int offset
) {
414 return new HunkHeader(this, offset
);
417 /** @return if a {@link PatchType#GIT_BINARY}, the new-image delta/literal */
418 public BinaryHunk
getForwardBinaryHunk() {
419 return forwardBinaryHunk
;
422 /** @return if a {@link PatchType#GIT_BINARY}, the old-image delta/literal */
423 public BinaryHunk
getReverseBinaryHunk() {
424 return reverseBinaryHunk
;
427 /** @return a list describing the content edits performed on this file. */
428 public EditList
toEditList() {
429 final EditList r
= new EditList();
430 for (final HunkHeader hunk
: hunks
)
431 r
.addAll(hunk
.toEditList());
436 * Parse a "diff --git" or "diff --cc" line.
439 * first character after the "diff --git " or "diff --cc " part.
441 * one past the last position to parse.
442 * @return first character after the LF at the end of the line; -1 on error.
444 int parseGitFileName(int ptr
, final int end
) {
445 final int eol
= nextLF(buf
, ptr
);
451 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
452 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
453 // is only one way to split the line such that text to the left
454 // of the space matches the text to the right, excluding the part
455 // before the first slash.
458 final int aStart
= nextLF(buf
, ptr
, '/');
463 final int sp
= nextLF(buf
, ptr
, ' ');
465 // We can't split the header, it isn't valid.
466 // This may be OK if this is a rename patch.
470 final int bStart
= nextLF(buf
, sp
, '/');
474 // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
475 // we have a valid split.
477 if (eq(aStart
, sp
- 1, bStart
, eol
- 1)) {
478 if (buf
[bol
] == '"') {
479 // We're a double quoted name. The region better end
480 // in a double quote too, and we need to decode the
481 // characters before reading the name.
483 if (buf
[sp
- 2] != '"') {
486 oldName
= QuotedString
.GIT_PATH
.dequote(buf
, bol
, sp
- 1);
487 oldName
= p1(oldName
);
489 oldName
= decode(Constants
.CHARSET
, buf
, aStart
, sp
- 1);
495 // This split wasn't correct. Move past the space and try
496 // another split as the space must be part of the file name.
504 int parseGitHeaders(int ptr
, final int end
) {
506 final int eol
= nextLF(buf
, ptr
);
507 if (isHunkHdr(buf
, ptr
, eol
) >= 1) {
508 // First hunk header; break out and parse them later.
511 } else if (match(buf
, ptr
, OLD_NAME
) >= 0) {
512 parseOldName(ptr
, eol
);
514 } else if (match(buf
, ptr
, NEW_NAME
) >= 0) {
515 parseNewName(ptr
, eol
);
517 } else if (match(buf
, ptr
, OLD_MODE
) >= 0) {
518 oldMode
= parseFileMode(ptr
+ OLD_MODE
.length
, eol
);
520 } else if (match(buf
, ptr
, NEW_MODE
) >= 0) {
521 newMode
= parseFileMode(ptr
+ NEW_MODE
.length
, eol
);
523 } else if (match(buf
, ptr
, DELETED_FILE_MODE
) >= 0) {
524 oldMode
= parseFileMode(ptr
+ DELETED_FILE_MODE
.length
, eol
);
525 newMode
= FileMode
.MISSING
;
526 changeType
= ChangeType
.DELETE
;
528 } else if (match(buf
, ptr
, NEW_FILE_MODE
) >= 0) {
529 parseNewFileMode(ptr
, eol
);
531 } else if (match(buf
, ptr
, COPY_FROM
) >= 0) {
532 oldName
= parseName(oldName
, ptr
+ COPY_FROM
.length
, eol
);
533 changeType
= ChangeType
.COPY
;
535 } else if (match(buf
, ptr
, COPY_TO
) >= 0) {
536 newName
= parseName(newName
, ptr
+ COPY_TO
.length
, eol
);
537 changeType
= ChangeType
.COPY
;
539 } else if (match(buf
, ptr
, RENAME_OLD
) >= 0) {
540 oldName
= parseName(oldName
, ptr
+ RENAME_OLD
.length
, eol
);
541 changeType
= ChangeType
.RENAME
;
543 } else if (match(buf
, ptr
, RENAME_NEW
) >= 0) {
544 newName
= parseName(newName
, ptr
+ RENAME_NEW
.length
, eol
);
545 changeType
= ChangeType
.RENAME
;
547 } else if (match(buf
, ptr
, RENAME_FROM
) >= 0) {
548 oldName
= parseName(oldName
, ptr
+ RENAME_FROM
.length
, eol
);
549 changeType
= ChangeType
.RENAME
;
551 } else if (match(buf
, ptr
, RENAME_TO
) >= 0) {
552 newName
= parseName(newName
, ptr
+ RENAME_TO
.length
, eol
);
553 changeType
= ChangeType
.RENAME
;
555 } else if (match(buf
, ptr
, SIMILARITY_INDEX
) >= 0) {
556 score
= parseBase10(buf
, ptr
+ SIMILARITY_INDEX
.length
, null);
558 } else if (match(buf
, ptr
, DISSIMILARITY_INDEX
) >= 0) {
559 score
= parseBase10(buf
, ptr
+ DISSIMILARITY_INDEX
.length
, null);
561 } else if (match(buf
, ptr
, INDEX
) >= 0) {
562 parseIndexLine(ptr
+ INDEX
.length
, eol
);
565 // Probably an empty patch (stat dirty).
574 void parseOldName(int ptr
, final int eol
) {
575 oldName
= p1(parseName(oldName
, ptr
+ OLD_NAME
.length
, eol
));
576 if (oldName
== DEV_NULL
)
577 changeType
= ChangeType
.ADD
;
580 void parseNewName(int ptr
, final int eol
) {
581 newName
= p1(parseName(newName
, ptr
+ NEW_NAME
.length
, eol
));
582 if (newName
== DEV_NULL
)
583 changeType
= ChangeType
.DELETE
;
586 void parseNewFileMode(int ptr
, final int eol
) {
587 oldMode
= FileMode
.MISSING
;
588 newMode
= parseFileMode(ptr
+ NEW_FILE_MODE
.length
, eol
);
589 changeType
= ChangeType
.ADD
;
592 int parseTraditionalHeaders(int ptr
, final int end
) {
594 final int eol
= nextLF(buf
, ptr
);
595 if (isHunkHdr(buf
, ptr
, eol
) >= 1) {
596 // First hunk header; break out and parse them later.
599 } else if (match(buf
, ptr
, OLD_NAME
) >= 0) {
600 parseOldName(ptr
, eol
);
602 } else if (match(buf
, ptr
, NEW_NAME
) >= 0) {
603 parseNewName(ptr
, eol
);
606 // Possibly an empty patch.
615 private String
parseName(final String expect
, int ptr
, final int end
) {
620 if (buf
[ptr
] == '"') {
621 // New style GNU diff format
623 r
= QuotedString
.GIT_PATH
.dequote(buf
, ptr
, end
- 1);
625 // Older style GNU diff format, an optional tab ends the name.
628 while (ptr
< tab
&& buf
[tab
- 1] != '\t')
632 r
= decode(Constants
.CHARSET
, buf
, ptr
, tab
- 1);
635 if (r
.equals(DEV_NULL
))
640 private static String
p1(final String r
) {
641 final int s
= r
.indexOf('/');
642 return s
> 0 ? r
.substring(s
+ 1) : r
;
645 FileMode
parseFileMode(int ptr
, final int end
) {
647 while (ptr
< end
- 1) {
649 tmp
+= buf
[ptr
++] - '0';
651 return FileMode
.fromBits(tmp
);
654 void parseIndexLine(int ptr
, final int end
) {
655 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
656 // can be unique abbreviations
658 final int dot2
= nextLF(buf
, ptr
, '.');
659 final int mode
= nextLF(buf
, dot2
, ' ');
661 oldId
= AbbreviatedObjectId
.fromString(buf
, ptr
, dot2
- 1);
662 newId
= AbbreviatedObjectId
.fromString(buf
, dot2
+ 1, mode
- 1);
665 newMode
= oldMode
= parseFileMode(mode
, end
);
668 private boolean eq(int aPtr
, int aEnd
, int bPtr
, int bEnd
) {
669 if (aEnd
- aPtr
!= bEnd
- bPtr
) {
672 while (aPtr
< aEnd
) {
673 if (buf
[aPtr
++] != buf
[bPtr
++])
680 * Determine if this is a patch hunk header.
685 * first position in the buffer to evaluate
687 * last position to consider; usually the end of the buffer (
688 * <code>buf.length</code>) or the first position on the next
689 * line. This is only used to avoid very long runs of '@' from
690 * killing the scan loop.
691 * @return the number of "ancestor revisions" in the hunk header. A
692 * traditional two-way diff ("@@ -...") returns 1; a combined diff
693 * for a 3 way-merge returns 3. If this is not a hunk header, 0 is
696 static int isHunkHdr(final byte[] buf
, final int start
, final int end
) {
698 while (ptr
< end
&& buf
[ptr
] == '@')
702 if (ptr
== end
|| buf
[ptr
++] != ' ')
704 if (ptr
== end
|| buf
[ptr
++] != '-')
706 return (ptr
- 3) - start
;