Remove System.out.println from RevWalkFilterTest
[egit/chris.git] / org.spearce.jgit / src / org / spearce / jgit / patch / FileHeader.java
blobc64f7421952e21ee6ae64359e265a930e78fe4e0
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.patch;
40 import static org.spearce.jgit.lib.Constants.encodeASCII;
41 import static org.spearce.jgit.util.RawParseUtils.decode;
42 import static org.spearce.jgit.util.RawParseUtils.decodeNoFallback;
43 import static org.spearce.jgit.util.RawParseUtils.extractBinaryString;
44 import static org.spearce.jgit.util.RawParseUtils.match;
45 import static org.spearce.jgit.util.RawParseUtils.nextLF;
46 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
48 import java.io.IOException;
49 import java.nio.charset.CharacterCodingException;
50 import java.nio.charset.Charset;
51 import java.util.ArrayList;
52 import java.util.Collections;
53 import java.util.List;
55 import org.spearce.jgit.diff.EditList;
56 import org.spearce.jgit.lib.AbbreviatedObjectId;
57 import org.spearce.jgit.lib.Constants;
58 import org.spearce.jgit.lib.FileMode;
59 import org.spearce.jgit.util.QuotedString;
60 import org.spearce.jgit.util.RawParseUtils;
61 import org.spearce.jgit.util.TemporaryBuffer;
63 /** Patch header describing an action for a single file path. */
64 public class FileHeader {
65 /** Magical file name used for file adds or deletes. */
66 public static final String DEV_NULL = "/dev/null";
68 private static final byte[] OLD_MODE = encodeASCII("old mode ");
70 private static final byte[] NEW_MODE = encodeASCII("new mode ");
72 static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode ");
74 static final byte[] NEW_FILE_MODE = encodeASCII("new file mode ");
76 private static final byte[] COPY_FROM = encodeASCII("copy from ");
78 private static final byte[] COPY_TO = encodeASCII("copy to ");
80 private static final byte[] RENAME_OLD = encodeASCII("rename old ");
82 private static final byte[] RENAME_NEW = encodeASCII("rename new ");
84 private static final byte[] RENAME_FROM = encodeASCII("rename from ");
86 private static final byte[] RENAME_TO = encodeASCII("rename to ");
88 private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index ");
90 private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index ");
92 static final byte[] INDEX = encodeASCII("index ");
94 static final byte[] OLD_NAME = encodeASCII("--- ");
96 static final byte[] NEW_NAME = encodeASCII("+++ ");
98 /** General type of change a single file-level patch describes. */
99 public static enum ChangeType {
100 /** Add a new file to the project */
101 ADD,
103 /** Modify an existing file in the project (content and/or mode) */
104 MODIFY,
106 /** Delete an existing file from the project */
107 DELETE,
109 /** Rename an existing file to a new location */
110 RENAME,
112 /** Copy an existing file to a new location, keeping the original */
113 COPY;
116 /** Type of patch used by this file. */
117 public static enum PatchType {
118 /** A traditional unified diff style patch of a text file. */
119 UNIFIED,
121 /** An empty patch with a message "Binary files ... differ" */
122 BINARY,
124 /** A Git binary patch, holding pre and post image deltas */
125 GIT_BINARY;
128 /** Buffer holding the patch data for this file. */
129 final byte[] buf;
131 /** Offset within {@link #buf} to the "diff ..." line. */
132 final int startOffset;
134 /** Position 1 past the end of this file within {@link #buf}. */
135 int endOffset;
137 /** File name of the old (pre-image). */
138 private String oldName;
140 /** File name of the new (post-image). */
141 private String newName;
143 /** Old mode of the file, if described by the patch, else null. */
144 private FileMode oldMode;
146 /** New mode of the file, if described by the patch, else null. */
147 protected FileMode newMode;
149 /** General type of change indicated by the patch. */
150 protected ChangeType changeType;
152 /** Similarity score if {@link #changeType} is a copy or rename. */
153 private int score;
155 /** ObjectId listed on the index line for the old (pre-image) */
156 private AbbreviatedObjectId oldId;
158 /** ObjectId listed on the index line for the new (post-image) */
159 protected AbbreviatedObjectId newId;
161 /** Type of patch used to modify this file */
162 PatchType patchType;
164 /** The hunks of this file */
165 private List<HunkHeader> hunks;
167 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
168 BinaryHunk forwardBinaryHunk;
170 /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
171 BinaryHunk reverseBinaryHunk;
173 FileHeader(final byte[] b, final int offset) {
174 buf = b;
175 startOffset = offset;
176 changeType = ChangeType.MODIFY; // unless otherwise designated
177 patchType = PatchType.UNIFIED;
180 int getParentCount() {
181 return 1;
184 /** @return the byte array holding this file's patch script. */
185 public byte[] getBuffer() {
186 return buf;
189 /** @return offset the start of this file's script in {@link #getBuffer()}. */
190 public int getStartOffset() {
191 return startOffset;
194 /** @return offset one past the end of the file script. */
195 public int getEndOffset() {
196 return endOffset;
200 * Convert the patch script for this file into a string.
201 * <p>
202 * The default character encoding ({@link Constants#CHARSET}) is assumed for
203 * both the old and new files.
205 * @return the patch script, as a Unicode string.
207 public String getScriptText() {
208 return getScriptText(null, null);
212 * Convert the patch script for this file into a string.
214 * @param oldCharset
215 * hint character set to decode the old lines with.
216 * @param newCharset
217 * hint character set to decode the new lines with.
218 * @return the patch script, as a Unicode string.
220 public String getScriptText(Charset oldCharset, Charset newCharset) {
221 return getScriptText(new Charset[] { oldCharset, newCharset });
224 String getScriptText(Charset[] charsetGuess) {
225 if (getHunks().isEmpty()) {
226 // If we have no hunks then we can safely assume the entire
227 // patch is a binary style patch, or a meta-data only style
228 // patch. Either way the encoding of the headers should be
229 // strictly 7-bit US-ASCII and the body is either 7-bit ASCII
230 // (due to the base 85 encoding used for a BinaryHunk) or is
231 // arbitrary noise we have chosen to ignore and not understand
232 // (e.g. the message "Binary files ... differ").
234 return extractBinaryString(buf, startOffset, endOffset);
237 if (charsetGuess != null && charsetGuess.length != getParentCount() + 1)
238 throw new IllegalArgumentException("Expected "
239 + (getParentCount() + 1) + " character encoding guesses");
241 if (trySimpleConversion(charsetGuess)) {
242 Charset cs = charsetGuess != null ? charsetGuess[0] : null;
243 if (cs == null)
244 cs = Constants.CHARSET;
245 try {
246 return decodeNoFallback(cs, buf, startOffset, endOffset);
247 } catch (CharacterCodingException cee) {
248 // Try the much slower, more-memory intensive version which
249 // can handle a character set conversion patch.
253 final StringBuilder r = new StringBuilder(endOffset - startOffset);
255 // Always treat the headers as US-ASCII; Git file names are encoded
256 // in a C style escape if any character has the high-bit set.
258 final int hdrEnd = getHunks().get(0).getStartOffset();
259 for (int ptr = startOffset; ptr < hdrEnd;) {
260 final int eol = Math.min(hdrEnd, nextLF(buf, ptr));
261 r.append(extractBinaryString(buf, ptr, eol));
262 ptr = eol;
265 final String[] files = extractFileLines(charsetGuess);
266 final int[] offsets = new int[files.length];
267 for (final HunkHeader h : getHunks())
268 h.extractFileLines(r, files, offsets);
269 return r.toString();
272 private static boolean trySimpleConversion(final Charset[] charsetGuess) {
273 if (charsetGuess == null)
274 return true;
275 for (int i = 1; i < charsetGuess.length; i++) {
276 if (charsetGuess[i] != charsetGuess[0])
277 return false;
279 return true;
282 private String[] extractFileLines(final Charset[] csGuess) {
283 final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1];
284 try {
285 for (int i = 0; i < tmp.length; i++)
286 tmp[i] = new TemporaryBuffer();
287 for (final HunkHeader h : getHunks())
288 h.extractFileLines(tmp);
290 final String[] r = new String[tmp.length];
291 for (int i = 0; i < tmp.length; i++) {
292 Charset cs = csGuess != null ? csGuess[i] : null;
293 if (cs == null)
294 cs = Constants.CHARSET;
295 r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray());
297 return r;
298 } catch (IOException ioe) {
299 throw new RuntimeException("Cannot convert script to text", ioe);
300 } finally {
301 for (final TemporaryBuffer b : tmp) {
302 if (b != null)
303 b.destroy();
309 * Get the old name associated with this file.
310 * <p>
311 * The meaning of the old name can differ depending on the semantic meaning
312 * of this patch:
313 * <ul>
314 * <li><i>file add</i>: always <code>/dev/null</code></li>
315 * <li><i>file modify</i>: always {@link #getNewName()}</li>
316 * <li><i>file delete</i>: always the file being deleted</li>
317 * <li><i>file copy</i>: source file the copy originates from</li>
318 * <li><i>file rename</i>: source file the rename originates from</li>
319 * </ul>
321 * @return old name for this file.
323 public String getOldName() {
324 return oldName;
328 * Get the new name associated with this file.
329 * <p>
330 * The meaning of the new name can differ depending on the semantic meaning
331 * of this patch:
332 * <ul>
333 * <li><i>file add</i>: always the file being created</li>
334 * <li><i>file modify</i>: always {@link #getOldName()}</li>
335 * <li><i>file delete</i>: always <code>/dev/null</code></li>
336 * <li><i>file copy</i>: destination file the copy ends up at</li>
337 * <li><i>file rename</i>: destination file the rename ends up at/li>
338 * </ul>
340 * @return new name for this file.
342 public String getNewName() {
343 return newName;
346 /** @return the old file mode, if described in the patch */
347 public FileMode getOldMode() {
348 return oldMode;
351 /** @return the new file mode, if described in the patch */
352 public FileMode getNewMode() {
353 return newMode;
356 /** @return the type of change this patch makes on {@link #getNewName()} */
357 public ChangeType getChangeType() {
358 return changeType;
362 * @return similarity score between {@link #getOldName()} and
363 * {@link #getNewName()} if {@link #getChangeType()} is
364 * {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
366 public int getScore() {
367 return score;
371 * Get the old object id from the <code>index</code>.
373 * @return the object id; null if there is no index line
375 public AbbreviatedObjectId getOldId() {
376 return oldId;
380 * Get the new object id from the <code>index</code>.
382 * @return the object id; null if there is no index line
384 public AbbreviatedObjectId getNewId() {
385 return newId;
388 /** @return style of patch used to modify this file */
389 public PatchType getPatchType() {
390 return patchType;
393 /** @return true if this patch modifies metadata about a file */
394 public boolean hasMetaDataChanges() {
395 return changeType != ChangeType.MODIFY || newMode != oldMode;
398 /** @return hunks altering this file; in order of appearance in patch */
399 public List<? extends HunkHeader> getHunks() {
400 if (hunks == null)
401 return Collections.emptyList();
402 return hunks;
405 void addHunk(final HunkHeader h) {
406 if (h.getFileHeader() != this)
407 throw new IllegalArgumentException("Hunk belongs to another file");
408 if (hunks == null)
409 hunks = new ArrayList<HunkHeader>();
410 hunks.add(h);
413 HunkHeader newHunkHeader(final int offset) {
414 return new HunkHeader(this, offset);
417 /** @return if a {@link PatchType#GIT_BINARY}, the new-image delta/literal */
418 public BinaryHunk getForwardBinaryHunk() {
419 return forwardBinaryHunk;
422 /** @return if a {@link PatchType#GIT_BINARY}, the old-image delta/literal */
423 public BinaryHunk getReverseBinaryHunk() {
424 return reverseBinaryHunk;
427 /** @return a list describing the content edits performed on this file. */
428 public EditList toEditList() {
429 final EditList r = new EditList();
430 for (final HunkHeader hunk : hunks)
431 r.addAll(hunk.toEditList());
432 return r;
436 * Parse a "diff --git" or "diff --cc" line.
438 * @param ptr
439 * first character after the "diff --git " or "diff --cc " part.
440 * @param end
441 * one past the last position to parse.
442 * @return first character after the LF at the end of the line; -1 on error.
444 int parseGitFileName(int ptr, final int end) {
445 final int eol = nextLF(buf, ptr);
446 final int bol = ptr;
447 if (eol >= end) {
448 return -1;
451 // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
452 // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
453 // is only one way to split the line such that text to the left
454 // of the space matches the text to the right, excluding the part
455 // before the first slash.
458 final int aStart = nextLF(buf, ptr, '/');
459 if (aStart >= eol)
460 return eol;
462 while (ptr < eol) {
463 final int sp = nextLF(buf, ptr, ' ');
464 if (sp >= eol) {
465 // We can't split the header, it isn't valid.
466 // This may be OK if this is a rename patch.
468 return eol;
470 final int bStart = nextLF(buf, sp, '/');
471 if (bStart >= eol)
472 return eol;
474 // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
475 // we have a valid split.
477 if (eq(aStart, sp - 1, bStart, eol - 1)) {
478 if (buf[bol] == '"') {
479 // We're a double quoted name. The region better end
480 // in a double quote too, and we need to decode the
481 // characters before reading the name.
483 if (buf[sp - 2] != '"') {
484 return eol;
486 oldName = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
487 oldName = p1(oldName);
488 } else {
489 oldName = decode(Constants.CHARSET, buf, aStart, sp - 1);
491 newName = oldName;
492 return eol;
495 // This split wasn't correct. Move past the space and try
496 // another split as the space must be part of the file name.
498 ptr = sp;
501 return eol;
504 int parseGitHeaders(int ptr, final int end) {
505 while (ptr < end) {
506 final int eol = nextLF(buf, ptr);
507 if (isHunkHdr(buf, ptr, eol) >= 1) {
508 // First hunk header; break out and parse them later.
509 break;
511 } else if (match(buf, ptr, OLD_NAME) >= 0) {
512 parseOldName(ptr, eol);
514 } else if (match(buf, ptr, NEW_NAME) >= 0) {
515 parseNewName(ptr, eol);
517 } else if (match(buf, ptr, OLD_MODE) >= 0) {
518 oldMode = parseFileMode(ptr + OLD_MODE.length, eol);
520 } else if (match(buf, ptr, NEW_MODE) >= 0) {
521 newMode = parseFileMode(ptr + NEW_MODE.length, eol);
523 } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
524 oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
525 newMode = FileMode.MISSING;
526 changeType = ChangeType.DELETE;
528 } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
529 parseNewFileMode(ptr, eol);
531 } else if (match(buf, ptr, COPY_FROM) >= 0) {
532 oldName = parseName(oldName, ptr + COPY_FROM.length, eol);
533 changeType = ChangeType.COPY;
535 } else if (match(buf, ptr, COPY_TO) >= 0) {
536 newName = parseName(newName, ptr + COPY_TO.length, eol);
537 changeType = ChangeType.COPY;
539 } else if (match(buf, ptr, RENAME_OLD) >= 0) {
540 oldName = parseName(oldName, ptr + RENAME_OLD.length, eol);
541 changeType = ChangeType.RENAME;
543 } else if (match(buf, ptr, RENAME_NEW) >= 0) {
544 newName = parseName(newName, ptr + RENAME_NEW.length, eol);
545 changeType = ChangeType.RENAME;
547 } else if (match(buf, ptr, RENAME_FROM) >= 0) {
548 oldName = parseName(oldName, ptr + RENAME_FROM.length, eol);
549 changeType = ChangeType.RENAME;
551 } else if (match(buf, ptr, RENAME_TO) >= 0) {
552 newName = parseName(newName, ptr + RENAME_TO.length, eol);
553 changeType = ChangeType.RENAME;
555 } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
556 score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);
558 } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
559 score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);
561 } else if (match(buf, ptr, INDEX) >= 0) {
562 parseIndexLine(ptr + INDEX.length, eol);
564 } else {
565 // Probably an empty patch (stat dirty).
566 break;
569 ptr = eol;
571 return ptr;
574 void parseOldName(int ptr, final int eol) {
575 oldName = p1(parseName(oldName, ptr + OLD_NAME.length, eol));
576 if (oldName == DEV_NULL)
577 changeType = ChangeType.ADD;
580 void parseNewName(int ptr, final int eol) {
581 newName = p1(parseName(newName, ptr + NEW_NAME.length, eol));
582 if (newName == DEV_NULL)
583 changeType = ChangeType.DELETE;
586 void parseNewFileMode(int ptr, final int eol) {
587 oldMode = FileMode.MISSING;
588 newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
589 changeType = ChangeType.ADD;
592 int parseTraditionalHeaders(int ptr, final int end) {
593 while (ptr < end) {
594 final int eol = nextLF(buf, ptr);
595 if (isHunkHdr(buf, ptr, eol) >= 1) {
596 // First hunk header; break out and parse them later.
597 break;
599 } else if (match(buf, ptr, OLD_NAME) >= 0) {
600 parseOldName(ptr, eol);
602 } else if (match(buf, ptr, NEW_NAME) >= 0) {
603 parseNewName(ptr, eol);
605 } else {
606 // Possibly an empty patch.
607 break;
610 ptr = eol;
612 return ptr;
615 private String parseName(final String expect, int ptr, final int end) {
616 if (ptr == end)
617 return expect;
619 String r;
620 if (buf[ptr] == '"') {
621 // New style GNU diff format
623 r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
624 } else {
625 // Older style GNU diff format, an optional tab ends the name.
627 int tab = end;
628 while (ptr < tab && buf[tab - 1] != '\t')
629 tab--;
630 if (ptr == tab)
631 tab = end;
632 r = decode(Constants.CHARSET, buf, ptr, tab - 1);
635 if (r.equals(DEV_NULL))
636 r = DEV_NULL;
637 return r;
640 private static String p1(final String r) {
641 final int s = r.indexOf('/');
642 return s > 0 ? r.substring(s + 1) : r;
645 FileMode parseFileMode(int ptr, final int end) {
646 int tmp = 0;
647 while (ptr < end - 1) {
648 tmp <<= 3;
649 tmp += buf[ptr++] - '0';
651 return FileMode.fromBits(tmp);
654 void parseIndexLine(int ptr, final int end) {
655 // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
656 // can be unique abbreviations
658 final int dot2 = nextLF(buf, ptr, '.');
659 final int mode = nextLF(buf, dot2, ' ');
661 oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
662 newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);
664 if (mode < end)
665 newMode = oldMode = parseFileMode(mode, end);
668 private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
669 if (aEnd - aPtr != bEnd - bPtr) {
670 return false;
672 while (aPtr < aEnd) {
673 if (buf[aPtr++] != buf[bPtr++])
674 return false;
676 return true;
680 * Determine if this is a patch hunk header.
682 * @param buf
683 * the buffer to scan
684 * @param start
685 * first position in the buffer to evaluate
686 * @param end
687 * last position to consider; usually the end of the buffer (
688 * <code>buf.length</code>) or the first position on the next
689 * line. This is only used to avoid very long runs of '@' from
690 * killing the scan loop.
691 * @return the number of "ancestor revisions" in the hunk header. A
692 * traditional two-way diff ("@@ -...") returns 1; a combined diff
693 * for a 3 way-merge returns 3. If this is not a hunk header, 0 is
694 * returned instead.
696 static int isHunkHdr(final byte[] buf, final int start, final int end) {
697 int ptr = start;
698 while (ptr < end && buf[ptr] == '@')
699 ptr++;
700 if (ptr - start < 2)
701 return 0;
702 if (ptr == end || buf[ptr++] != ' ')
703 return 0;
704 if (ptr == end || buf[ptr++] != '-')
705 return 0;
706 return (ptr - 3) - start;