Added dirty-detection to WorkingTreeIterator
[jgit.git] / org.eclipse.jgit / src / org / eclipse / jgit / treewalk / WorkingTreeIterator.java
blobcd06c2498f011729f1b90e229b8622a4edd5f21e
1 /*
2 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
3 * Copyright (C) 2010, Christian Halstrick <christian.halstrick@sap.com>
4 * Copyright (C) 2010, Matthias Sohn <matthias.sohn@sap.com>
5 * and other copyright owners as documented in the project's IP log.
7 * This program and the accompanying materials are made available
8 * under the terms of the Eclipse Distribution License v1.0 which
9 * accompanies this distribution, is reproduced below, and is
10 * available at http://www.eclipse.org/org/documents/edl-v10.php
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials provided
24 * with the distribution.
26 * - Neither the name of the Eclipse Foundation, Inc. nor the
27 * names of its contributors may be used to endorse or promote
28 * products derived from this software without specific prior
29 * written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
36 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
40 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
43 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 package org.eclipse.jgit.treewalk;
48 import java.io.IOException;
49 import java.io.InputStream;
50 import java.nio.ByteBuffer;
51 import java.nio.CharBuffer;
52 import java.nio.charset.CharacterCodingException;
53 import java.nio.charset.CharsetEncoder;
54 import java.security.MessageDigest;
55 import java.text.MessageFormat;
56 import java.util.Arrays;
57 import java.util.Comparator;
59 import org.eclipse.jgit.JGitText;
60 import org.eclipse.jgit.dircache.DirCache;
61 import org.eclipse.jgit.dircache.DirCacheEntry;
62 import org.eclipse.jgit.errors.CorruptObjectException;
63 import org.eclipse.jgit.lib.Constants;
64 import org.eclipse.jgit.lib.FileMode;
65 import org.eclipse.jgit.util.FS;
67 /**
68 * Walks a working directory tree as part of a {@link TreeWalk}.
69 * <p>
70 * Most applications will want to use the standard implementation of this
71 * iterator, {@link FileTreeIterator}, as that does all IO through the standard
72 * <code>java.io</code> package. Plugins for a Java based IDE may however wish
73 * to create their own implementations of this class to allow traversal of the
74 * IDE's project space, as well as benefit from any caching the IDE may have.
76 * @see FileTreeIterator
78 public abstract class WorkingTreeIterator extends AbstractTreeIterator {
79 /** An empty entry array, suitable for {@link #init(Entry[])}. */
80 protected static final Entry[] EOF = {};
82 /** Size we perform file IO in if we have to read and hash a file. */
83 private static final int BUFFER_SIZE = 2048;
85 /** The {@link #idBuffer()} for the current entry. */
86 private byte[] contentId;
88 /** Index within {@link #entries} that {@link #contentId} came from. */
89 private int contentIdFromPtr;
91 /** Buffer used to perform {@link #contentId} computations. */
92 private byte[] contentReadBuffer;
94 /** Digest computer for {@link #contentId} computations. */
95 private MessageDigest contentDigest;
97 /** File name character encoder. */
98 private final CharsetEncoder nameEncoder;
100 /** List of entries obtained from the subclass. */
101 private Entry[] entries;
103 /** Total number of entries in {@link #entries} that are valid. */
104 private int entryCnt;
106 /** Current position within {@link #entries}. */
107 private int ptr;
109 /** Create a new iterator with no parent. */
110 protected WorkingTreeIterator() {
111 super();
112 nameEncoder = Constants.CHARSET.newEncoder();
116 * Create a new iterator with no parent and a prefix.
117 * <p>
118 * The prefix path supplied is inserted in front of all paths generated by
119 * this iterator. It is intended to be used when an iterator is being
120 * created for a subsection of an overall repository and needs to be
121 * combined with other iterators that are created to run over the entire
122 * repository namespace.
124 * @param prefix
125 * position of this iterator in the repository tree. The value
126 * may be null or the empty string to indicate the prefix is the
127 * root of the repository. A trailing slash ('/') is
128 * automatically appended if the prefix does not end in '/'.
130 protected WorkingTreeIterator(final String prefix) {
131 super(prefix);
132 nameEncoder = Constants.CHARSET.newEncoder();
136 * Create an iterator for a subtree of an existing iterator.
138 * @param p
139 * parent tree iterator.
141 protected WorkingTreeIterator(final WorkingTreeIterator p) {
142 super(p);
143 nameEncoder = p.nameEncoder;
146 @Override
147 public byte[] idBuffer() {
148 if (contentIdFromPtr == ptr)
149 return contentId;
150 switch (mode & FileMode.TYPE_MASK) {
151 case FileMode.TYPE_FILE:
152 contentIdFromPtr = ptr;
153 return contentId = idBufferBlob(entries[ptr]);
154 case FileMode.TYPE_SYMLINK:
155 // Java does not support symbolic links, so we should not
156 // have reached this particular part of the walk code.
158 return zeroid;
159 case FileMode.TYPE_GITLINK:
160 // TODO: Support obtaining current HEAD SHA-1 from nested repository
162 return zeroid;
164 return zeroid;
167 private void initializeDigest() {
168 if (contentDigest != null)
169 return;
171 if (parent == null) {
172 contentReadBuffer = new byte[BUFFER_SIZE];
173 contentDigest = Constants.newMessageDigest();
174 } else {
175 final WorkingTreeIterator p = (WorkingTreeIterator) parent;
176 p.initializeDigest();
177 contentReadBuffer = p.contentReadBuffer;
178 contentDigest = p.contentDigest;
182 private static final byte[] digits = { '0', '1', '2', '3', '4', '5', '6',
183 '7', '8', '9' };
185 private static final byte[] hblob = Constants
186 .encodedTypeString(Constants.OBJ_BLOB);
188 private byte[] idBufferBlob(final Entry e) {
189 try {
190 final InputStream is = e.openInputStream();
191 if (is == null)
192 return zeroid;
193 try {
194 initializeDigest();
196 contentDigest.reset();
197 contentDigest.update(hblob);
198 contentDigest.update((byte) ' ');
200 final long blobLength = e.getLength();
201 long sz = blobLength;
202 if (sz == 0) {
203 contentDigest.update((byte) '0');
204 } else {
205 final int bufn = contentReadBuffer.length;
206 int p = bufn;
207 do {
208 contentReadBuffer[--p] = digits[(int) (sz % 10)];
209 sz /= 10;
210 } while (sz > 0);
211 contentDigest.update(contentReadBuffer, p, bufn - p);
213 contentDigest.update((byte) 0);
215 for (;;) {
216 final int r = is.read(contentReadBuffer);
217 if (r <= 0)
218 break;
219 contentDigest.update(contentReadBuffer, 0, r);
220 sz += r;
222 if (sz != blobLength)
223 return zeroid;
224 return contentDigest.digest();
225 } finally {
226 try {
227 is.close();
228 } catch (IOException err2) {
229 // Suppress any error related to closing an input
230 // stream. We don't care, we should not have any
231 // outstanding data to flush or anything like that.
234 } catch (IOException err) {
235 // Can't read the file? Don't report the failure either.
237 return zeroid;
241 @Override
242 public int idOffset() {
243 return 0;
246 @Override
247 public boolean first() {
248 return ptr == 0;
251 @Override
252 public boolean eof() {
253 return ptr == entryCnt;
256 @Override
257 public void next(final int delta) throws CorruptObjectException {
258 ptr += delta;
259 if (!eof())
260 parseEntry();
263 @Override
264 public void back(final int delta) throws CorruptObjectException {
265 ptr -= delta;
266 parseEntry();
269 private void parseEntry() {
270 final Entry e = entries[ptr];
271 mode = e.getMode().getBits();
273 final int nameLen = e.encodedNameLen;
274 ensurePathCapacity(pathOffset + nameLen, pathOffset);
275 System.arraycopy(e.encodedName, 0, path, pathOffset, nameLen);
276 pathLen = pathOffset + nameLen;
280 * Get the byte length of this entry.
282 * @return size of this file, in bytes.
284 public long getEntryLength() {
285 return current().getLength();
289 * Get the last modified time of this entry.
291 * @return last modified time of this file, in milliseconds since the epoch
292 * (Jan 1, 1970 UTC).
294 public long getEntryLastModified() {
295 return current().getLastModified();
298 private static final Comparator<Entry> ENTRY_CMP = new Comparator<Entry>() {
299 public int compare(final Entry o1, final Entry o2) {
300 final byte[] a = o1.encodedName;
301 final byte[] b = o2.encodedName;
302 final int aLen = o1.encodedNameLen;
303 final int bLen = o2.encodedNameLen;
304 int cPos;
306 for (cPos = 0; cPos < aLen && cPos < bLen; cPos++) {
307 final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
308 if (cmp != 0)
309 return cmp;
312 if (cPos < aLen)
313 return (a[cPos] & 0xff) - lastPathChar(o2);
314 if (cPos < bLen)
315 return lastPathChar(o1) - (b[cPos] & 0xff);
316 return lastPathChar(o1) - lastPathChar(o2);
320 static int lastPathChar(final Entry e) {
321 return e.getMode() == FileMode.TREE ? '/' : '\0';
325 * Constructor helper.
327 * @param list
328 * files in the subtree of the work tree this iterator operates
329 * on
331 protected void init(final Entry[] list) {
332 // Filter out nulls, . and .. as these are not valid tree entries,
333 // also cache the encoded forms of the path names for efficient use
334 // later on during sorting and iteration.
336 entries = list;
337 int i, o;
339 for (i = 0, o = 0; i < entries.length; i++) {
340 final Entry e = entries[i];
341 if (e == null)
342 continue;
343 final String name = e.getName();
344 if (".".equals(name) || "..".equals(name))
345 continue;
346 if (Constants.DOT_GIT.equals(name))
347 continue;
348 if (i != o)
349 entries[o] = e;
350 e.encodeName(nameEncoder);
351 o++;
353 entryCnt = o;
354 Arrays.sort(entries, 0, entryCnt, ENTRY_CMP);
356 contentIdFromPtr = -1;
357 ptr = 0;
358 if (!eof())
359 parseEntry();
363 * Obtain the current entry from this iterator.
365 * @return the currently selected entry.
367 protected Entry current() {
368 return entries[ptr];
372 * Checks whether this entry differs from a given entry from the
373 * {@link DirCache}.
375 * File status information is used and if status is same we consider the
376 * file identical to the state in the working directory. Native git uses
377 * more stat fields than we have accessible in Java.
379 * @param entry
380 * the entry from the dircache we want to compare against
381 * @param forceContentCheck
382 * True if the actual file content should be checked if
383 * modification time differs.
384 * @param checkFilemode
385 * whether the executable-bit in the filemode should be checked
386 * to detect modifications
387 * @param fs
388 * The filesystem this repo uses. Needed to find out whether the
389 * executable-bits are supported
391 * @return true if content is most likely different.
393 public boolean isModified(DirCacheEntry entry, boolean forceContentCheck,
394 boolean checkFilemode, FS fs) {
395 if (entry.isAssumeValid())
396 return false;
398 if (entry.isUpdateNeeded())
399 return true;
401 if (getEntryLength() != entry.getLength())
402 return true;
404 // determine difference in mode-bits of file and index-entry. In the
405 // bitwise presentation of modeDiff we'll have a '1' when the two modes
406 // differ at this position.
407 int modeDiff = getEntryRawMode() ^ entry.getRawMode();
408 // ignore the executable file bits if checkFilemode tells me to do so.
409 // Ignoring is done by setting the bits representing a EXECUTABLE_FILE
410 // to '0' in modeDiff
411 if (!checkFilemode)
412 modeDiff &= ~FileMode.EXECUTABLE_FILE.getBits();
413 if (modeDiff != 0)
414 // report a modification if the modes still (after potentially
415 // ignoring EXECUTABLE_FILE bits) differ
416 return true;
418 // Git under windows only stores seconds so we round the timestamp
419 // Java gives us if it looks like the timestamp in index is seconds
420 // only. Otherwise we compare the timestamp at millisecond precision.
421 long cacheLastModified = entry.getLastModified();
422 long fileLastModified = getEntryLastModified();
423 if (cacheLastModified % 1000 == 0)
424 fileLastModified = fileLastModified - fileLastModified % 1000;
425 if (forceContentCheck) {
426 if (fileLastModified == cacheLastModified)
427 return false; // Same time, don't check content.
428 else
429 return !getEntryObjectId().equals(entry.getObjectId());
430 } else {
431 // No content check forced, assume dirty if stat differs.
432 return fileLastModified != cacheLastModified;
436 /** A single entry within a working directory tree. */
437 protected static abstract class Entry {
438 byte[] encodedName;
440 int encodedNameLen;
442 void encodeName(final CharsetEncoder enc) {
443 final ByteBuffer b;
444 try {
445 b = enc.encode(CharBuffer.wrap(getName()));
446 } catch (CharacterCodingException e) {
447 // This should so never happen.
448 throw new RuntimeException(MessageFormat.format(JGitText.get().unencodeableFile, getName()));
451 encodedNameLen = b.limit();
452 if (b.hasArray() && b.arrayOffset() == 0)
453 encodedName = b.array();
454 else
455 b.get(encodedName = new byte[encodedNameLen]);
458 public String toString() {
459 return getMode().toString() + " " + getName();
463 * Get the type of this entry.
464 * <p>
465 * <b>Note: Efficient implementation required.</b>
466 * <p>
467 * The implementation of this method must be efficient. If a subclass
468 * needs to compute the value they should cache the reference within an
469 * instance member instead.
471 * @return a file mode constant from {@link FileMode}.
473 public abstract FileMode getMode();
476 * Get the byte length of this entry.
477 * <p>
478 * <b>Note: Efficient implementation required.</b>
479 * <p>
480 * The implementation of this method must be efficient. If a subclass
481 * needs to compute the value they should cache the reference within an
482 * instance member instead.
484 * @return size of this file, in bytes.
486 public abstract long getLength();
489 * Get the last modified time of this entry.
490 * <p>
491 * <b>Note: Efficient implementation required.</b>
492 * <p>
493 * The implementation of this method must be efficient. If a subclass
494 * needs to compute the value they should cache the reference within an
495 * instance member instead.
497 * @return time since the epoch (in ms) of the last change.
499 public abstract long getLastModified();
502 * Get the name of this entry within its directory.
503 * <p>
504 * Efficient implementations are not required. The caller will obtain
505 * the name only once and cache it once obtained.
507 * @return name of the entry.
509 public abstract String getName();
512 * Obtain an input stream to read the file content.
513 * <p>
514 * Efficient implementations are not required. The caller will usually
515 * obtain the stream only once per entry, if at all.
516 * <p>
517 * The input stream should not use buffering if the implementation can
518 * avoid it. The caller will buffer as necessary to perform efficient
519 * block IO operations.
520 * <p>
521 * The caller will close the stream once complete.
523 * @return a stream to read from the file.
524 * @throws IOException
525 * the file could not be opened for reading.
527 public abstract InputStream openInputStream() throws IOException;