Merge changes I39bfefee,I47795987,I70d120fb,I58cc5e01,I96bee7b9
[jgit.git] / org.eclipse.jgit / src / org / eclipse / jgit / treewalk / AbstractTreeIterator.java
bloba54b3e9cfa5d18adda2f981a8d987eacaa4d784e
1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
4 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
5 * and other copyright owners as documented in the project's IP log.
7 * This program and the accompanying materials are made available
8 * under the terms of the Eclipse Distribution License v1.0 which
9 * accompanies this distribution, is reproduced below, and is
10 * available at http://www.eclipse.org/org/documents/edl-v10.php
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials provided
24 * with the distribution.
26 * - Neither the name of the Eclipse Foundation, Inc. nor the
27 * names of its contributors may be used to endorse or promote
28 * products derived from this software without specific prior
29 * written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
36 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
40 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
43 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 package org.eclipse.jgit.treewalk;
48 import java.io.IOException;
49 import java.nio.ByteBuffer;
50 import java.nio.CharBuffer;
52 import org.eclipse.jgit.errors.CorruptObjectException;
53 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
54 import org.eclipse.jgit.lib.Constants;
55 import org.eclipse.jgit.lib.FileMode;
56 import org.eclipse.jgit.lib.MutableObjectId;
57 import org.eclipse.jgit.lib.ObjectId;
58 import org.eclipse.jgit.lib.ObjectReader;
59 import org.eclipse.jgit.treewalk.filter.TreeFilter;
61 /**
62 * Walks a Git tree (directory) in Git sort order.
63 * <p>
64 * A new iterator instance should be positioned on the first entry, or at eof.
65 * Data for the first entry (if not at eof) should be available immediately.
66 * <p>
67 * Implementors must walk a tree in the Git sort order, which has the following
68 * odd sorting:
69 * <ol>
70 * <li>A.c</li>
71 * <li>A/c</li>
72 * <li>A0c</li>
73 * </ol>
74 * <p>
75 * In the second item, <code>A</code> is the name of a subtree and
76 * <code>c</code> is a file within that subtree. The other two items are files
77 * in the root level tree.
79 * @see CanonicalTreeParser
81 public abstract class AbstractTreeIterator {
82 /** Default size for the {@link #path} buffer. */
83 protected static final int DEFAULT_PATH_SIZE = 128;
85 /** A dummy object id buffer that matches the zero ObjectId. */
86 protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH];
88 /** Iterator for the parent tree; null if we are the root iterator. */
89 final AbstractTreeIterator parent;
91 /** The iterator this current entry is path equal to. */
92 AbstractTreeIterator matches;
94 /**
95 * Number of entries we moved forward to force a D/F conflict match.
97 * @see NameConflictTreeWalk
99 int matchShift;
102 * Mode bits for the current entry.
103 * <p>
104 * A numerical value from FileMode is usually faster for an iterator to
105 * obtain from its data source so this is the preferred representation.
107 * @see org.eclipse.jgit.lib.FileMode
109 protected int mode;
112 * Path buffer for the current entry.
113 * <p>
114 * This buffer is pre-allocated at the start of walking and is shared from
115 * parent iterators down into their subtree iterators. The sharing allows
116 * the current entry to always be a full path from the root, while each
117 * subtree only needs to populate the part that is under their control.
119 protected byte[] path;
122 * Position within {@link #path} this iterator starts writing at.
123 * <p>
124 * This is the first offset in {@link #path} that this iterator must
125 * populate during {@link #next}. At the root level (when {@link #parent}
126 * is null) this is 0. For a subtree iterator the index before this position
127 * should have the value '/'.
129 protected final int pathOffset;
132 * Total length of the current entry's complete path from the root.
133 * <p>
134 * This is the number of bytes within {@link #path} that pertain to the
135 * current entry. Values at this index through the end of the array are
136 * garbage and may be randomly populated from prior entries.
138 protected int pathLen;
140 /** Create a new iterator with no parent. */
141 protected AbstractTreeIterator() {
142 parent = null;
143 path = new byte[DEFAULT_PATH_SIZE];
144 pathOffset = 0;
148 * Create a new iterator with no parent and a prefix.
149 * <p>
150 * The prefix path supplied is inserted in front of all paths generated by
151 * this iterator. It is intended to be used when an iterator is being
152 * created for a subsection of an overall repository and needs to be
153 * combined with other iterators that are created to run over the entire
154 * repository namespace.
156 * @param prefix
157 * position of this iterator in the repository tree. The value
158 * may be null or the empty string to indicate the prefix is the
159 * root of the repository. A trailing slash ('/') is
160 * automatically appended if the prefix does not end in '/'.
162 protected AbstractTreeIterator(final String prefix) {
163 parent = null;
165 if (prefix != null && prefix.length() > 0) {
166 final ByteBuffer b;
168 b = Constants.CHARSET.encode(CharBuffer.wrap(prefix));
169 pathLen = b.limit();
170 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
171 b.get(path, 0, pathLen);
172 if (path[pathLen - 1] != '/')
173 path[pathLen++] = '/';
174 pathOffset = pathLen;
175 } else {
176 path = new byte[DEFAULT_PATH_SIZE];
177 pathOffset = 0;
182 * Create a new iterator with no parent and a prefix.
183 * <p>
184 * The prefix path supplied is inserted in front of all paths generated by
185 * this iterator. It is intended to be used when an iterator is being
186 * created for a subsection of an overall repository and needs to be
187 * combined with other iterators that are created to run over the entire
188 * repository namespace.
190 * @param prefix
191 * position of this iterator in the repository tree. The value
192 * may be null or the empty array to indicate the prefix is the
193 * root of the repository. A trailing slash ('/') is
194 * automatically appended if the prefix does not end in '/'.
196 protected AbstractTreeIterator(final byte[] prefix) {
197 parent = null;
199 if (prefix != null && prefix.length > 0) {
200 pathLen = prefix.length;
201 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
202 System.arraycopy(prefix, 0, path, 0, pathLen);
203 if (path[pathLen - 1] != '/')
204 path[pathLen++] = '/';
205 pathOffset = pathLen;
206 } else {
207 path = new byte[DEFAULT_PATH_SIZE];
208 pathOffset = 0;
213 * Create an iterator for a subtree of an existing iterator.
215 * @param p
216 * parent tree iterator.
218 protected AbstractTreeIterator(final AbstractTreeIterator p) {
219 parent = p;
220 path = p.path;
221 pathOffset = p.pathLen + 1;
223 try {
224 path[pathOffset - 1] = '/';
225 } catch (ArrayIndexOutOfBoundsException e) {
226 growPath(p.pathLen);
227 path[pathOffset - 1] = '/';
232 * Create an iterator for a subtree of an existing iterator.
233 * <p>
234 * The caller is responsible for setting up the path of the child iterator.
236 * @param p
237 * parent tree iterator.
238 * @param childPath
239 * path array to be used by the child iterator. This path must
240 * contain the path from the top of the walk to the first child
241 * and must end with a '/'.
242 * @param childPathOffset
243 * position within <code>childPath</code> where the child can
244 * insert its data. The value at
245 * <code>childPath[childPathOffset-1]</code> must be '/'.
247 protected AbstractTreeIterator(final AbstractTreeIterator p,
248 final byte[] childPath, final int childPathOffset) {
249 parent = p;
250 path = childPath;
251 pathOffset = childPathOffset;
255 * Grow the path buffer larger.
257 * @param len
258 * number of live bytes in the path buffer. This many bytes will
259 * be moved into the larger buffer.
261 protected void growPath(final int len) {
262 setPathCapacity(path.length << 1, len);
266 * Ensure that path is capable to hold at least {@code capacity} bytes
268 * @param capacity
269 * the amount of bytes to hold
270 * @param len
271 * the amount of live bytes in path buffer
273 protected void ensurePathCapacity(final int capacity, final int len) {
274 if (path.length >= capacity)
275 return;
276 final byte[] o = path;
277 int current = o.length;
278 int newCapacity = current;
279 while (newCapacity < capacity && newCapacity > 0)
280 newCapacity <<= 1;
281 setPathCapacity(newCapacity, len);
285 * Set path buffer capacity to the specified size
287 * @param capacity
288 * the new size
289 * @param len
290 * the amount of bytes to copy
292 private void setPathCapacity(int capacity, int len) {
293 final byte[] o = path;
294 final byte[] n = new byte[capacity];
295 System.arraycopy(o, 0, n, 0, len);
296 for (AbstractTreeIterator p = this; p != null && p.path == o; p = p.parent)
297 p.path = n;
301 * Compare the path of this current entry to another iterator's entry.
303 * @param p
304 * the other iterator to compare the path against.
305 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
306 * p's entry sorts first.
308 public int pathCompare(final AbstractTreeIterator p) {
309 return pathCompare(p, p.mode);
312 int pathCompare(final AbstractTreeIterator p, final int pMode) {
313 final byte[] a = path;
314 final byte[] b = p.path;
315 final int aLen = pathLen;
316 final int bLen = p.pathLen;
317 int cPos;
319 // Its common when we are a subtree for both parents to match;
320 // when this happens everything in path[0..cPos] is known to
321 // be equal and does not require evaluation again.
323 cPos = alreadyMatch(this, p);
325 for (; cPos < aLen && cPos < bLen; cPos++) {
326 final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
327 if (cmp != 0)
328 return cmp;
331 if (cPos < aLen)
332 return (a[cPos] & 0xff) - lastPathChar(pMode);
333 if (cPos < bLen)
334 return lastPathChar(mode) - (b[cPos] & 0xff);
335 return lastPathChar(mode) - lastPathChar(pMode);
338 private static int alreadyMatch(AbstractTreeIterator a,
339 AbstractTreeIterator b) {
340 for (;;) {
341 final AbstractTreeIterator ap = a.parent;
342 final AbstractTreeIterator bp = b.parent;
343 if (ap == null || bp == null)
344 return 0;
345 if (ap.matches == bp.matches)
346 return a.pathOffset;
347 a = ap;
348 b = bp;
352 private static int lastPathChar(final int mode) {
353 return FileMode.TREE.equals(mode) ? '/' : '\0';
357 * Check if the current entry of both iterators has the same id.
358 * <p>
359 * This method is faster than {@link #getEntryObjectId()} as it does not
360 * require copying the bytes out of the buffers. A direct {@link #idBuffer}
361 * compare operation is performed.
363 * @param otherIterator
364 * the other iterator to test against.
365 * @return true if both iterators have the same object id; false otherwise.
367 public boolean idEqual(final AbstractTreeIterator otherIterator) {
368 return ObjectId.equals(idBuffer(), idOffset(),
369 otherIterator.idBuffer(), otherIterator.idOffset());
373 * Get the object id of the current entry.
375 * @return an object id for the current entry.
377 public ObjectId getEntryObjectId() {
378 return ObjectId.fromRaw(idBuffer(), idOffset());
382 * Obtain the ObjectId for the current entry.
384 * @param out
385 * buffer to copy the object id into.
387 public void getEntryObjectId(final MutableObjectId out) {
388 out.fromRaw(idBuffer(), idOffset());
391 /** @return the file mode of the current entry. */
392 public FileMode getEntryFileMode() {
393 return FileMode.fromBits(mode);
396 /** @return the file mode of the current entry as bits */
397 public int getEntryRawMode() {
398 return mode;
401 /** @return path of the current entry, as a string. */
402 public String getEntryPathString() {
403 return TreeWalk.pathOf(this);
407 * Get the current entry's path hash code.
408 * <p>
409 * This method computes a hash code on the fly for this path, the hash is
410 * suitable to cluster objects that may have similar paths together.
412 * @return path hash code; any integer may be returned.
414 public int getEntryPathHashCode() {
415 int hash = 0;
416 for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) {
417 byte c = path[i];
418 if (c != ' ')
419 hash = (hash >>> 2) + (c << 24);
421 return hash;
425 * Get the byte array buffer object IDs must be copied out of.
426 * <p>
427 * The id buffer contains the bytes necessary to construct an ObjectId for
428 * the current entry of this iterator. The buffer can be the same buffer for
429 * all entries, or it can be a unique buffer per-entry. Implementations are
430 * encouraged to expose their private buffer whenever possible to reduce
431 * garbage generation and copying costs.
433 * @return byte array the implementation stores object IDs within.
434 * @see #getEntryObjectId()
436 public abstract byte[] idBuffer();
439 * Get the position within {@link #idBuffer()} of this entry's ObjectId.
441 * @return offset into the array returned by {@link #idBuffer()} where the
442 * ObjectId must be copied out of.
444 public abstract int idOffset();
447 * Create a new iterator for the current entry's subtree.
448 * <p>
449 * The parent reference of the iterator must be <code>this</code>,
450 * otherwise the caller would not be able to exit out of the subtree
451 * iterator correctly and return to continue walking <code>this</code>.
453 * @param reader
454 * reader to load the tree data from.
455 * @return a new parser that walks over the current subtree.
456 * @throws IncorrectObjectTypeException
457 * the current entry is not actually a tree and cannot be parsed
458 * as though it were a tree.
459 * @throws IOException
460 * a loose object or pack file could not be read.
462 public abstract AbstractTreeIterator createSubtreeIterator(
463 ObjectReader reader) throws IncorrectObjectTypeException,
464 IOException;
467 * Create a new iterator as though the current entry were a subtree.
469 * @return a new empty tree iterator.
471 public EmptyTreeIterator createEmptyTreeIterator() {
472 return new EmptyTreeIterator(this);
476 * Create a new iterator for the current entry's subtree.
477 * <p>
478 * The parent reference of the iterator must be <code>this</code>, otherwise
479 * the caller would not be able to exit out of the subtree iterator
480 * correctly and return to continue walking <code>this</code>.
482 * @param reader
483 * reader to load the tree data from.
484 * @param idBuffer
485 * temporary ObjectId buffer for use by this method.
486 * @return a new parser that walks over the current subtree.
487 * @throws IncorrectObjectTypeException
488 * the current entry is not actually a tree and cannot be parsed
489 * as though it were a tree.
490 * @throws IOException
491 * a loose object or pack file could not be read.
493 public AbstractTreeIterator createSubtreeIterator(
494 final ObjectReader reader, final MutableObjectId idBuffer)
495 throws IncorrectObjectTypeException, IOException {
496 return createSubtreeIterator(reader);
500 * Is this tree iterator positioned on its first entry?
501 * <p>
502 * An iterator is positioned on the first entry if <code>back(1)</code>
503 * would be an invalid request as there is no entry before the current one.
504 * <p>
505 * An empty iterator (one with no entries) will be
506 * <code>first() &amp;&amp; eof()</code>.
508 * @return true if the iterator is positioned on the first entry.
510 public abstract boolean first();
513 * Is this tree iterator at its EOF point (no more entries)?
514 * <p>
515 * An iterator is at EOF if there is no current entry.
517 * @return true if we have walked all entries and have none left.
519 public abstract boolean eof();
522 * Move to next entry, populating this iterator with the entry data.
523 * <p>
524 * The delta indicates how many moves forward should occur. The most common
525 * delta is 1 to move to the next entry.
526 * <p>
527 * Implementations must populate the following members:
528 * <ul>
529 * <li>{@link #mode}</li>
530 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
531 * <li>{@link #pathLen}</li>
532 * </ul>
533 * as well as any implementation dependent information necessary to
534 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
535 * when demanded.
537 * @param delta
538 * number of entries to move the iterator by. Must be a positive,
539 * non-zero integer.
540 * @throws CorruptObjectException
541 * the tree is invalid.
543 public abstract void next(int delta) throws CorruptObjectException;
546 * Move to prior entry, populating this iterator with the entry data.
547 * <p>
548 * The delta indicates how many moves backward should occur.The most common
549 * delta is 1 to move to the prior entry.
550 * <p>
551 * Implementations must populate the following members:
552 * <ul>
553 * <li>{@link #mode}</li>
554 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
555 * <li>{@link #pathLen}</li>
556 * </ul>
557 * as well as any implementation dependent information necessary to
558 * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
559 * when demanded.
561 * @param delta
562 * number of entries to move the iterator by. Must be a positive,
563 * non-zero integer.
564 * @throws CorruptObjectException
565 * the tree is invalid.
567 public abstract void back(int delta) throws CorruptObjectException;
570 * Advance to the next tree entry, populating this iterator with its data.
571 * <p>
572 * This method behaves like <code>seek(1)</code> but is called by
573 * {@link TreeWalk} only if a {@link TreeFilter} was used and ruled out the
574 * current entry from the results. In such cases this tree iterator may
575 * perform special behavior.
577 * @throws CorruptObjectException
578 * the tree is invalid.
580 public void skip() throws CorruptObjectException {
581 next(1);
585 * Indicates to the iterator that no more entries will be read.
586 * <p>
587 * This is only invoked by TreeWalk when the iteration is aborted early due
588 * to a {@link org.eclipse.jgit.errors.StopWalkException} being thrown from
589 * within a TreeFilter.
591 public void stopWalk() {
592 // Do nothing by default. Most iterators do not care.
596 * @return the length of the name component of the path for the current entry
598 public int getNameLength() {
599 return pathLen - pathOffset;
603 * Get the name component of the current entry path into the provided buffer.
605 * @param buffer the buffer to get the name into, it is assumed that buffer can hold the name
606 * @param offset the offset of the name in the buffer
607 * @see #getNameLength()
609 public void getName(byte[] buffer, int offset) {
610 System.arraycopy(path, pathOffset, buffer, offset, pathLen - pathOffset);