Squash OffsetCache into WindowCache
[jgit.git] / org.eclipse.jgit / src / org / eclipse / jgit / lib / WindowCache.java
blobb2c79c1089e6baf08fa8a6343e04b31b2a570701
1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
11 * All rights reserved.
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 package org.eclipse.jgit.lib;
47 import java.io.IOException;
48 import java.lang.ref.ReferenceQueue;
49 import java.lang.ref.SoftReference;
50 import java.util.Random;
51 import java.util.concurrent.atomic.AtomicInteger;
52 import java.util.concurrent.atomic.AtomicLong;
53 import java.util.concurrent.atomic.AtomicReferenceArray;
54 import java.util.concurrent.locks.ReentrantLock;
56 /**
57 * Caches slices of a {@link PackFile} in memory for faster read access.
58 * <p>
59 * The WindowCache serves as a Java based "buffer cache", loading segments of a
60 * PackFile into the JVM heap prior to use. As JGit often wants to do reads of
61 * only tiny slices of a file, the WindowCache tries to smooth out these tiny
62 * reads into larger block-sized IO operations.
63 * <p>
64 * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
65 * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
66 * This is ensured by an array of locks, with the tuple hashed to a lock
67 * instance.
68 * <p>
69 * During a miss, older entries are evicted from the cache so long as
70 * {@link #isFull()} returns true.
71 * <p>
72 * Its too expensive during object access to be 100% accurate with a least
73 * recently used (LRU) algorithm. Strictly ordering every read is a lot of
74 * overhead that typically doesn't yield a corresponding benefit to the
75 * application.
76 * <p>
77 * This cache implements a loose LRU policy by randomly picking a window
78 * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
79 * within that window.
80 * <p>
81 * Entities created by the cache are held under SoftReferences, permitting the
82 * Java runtime's garbage collector to evict entries when heap memory gets low.
83 * Most JREs implement a loose least recently used algorithm for this eviction.
84 * <p>
85 * The internal hash table does not expand at runtime, instead it is fixed in
86 * size at cache creation time. The internal lock table used to gate load
87 * invocations is also fixed in size.
88 * <p>
89 * The key tuple is passed through to methods as a pair of parameters rather
90 * than as a single Object, thus reducing the transient memory allocations of
91 * callers. It is more efficient to avoid the allocation, as we can't be 100%
92 * sure that a JIT would be able to stack-allocate a key tuple.
93 * <p>
94 * This cache has an implementation rule such that:
95 * <ul>
96 * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
97 * for a given <code>(PackFile,position)</code> tuple.</li>
98 * <li>For every <code>load()</code> invocation there is exactly one
99 * {@link #createRef(PackFile, long, ByteWindow)} invocation to wrap a
100 * SoftReference around the cached entity.</li>
101 * <li>For every Reference created by <code>createRef()</code> there will be
102 * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
103 * with the (now expired) cached entity.</li>
104 * </ul>
105 * <p>
106 * Therefore, it is safe to perform resource accounting increments during the
107 * {@link #load(PackFile, long)} or
108 * {@link #createRef(PackFile, long, ByteWindow)} methods, and matching
109 * decrements during {@link #clear(Ref)}. Implementors may need to override
110 * {@link #createRef(PackFile, long, ByteWindow)} in order to embed additional
111 * accounting information into an implementation specific {@link Ref} subclass,
112 * as the cached entity may have already been evicted by the JRE's garbage
113 * collector.
114 * <p>
115 * To maintain higher concurrency workloads, during eviction only one thread
116 * performs the eviction work, while other threads can continue to insert new
117 * objects in parallel. This means that the cache can be temporarily over limit,
118 * especially if the nominated eviction thread is being starved relative to the
119 * other threads.
121 public class WindowCache {
122 private static final int bits(int newSize) {
123 if (newSize < 4096)
124 throw new IllegalArgumentException("Invalid window size");
125 if (Integer.bitCount(newSize) != 1)
126 throw new IllegalArgumentException("Window size must be power of 2");
127 return Integer.numberOfTrailingZeros(newSize);
130 private static final Random rng = new Random();
132 private static volatile WindowCache cache;
134 static {
135 reconfigure(new WindowCacheConfig());
139 * Modify the configuration of the window cache.
140 * <p>
141 * The new configuration is applied immediately. If the new limits are
142 * smaller than what what is currently cached, older entries will be purged
143 * as soon as possible to allow the cache to meet the new limit.
145 * @param packedGitLimit
146 * maximum number of bytes to hold within this instance.
147 * @param packedGitWindowSize
148 * number of bytes per window within the cache.
149 * @param packedGitMMAP
150 * true to enable use of mmap when creating windows.
151 * @param deltaBaseCacheLimit
152 * number of bytes to hold in the delta base cache.
153 * @deprecated Use {@link WindowCacheConfig} instead.
155 public static void reconfigure(final int packedGitLimit,
156 final int packedGitWindowSize, final boolean packedGitMMAP,
157 final int deltaBaseCacheLimit) {
158 final WindowCacheConfig c = new WindowCacheConfig();
159 c.setPackedGitLimit(packedGitLimit);
160 c.setPackedGitWindowSize(packedGitWindowSize);
161 c.setPackedGitMMAP(packedGitMMAP);
162 c.setDeltaBaseCacheLimit(deltaBaseCacheLimit);
163 reconfigure(c);
167 * Modify the configuration of the window cache.
168 * <p>
169 * The new configuration is applied immediately. If the new limits are
170 * smaller than what what is currently cached, older entries will be purged
171 * as soon as possible to allow the cache to meet the new limit.
173 * @param cfg
174 * the new window cache configuration.
175 * @throws IllegalArgumentException
176 * the cache configuration contains one or more invalid
177 * settings, usually too low of a limit.
179 public static void reconfigure(final WindowCacheConfig cfg) {
180 final WindowCache nc = new WindowCache(cfg);
181 final WindowCache oc = cache;
182 if (oc != null)
183 oc.removeAll();
184 cache = nc;
185 UnpackedObjectCache.reconfigure(cfg);
188 static WindowCache getInstance() {
189 return cache;
192 static final ByteWindow get(final PackFile pack, final long offset)
193 throws IOException {
194 final WindowCache c = cache;
195 final ByteWindow r = c.getOrLoad(pack, c.toStart(offset));
196 if (c != cache) {
197 // The cache was reconfigured while we were using the old one
198 // to load this window. The window is still valid, but our
199 // cache may think its still live. Ensure the window is removed
200 // from the old cache so resources can be released.
202 c.removeAll();
204 return r;
207 static final void purge(final PackFile pack) {
208 cache.removeAll(pack);
211 /** ReferenceQueue to cleanup released and garbage collected windows. */
212 private final ReferenceQueue<ByteWindow> queue;
214 /** Number of entries in {@link #table}. */
215 private final int tableSize;
217 /** Access clock for loose LRU. */
218 private final AtomicLong clock;
220 /** Hash bucket directory; entries are chained below. */
221 private final AtomicReferenceArray<Entry> table;
223 /** Locks to prevent concurrent loads for same (PackFile,position). */
224 private final Lock[] locks;
226 /** Lock to elect the eviction thread after a load occurs. */
227 private final ReentrantLock evictLock;
229 /** Number of {@link #table} buckets to scan for an eviction window. */
230 private final int evictBatch;
232 private final int maxFiles;
234 private final long maxBytes;
236 private final boolean mmap;
238 private final int windowSizeShift;
240 private final int windowSize;
242 private final AtomicInteger openFiles;
244 private final AtomicLong openBytes;
246 private WindowCache(final WindowCacheConfig cfg) {
247 tableSize = tableSize(cfg);
248 final int lockCount = lockCount(cfg);
249 if (tableSize < 1)
250 throw new IllegalArgumentException("tSize must be >= 1");
251 if (lockCount < 1)
252 throw new IllegalArgumentException("lockCount must be >= 1");
254 queue = new ReferenceQueue<ByteWindow>();
255 clock = new AtomicLong(1);
256 table = new AtomicReferenceArray<Entry>(tableSize);
257 locks = new Lock[lockCount];
258 for (int i = 0; i < locks.length; i++)
259 locks[i] = new Lock();
260 evictLock = new ReentrantLock();
262 int eb = (int) (tableSize * .1);
263 if (64 < eb)
264 eb = 64;
265 else if (eb < 4)
266 eb = 4;
267 if (tableSize < eb)
268 eb = tableSize;
269 evictBatch = eb;
271 maxFiles = cfg.getPackedGitOpenFiles();
272 maxBytes = cfg.getPackedGitLimit();
273 mmap = cfg.isPackedGitMMAP();
274 windowSizeShift = bits(cfg.getPackedGitWindowSize());
275 windowSize = 1 << windowSizeShift;
277 openFiles = new AtomicInteger();
278 openBytes = new AtomicLong();
280 if (maxFiles < 1)
281 throw new IllegalArgumentException("Open files must be >= 1");
282 if (maxBytes < windowSize)
283 throw new IllegalArgumentException("Window size must be < limit");
286 int getOpenFiles() {
287 return openFiles.get();
290 long getOpenBytes() {
291 return openBytes.get();
294 private int hash(final int packHash, final long off) {
295 return packHash + (int) (off >>> windowSizeShift);
298 private ByteWindow load(final PackFile pack, final long offset)
299 throws IOException {
300 if (pack.beginWindowCache())
301 openFiles.incrementAndGet();
302 try {
303 if (mmap)
304 return pack.mmap(offset, windowSize);
305 return pack.read(offset, windowSize);
306 } catch (IOException e) {
307 close(pack);
308 throw e;
309 } catch (RuntimeException e) {
310 close(pack);
311 throw e;
312 } catch (Error e) {
313 close(pack);
314 throw e;
318 private Ref createRef(final PackFile p, final long o, final ByteWindow v) {
319 final Ref ref = new Ref(p, o, v, queue);
320 openBytes.addAndGet(ref.size);
321 return ref;
324 private void clear(final Ref ref) {
325 openBytes.addAndGet(-ref.size);
326 close(ref.pack);
329 private void close(final PackFile pack) {
330 if (pack.endWindowCache())
331 openFiles.decrementAndGet();
334 private boolean isFull() {
335 return maxFiles < openFiles.get() || maxBytes < openBytes.get();
338 private long toStart(final long offset) {
339 return (offset >>> windowSizeShift) << windowSizeShift;
342 private static int tableSize(final WindowCacheConfig cfg) {
343 final int wsz = cfg.getPackedGitWindowSize();
344 final long limit = cfg.getPackedGitLimit();
345 if (wsz <= 0)
346 throw new IllegalArgumentException("Invalid window size");
347 if (limit < wsz)
348 throw new IllegalArgumentException("Window size must be < limit");
349 return (int) Math.min(5 * (limit / wsz) / 2, 2000000000);
352 private static int lockCount(final WindowCacheConfig cfg) {
353 return Math.max(cfg.getPackedGitOpenFiles(), 32);
357 * Lookup a cached object, creating and loading it if it doesn't exist.
359 * @param pack
360 * the pack that "contains" the cached object.
361 * @param position
362 * offset within <code>pack</code> of the object.
363 * @return the object reference.
364 * @throws IOException
365 * the object reference was not in the cache and could not be
366 * obtained by {@link #load(PackFile, long)}.
368 private ByteWindow getOrLoad(final PackFile pack, final long position)
369 throws IOException {
370 final int slot = slot(pack, position);
371 final Entry e1 = table.get(slot);
372 ByteWindow v = scan(e1, pack, position);
373 if (v != null)
374 return v;
376 synchronized (lock(pack, position)) {
377 Entry e2 = table.get(slot);
378 if (e2 != e1) {
379 v = scan(e2, pack, position);
380 if (v != null)
381 return v;
384 v = load(pack, position);
385 final Ref ref = createRef(pack, position, v);
386 hit(ref);
387 for (;;) {
388 final Entry n = new Entry(clean(e2), ref);
389 if (table.compareAndSet(slot, e2, n))
390 break;
391 e2 = table.get(slot);
395 if (evictLock.tryLock()) {
396 try {
397 gc();
398 evict();
399 } finally {
400 evictLock.unlock();
404 return v;
407 private ByteWindow scan(Entry n, final PackFile pack, final long position) {
408 for (; n != null; n = n.next) {
409 final Ref r = n.ref;
410 if (r.pack == pack && r.position == position) {
411 final ByteWindow v = r.get();
412 if (v != null) {
413 hit(r);
414 return v;
416 n.kill();
417 break;
420 return null;
423 private void hit(final Ref r) {
424 // We don't need to be 100% accurate here. Its sufficient that at least
425 // one thread performs the increment. Any other concurrent access at
426 // exactly the same time can simply use the same clock value.
428 // Consequently we attempt the set, but we don't try to recover should
429 // it fail. This is why we don't use getAndIncrement() here.
431 final long c = clock.get();
432 clock.compareAndSet(c, c + 1);
433 r.lastAccess = c;
436 private void evict() {
437 while (isFull()) {
438 int ptr = rng.nextInt(tableSize);
439 Entry old = null;
440 int slot = 0;
441 for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
442 if (tableSize <= ptr)
443 ptr = 0;
444 for (Entry e = table.get(ptr); e != null; e = e.next) {
445 if (e.dead)
446 continue;
447 if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
448 old = e;
449 slot = ptr;
453 if (old != null) {
454 old.kill();
455 gc();
456 final Entry e1 = table.get(slot);
457 table.compareAndSet(slot, e1, clean(e1));
463 * Clear every entry from the cache.
464 * <p>
465 * This is a last-ditch effort to clear out the cache, such as before it
466 * gets replaced by another cache that is configured differently. This
467 * method tries to force every cached entry through {@link #clear(Ref)} to
468 * ensure that resources are correctly accounted for and cleaned up by the
469 * subclass. A concurrent reader loading entries while this method is
470 * running may cause resource accounting failures.
472 private void removeAll() {
473 for (int s = 0; s < tableSize; s++) {
474 Entry e1;
475 do {
476 e1 = table.get(s);
477 for (Entry e = e1; e != null; e = e.next)
478 e.kill();
479 } while (!table.compareAndSet(s, e1, null));
481 gc();
485 * Clear all entries related to a single file.
486 * <p>
487 * Typically this method is invoked during {@link PackFile#close()}, when we
488 * know the pack is never going to be useful to us again (for example, it no
489 * longer exists on disk). A concurrent reader loading an entry from this
490 * same pack may cause the pack to become stuck in the cache anyway.
492 * @param pack
493 * the file to purge all entries of.
495 private void removeAll(final PackFile pack) {
496 for (int s = 0; s < tableSize; s++) {
497 final Entry e1 = table.get(s);
498 boolean hasDead = false;
499 for (Entry e = e1; e != null; e = e.next) {
500 if (e.ref.pack == pack) {
501 e.kill();
502 hasDead = true;
503 } else if (e.dead)
504 hasDead = true;
506 if (hasDead)
507 table.compareAndSet(s, e1, clean(e1));
509 gc();
512 @SuppressWarnings("unchecked")
513 private void gc() {
514 Ref r;
515 while ((r = (Ref) queue.poll()) != null) {
516 // Sun's Java 5 and 6 implementation have a bug where a Reference
517 // can be enqueued and dequeued twice on the same reference queue
518 // due to a race condition within ReferenceQueue.enqueue(Reference).
520 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6837858
522 // We CANNOT permit a Reference to come through us twice, as it will
523 // skew the resource counters we maintain. Our canClear() check here
524 // provides a way to skip the redundant dequeues, if any.
526 if (r.canClear()) {
527 clear(r);
529 boolean found = false;
530 final int s = slot(r.pack, r.position);
531 final Entry e1 = table.get(s);
532 for (Entry n = e1; n != null; n = n.next) {
533 if (n.ref == r) {
534 n.dead = true;
535 found = true;
536 break;
539 if (found)
540 table.compareAndSet(s, e1, clean(e1));
545 private int slot(final PackFile pack, final long position) {
546 return (hash(pack.hash, position) >>> 1) % tableSize;
549 private Lock lock(final PackFile pack, final long position) {
550 return locks[(hash(pack.hash, position) >>> 1) % locks.length];
553 private static Entry clean(Entry top) {
554 while (top != null && top.dead) {
555 top.ref.enqueue();
556 top = top.next;
558 if (top == null)
559 return null;
560 final Entry n = clean(top.next);
561 return n == top.next ? top : new Entry(n, top.ref);
564 private static class Entry {
565 /** Next entry in the hash table's chain list. */
566 final Entry next;
568 /** The referenced object. */
569 final Ref ref;
572 * Marked true when ref.get() returns null and the ref is dead.
573 * <p>
574 * A true here indicates that the ref is no longer accessible, and that
575 * we therefore need to eventually purge this Entry object out of the
576 * bucket's chain.
578 volatile boolean dead;
580 Entry(final Entry n, final Ref r) {
581 next = n;
582 ref = r;
585 final void kill() {
586 dead = true;
587 ref.enqueue();
591 /** A soft reference wrapped around a cached object. */
592 private static class Ref extends SoftReference<ByteWindow> {
593 final PackFile pack;
595 final long position;
597 final int size;
599 long lastAccess;
601 private boolean cleared;
603 protected Ref(final PackFile pack, final long position,
604 final ByteWindow v, final ReferenceQueue<ByteWindow> queue) {
605 super(v, queue);
606 this.pack = pack;
607 this.position = position;
608 this.size = v.size();
611 final synchronized boolean canClear() {
612 if (cleared)
613 return false;
614 cleared = true;
615 return true;
619 private static final class Lock {
620 // Used only for its implicit monitor.