3 * THE REGENTS OF THE UNIVERSITY OF MICHIGAN
6 * Permission is granted to use, copy, create derivative works
7 * and redistribute this software and such derivative works
8 * for any purpose, so long as the name of The University of
9 * Michigan is not used in any advertising or publicity
10 * pertaining to the use of distribution of this software
11 * without specific, written prior authorization. If the
12 * above copyright notice or any other identification of the
13 * University of Michigan is included in any copy of any
14 * portion of this software, then the disclaimer below must
17 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
18 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
19 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O
20 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
21 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
23 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
24 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
25 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
26 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
27 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
32 * Portions Copyright (c) 2008
33 * The Linux Box Corporation
36 * Permission is granted to use, copy, create derivative works
37 * and redistribute this software and such derivative works
38 * for any purpose, so long as the name of the Linux Box
39 * Corporation is not used in any advertising or publicity
40 * pertaining to the use or distribution of this software
41 * without specific, written prior authorization. If the
42 * above copyright notice or any other identification of the
43 * Linux Box Corporation is included in any copy of any
44 * portion of this software, then the disclaimer below must
47 * This software is provided as is, without representation
48 * from the Linux Box Corporation as to its fitness for any
49 * purpose, and without warranty by the Linux Box Corporation
50 * of any kind, either express or implied, including
51 * without limitation the implied warranties of
52 * merchantability and fitness for a particular purpose. The
53 * Linux Box Corporation shall not be liable for any damages,
54 * including special, indirect, incidental, or consequential
55 * damages, with respect to any claim arising out of or in
56 * connection with the use of the software, even if it has been
57 * or is hereafter advised of the possibility of such damages.
61 #include <afsconfig.h>
62 #include "afs/param.h"
63 #if defined(AFS_CACHE_BYPASS) || defined(UKERNEL)
64 #include "afs/afs_bypasscache.h"
70 #include "afs/sysincludes.h" /* Standard vendor system headers */
71 #include "afs/afsincludes.h" /* Afs-based standard headers */
72 #include "afs/afs_stats.h" /* statistics */
73 #include "afs/nfsclient.h"
74 #include "rx/rx_globals.h"
77 #define afs_min(A,B) ((A)<(B)) ? (A) : (B)
80 /* conditional GLOCK macros */
81 #define COND_GLOCK(var) \
83 var = ISAFS_GLOCK(); \
88 #define COND_RE_GUNLOCK(var) \
95 /* conditional GUNLOCK macros */
97 #define COND_GUNLOCK(var) \
99 var = ISAFS_GLOCK(); \
104 #define COND_RE_GLOCK(var) \
111 int cache_bypass_strategy
= NEVER_BYPASS_CACHE
;
112 afs_size_t cache_bypass_threshold
= AFS_CACHE_BYPASS_DISABLED
; /* file size > threshold triggers bypass */
113 int cache_bypass_prefetch
= 1; /* Should we do prefetching ? */
115 extern afs_rwlock_t afs_xcbhash
;
118 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
119 * but that routine is static. We are about to change a file from
120 * normal caching to bypass it's caching. Therefore, we want to
121 * free up any cache space in use by the file, and throw out any
122 * existing VM pages for the file. We keep track of the number of
123 * times we go back and forth from caching to bypass.
126 afs_TransitionToBypass(struct vcache
*avc
,
127 afs_ucred_t
*acred
, int aflags
)
137 if (aflags
& TRANSChangeDesiredBit
)
139 if (aflags
& TRANSSetManualBit
)
144 ObtainWriteLock(&avc
->lock
, 925);
146 * Someone may have beat us to doing the transition - we had no lock
147 * when we checked the flag earlier. No cause to panic, just return.
149 if (avc
->cachingStates
& FCSBypass
)
152 /* If we never cached this, just change state */
153 if (setDesire
&& (!(avc
->cachingStates
& FCSBypass
))) {
154 avc
->cachingStates
|= FCSBypass
;
158 /* cg2v, try to store any chunks not written 20071204 */
159 if (avc
->execsOrWriters
> 0) {
160 struct vrequest
*treq
= NULL
;
162 code
= afs_CreateReq(&treq
, acred
);
164 code
= afs_StoreAllSegments(avc
, treq
, AFS_SYNC
| AFS_LASTSTORE
);
165 afs_DestroyReq(treq
);
169 /* also cg2v, don't dequeue the callback */
170 /* next reference will re-stat */
171 afs_StaleVCacheFlags(avc
, AFS_STALEVC_NOCB
, CDirty
);
172 /* now find the disk cache entries */
173 afs_TryToSmush(avc
, acred
, 1);
174 if (avc
->linkData
&& !(avc
->f
.states
& CCore
)) {
175 afs_osi_Free(avc
->linkData
, strlen(avc
->linkData
) + 1);
176 avc
->linkData
= NULL
;
179 avc
->cachingStates
|= FCSBypass
; /* Set the bypass flag */
181 avc
->cachingStates
|= FCSDesireBypass
;
183 avc
->cachingStates
|= FCSManuallySet
;
184 avc
->cachingTransitions
++;
187 ReleaseWriteLock(&avc
->lock
);
192 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
193 * but that routine is static. We are about to change a file from
194 * bypassing caching to normal caching. Therefore, we want to
195 * throw out any existing VM pages for the file. We keep track of
196 * the number of times we go back and forth from caching to bypass.
199 afs_TransitionToCaching(struct vcache
*avc
,
209 if (aflags
& TRANSChangeDesiredBit
)
211 if (aflags
& TRANSSetManualBit
)
215 ObtainWriteLock(&avc
->lock
, 926);
217 * Someone may have beat us to doing the transition - we had no lock
218 * when we checked the flag earlier. No cause to panic, just return.
220 if (!(avc
->cachingStates
& FCSBypass
))
223 /* Ok, we actually do need to flush */
224 /* next reference will re-stat cache entry */
225 afs_StaleVCacheFlags(avc
, 0, CDirty
);
227 /* now find the disk cache entries */
228 afs_TryToSmush(avc
, acred
, 1);
229 if (avc
->linkData
&& !(avc
->f
.states
& CCore
)) {
230 afs_osi_Free(avc
->linkData
, strlen(avc
->linkData
) + 1);
231 avc
->linkData
= NULL
;
234 avc
->cachingStates
&= ~(FCSBypass
); /* Reset the bypass flag */
236 avc
->cachingStates
&= ~(FCSDesireBypass
);
238 avc
->cachingStates
|= FCSManuallySet
;
239 avc
->cachingTransitions
++;
242 ReleaseWriteLock(&avc
->lock
);
246 /* In the case where there's an error in afs_NoCacheFetchProc or
247 * afs_PrefetchNoCache, all of the pages they've been passed need
251 typedef void * bypass_page_t
;
253 #define unlock_and_release_pages(auio)
254 #define release_full_page(pp, pageoff)
257 typedef struct page
* bypass_page_t
;
259 #define unlock_and_release_pages(auio) \
261 struct iovec *ciov; \
264 afs_int32 iovno = 0; \
265 ciov = auio->uio_iov; \
266 iovmax = auio->uio_iovcnt - 1; \
267 pp = (bypass_page_t) ciov->iov_base; \
270 if (PageLocked(pp)) \
272 put_page(pp); /* decrement refcount */ \
277 ciov = (auio->uio_iov + iovno); \
278 pp = (bypass_page_t) ciov->iov_base; \
282 #define release_full_page(pp, pageoff) \
284 /* this is appropriate when no caller intends to unlock \
285 * and release the page */ \
286 SetPageUptodate(pp); \
290 afs_warn("afs_NoCacheFetchProc: page not locked!\n"); \
291 put_page(pp); /* decrement refcount */ \
296 afs_bypass_copy_page(bypass_page_t pp
, int pageoff
, struct iovec
*rxiov
,
297 int iovno
, int iovoff
, struct uio
*auio
, int curiov
, int partial
)
303 dolen
= auio
->uio_iov
[curiov
].iov_len
- pageoff
;
305 dolen
= rxiov
[iovno
].iov_len
- iovoff
;
307 #if !defined(UKERNEL)
308 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
309 address
= kmap_atomic(pp
);
311 address
= kmap_atomic(pp
, KM_USER0
);
316 memcpy(address
+ pageoff
, (char *)(rxiov
[iovno
].iov_base
) + iovoff
, dolen
);
317 #if !defined(UKERNEL)
318 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
319 kunmap_atomic(address
);
321 kunmap_atomic(address
, KM_USER0
);
326 /* no-cache prefetch routine */
328 afs_NoCacheFetchProc(struct rx_call
*acall
,
331 afs_int32 release_pages
,
336 int moredata
, iovno
, iovoff
, iovmax
, result
, locked
;
345 rxiov
= osi_AllocSmallSpace(sizeof(struct iovec
) * RX_MAXIOVECS
);
346 ciov
= auio
->uio_iov
;
347 pp
= (bypass_page_t
) ciov
->iov_base
;
348 iovmax
= auio
->uio_iovcnt
- 1;
349 iovno
= iovoff
= result
= 0;
352 COND_GUNLOCK(locked
);
353 code
= rx_Read(acall
, (char *)&length
, sizeof(afs_int32
));
354 COND_RE_GLOCK(locked
);
355 if (code
!= sizeof(afs_int32
)) {
357 afs_warn("Preread error. code: %d instead of %d\n",
358 code
, (int)sizeof(afs_int32
));
359 unlock_and_release_pages(auio
);
362 length
= ntohl(length
);
366 afs_warn("Preread error. Got length %d, which is greater than size %d\n",
368 unlock_and_release_pages(auio
);
372 /* If we get a 0 length reply, time to cleanup and return */
374 unlock_and_release_pages(auio
);
380 * The fetch protocol is extended for the AFS/DFS translator
381 * to allow multiple blocks of data, each with its own length,
382 * to be returned. As long as the top bit is set, there are more
385 * We do not do this for AFS file servers because they sometimes
386 * return large negative numbers as the transfer size.
388 if (avc
->f
.states
& CForeign
) {
389 moredata
= length
& 0x80000000;
390 length
&= ~0x80000000;
395 for (curpage
= 0; curpage
<= iovmax
; curpage
++) {
397 /* properly, this should track uio_resid, not a fixed page size! */
398 while (pageoff
< auio
->uio_iov
[curpage
].iov_len
) {
399 /* If no more iovs, issue new read. */
401 COND_GUNLOCK(locked
);
402 bytes
= rx_Readv(acall
, rxiov
, &nio
, RX_MAXIOVECS
, length
);
403 COND_RE_GLOCK(locked
);
405 afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", bytes
);
407 unlock_and_release_pages(auio
);
409 } else if (bytes
== 0) {
410 /* we failed to read the full length */
412 afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n");
413 unlock_and_release_pages(auio
);
417 auio
->uio_resid
-= bytes
;
420 pp
= (bypass_page_t
)auio
->uio_iov
[curpage
].iov_base
;
421 if (pageoff
+ (rxiov
[iovno
].iov_len
- iovoff
) <= auio
->uio_iov
[curpage
].iov_len
) {
422 /* Copy entire (or rest of) current iovec into current page */
424 afs_bypass_copy_page(pp
, pageoff
, rxiov
, iovno
, iovoff
, auio
, curpage
, 0);
425 length
-= (rxiov
[iovno
].iov_len
- iovoff
);
426 pageoff
+= rxiov
[iovno
].iov_len
- iovoff
;
430 /* Copy only what's needed to fill current page */
432 afs_bypass_copy_page(pp
, pageoff
, rxiov
, iovno
, iovoff
, auio
, curpage
, 1);
433 length
-= (auio
->uio_iov
[curpage
].iov_len
- pageoff
);
434 iovoff
+= auio
->uio_iov
[curpage
].iov_len
- pageoff
;
435 pageoff
= auio
->uio_iov
[curpage
].iov_len
;
438 /* we filled a page, or this is the last page. conditionally release it */
439 if (pp
&& ((pageoff
== auio
->uio_iov
[curpage
].iov_len
&&
440 release_pages
) || (length
== 0 && iovno
>= nio
)))
441 release_full_page(pp
, pageoff
);
443 if (length
== 0 && iovno
>= nio
)
450 osi_FreeSmallSpace(rxiov
);
455 /* dispatch a no-cache read request */
457 afs_ReadNoCache(struct vcache
*avc
,
458 struct nocache_read_request
*bparms
,
463 struct brequest
*breq
;
464 struct vrequest
*areq
= NULL
;
468 afs_warn("afs_ReadNoCache VCache Error!\n");
473 /* the receiver will free areq */
474 code
= afs_CreateReq(&areq
, acred
);
476 afs_warn("afs_ReadNoCache afs_CreateReq error!\n");
478 code
= afs_VerifyVCache(avc
, areq
);
480 afs_warn("afs_ReadNoCache Failed to verify VCache!\n");
486 code
= afs_CheckCode(code
, areq
, 11); /* failed to get it */
492 /* and queue this one */
496 breq
= afs_BQueue(BOP_FETCH_NOCACHE
, avc
, B_DONTWAIT
, 0, acred
, 1, 1,
497 bparms
, (void *)0, (void *)0);
502 afs_osi_Wait(10 * bcnt
, 0, 0);
514 /* If there's a problem before we queue the request, we need to
515 * do everything that would normally happen when the request was
516 * processed, like unlocking the pages and freeing memory.
518 unlock_and_release_pages(bparms
->auio
);
520 afs_DestroyReq(areq
);
522 osi_Free(bparms
->auio
->uio_iov
,
523 bparms
->auio
->uio_iovcnt
* sizeof(struct iovec
));
524 osi_Free(bparms
->auio
, sizeof(struct uio
));
525 osi_Free(bparms
, sizeof(struct nocache_read_request
));
530 /* Cannot have static linkage--called from BPrefetch (afs_daemons) */
532 afs_PrefetchNoCache(struct vcache
*avc
,
534 struct nocache_read_request
*bparms
)
538 struct iovec
*iovecp
;
540 struct vrequest
*areq
;
542 struct rx_connection
*rxconn
;
543 #ifdef AFS_64BIT_CLIENT
544 afs_int32 length_hi
, bytes
, locked
;
548 struct rx_call
*tcall
;
550 struct AFSVolSync tsync
;
551 struct AFSFetchStatus OutStatus
;
552 struct AFSCallBack CallBack
;
554 struct tlocal1
*tcallspec
;
559 iovecp
= auio
->uio_iov
;
562 tcallspec
= osi_Alloc(sizeof(struct tlocal1
));
564 tc
= afs_Conn(&avc
->f
.fid
, areq
, SHARED_LOCK
/* ignored */, &rxconn
);
566 avc
->callback
= tc
->parent
->srvr
->server
;
567 tcall
= rx_NewCall(rxconn
);
568 #ifdef AFS_64BIT_CLIENT
569 if (!afs_serverHasNo64Bit(tc
)) {
570 code
= StartRXAFS_FetchData64(tcall
,
571 (struct AFSFid
*) &avc
->f
.fid
.Fid
,
575 COND_GUNLOCK(locked
);
576 bytes
= rx_Read(tcall
, (char *)&length_hi
,
578 COND_RE_GLOCK(locked
);
580 if (bytes
!= sizeof(afs_int32
)) {
582 COND_GUNLOCK(locked
);
583 code
= rx_EndCall(tcall
, RX_PROTOCOL_ERROR
);
584 COND_RE_GLOCK(locked
);
588 } /* afs_serverHasNo64Bit */
589 if (code
== RXGEN_OPCODE
|| afs_serverHasNo64Bit(tc
)) {
590 if (auio
->uio_offset
> 0x7FFFFFFF) {
594 pos
= auio
->uio_offset
;
595 COND_GUNLOCK(locked
);
597 tcall
= rx_NewCall(rxconn
);
598 code
= StartRXAFS_FetchData(tcall
,
599 (struct AFSFid
*) &avc
->f
.fid
.Fid
,
600 pos
, bparms
->length
);
601 COND_RE_GLOCK(locked
);
603 afs_serverSetNo64Bit(tc
);
606 code
= StartRXAFS_FetchData(tcall
,
607 (struct AFSFid
*) &avc
->f
.fid
.Fid
,
608 auio
->uio_offset
, bparms
->length
);
611 code
= afs_NoCacheFetchProc(tcall
, avc
, auio
,
612 1 /* release_pages */,
615 afs_warn("BYPASS: StartRXAFS_FetchData failed: %d\n", code
);
616 unlock_and_release_pages(auio
);
617 afs_PutConn(tc
, rxconn
, SHARED_LOCK
);
621 code
= EndRXAFS_FetchData(tcall
, &tcallspec
->OutStatus
,
622 &tcallspec
->CallBack
,
625 afs_warn("BYPASS: NoCacheFetchProc failed: %d\n", code
);
627 code
= rx_EndCall(tcall
, code
);
629 afs_warn("BYPASS: No connection.\n");
631 unlock_and_release_pages(auio
);
634 } while (afs_Analyze(tc
, rxconn
, code
, &avc
->f
.fid
, areq
,
635 AFS_STATS_FS_RPCIDX_FETCHDATA
,
639 * Copy appropriate fields into vcache
643 afs_ProcessFS(avc
, &tcallspec
->OutStatus
, areq
);
645 osi_Free(areq
, sizeof(struct vrequest
));
646 osi_Free(tcallspec
, sizeof(struct tlocal1
));
647 osi_Free(bparms
, sizeof(struct nocache_read_request
));
649 /* in UKERNEL, the "pages" are passed in */
650 osi_Free(iovecp
, auio
->uio_iovcnt
* sizeof(struct iovec
));
651 osi_Free(auio
, sizeof(struct uio
));