fs: use kmem_cache_zalloc instead
[pv_ops_mirror.git] / fs / xfs / xfs_iomap.c
blobbf57b75acb905aa3a5836fa75e65d859a112f453
1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_attr.h"
47 #include "xfs_buf_item.h"
48 #include "xfs_trans_space.h"
49 #include "xfs_utils.h"
50 #include "xfs_iomap.h"
52 #if defined(XFS_RW_TRACE)
53 void
54 xfs_iomap_enter_trace(
55 int tag,
56 xfs_iocore_t *io,
57 xfs_off_t offset,
58 ssize_t count)
60 xfs_inode_t *ip = XFS_IO_INODE(io);
62 if (!ip->i_rwtrace)
63 return;
65 ktrace_enter(ip->i_rwtrace,
66 (void *)((unsigned long)tag),
67 (void *)ip,
68 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
69 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
70 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
71 (void *)((unsigned long)(offset & 0xffffffff)),
72 (void *)((unsigned long)count),
73 (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
74 (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
75 (void *)((unsigned long)current_pid()),
76 (void *)NULL,
77 (void *)NULL,
78 (void *)NULL,
79 (void *)NULL,
80 (void *)NULL,
81 (void *)NULL);
84 void
85 xfs_iomap_map_trace(
86 int tag,
87 xfs_iocore_t *io,
88 xfs_off_t offset,
89 ssize_t count,
90 xfs_iomap_t *iomapp,
91 xfs_bmbt_irec_t *imapp,
92 int flags)
94 xfs_inode_t *ip = XFS_IO_INODE(io);
96 if (!ip->i_rwtrace)
97 return;
99 ktrace_enter(ip->i_rwtrace,
100 (void *)((unsigned long)tag),
101 (void *)ip,
102 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
103 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
104 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
105 (void *)((unsigned long)(offset & 0xffffffff)),
106 (void *)((unsigned long)count),
107 (void *)((unsigned long)flags),
108 (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
109 (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
110 (void *)((unsigned long)(iomapp->iomap_delta)),
111 (void *)((unsigned long)(iomapp->iomap_bsize)),
112 (void *)((unsigned long)(iomapp->iomap_bn)),
113 (void *)(__psint_t)(imapp->br_startoff),
114 (void *)((unsigned long)(imapp->br_blockcount)),
115 (void *)(__psint_t)(imapp->br_startblock));
117 #else
118 #define xfs_iomap_enter_trace(tag, io, offset, count)
119 #define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
120 #endif
122 #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
123 << mp->m_writeio_log)
124 #define XFS_STRAT_WRITE_IMAPS 2
125 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
127 STATIC int
128 xfs_imap_to_bmap(
129 xfs_iocore_t *io,
130 xfs_off_t offset,
131 xfs_bmbt_irec_t *imap,
132 xfs_iomap_t *iomapp,
133 int imaps, /* Number of imap entries */
134 int iomaps, /* Number of iomap entries */
135 int flags)
137 xfs_mount_t *mp;
138 xfs_fsize_t nisize;
139 int pbm;
140 xfs_fsblock_t start_block;
142 mp = io->io_mount;
143 nisize = XFS_SIZE(mp, io);
144 if (io->io_new_size > nisize)
145 nisize = io->io_new_size;
147 for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
148 iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
149 iomapp->iomap_delta = offset - iomapp->iomap_offset;
150 iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
151 iomapp->iomap_flags = flags;
153 if (io->io_flags & XFS_IOCORE_RT) {
154 iomapp->iomap_flags |= IOMAP_REALTIME;
155 iomapp->iomap_target = mp->m_rtdev_targp;
156 } else {
157 iomapp->iomap_target = mp->m_ddev_targp;
159 start_block = imap->br_startblock;
160 if (start_block == HOLESTARTBLOCK) {
161 iomapp->iomap_bn = IOMAP_DADDR_NULL;
162 iomapp->iomap_flags |= IOMAP_HOLE;
163 } else if (start_block == DELAYSTARTBLOCK) {
164 iomapp->iomap_bn = IOMAP_DADDR_NULL;
165 iomapp->iomap_flags |= IOMAP_DELAY;
166 } else {
167 iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
168 if (ISUNWRITTEN(imap))
169 iomapp->iomap_flags |= IOMAP_UNWRITTEN;
172 if ((iomapp->iomap_offset + iomapp->iomap_bsize) >= nisize) {
173 iomapp->iomap_flags |= IOMAP_EOF;
176 offset += iomapp->iomap_bsize - iomapp->iomap_delta;
178 return pbm; /* Return the number filled */
182 xfs_iomap(
183 xfs_iocore_t *io,
184 xfs_off_t offset,
185 ssize_t count,
186 int flags,
187 xfs_iomap_t *iomapp,
188 int *niomaps)
190 xfs_mount_t *mp = io->io_mount;
191 xfs_fileoff_t offset_fsb, end_fsb;
192 int error = 0;
193 int lockmode = 0;
194 xfs_bmbt_irec_t imap;
195 int nimaps = 1;
196 int bmapi_flags = 0;
197 int iomap_flags = 0;
199 if (XFS_FORCED_SHUTDOWN(mp))
200 return XFS_ERROR(EIO);
202 switch (flags &
203 (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
204 BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
205 case BMAPI_READ:
206 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
207 lockmode = XFS_LCK_MAP_SHARED(mp, io);
208 bmapi_flags = XFS_BMAPI_ENTIRE;
209 break;
210 case BMAPI_WRITE:
211 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
212 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
213 if (flags & BMAPI_IGNSTATE)
214 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
215 XFS_ILOCK(mp, io, lockmode);
216 break;
217 case BMAPI_ALLOCATE:
218 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count);
219 lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
220 bmapi_flags = XFS_BMAPI_ENTIRE;
221 /* Attempt non-blocking lock */
222 if (flags & BMAPI_TRYLOCK) {
223 if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
224 return XFS_ERROR(EAGAIN);
225 } else {
226 XFS_ILOCK(mp, io, lockmode);
228 break;
229 case BMAPI_UNWRITTEN:
230 goto phase2;
231 case BMAPI_DEVICE:
232 lockmode = XFS_LCK_MAP_SHARED(mp, io);
233 iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
234 mp->m_rtdev_targp : mp->m_ddev_targp;
235 error = 0;
236 *niomaps = 1;
237 goto out;
238 default:
239 BUG();
242 ASSERT(offset <= mp->m_maxioffset);
243 if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
244 count = mp->m_maxioffset - offset;
245 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
246 offset_fsb = XFS_B_TO_FSBT(mp, offset);
248 error = XFS_BMAPI(mp, NULL, io, offset_fsb,
249 (xfs_filblks_t)(end_fsb - offset_fsb),
250 bmapi_flags, NULL, 0, &imap,
251 &nimaps, NULL, NULL);
253 if (error)
254 goto out;
256 phase2:
257 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
258 case BMAPI_WRITE:
259 /* If we found an extent, return it */
260 if (nimaps &&
261 (imap.br_startblock != HOLESTARTBLOCK) &&
262 (imap.br_startblock != DELAYSTARTBLOCK)) {
263 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
264 offset, count, iomapp, &imap, flags);
265 break;
268 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
269 error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
270 count, flags, &imap, &nimaps, nimaps);
271 } else {
272 error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
273 flags, &imap, &nimaps);
275 if (!error) {
276 xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io,
277 offset, count, iomapp, &imap, flags);
279 iomap_flags = IOMAP_NEW;
280 break;
281 case BMAPI_ALLOCATE:
282 /* If we found an extent, return it */
283 XFS_IUNLOCK(mp, io, lockmode);
284 lockmode = 0;
286 if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) {
287 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
288 offset, count, iomapp, &imap, flags);
289 break;
292 error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count,
293 &imap, &nimaps);
294 break;
295 case BMAPI_UNWRITTEN:
296 lockmode = 0;
297 error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
298 nimaps = 0;
299 break;
302 if (nimaps) {
303 *niomaps = xfs_imap_to_bmap(io, offset, &imap,
304 iomapp, nimaps, *niomaps, iomap_flags);
305 } else if (niomaps) {
306 *niomaps = 0;
309 out:
310 if (lockmode)
311 XFS_IUNLOCK(mp, io, lockmode);
312 return XFS_ERROR(error);
315 STATIC int
316 xfs_iomap_eof_align_last_fsb(
317 xfs_mount_t *mp,
318 xfs_iocore_t *io,
319 xfs_fsize_t isize,
320 xfs_extlen_t extsize,
321 xfs_fileoff_t *last_fsb)
323 xfs_fileoff_t new_last_fsb = 0;
324 xfs_extlen_t align;
325 int eof, error;
327 if (io->io_flags & XFS_IOCORE_RT)
330 * If mounted with the "-o swalloc" option, roundup the allocation
331 * request to a stripe width boundary if the file size is >=
332 * stripe width and we are allocating past the allocation eof.
334 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
335 (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
336 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
338 * Roundup the allocation request to a stripe unit (m_dalign) boundary
339 * if the file size is >= stripe unit size, and we are allocating past
340 * the allocation eof.
342 else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
343 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
346 * Always round up the allocation request to an extent boundary
347 * (when file on a real-time subvolume or has di_extsize hint).
349 if (extsize) {
350 if (new_last_fsb)
351 align = roundup_64(new_last_fsb, extsize);
352 else
353 align = extsize;
354 new_last_fsb = roundup_64(*last_fsb, align);
357 if (new_last_fsb) {
358 error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
359 if (error)
360 return error;
361 if (eof)
362 *last_fsb = new_last_fsb;
364 return 0;
367 STATIC int
368 xfs_flush_space(
369 xfs_inode_t *ip,
370 int *fsynced,
371 int *ioflags)
373 switch (*fsynced) {
374 case 0:
375 if (ip->i_delayed_blks) {
376 xfs_iunlock(ip, XFS_ILOCK_EXCL);
377 xfs_flush_inode(ip);
378 xfs_ilock(ip, XFS_ILOCK_EXCL);
379 *fsynced = 1;
380 } else {
381 *ioflags |= BMAPI_SYNC;
382 *fsynced = 2;
384 return 0;
385 case 1:
386 *fsynced = 2;
387 *ioflags |= BMAPI_SYNC;
388 return 0;
389 case 2:
390 xfs_iunlock(ip, XFS_ILOCK_EXCL);
391 xfs_flush_device(ip);
392 xfs_ilock(ip, XFS_ILOCK_EXCL);
393 *fsynced = 3;
394 return 0;
396 return 1;
399 STATIC int
400 xfs_cmn_err_fsblock_zero(
401 xfs_inode_t *ip,
402 xfs_bmbt_irec_t *imap)
404 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
405 "Access to block zero in inode %llu "
406 "start_block: %llx start_off: %llx "
407 "blkcnt: %llx extent-state: %x\n",
408 (unsigned long long)ip->i_ino,
409 (unsigned long long)imap->br_startblock,
410 (unsigned long long)imap->br_startoff,
411 (unsigned long long)imap->br_blockcount,
412 imap->br_state);
413 return EFSCORRUPTED;
417 xfs_iomap_write_direct(
418 xfs_inode_t *ip,
419 xfs_off_t offset,
420 size_t count,
421 int flags,
422 xfs_bmbt_irec_t *ret_imap,
423 int *nmaps,
424 int found)
426 xfs_mount_t *mp = ip->i_mount;
427 xfs_iocore_t *io = &ip->i_iocore;
428 xfs_fileoff_t offset_fsb;
429 xfs_fileoff_t last_fsb;
430 xfs_filblks_t count_fsb, resaligned;
431 xfs_fsblock_t firstfsb;
432 xfs_extlen_t extsz, temp;
433 xfs_fsize_t isize;
434 int nimaps;
435 int bmapi_flag;
436 int quota_flag;
437 int rt;
438 xfs_trans_t *tp;
439 xfs_bmbt_irec_t imap;
440 xfs_bmap_free_t free_list;
441 uint qblocks, resblks, resrtextents;
442 int committed;
443 int error;
446 * Make sure that the dquots are there. This doesn't hold
447 * the ilock across a disk read.
449 error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
450 if (error)
451 return XFS_ERROR(error);
453 rt = XFS_IS_REALTIME_INODE(ip);
454 extsz = xfs_get_extsz_hint(ip);
456 isize = ip->i_size;
457 if (io->io_new_size > isize)
458 isize = io->io_new_size;
460 offset_fsb = XFS_B_TO_FSBT(mp, offset);
461 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
462 if ((offset + count) > isize) {
463 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
464 &last_fsb);
465 if (error)
466 goto error_out;
467 } else {
468 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
469 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
470 ret_imap->br_blockcount +
471 ret_imap->br_startoff);
473 count_fsb = last_fsb - offset_fsb;
474 ASSERT(count_fsb > 0);
476 resaligned = count_fsb;
477 if (unlikely(extsz)) {
478 if ((temp = do_mod(offset_fsb, extsz)))
479 resaligned += temp;
480 if ((temp = do_mod(resaligned, extsz)))
481 resaligned += extsz - temp;
484 if (unlikely(rt)) {
485 resrtextents = qblocks = resaligned;
486 resrtextents /= mp->m_sb.sb_rextsize;
487 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
488 quota_flag = XFS_QMOPT_RES_RTBLKS;
489 } else {
490 resrtextents = 0;
491 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
492 quota_flag = XFS_QMOPT_RES_REGBLKS;
496 * Allocate and setup the transaction
498 xfs_iunlock(ip, XFS_ILOCK_EXCL);
499 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
500 error = xfs_trans_reserve(tp, resblks,
501 XFS_WRITE_LOG_RES(mp), resrtextents,
502 XFS_TRANS_PERM_LOG_RES,
503 XFS_WRITE_LOG_COUNT);
505 * Check for running out of space, note: need lock to return
507 if (error)
508 xfs_trans_cancel(tp, 0);
509 xfs_ilock(ip, XFS_ILOCK_EXCL);
510 if (error)
511 goto error_out;
513 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
514 qblocks, 0, quota_flag);
515 if (error)
516 goto error1;
518 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
519 xfs_trans_ihold(tp, ip);
521 bmapi_flag = XFS_BMAPI_WRITE;
522 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
523 bmapi_flag |= XFS_BMAPI_PREALLOC;
526 * Issue the xfs_bmapi() call to allocate the blocks
528 XFS_BMAP_INIT(&free_list, &firstfsb);
529 nimaps = 1;
530 error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
531 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
532 if (error)
533 goto error0;
536 * Complete the transaction
538 error = xfs_bmap_finish(&tp, &free_list, &committed);
539 if (error)
540 goto error0;
541 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
542 if (error)
543 goto error_out;
546 * Copy any maps to caller's array and return any error.
548 if (nimaps == 0) {
549 error = ENOSPC;
550 goto error_out;
553 if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
554 error = xfs_cmn_err_fsblock_zero(ip, &imap);
555 goto error_out;
558 *ret_imap = imap;
559 *nmaps = 1;
560 return 0;
562 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
563 xfs_bmap_cancel(&free_list);
564 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
566 error1: /* Just cancel transaction */
567 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
568 *nmaps = 0; /* nothing set-up here */
570 error_out:
571 return XFS_ERROR(error);
575 * If the caller is doing a write at the end of the file,
576 * then extend the allocation out to the file system's write
577 * iosize. We clean up any extra space left over when the
578 * file is closed in xfs_inactive().
580 * For sync writes, we are flushing delayed allocate space to
581 * try to make additional space available for allocation near
582 * the filesystem full boundary - preallocation hurts in that
583 * situation, of course.
585 STATIC int
586 xfs_iomap_eof_want_preallocate(
587 xfs_mount_t *mp,
588 xfs_iocore_t *io,
589 xfs_fsize_t isize,
590 xfs_off_t offset,
591 size_t count,
592 int ioflag,
593 xfs_bmbt_irec_t *imap,
594 int nimaps,
595 int *prealloc)
597 xfs_fileoff_t start_fsb;
598 xfs_filblks_t count_fsb;
599 xfs_fsblock_t firstblock;
600 int n, error, imaps;
602 *prealloc = 0;
603 if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
604 return 0;
607 * If there are any real blocks past eof, then don't
608 * do any speculative allocation.
610 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
611 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
612 while (count_fsb > 0) {
613 imaps = nimaps;
614 firstblock = NULLFSBLOCK;
615 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
616 &firstblock, 0, imap, &imaps, NULL, NULL);
617 if (error)
618 return error;
619 for (n = 0; n < imaps; n++) {
620 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
621 (imap[n].br_startblock != DELAYSTARTBLOCK))
622 return 0;
623 start_fsb += imap[n].br_blockcount;
624 count_fsb -= imap[n].br_blockcount;
627 *prealloc = 1;
628 return 0;
632 xfs_iomap_write_delay(
633 xfs_inode_t *ip,
634 xfs_off_t offset,
635 size_t count,
636 int ioflag,
637 xfs_bmbt_irec_t *ret_imap,
638 int *nmaps)
640 xfs_mount_t *mp = ip->i_mount;
641 xfs_iocore_t *io = &ip->i_iocore;
642 xfs_fileoff_t offset_fsb;
643 xfs_fileoff_t last_fsb;
644 xfs_off_t aligned_offset;
645 xfs_fileoff_t ioalign;
646 xfs_fsblock_t firstblock;
647 xfs_extlen_t extsz;
648 xfs_fsize_t isize;
649 int nimaps;
650 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
651 int prealloc, fsynced = 0;
652 int error;
654 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
657 * Make sure that the dquots are there. This doesn't hold
658 * the ilock across a disk read.
660 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
661 if (error)
662 return XFS_ERROR(error);
664 extsz = xfs_get_extsz_hint(ip);
665 offset_fsb = XFS_B_TO_FSBT(mp, offset);
667 retry:
668 isize = ip->i_size;
669 if (io->io_new_size > isize)
670 isize = io->io_new_size;
672 error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
673 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
674 if (error)
675 return error;
677 if (prealloc) {
678 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
679 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
680 last_fsb = ioalign + mp->m_writeio_blocks;
681 } else {
682 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
685 if (prealloc || extsz) {
686 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
687 &last_fsb);
688 if (error)
689 return error;
692 nimaps = XFS_WRITE_IMAPS;
693 firstblock = NULLFSBLOCK;
694 error = XFS_BMAPI(mp, NULL, io, offset_fsb,
695 (xfs_filblks_t)(last_fsb - offset_fsb),
696 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
697 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
698 &nimaps, NULL, NULL);
699 if (error && (error != ENOSPC))
700 return XFS_ERROR(error);
703 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
704 * then we must have run out of space - flush delalloc, and retry..
706 if (nimaps == 0) {
707 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
708 io, offset, count);
709 if (xfs_flush_space(ip, &fsynced, &ioflag))
710 return XFS_ERROR(ENOSPC);
712 error = 0;
713 goto retry;
716 if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
717 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
719 *ret_imap = imap[0];
720 *nmaps = 1;
722 return 0;
726 * Pass in a delayed allocate extent, convert it to real extents;
727 * return to the caller the extent we create which maps on top of
728 * the originating callers request.
730 * Called without a lock on the inode.
733 xfs_iomap_write_allocate(
734 xfs_inode_t *ip,
735 xfs_off_t offset,
736 size_t count,
737 xfs_bmbt_irec_t *map,
738 int *retmap)
740 xfs_mount_t *mp = ip->i_mount;
741 xfs_iocore_t *io = &ip->i_iocore;
742 xfs_fileoff_t offset_fsb, last_block;
743 xfs_fileoff_t end_fsb, map_start_fsb;
744 xfs_fsblock_t first_block;
745 xfs_bmap_free_t free_list;
746 xfs_filblks_t count_fsb;
747 xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS];
748 xfs_trans_t *tp;
749 int i, nimaps, committed;
750 int error = 0;
751 int nres;
753 *retmap = 0;
756 * Make sure that the dquots are there.
758 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
759 return XFS_ERROR(error);
761 offset_fsb = XFS_B_TO_FSBT(mp, offset);
762 count_fsb = map->br_blockcount;
763 map_start_fsb = map->br_startoff;
765 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
767 while (count_fsb != 0) {
769 * Set up a transaction with which to allocate the
770 * backing store for the file. Do allocations in a
771 * loop until we get some space in the range we are
772 * interested in. The other space that might be allocated
773 * is in the delayed allocation extent on which we sit
774 * but before our buffer starts.
777 nimaps = 0;
778 while (nimaps == 0) {
779 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
780 tp->t_flags |= XFS_TRANS_RESERVE;
781 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
782 error = xfs_trans_reserve(tp, nres,
783 XFS_WRITE_LOG_RES(mp),
784 0, XFS_TRANS_PERM_LOG_RES,
785 XFS_WRITE_LOG_COUNT);
786 if (error) {
787 xfs_trans_cancel(tp, 0);
788 return XFS_ERROR(error);
790 xfs_ilock(ip, XFS_ILOCK_EXCL);
791 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
792 xfs_trans_ihold(tp, ip);
794 XFS_BMAP_INIT(&free_list, &first_block);
796 nimaps = XFS_STRAT_WRITE_IMAPS;
798 * Ensure we don't go beyond eof - it is possible
799 * the extents changed since we did the read call,
800 * we dropped the ilock in the interim.
803 end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
804 xfs_bmap_last_offset(NULL, ip, &last_block,
805 XFS_DATA_FORK);
806 last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
807 if ((map_start_fsb + count_fsb) > last_block) {
808 count_fsb = last_block - map_start_fsb;
809 if (count_fsb == 0) {
810 error = EAGAIN;
811 goto trans_cancel;
815 /* Go get the actual blocks */
816 error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
817 XFS_BMAPI_WRITE, &first_block, 1,
818 imap, &nimaps, &free_list, NULL);
819 if (error)
820 goto trans_cancel;
822 error = xfs_bmap_finish(&tp, &free_list, &committed);
823 if (error)
824 goto trans_cancel;
826 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
827 if (error)
828 goto error0;
830 xfs_iunlock(ip, XFS_ILOCK_EXCL);
834 * See if we were able to allocate an extent that
835 * covers at least part of the callers request
837 for (i = 0; i < nimaps; i++) {
838 if (unlikely(!imap[i].br_startblock &&
839 !(io->io_flags & XFS_IOCORE_RT)))
840 return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
841 if ((offset_fsb >= imap[i].br_startoff) &&
842 (offset_fsb < (imap[i].br_startoff +
843 imap[i].br_blockcount))) {
844 *map = imap[i];
845 *retmap = 1;
846 XFS_STATS_INC(xs_xstrat_quick);
847 return 0;
849 count_fsb -= imap[i].br_blockcount;
852 /* So far we have not mapped the requested part of the
853 * file, just surrounding data, try again.
855 nimaps--;
856 map_start_fsb = imap[nimaps].br_startoff +
857 imap[nimaps].br_blockcount;
860 trans_cancel:
861 xfs_bmap_cancel(&free_list);
862 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
863 error0:
864 xfs_iunlock(ip, XFS_ILOCK_EXCL);
865 return XFS_ERROR(error);
869 xfs_iomap_write_unwritten(
870 xfs_inode_t *ip,
871 xfs_off_t offset,
872 size_t count)
874 xfs_mount_t *mp = ip->i_mount;
875 xfs_iocore_t *io = &ip->i_iocore;
876 xfs_fileoff_t offset_fsb;
877 xfs_filblks_t count_fsb;
878 xfs_filblks_t numblks_fsb;
879 xfs_fsblock_t firstfsb;
880 int nimaps;
881 xfs_trans_t *tp;
882 xfs_bmbt_irec_t imap;
883 xfs_bmap_free_t free_list;
884 uint resblks;
885 int committed;
886 int error;
888 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
889 &ip->i_iocore, offset, count);
891 offset_fsb = XFS_B_TO_FSBT(mp, offset);
892 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
893 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
895 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
897 do {
899 * set up a transaction to convert the range of extents
900 * from unwritten to real. Do allocations in a loop until
901 * we have covered the range passed in.
903 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
904 tp->t_flags |= XFS_TRANS_RESERVE;
905 error = xfs_trans_reserve(tp, resblks,
906 XFS_WRITE_LOG_RES(mp), 0,
907 XFS_TRANS_PERM_LOG_RES,
908 XFS_WRITE_LOG_COUNT);
909 if (error) {
910 xfs_trans_cancel(tp, 0);
911 return XFS_ERROR(error);
914 xfs_ilock(ip, XFS_ILOCK_EXCL);
915 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
916 xfs_trans_ihold(tp, ip);
919 * Modify the unwritten extent state of the buffer.
921 XFS_BMAP_INIT(&free_list, &firstfsb);
922 nimaps = 1;
923 error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
924 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
925 1, &imap, &nimaps, &free_list, NULL);
926 if (error)
927 goto error_on_bmapi_transaction;
929 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
930 if (error)
931 goto error_on_bmapi_transaction;
933 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
934 xfs_iunlock(ip, XFS_ILOCK_EXCL);
935 if (error)
936 return XFS_ERROR(error);
938 if (unlikely(!imap.br_startblock &&
939 !(io->io_flags & XFS_IOCORE_RT)))
940 return xfs_cmn_err_fsblock_zero(ip, &imap);
942 if ((numblks_fsb = imap.br_blockcount) == 0) {
944 * The numblks_fsb value should always get
945 * smaller, otherwise the loop is stuck.
947 ASSERT(imap.br_blockcount);
948 break;
950 offset_fsb += numblks_fsb;
951 count_fsb -= numblks_fsb;
952 } while (count_fsb > 0);
954 return 0;
956 error_on_bmapi_transaction:
957 xfs_bmap_cancel(&free_list);
958 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
959 xfs_iunlock(ip, XFS_ILOCK_EXCL);
960 return XFS_ERROR(error);