fs/netfs/write_issue.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Network filesystem high-level (buffered) writeback.
   3  *
   4  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
   5  * Written by David Howells (dhowells@redhat.com)
   6  *
   7  *
   8  * To support network filesystems with local caching, we manage a situation
   9  * that can be envisioned like the following:
  10  *
  11  *               +---+---+-----+-----+---+----------+
  12  *    Folios:    |   |   |     |     |   |          |
  13  *               +---+---+-----+-----+---+----------+
  14  *
  15  *                 +------+------+     +----+----+
  16  *    Upload:      |      |      |.....|    |    |
  17  *  (Stream 0)     +------+------+     +----+----+
  18  *
  19  *               +------+------+------+------+------+
  20  *    Cache:     |      |      |      |      |      |
  21  *  (Stream 1)   +------+------+------+------+------+
  22  *
  23  * Where we have a sequence of folios of varying sizes that we need to overlay
  24  * with multiple parallel streams of I/O requests, where the I/O requests in a
  25  * stream may also be of various sizes (in cifs, for example, the sizes are
  26  * negotiated with the server; in something like ceph, they may represent the
  27  * sizes of storage objects).
  28  *
  29  * The sequence in each stream may contain gaps and noncontiguous subrequests
  30  * may be glued together into single vectored write RPCs.
  31  */
  32
  33 #include <linux/export.h>
  34 #include <linux/fs.h>
  35 #include <linux/mm.h>
  36 #include <linux/pagemap.h>
  37 #include "internal.h"
  38
  39 /*
  40  * Kill all dirty folios in the event of an unrecoverable error, starting with
  41  * a locked folio we've already obtained from writeback_iter().
  42  */
  43 static void netfs_kill_dirty_pages(struct address_space *mapping,
  44                                    struct writeback_control *wbc,
  45                                    struct folio *folio)
  46 {
  47         int error = 0;
  48
  49         do {
  50                 enum netfs_folio_trace why = netfs_folio_trace_kill;
  51                 struct netfs_group *group = NULL;
  52                 struct netfs_folio *finfo = NULL;
  53                 void *priv;
  54
  55                 priv = folio_detach_private(folio);
  56                 if (priv) {
  57                         finfo = __netfs_folio_info(priv);
  58                         if (finfo) {
  59                                 /* Kill folio from streaming write. */
  60                                 group = finfo->netfs_group;
  61                                 why = netfs_folio_trace_kill_s;
  62                         } else {
  63                                 group = priv;
  64                                 if (group == NETFS_FOLIO_COPY_TO_CACHE) {
  65                                         /* Kill copy-to-cache folio */
  66                                         why = netfs_folio_trace_kill_cc;
  67                                         group = NULL;
  68                                 } else {
  69                                         /* Kill folio with group */
  70                                         why = netfs_folio_trace_kill_g;
  71                                 }
  72                         }
  73                 }
  74
  75                 trace_netfs_folio(folio, why);
  76
  77                 folio_start_writeback(folio);
  78                 folio_unlock(folio);
  79                 folio_end_writeback(folio);
  80
  81                 netfs_put_group(group);
  82                 kfree(finfo);
  83
  84         } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
  85 }
  86
  87 /*
  88  * Create a write request and set it up appropriately for the origin type.
  89  */
  90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
  91                                                 struct file *file,
  92                                                 loff_t start,
  93                                                 enum netfs_io_origin origin)
  94 {
  95         struct netfs_io_request *wreq;
  96         struct netfs_inode *ictx;
  97         bool is_buffered = (origin == NETFS_WRITEBACK ||
  98                             origin == NETFS_WRITETHROUGH ||
  99                             origin == NETFS_PGPRIV2_COPY_TO_CACHE);
 100
 101         wreq = netfs_alloc_request(mapping, file, start, 0, origin);
 102         if (IS_ERR(wreq))
 103                 return wreq;
 104
 105         _enter("R=%x", wreq->debug_id);
 106
 107         ictx = netfs_inode(wreq->inode);
 108         if (is_buffered && netfs_is_cache_enabled(ictx))
 109                 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 110
 111         wreq->cleaned_to = wreq->start;
 112
 113         wreq->io_streams[0].stream_nr           = 0;
 114         wreq->io_streams[0].source              = NETFS_UPLOAD_TO_SERVER;
 115         wreq->io_streams[0].prepare_write       = ictx->ops->prepare_write;
 116         wreq->io_streams[0].issue_write         = ictx->ops->issue_write;
 117         wreq->io_streams[0].collected_to        = start;
 118         wreq->io_streams[0].transferred         = LONG_MAX;
 119
 120         wreq->io_streams[1].stream_nr           = 1;
 121         wreq->io_streams[1].source              = NETFS_WRITE_TO_CACHE;
 122         wreq->io_streams[1].collected_to        = start;
 123         wreq->io_streams[1].transferred         = LONG_MAX;
 124         if (fscache_resources_valid(&wreq->cache_resources)) {
 125                 wreq->io_streams[1].avail       = true;
 126                 wreq->io_streams[1].active      = true;
 127                 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
 128                 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
 129         }
 130
 131         return wreq;
 132 }
 133
 134 /**
 135  * netfs_prepare_write_failed - Note write preparation failed
 136  * @subreq: The subrequest to mark
 137  *
 138  * Mark a subrequest to note that preparation for write failed.
 139  */
 140 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
 141 {
 142         __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
 143         trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
 144 }
 145 EXPORT_SYMBOL(netfs_prepare_write_failed);
 146
 147 /*
 148  * Prepare a write subrequest.  We need to allocate a new subrequest
 149  * if we don't have one.
 150  */
 151 static void netfs_prepare_write(struct netfs_io_request *wreq,
 152                                 struct netfs_io_stream *stream,
 153                                 loff_t start)
 154 {
 155         struct netfs_io_subrequest *subreq;
 156         struct iov_iter *wreq_iter = &wreq->io_iter;
 157
 158         /* Make sure we don't point the iterator at a used-up folio_queue
 159          * struct being used as a placeholder to prevent the queue from
 160          * collapsing.  In such a case, extend the queue.
 161          */
 162         if (iov_iter_is_folioq(wreq_iter) &&
 163             wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
 164                 netfs_buffer_make_space(wreq);
 165         }
 166
 167         subreq = netfs_alloc_subrequest(wreq);
 168         subreq->source          = stream->source;
 169         subreq->start           = start;
 170         subreq->stream_nr       = stream->stream_nr;
 171         subreq->io_iter         = *wreq_iter;
 172
 173         _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
 174
 175         trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
 176
 177         stream->sreq_max_len    = UINT_MAX;
 178         stream->sreq_max_segs   = INT_MAX;
 179         switch (stream->source) {
 180         case NETFS_UPLOAD_TO_SERVER:
 181                 netfs_stat(&netfs_n_wh_upload);
 182                 stream->sreq_max_len = wreq->wsize;
 183                 break;
 184         case NETFS_WRITE_TO_CACHE:
 185                 netfs_stat(&netfs_n_wh_write);
 186                 break;
 187         default:
 188                 WARN_ON_ONCE(1);
 189                 break;
 190         }
 191
 192         if (stream->prepare_write)
 193                 stream->prepare_write(subreq);
 194
 195         __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
 196
 197         /* We add to the end of the list whilst the collector may be walking
 198          * the list.  The collector only goes nextwards and uses the lock to
 199          * remove entries off of the front.
 200          */
 201         spin_lock_bh(&wreq->lock);
 202         list_add_tail(&subreq->rreq_link, &stream->subrequests);
 203         if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
 204                 stream->front = subreq;
 205                 if (!stream->active) {
 206                         stream->collected_to = stream->front->start;
 207                         /* Write list pointers before active flag */
 208                         smp_store_release(&stream->active, true);
 209                 }
 210         }
 211
 212         spin_unlock_bh(&wreq->lock);
 213
 214         stream->construct = subreq;
 215 }
 216
 217 /*
 218  * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
 219  * operation.  The operation may be asynchronous and should call
 220  * netfs_write_subrequest_terminated() when complete.
 221  */
 222 static void netfs_do_issue_write(struct netfs_io_stream *stream,
 223                                  struct netfs_io_subrequest *subreq)
 224 {
 225         struct netfs_io_request *wreq = subreq->rreq;
 226
 227         _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
 228
 229         if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
 230                 return netfs_write_subrequest_terminated(subreq, subreq->error, false);
 231
 232         trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
 233         stream->issue_write(subreq);
 234 }
 235
 236 void netfs_reissue_write(struct netfs_io_stream *stream,
 237                          struct netfs_io_subrequest *subreq,
 238                          struct iov_iter *source)
 239 {
 240         size_t size = subreq->len - subreq->transferred;
 241
 242         // TODO: Use encrypted buffer
 243         subreq->io_iter = *source;
 244         iov_iter_advance(source, size);
 245         iov_iter_truncate(&subreq->io_iter, size);
 246
 247         __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
 248         netfs_do_issue_write(stream, subreq);
 249 }
 250
 251 void netfs_issue_write(struct netfs_io_request *wreq,
 252                        struct netfs_io_stream *stream)
 253 {
 254         struct netfs_io_subrequest *subreq = stream->construct;
 255
 256         if (!subreq)
 257                 return;
 258         stream->construct = NULL;
 259         subreq->io_iter.count = subreq->len;
 260         netfs_do_issue_write(stream, subreq);
 261 }
 262
 263 /*
 264  * Add data to the write subrequest, dispatching each as we fill it up or if it
 265  * is discontiguous with the previous.  We only fill one part at a time so that
 266  * we can avoid overrunning the credits obtained (cifs) and try to parallelise
 267  * content-crypto preparation with network writes.
 268  */
 269 int netfs_advance_write(struct netfs_io_request *wreq,
 270                         struct netfs_io_stream *stream,
 271                         loff_t start, size_t len, bool to_eof)
 272 {
 273         struct netfs_io_subrequest *subreq = stream->construct;
 274         size_t part;
 275
 276         if (!stream->avail) {
 277                 _leave("no write");
 278                 return len;
 279         }
 280
 281         _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
 282
 283         if (subreq && start != subreq->start + subreq->len) {
 284                 netfs_issue_write(wreq, stream);
 285                 subreq = NULL;
 286         }
 287
 288         if (!stream->construct)
 289                 netfs_prepare_write(wreq, stream, start);
 290         subreq = stream->construct;
 291
 292         part = umin(stream->sreq_max_len - subreq->len, len);
 293         _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
 294         subreq->len += part;
 295         subreq->nr_segs++;
 296         stream->submit_extendable_to -= part;
 297
 298         if (subreq->len >= stream->sreq_max_len ||
 299             subreq->nr_segs >= stream->sreq_max_segs ||
 300             to_eof) {
 301                 netfs_issue_write(wreq, stream);
 302                 subreq = NULL;
 303         }
 304
 305         return part;
 306 }
 307
 308 /*
 309  * Write some of a pending folio data back to the server.
 310  */
 311 static int netfs_write_folio(struct netfs_io_request *wreq,
 312                              struct writeback_control *wbc,
 313                              struct folio *folio)
 314 {
 315         struct netfs_io_stream *upload = &wreq->io_streams[0];
 316         struct netfs_io_stream *cache  = &wreq->io_streams[1];
 317         struct netfs_io_stream *stream;
 318         struct netfs_group *fgroup; /* TODO: Use this with ceph */
 319         struct netfs_folio *finfo;
 320         size_t iter_off = 0;
 321         size_t fsize = folio_size(folio), flen = fsize, foff = 0;
 322         loff_t fpos = folio_pos(folio), i_size;
 323         bool to_eof = false, streamw = false;
 324         bool debug = false;
 325
 326         _enter("");
 327
 328         /* netfs_perform_write() may shift i_size around the page or from out
 329          * of the page to beyond it, but cannot move i_size into or through the
 330          * page since we have it locked.
 331          */
 332         i_size = i_size_read(wreq->inode);
 333
 334         if (fpos >= i_size) {
 335                 /* mmap beyond eof. */
 336                 _debug("beyond eof");
 337                 folio_start_writeback(folio);
 338                 folio_unlock(folio);
 339                 wreq->nr_group_rel += netfs_folio_written_back(folio);
 340                 netfs_put_group_many(wreq->group, wreq->nr_group_rel);
 341                 wreq->nr_group_rel = 0;
 342                 return 0;
 343         }
 344
 345         if (fpos + fsize > wreq->i_size)
 346                 wreq->i_size = i_size;
 347
 348         fgroup = netfs_folio_group(folio);
 349         finfo = netfs_folio_info(folio);
 350         if (finfo) {
 351                 foff = finfo->dirty_offset;
 352                 flen = foff + finfo->dirty_len;
 353                 streamw = true;
 354         }
 355
 356         if (wreq->origin == NETFS_WRITETHROUGH) {
 357                 to_eof = false;
 358                 if (flen > i_size - fpos)
 359                         flen = i_size - fpos;
 360         } else if (flen > i_size - fpos) {
 361                 flen = i_size - fpos;
 362                 if (!streamw)
 363                         folio_zero_segment(folio, flen, fsize);
 364                 to_eof = true;
 365         } else if (flen == i_size - fpos) {
 366                 to_eof = true;
 367         }
 368         flen -= foff;
 369
 370         _debug("folio %zx %zx %zx", foff, flen, fsize);
 371
 372         /* Deal with discontinuities in the stream of dirty pages.  These can
 373          * arise from a number of sources:
 374          *
 375          * (1) Intervening non-dirty pages from random-access writes, multiple
 376          *     flushers writing back different parts simultaneously and manual
 377          *     syncing.
 378          *
 379          * (2) Partially-written pages from write-streaming.
 380          *
 381          * (3) Pages that belong to a different write-back group (eg.  Ceph
 382          *     snapshots).
 383          *
 384          * (4) Actually-clean pages that were marked for write to the cache
 385          *     when they were read.  Note that these appear as a special
 386          *     write-back group.
 387          */
 388         if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
 389                 netfs_issue_write(wreq, upload);
 390         } else if (fgroup != wreq->group) {
 391                 /* We can't write this page to the server yet. */
 392                 kdebug("wrong group");
 393                 folio_redirty_for_writepage(wbc, folio);
 394                 folio_unlock(folio);
 395                 netfs_issue_write(wreq, upload);
 396                 netfs_issue_write(wreq, cache);
 397                 return 0;
 398         }
 399
 400         if (foff > 0)
 401                 netfs_issue_write(wreq, upload);
 402         if (streamw)
 403                 netfs_issue_write(wreq, cache);
 404
 405         /* Flip the page to the writeback state and unlock.  If we're called
 406          * from write-through, then the page has already been put into the wb
 407          * state.
 408          */
 409         if (wreq->origin == NETFS_WRITEBACK)
 410                 folio_start_writeback(folio);
 411         folio_unlock(folio);
 412
 413         if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
 414                 if (!cache->avail) {
 415                         trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
 416                         netfs_issue_write(wreq, upload);
 417                         netfs_folio_written_back(folio);
 418                         return 0;
 419                 }
 420                 trace_netfs_folio(folio, netfs_folio_trace_store_copy);
 421         } else if (!upload->avail && !cache->avail) {
 422                 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
 423                 netfs_folio_written_back(folio);
 424                 return 0;
 425         } else if (!upload->construct) {
 426                 trace_netfs_folio(folio, netfs_folio_trace_store);
 427         } else {
 428                 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
 429         }
 430
 431         /* Attach the folio to the rolling buffer. */
 432         netfs_buffer_append_folio(wreq, folio, false);
 433
 434         /* Move the submission point forward to allow for write-streaming data
 435          * not starting at the front of the page.  We don't do write-streaming
 436          * with the cache as the cache requires DIO alignment.
 437          *
 438          * Also skip uploading for data that's been read and just needs copying
 439          * to the cache.
 440          */
 441         for (int s = 0; s < NR_IO_STREAMS; s++) {
 442                 stream = &wreq->io_streams[s];
 443                 stream->submit_off = foff;
 444                 stream->submit_len = flen;
 445                 if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
 446                     (stream->source == NETFS_UPLOAD_TO_SERVER &&
 447                      fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
 448                         stream->submit_off = UINT_MAX;
 449                         stream->submit_len = 0;
 450                 }
 451         }
 452
 453         /* Attach the folio to one or more subrequests.  For a big folio, we
 454          * could end up with thousands of subrequests if the wsize is small -
 455          * but we might need to wait during the creation of subrequests for
 456          * network resources (eg. SMB credits).
 457          */
 458         for (;;) {
 459                 ssize_t part;
 460                 size_t lowest_off = ULONG_MAX;
 461                 int choose_s = -1;
 462
 463                 /* Always add to the lowest-submitted stream first. */
 464                 for (int s = 0; s < NR_IO_STREAMS; s++) {
 465                         stream = &wreq->io_streams[s];
 466                         if (stream->submit_len > 0 &&
 467                             stream->submit_off < lowest_off) {
 468                                 lowest_off = stream->submit_off;
 469                                 choose_s = s;
 470                         }
 471                 }
 472
 473                 if (choose_s < 0)
 474                         break;
 475                 stream = &wreq->io_streams[choose_s];
 476
 477                 /* Advance the iterator(s). */
 478                 if (stream->submit_off > iter_off) {
 479                         iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
 480                         iter_off = stream->submit_off;
 481                 }
 482
 483                 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
 484                 stream->submit_extendable_to = fsize - stream->submit_off;
 485                 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
 486                                            stream->submit_len, to_eof);
 487                 stream->submit_off += part;
 488                 if (part > stream->submit_len)
 489                         stream->submit_len = 0;
 490                 else
 491                         stream->submit_len -= part;
 492                 if (part > 0)
 493                         debug = true;
 494         }
 495
 496         if (fsize > iter_off)
 497                 iov_iter_advance(&wreq->io_iter, fsize - iter_off);
 498         atomic64_set(&wreq->issued_to, fpos + fsize);
 499
 500         if (!debug)
 501                 kdebug("R=%x: No submit", wreq->debug_id);
 502
 503         if (foff + flen < fsize)
 504                 for (int s = 0; s < NR_IO_STREAMS; s++)
 505                         netfs_issue_write(wreq, &wreq->io_streams[s]);
 506
 507         _leave(" = 0");
 508         return 0;
 509 }
 510
 511 /*
 512  * End the issuing of writes, letting the collector know we're done.
 513  */
 514 static void netfs_end_issue_write(struct netfs_io_request *wreq)
 515 {
 516         bool needs_poke = true;
 517
 518         smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
 519         set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
 520
 521         for (int s = 0; s < NR_IO_STREAMS; s++) {
 522                 struct netfs_io_stream *stream = &wreq->io_streams[s];
 523
 524                 if (!stream->active)
 525                         continue;
 526                 if (!list_empty(&stream->subrequests))
 527                         needs_poke = false;
 528                 netfs_issue_write(wreq, stream);
 529         }
 530
 531         if (needs_poke)
 532                 netfs_wake_write_collector(wreq, false);
 533 }
 534
 535 /*
 536  * Write some of the pending data back to the server
 537  */
 538 int netfs_writepages(struct address_space *mapping,
 539                      struct writeback_control *wbc)
 540 {
 541         struct netfs_inode *ictx = netfs_inode(mapping->host);
 542         struct netfs_io_request *wreq = NULL;
 543         struct folio *folio;
 544         int error = 0;
 545
 546         if (!mutex_trylock(&ictx->wb_lock)) {
 547                 if (wbc->sync_mode == WB_SYNC_NONE) {
 548                         netfs_stat(&netfs_n_wb_lock_skip);
 549                         return 0;
 550                 }
 551                 netfs_stat(&netfs_n_wb_lock_wait);
 552                 mutex_lock(&ictx->wb_lock);
 553         }
 554
 555         /* Need the first folio to be able to set up the op. */
 556         folio = writeback_iter(mapping, wbc, NULL, &error);
 557         if (!folio)
 558                 goto out;
 559
 560         wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
 561         if (IS_ERR(wreq)) {
 562                 error = PTR_ERR(wreq);
 563                 goto couldnt_start;
 564         }
 565
 566         trace_netfs_write(wreq, netfs_write_trace_writeback);
 567         netfs_stat(&netfs_n_wh_writepages);
 568
 569         do {
 570                 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
 571
 572                 /* It appears we don't have to handle cyclic writeback wrapping. */
 573                 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));
 574
 575                 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
 576                     unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
 577                         set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
 578                         wreq->netfs_ops->begin_writeback(wreq);
 579                 }
 580
 581                 error = netfs_write_folio(wreq, wbc, folio);
 582                 if (error < 0)
 583                         break;
 584         } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
 585
 586         netfs_end_issue_write(wreq);
 587
 588         mutex_unlock(&ictx->wb_lock);
 589
 590         netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 591         _leave(" = %d", error);
 592         return error;
 593
 594 couldnt_start:
 595         netfs_kill_dirty_pages(mapping, wbc, folio);
 596 out:
 597         mutex_unlock(&ictx->wb_lock);
 598         _leave(" = %d", error);
 599         return error;
 600 }
 601 EXPORT_SYMBOL(netfs_writepages);
 602
 603 /*
 604  * Begin a write operation for writing through the pagecache.
 605  */
 606 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
 607 {
 608         struct netfs_io_request *wreq = NULL;
 609         struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
 610
 611         mutex_lock(&ictx->wb_lock);
 612
 613         wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
 614                                       iocb->ki_pos, NETFS_WRITETHROUGH);
 615         if (IS_ERR(wreq)) {
 616                 mutex_unlock(&ictx->wb_lock);
 617                 return wreq;
 618         }
 619
 620         wreq->io_streams[0].avail = true;
 621         trace_netfs_write(wreq, netfs_write_trace_writethrough);
 622         return wreq;
 623 }
 624
 625 /*
 626  * Advance the state of the write operation used when writing through the
 627  * pagecache.  Data has been copied into the pagecache that we need to append
 628  * to the request.  If we've added more than wsize then we need to create a new
 629  * subrequest.
 630  */
 631 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
 632                                struct folio *folio, size_t copied, bool to_page_end,
 633                                struct folio **writethrough_cache)
 634 {
 635         _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
 636                wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
 637
 638         if (!*writethrough_cache) {
 639                 if (folio_test_dirty(folio))
 640                         /* Sigh.  mmap. */
 641                         folio_clear_dirty_for_io(folio);
 642
 643                 /* We can make multiple writes to the folio... */
 644                 folio_start_writeback(folio);
 645                 if (wreq->len == 0)
 646                         trace_netfs_folio(folio, netfs_folio_trace_wthru);
 647                 else
 648                         trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
 649                 *writethrough_cache = folio;
 650         }
 651
 652         wreq->len += copied;
 653         if (!to_page_end)
 654                 return 0;
 655
 656         *writethrough_cache = NULL;
 657         return netfs_write_folio(wreq, wbc, folio);
 658 }
 659
 660 /*
 661  * End a write operation used when writing through the pagecache.
 662  */
 663 int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
 664                            struct folio *writethrough_cache)
 665 {
 666         struct netfs_inode *ictx = netfs_inode(wreq->inode);
 667         int ret;
 668
 669         _enter("R=%x", wreq->debug_id);
 670
 671         if (writethrough_cache)
 672                 netfs_write_folio(wreq, wbc, writethrough_cache);
 673
 674         netfs_end_issue_write(wreq);
 675
 676         mutex_unlock(&ictx->wb_lock);
 677
 678         if (wreq->iocb) {
 679                 ret = -EIOCBQUEUED;
 680         } else {
 681                 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
 682                 ret = wreq->error;
 683         }
 684         netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 685         return ret;
 686 }
 687
 688 /*
 689  * Write data to the server without going through the pagecache and without
 690  * writing it to the local cache.
 691  */
 692 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
 693 {
 694         struct netfs_io_stream *upload = &wreq->io_streams[0];
 695         ssize_t part;
 696         loff_t start = wreq->start;
 697         int error = 0;
 698
 699         _enter("%zx", len);
 700
 701         if (wreq->origin == NETFS_DIO_WRITE)
 702                 inode_dio_begin(wreq->inode);
 703
 704         while (len) {
 705                 // TODO: Prepare content encryption
 706
 707                 _debug("unbuffered %zx", len);
 708                 part = netfs_advance_write(wreq, upload, start, len, false);
 709                 start += part;
 710                 len -= part;
 711                 iov_iter_advance(&wreq->io_iter, part);
 712                 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
 713                         trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
 714                         wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
 715                 }
 716                 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
 717                         break;
 718         }
 719
 720         netfs_end_issue_write(wreq);
 721         _leave(" = %d", error);
 722         return error;
 723 }