io_uring: ensure finish_wait() is always called in __io_uring_task_cancel()
[linux/fpc-iii.git] / fs / cifs / file.c
blob6d001905c8e51af170559f67b26609af5fb48afc
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47 #include "fs_context.h"
49 static inline int cifs_convert_flags(unsigned int flags)
51 if ((flags & O_ACCMODE) == O_RDONLY)
52 return GENERIC_READ;
53 else if ((flags & O_ACCMODE) == O_WRONLY)
54 return GENERIC_WRITE;
55 else if ((flags & O_ACCMODE) == O_RDWR) {
56 /* GENERIC_ALL is too much permission to request
57 can cause unnecessary access denied on create */
58 /* return GENERIC_ALL; */
59 return (GENERIC_READ | GENERIC_WRITE);
62 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
63 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 FILE_READ_DATA);
67 static u32 cifs_posix_convert_flags(unsigned int flags)
69 u32 posix_flags = 0;
71 if ((flags & O_ACCMODE) == O_RDONLY)
72 posix_flags = SMB_O_RDONLY;
73 else if ((flags & O_ACCMODE) == O_WRONLY)
74 posix_flags = SMB_O_WRONLY;
75 else if ((flags & O_ACCMODE) == O_RDWR)
76 posix_flags = SMB_O_RDWR;
78 if (flags & O_CREAT) {
79 posix_flags |= SMB_O_CREAT;
80 if (flags & O_EXCL)
81 posix_flags |= SMB_O_EXCL;
82 } else if (flags & O_EXCL)
83 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
84 current->comm, current->tgid);
86 if (flags & O_TRUNC)
87 posix_flags |= SMB_O_TRUNC;
88 /* be safe and imply O_SYNC for O_DSYNC */
89 if (flags & O_DSYNC)
90 posix_flags |= SMB_O_SYNC;
91 if (flags & O_DIRECTORY)
92 posix_flags |= SMB_O_DIRECTORY;
93 if (flags & O_NOFOLLOW)
94 posix_flags |= SMB_O_NOFOLLOW;
95 if (flags & O_DIRECT)
96 posix_flags |= SMB_O_DIRECT;
98 return posix_flags;
101 static inline int cifs_get_disposition(unsigned int flags)
103 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
104 return FILE_CREATE;
105 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
106 return FILE_OVERWRITE_IF;
107 else if ((flags & O_CREAT) == O_CREAT)
108 return FILE_OPEN_IF;
109 else if ((flags & O_TRUNC) == O_TRUNC)
110 return FILE_OVERWRITE;
111 else
112 return FILE_OPEN;
115 int cifs_posix_open(char *full_path, struct inode **pinode,
116 struct super_block *sb, int mode, unsigned int f_flags,
117 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
119 int rc;
120 FILE_UNIX_BASIC_INFO *presp_data;
121 __u32 posix_flags = 0;
122 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
123 struct cifs_fattr fattr;
124 struct tcon_link *tlink;
125 struct cifs_tcon *tcon;
127 cifs_dbg(FYI, "posix open %s\n", full_path);
129 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
130 if (presp_data == NULL)
131 return -ENOMEM;
133 tlink = cifs_sb_tlink(cifs_sb);
134 if (IS_ERR(tlink)) {
135 rc = PTR_ERR(tlink);
136 goto posix_open_ret;
139 tcon = tlink_tcon(tlink);
140 mode &= ~current_umask();
142 posix_flags = cifs_posix_convert_flags(f_flags);
143 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
144 poplock, full_path, cifs_sb->local_nls,
145 cifs_remap(cifs_sb));
146 cifs_put_tlink(tlink);
148 if (rc)
149 goto posix_open_ret;
151 if (presp_data->Type == cpu_to_le32(-1))
152 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 if (!pinode)
155 goto posix_open_ret; /* caller does not need info */
157 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
159 /* get new inode and set it up */
160 if (*pinode == NULL) {
161 cifs_fill_uniqueid(sb, &fattr);
162 *pinode = cifs_iget(sb, &fattr);
163 if (!*pinode) {
164 rc = -ENOMEM;
165 goto posix_open_ret;
167 } else {
168 cifs_fattr_to_inode(*pinode, &fattr);
171 posix_open_ret:
172 kfree(presp_data);
173 return rc;
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179 struct cifs_fid *fid, unsigned int xid)
181 int rc;
182 int desired_access;
183 int disposition;
184 int create_options = CREATE_NOT_DIR;
185 FILE_ALL_INFO *buf;
186 struct TCP_Server_Info *server = tcon->ses->server;
187 struct cifs_open_parms oparms;
189 if (!server->ops->open)
190 return -ENOSYS;
192 desired_access = cifs_convert_flags(f_flags);
194 /*********************************************************************
195 * open flag mapping table:
197 * POSIX Flag CIFS Disposition
198 * ---------- ----------------
199 * O_CREAT FILE_OPEN_IF
200 * O_CREAT | O_EXCL FILE_CREATE
201 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
202 * O_TRUNC FILE_OVERWRITE
203 * none of the above FILE_OPEN
205 * Note that there is not a direct match between disposition
206 * FILE_SUPERSEDE (ie create whether or not file exists although
207 * O_CREAT | O_TRUNC is similar but truncates the existing
208 * file rather than creating a new file as FILE_SUPERSEDE does
209 * (which uses the attributes / metadata passed in on open call)
211 *? O_SYNC is a reasonable match to CIFS writethrough flag
212 *? and the read write flags match reasonably. O_LARGEFILE
213 *? is irrelevant because largefile support is always used
214 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216 *********************************************************************/
218 disposition = cifs_get_disposition(f_flags);
220 /* BB pass O_SYNC flag through on file attributes .. BB */
222 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
223 if (!buf)
224 return -ENOMEM;
226 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
227 if (f_flags & O_SYNC)
228 create_options |= CREATE_WRITE_THROUGH;
230 if (f_flags & O_DIRECT)
231 create_options |= CREATE_NO_BUFFER;
233 oparms.tcon = tcon;
234 oparms.cifs_sb = cifs_sb;
235 oparms.desired_access = desired_access;
236 oparms.create_options = cifs_create_options(cifs_sb, create_options);
237 oparms.disposition = disposition;
238 oparms.path = full_path;
239 oparms.fid = fid;
240 oparms.reconnect = false;
242 rc = server->ops->open(xid, &oparms, oplock, buf);
244 if (rc)
245 goto out;
247 /* TODO: Add support for calling posix query info but with passing in fid */
248 if (tcon->unix_ext)
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250 xid);
251 else
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253 xid, fid);
255 if (rc) {
256 server->ops->close(xid, tcon, fid);
257 if (rc == -ESTALE)
258 rc = -EOPENSTALE;
261 out:
262 kfree(buf);
263 return rc;
266 static bool
267 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 struct cifs_fid_locks *cur;
270 bool has_locks = false;
272 down_read(&cinode->lock_sem);
273 list_for_each_entry(cur, &cinode->llist, llist) {
274 if (!list_empty(&cur->locks)) {
275 has_locks = true;
276 break;
279 up_read(&cinode->lock_sem);
280 return has_locks;
283 void
284 cifs_down_write(struct rw_semaphore *sem)
286 while (!down_write_trylock(sem))
287 msleep(10);
290 static void cifsFileInfo_put_work(struct work_struct *work);
292 struct cifsFileInfo *
293 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
294 struct tcon_link *tlink, __u32 oplock)
296 struct dentry *dentry = file_dentry(file);
297 struct inode *inode = d_inode(dentry);
298 struct cifsInodeInfo *cinode = CIFS_I(inode);
299 struct cifsFileInfo *cfile;
300 struct cifs_fid_locks *fdlocks;
301 struct cifs_tcon *tcon = tlink_tcon(tlink);
302 struct TCP_Server_Info *server = tcon->ses->server;
304 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
305 if (cfile == NULL)
306 return cfile;
308 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
309 if (!fdlocks) {
310 kfree(cfile);
311 return NULL;
314 INIT_LIST_HEAD(&fdlocks->locks);
315 fdlocks->cfile = cfile;
316 cfile->llist = fdlocks;
318 cfile->count = 1;
319 cfile->pid = current->tgid;
320 cfile->uid = current_fsuid();
321 cfile->dentry = dget(dentry);
322 cfile->f_flags = file->f_flags;
323 cfile->invalidHandle = false;
324 cfile->tlink = cifs_get_tlink(tlink);
325 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
326 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
327 mutex_init(&cfile->fh_mutex);
328 spin_lock_init(&cfile->file_info_lock);
330 cifs_sb_active(inode->i_sb);
333 * If the server returned a read oplock and we have mandatory brlocks,
334 * set oplock level to None.
336 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
337 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
338 oplock = 0;
341 cifs_down_write(&cinode->lock_sem);
342 list_add(&fdlocks->llist, &cinode->llist);
343 up_write(&cinode->lock_sem);
345 spin_lock(&tcon->open_file_lock);
346 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
347 oplock = fid->pending_open->oplock;
348 list_del(&fid->pending_open->olist);
350 fid->purge_cache = false;
351 server->ops->set_fid(cfile, fid, oplock);
353 list_add(&cfile->tlist, &tcon->openFileList);
354 atomic_inc(&tcon->num_local_opens);
356 /* if readable file instance put first in list*/
357 spin_lock(&cinode->open_file_lock);
358 if (file->f_mode & FMODE_READ)
359 list_add(&cfile->flist, &cinode->openFileList);
360 else
361 list_add_tail(&cfile->flist, &cinode->openFileList);
362 spin_unlock(&cinode->open_file_lock);
363 spin_unlock(&tcon->open_file_lock);
365 if (fid->purge_cache)
366 cifs_zap_mapping(inode);
368 file->private_data = cfile;
369 return cfile;
372 struct cifsFileInfo *
373 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
375 spin_lock(&cifs_file->file_info_lock);
376 cifsFileInfo_get_locked(cifs_file);
377 spin_unlock(&cifs_file->file_info_lock);
378 return cifs_file;
381 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
383 struct inode *inode = d_inode(cifs_file->dentry);
384 struct cifsInodeInfo *cifsi = CIFS_I(inode);
385 struct cifsLockInfo *li, *tmp;
386 struct super_block *sb = inode->i_sb;
389 * Delete any outstanding lock records. We'll lose them when the file
390 * is closed anyway.
392 cifs_down_write(&cifsi->lock_sem);
393 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
394 list_del(&li->llist);
395 cifs_del_lock_waiters(li);
396 kfree(li);
398 list_del(&cifs_file->llist->llist);
399 kfree(cifs_file->llist);
400 up_write(&cifsi->lock_sem);
402 cifs_put_tlink(cifs_file->tlink);
403 dput(cifs_file->dentry);
404 cifs_sb_deactive(sb);
405 kfree(cifs_file);
408 static void cifsFileInfo_put_work(struct work_struct *work)
410 struct cifsFileInfo *cifs_file = container_of(work,
411 struct cifsFileInfo, put);
413 cifsFileInfo_put_final(cifs_file);
417 * cifsFileInfo_put - release a reference of file priv data
419 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
421 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
423 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
425 _cifsFileInfo_put(cifs_file, true, true);
429 * _cifsFileInfo_put - release a reference of file priv data
431 * This may involve closing the filehandle @cifs_file out on the
432 * server. Must be called without holding tcon->open_file_lock,
433 * cinode->open_file_lock and cifs_file->file_info_lock.
435 * If @wait_for_oplock_handler is true and we are releasing the last
436 * reference, wait for any running oplock break handler of the file
437 * and cancel any pending one.
439 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
440 * @wait_oplock_handler: must be false if called from oplock_break_handler
441 * @offload: not offloaded on close and oplock breaks
444 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
445 bool wait_oplock_handler, bool offload)
447 struct inode *inode = d_inode(cifs_file->dentry);
448 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
449 struct TCP_Server_Info *server = tcon->ses->server;
450 struct cifsInodeInfo *cifsi = CIFS_I(inode);
451 struct super_block *sb = inode->i_sb;
452 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
453 struct cifs_fid fid;
454 struct cifs_pending_open open;
455 bool oplock_break_cancelled;
457 spin_lock(&tcon->open_file_lock);
458 spin_lock(&cifsi->open_file_lock);
459 spin_lock(&cifs_file->file_info_lock);
460 if (--cifs_file->count > 0) {
461 spin_unlock(&cifs_file->file_info_lock);
462 spin_unlock(&cifsi->open_file_lock);
463 spin_unlock(&tcon->open_file_lock);
464 return;
466 spin_unlock(&cifs_file->file_info_lock);
468 if (server->ops->get_lease_key)
469 server->ops->get_lease_key(inode, &fid);
471 /* store open in pending opens to make sure we don't miss lease break */
472 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
474 /* remove it from the lists */
475 list_del(&cifs_file->flist);
476 list_del(&cifs_file->tlist);
477 atomic_dec(&tcon->num_local_opens);
479 if (list_empty(&cifsi->openFileList)) {
480 cifs_dbg(FYI, "closing last open instance for inode %p\n",
481 d_inode(cifs_file->dentry));
483 * In strict cache mode we need invalidate mapping on the last
484 * close because it may cause a error when we open this file
485 * again and get at least level II oplock.
487 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
488 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
489 cifs_set_oplock_level(cifsi, 0);
492 spin_unlock(&cifsi->open_file_lock);
493 spin_unlock(&tcon->open_file_lock);
495 oplock_break_cancelled = wait_oplock_handler ?
496 cancel_work_sync(&cifs_file->oplock_break) : false;
498 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
499 struct TCP_Server_Info *server = tcon->ses->server;
500 unsigned int xid;
502 xid = get_xid();
503 if (server->ops->close_getattr)
504 server->ops->close_getattr(xid, tcon, cifs_file);
505 else if (server->ops->close)
506 server->ops->close(xid, tcon, &cifs_file->fid);
507 _free_xid(xid);
510 if (oplock_break_cancelled)
511 cifs_done_oplock_break(cifsi);
513 cifs_del_pending_open(&open);
515 if (offload)
516 queue_work(fileinfo_put_wq, &cifs_file->put);
517 else
518 cifsFileInfo_put_final(cifs_file);
521 int cifs_open(struct inode *inode, struct file *file)
524 int rc = -EACCES;
525 unsigned int xid;
526 __u32 oplock;
527 struct cifs_sb_info *cifs_sb;
528 struct TCP_Server_Info *server;
529 struct cifs_tcon *tcon;
530 struct tcon_link *tlink;
531 struct cifsFileInfo *cfile = NULL;
532 char *full_path = NULL;
533 bool posix_open_ok = false;
534 struct cifs_fid fid;
535 struct cifs_pending_open open;
537 xid = get_xid();
539 cifs_sb = CIFS_SB(inode->i_sb);
540 tlink = cifs_sb_tlink(cifs_sb);
541 if (IS_ERR(tlink)) {
542 free_xid(xid);
543 return PTR_ERR(tlink);
545 tcon = tlink_tcon(tlink);
546 server = tcon->ses->server;
548 full_path = build_path_from_dentry(file_dentry(file));
549 if (full_path == NULL) {
550 rc = -ENOMEM;
551 goto out;
554 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555 inode, file->f_flags, full_path);
557 if (file->f_flags & O_DIRECT &&
558 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560 file->f_op = &cifs_file_direct_nobrl_ops;
561 else
562 file->f_op = &cifs_file_direct_ops;
565 if (server->oplocks)
566 oplock = REQ_OPLOCK;
567 else
568 oplock = 0;
570 if (!tcon->broken_posix_open && tcon->unix_ext &&
571 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
572 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
573 /* can not refresh inode info since size could be stale */
574 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
575 cifs_sb->ctx->file_mode /* ignored */,
576 file->f_flags, &oplock, &fid.netfid, xid);
577 if (rc == 0) {
578 cifs_dbg(FYI, "posix open succeeded\n");
579 posix_open_ok = true;
580 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
581 if (tcon->ses->serverNOS)
582 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
583 tcon->ses->serverName,
584 tcon->ses->serverNOS);
585 tcon->broken_posix_open = true;
586 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
587 (rc != -EOPNOTSUPP)) /* path not found or net err */
588 goto out;
590 * Else fallthrough to retry open the old way on network i/o
591 * or DFS errors.
595 if (server->ops->get_lease_key)
596 server->ops->get_lease_key(inode, &fid);
598 cifs_add_pending_open(&fid, tlink, &open);
600 if (!posix_open_ok) {
601 if (server->ops->get_lease_key)
602 server->ops->get_lease_key(inode, &fid);
604 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
605 file->f_flags, &oplock, &fid, xid);
606 if (rc) {
607 cifs_del_pending_open(&open);
608 goto out;
612 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
613 if (cfile == NULL) {
614 if (server->ops->close)
615 server->ops->close(xid, tcon, &fid);
616 cifs_del_pending_open(&open);
617 rc = -ENOMEM;
618 goto out;
621 cifs_fscache_set_inode_cookie(inode, file);
623 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
625 * Time to set mode which we can not set earlier due to
626 * problems creating new read-only files.
628 struct cifs_unix_set_info_args args = {
629 .mode = inode->i_mode,
630 .uid = INVALID_UID, /* no change */
631 .gid = INVALID_GID, /* no change */
632 .ctime = NO_CHANGE_64,
633 .atime = NO_CHANGE_64,
634 .mtime = NO_CHANGE_64,
635 .device = 0,
637 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
638 cfile->pid);
641 out:
642 kfree(full_path);
643 free_xid(xid);
644 cifs_put_tlink(tlink);
645 return rc;
648 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
651 * Try to reacquire byte range locks that were released when session
652 * to server was lost.
654 static int
655 cifs_relock_file(struct cifsFileInfo *cfile)
657 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
658 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
659 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
660 int rc = 0;
662 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
663 if (cinode->can_cache_brlcks) {
664 /* can cache locks - no need to relock */
665 up_read(&cinode->lock_sem);
666 return rc;
669 if (cap_unix(tcon->ses) &&
670 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
671 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
672 rc = cifs_push_posix_locks(cfile);
673 else
674 rc = tcon->ses->server->ops->push_mand_locks(cfile);
676 up_read(&cinode->lock_sem);
677 return rc;
680 static int
681 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
683 int rc = -EACCES;
684 unsigned int xid;
685 __u32 oplock;
686 struct cifs_sb_info *cifs_sb;
687 struct cifs_tcon *tcon;
688 struct TCP_Server_Info *server;
689 struct cifsInodeInfo *cinode;
690 struct inode *inode;
691 char *full_path = NULL;
692 int desired_access;
693 int disposition = FILE_OPEN;
694 int create_options = CREATE_NOT_DIR;
695 struct cifs_open_parms oparms;
697 xid = get_xid();
698 mutex_lock(&cfile->fh_mutex);
699 if (!cfile->invalidHandle) {
700 mutex_unlock(&cfile->fh_mutex);
701 rc = 0;
702 free_xid(xid);
703 return rc;
706 inode = d_inode(cfile->dentry);
707 cifs_sb = CIFS_SB(inode->i_sb);
708 tcon = tlink_tcon(cfile->tlink);
709 server = tcon->ses->server;
712 * Can not grab rename sem here because various ops, including those
713 * that already have the rename sem can end up causing writepage to get
714 * called and if the server was down that means we end up here, and we
715 * can never tell if the caller already has the rename_sem.
717 full_path = build_path_from_dentry(cfile->dentry);
718 if (full_path == NULL) {
719 rc = -ENOMEM;
720 mutex_unlock(&cfile->fh_mutex);
721 free_xid(xid);
722 return rc;
725 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
726 inode, cfile->f_flags, full_path);
728 if (tcon->ses->server->oplocks)
729 oplock = REQ_OPLOCK;
730 else
731 oplock = 0;
733 if (tcon->unix_ext && cap_unix(tcon->ses) &&
734 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
735 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
737 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
738 * original open. Must mask them off for a reopen.
740 unsigned int oflags = cfile->f_flags &
741 ~(O_CREAT | O_EXCL | O_TRUNC);
743 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
744 cifs_sb->ctx->file_mode /* ignored */,
745 oflags, &oplock, &cfile->fid.netfid, xid);
746 if (rc == 0) {
747 cifs_dbg(FYI, "posix reopen succeeded\n");
748 oparms.reconnect = true;
749 goto reopen_success;
752 * fallthrough to retry open the old way on errors, especially
753 * in the reconnect path it is important to retry hard
757 desired_access = cifs_convert_flags(cfile->f_flags);
759 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
760 if (cfile->f_flags & O_SYNC)
761 create_options |= CREATE_WRITE_THROUGH;
763 if (cfile->f_flags & O_DIRECT)
764 create_options |= CREATE_NO_BUFFER;
766 if (server->ops->get_lease_key)
767 server->ops->get_lease_key(inode, &cfile->fid);
769 oparms.tcon = tcon;
770 oparms.cifs_sb = cifs_sb;
771 oparms.desired_access = desired_access;
772 oparms.create_options = cifs_create_options(cifs_sb, create_options);
773 oparms.disposition = disposition;
774 oparms.path = full_path;
775 oparms.fid = &cfile->fid;
776 oparms.reconnect = true;
779 * Can not refresh inode by passing in file_info buf to be returned by
780 * ops->open and then calling get_inode_info with returned buf since
781 * file might have write behind data that needs to be flushed and server
782 * version of file size can be stale. If we knew for sure that inode was
783 * not dirty locally we could do this.
785 rc = server->ops->open(xid, &oparms, &oplock, NULL);
786 if (rc == -ENOENT && oparms.reconnect == false) {
787 /* durable handle timeout is expired - open the file again */
788 rc = server->ops->open(xid, &oparms, &oplock, NULL);
789 /* indicate that we need to relock the file */
790 oparms.reconnect = true;
793 if (rc) {
794 mutex_unlock(&cfile->fh_mutex);
795 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
796 cifs_dbg(FYI, "oplock: %d\n", oplock);
797 goto reopen_error_exit;
800 reopen_success:
801 cfile->invalidHandle = false;
802 mutex_unlock(&cfile->fh_mutex);
803 cinode = CIFS_I(inode);
805 if (can_flush) {
806 rc = filemap_write_and_wait(inode->i_mapping);
807 if (!is_interrupt_error(rc))
808 mapping_set_error(inode->i_mapping, rc);
810 if (tcon->posix_extensions)
811 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
812 else if (tcon->unix_ext)
813 rc = cifs_get_inode_info_unix(&inode, full_path,
814 inode->i_sb, xid);
815 else
816 rc = cifs_get_inode_info(&inode, full_path, NULL,
817 inode->i_sb, xid, NULL);
820 * Else we are writing out data to server already and could deadlock if
821 * we tried to flush data, and since we do not know if we have data that
822 * would invalidate the current end of file on the server we can not go
823 * to the server to get the new inode info.
827 * If the server returned a read oplock and we have mandatory brlocks,
828 * set oplock level to None.
830 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
831 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
832 oplock = 0;
835 server->ops->set_fid(cfile, &cfile->fid, oplock);
836 if (oparms.reconnect)
837 cifs_relock_file(cfile);
839 reopen_error_exit:
840 kfree(full_path);
841 free_xid(xid);
842 return rc;
845 int cifs_close(struct inode *inode, struct file *file)
847 if (file->private_data != NULL) {
848 _cifsFileInfo_put(file->private_data, true, false);
849 file->private_data = NULL;
852 /* return code from the ->release op is always ignored */
853 return 0;
856 void
857 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
859 struct cifsFileInfo *open_file;
860 struct list_head *tmp;
861 struct list_head *tmp1;
862 struct list_head tmp_list;
864 if (!tcon->use_persistent || !tcon->need_reopen_files)
865 return;
867 tcon->need_reopen_files = false;
869 cifs_dbg(FYI, "Reopen persistent handles\n");
870 INIT_LIST_HEAD(&tmp_list);
872 /* list all files open on tree connection, reopen resilient handles */
873 spin_lock(&tcon->open_file_lock);
874 list_for_each(tmp, &tcon->openFileList) {
875 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
876 if (!open_file->invalidHandle)
877 continue;
878 cifsFileInfo_get(open_file);
879 list_add_tail(&open_file->rlist, &tmp_list);
881 spin_unlock(&tcon->open_file_lock);
883 list_for_each_safe(tmp, tmp1, &tmp_list) {
884 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
885 if (cifs_reopen_file(open_file, false /* do not flush */))
886 tcon->need_reopen_files = true;
887 list_del_init(&open_file->rlist);
888 cifsFileInfo_put(open_file);
892 int cifs_closedir(struct inode *inode, struct file *file)
894 int rc = 0;
895 unsigned int xid;
896 struct cifsFileInfo *cfile = file->private_data;
897 struct cifs_tcon *tcon;
898 struct TCP_Server_Info *server;
899 char *buf;
901 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
903 if (cfile == NULL)
904 return rc;
906 xid = get_xid();
907 tcon = tlink_tcon(cfile->tlink);
908 server = tcon->ses->server;
910 cifs_dbg(FYI, "Freeing private data in close dir\n");
911 spin_lock(&cfile->file_info_lock);
912 if (server->ops->dir_needs_close(cfile)) {
913 cfile->invalidHandle = true;
914 spin_unlock(&cfile->file_info_lock);
915 if (server->ops->close_dir)
916 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
917 else
918 rc = -ENOSYS;
919 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
920 /* not much we can do if it fails anyway, ignore rc */
921 rc = 0;
922 } else
923 spin_unlock(&cfile->file_info_lock);
925 buf = cfile->srch_inf.ntwrk_buf_start;
926 if (buf) {
927 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
928 cfile->srch_inf.ntwrk_buf_start = NULL;
929 if (cfile->srch_inf.smallBuf)
930 cifs_small_buf_release(buf);
931 else
932 cifs_buf_release(buf);
935 cifs_put_tlink(cfile->tlink);
936 kfree(file->private_data);
937 file->private_data = NULL;
938 /* BB can we lock the filestruct while this is going on? */
939 free_xid(xid);
940 return rc;
943 static struct cifsLockInfo *
944 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
946 struct cifsLockInfo *lock =
947 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
948 if (!lock)
949 return lock;
950 lock->offset = offset;
951 lock->length = length;
952 lock->type = type;
953 lock->pid = current->tgid;
954 lock->flags = flags;
955 INIT_LIST_HEAD(&lock->blist);
956 init_waitqueue_head(&lock->block_q);
957 return lock;
960 void
961 cifs_del_lock_waiters(struct cifsLockInfo *lock)
963 struct cifsLockInfo *li, *tmp;
964 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
965 list_del_init(&li->blist);
966 wake_up(&li->block_q);
970 #define CIFS_LOCK_OP 0
971 #define CIFS_READ_OP 1
972 #define CIFS_WRITE_OP 2
974 /* @rw_check : 0 - no op, 1 - read, 2 - write */
975 static bool
976 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
977 __u64 length, __u8 type, __u16 flags,
978 struct cifsFileInfo *cfile,
979 struct cifsLockInfo **conf_lock, int rw_check)
981 struct cifsLockInfo *li;
982 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
983 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
985 list_for_each_entry(li, &fdlocks->locks, llist) {
986 if (offset + length <= li->offset ||
987 offset >= li->offset + li->length)
988 continue;
989 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
990 server->ops->compare_fids(cfile, cur_cfile)) {
991 /* shared lock prevents write op through the same fid */
992 if (!(li->type & server->vals->shared_lock_type) ||
993 rw_check != CIFS_WRITE_OP)
994 continue;
996 if ((type & server->vals->shared_lock_type) &&
997 ((server->ops->compare_fids(cfile, cur_cfile) &&
998 current->tgid == li->pid) || type == li->type))
999 continue;
1000 if (rw_check == CIFS_LOCK_OP &&
1001 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1002 server->ops->compare_fids(cfile, cur_cfile))
1003 continue;
1004 if (conf_lock)
1005 *conf_lock = li;
1006 return true;
1008 return false;
1011 bool
1012 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1013 __u8 type, __u16 flags,
1014 struct cifsLockInfo **conf_lock, int rw_check)
1016 bool rc = false;
1017 struct cifs_fid_locks *cur;
1018 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1020 list_for_each_entry(cur, &cinode->llist, llist) {
1021 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1022 flags, cfile, conf_lock,
1023 rw_check);
1024 if (rc)
1025 break;
1028 return rc;
1032 * Check if there is another lock that prevents us to set the lock (mandatory
1033 * style). If such a lock exists, update the flock structure with its
1034 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1035 * or leave it the same if we can't. Returns 0 if we don't need to request to
1036 * the server or 1 otherwise.
1038 static int
1039 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1040 __u8 type, struct file_lock *flock)
1042 int rc = 0;
1043 struct cifsLockInfo *conf_lock;
1044 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1045 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1046 bool exist;
1048 down_read(&cinode->lock_sem);
1050 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1051 flock->fl_flags, &conf_lock,
1052 CIFS_LOCK_OP);
1053 if (exist) {
1054 flock->fl_start = conf_lock->offset;
1055 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1056 flock->fl_pid = conf_lock->pid;
1057 if (conf_lock->type & server->vals->shared_lock_type)
1058 flock->fl_type = F_RDLCK;
1059 else
1060 flock->fl_type = F_WRLCK;
1061 } else if (!cinode->can_cache_brlcks)
1062 rc = 1;
1063 else
1064 flock->fl_type = F_UNLCK;
1066 up_read(&cinode->lock_sem);
1067 return rc;
1070 static void
1071 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1073 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1074 cifs_down_write(&cinode->lock_sem);
1075 list_add_tail(&lock->llist, &cfile->llist->locks);
1076 up_write(&cinode->lock_sem);
1080 * Set the byte-range lock (mandatory style). Returns:
1081 * 1) 0, if we set the lock and don't need to request to the server;
1082 * 2) 1, if no locks prevent us but we need to request to the server;
1083 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1085 static int
1086 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1087 bool wait)
1089 struct cifsLockInfo *conf_lock;
1090 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1091 bool exist;
1092 int rc = 0;
1094 try_again:
1095 exist = false;
1096 cifs_down_write(&cinode->lock_sem);
1098 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1099 lock->type, lock->flags, &conf_lock,
1100 CIFS_LOCK_OP);
1101 if (!exist && cinode->can_cache_brlcks) {
1102 list_add_tail(&lock->llist, &cfile->llist->locks);
1103 up_write(&cinode->lock_sem);
1104 return rc;
1107 if (!exist)
1108 rc = 1;
1109 else if (!wait)
1110 rc = -EACCES;
1111 else {
1112 list_add_tail(&lock->blist, &conf_lock->blist);
1113 up_write(&cinode->lock_sem);
1114 rc = wait_event_interruptible(lock->block_q,
1115 (lock->blist.prev == &lock->blist) &&
1116 (lock->blist.next == &lock->blist));
1117 if (!rc)
1118 goto try_again;
1119 cifs_down_write(&cinode->lock_sem);
1120 list_del_init(&lock->blist);
1123 up_write(&cinode->lock_sem);
1124 return rc;
1128 * Check if there is another lock that prevents us to set the lock (posix
1129 * style). If such a lock exists, update the flock structure with its
1130 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1131 * or leave it the same if we can't. Returns 0 if we don't need to request to
1132 * the server or 1 otherwise.
1134 static int
1135 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1137 int rc = 0;
1138 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1139 unsigned char saved_type = flock->fl_type;
1141 if ((flock->fl_flags & FL_POSIX) == 0)
1142 return 1;
1144 down_read(&cinode->lock_sem);
1145 posix_test_lock(file, flock);
1147 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1148 flock->fl_type = saved_type;
1149 rc = 1;
1152 up_read(&cinode->lock_sem);
1153 return rc;
1157 * Set the byte-range lock (posix style). Returns:
1158 * 1) <0, if the error occurs while setting the lock;
1159 * 2) 0, if we set the lock and don't need to request to the server;
1160 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1161 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1163 static int
1164 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1166 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1167 int rc = FILE_LOCK_DEFERRED + 1;
1169 if ((flock->fl_flags & FL_POSIX) == 0)
1170 return rc;
1172 cifs_down_write(&cinode->lock_sem);
1173 if (!cinode->can_cache_brlcks) {
1174 up_write(&cinode->lock_sem);
1175 return rc;
1178 rc = posix_lock_file(file, flock, NULL);
1179 up_write(&cinode->lock_sem);
1180 return rc;
1184 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1186 unsigned int xid;
1187 int rc = 0, stored_rc;
1188 struct cifsLockInfo *li, *tmp;
1189 struct cifs_tcon *tcon;
1190 unsigned int num, max_num, max_buf;
1191 LOCKING_ANDX_RANGE *buf, *cur;
1192 static const int types[] = {
1193 LOCKING_ANDX_LARGE_FILES,
1194 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1196 int i;
1198 xid = get_xid();
1199 tcon = tlink_tcon(cfile->tlink);
1202 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1203 * and check it before using.
1205 max_buf = tcon->ses->server->maxBuf;
1206 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1207 free_xid(xid);
1208 return -EINVAL;
1211 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1212 PAGE_SIZE);
1213 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1214 PAGE_SIZE);
1215 max_num = (max_buf - sizeof(struct smb_hdr)) /
1216 sizeof(LOCKING_ANDX_RANGE);
1217 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1218 if (!buf) {
1219 free_xid(xid);
1220 return -ENOMEM;
1223 for (i = 0; i < 2; i++) {
1224 cur = buf;
1225 num = 0;
1226 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1227 if (li->type != types[i])
1228 continue;
1229 cur->Pid = cpu_to_le16(li->pid);
1230 cur->LengthLow = cpu_to_le32((u32)li->length);
1231 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1232 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1233 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1234 if (++num == max_num) {
1235 stored_rc = cifs_lockv(xid, tcon,
1236 cfile->fid.netfid,
1237 (__u8)li->type, 0, num,
1238 buf);
1239 if (stored_rc)
1240 rc = stored_rc;
1241 cur = buf;
1242 num = 0;
1243 } else
1244 cur++;
1247 if (num) {
1248 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1249 (__u8)types[i], 0, num, buf);
1250 if (stored_rc)
1251 rc = stored_rc;
1255 kfree(buf);
1256 free_xid(xid);
1257 return rc;
1260 static __u32
1261 hash_lockowner(fl_owner_t owner)
1263 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1266 struct lock_to_push {
1267 struct list_head llist;
1268 __u64 offset;
1269 __u64 length;
1270 __u32 pid;
1271 __u16 netfid;
1272 __u8 type;
1275 static int
1276 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1278 struct inode *inode = d_inode(cfile->dentry);
1279 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1280 struct file_lock *flock;
1281 struct file_lock_context *flctx = inode->i_flctx;
1282 unsigned int count = 0, i;
1283 int rc = 0, xid, type;
1284 struct list_head locks_to_send, *el;
1285 struct lock_to_push *lck, *tmp;
1286 __u64 length;
1288 xid = get_xid();
1290 if (!flctx)
1291 goto out;
1293 spin_lock(&flctx->flc_lock);
1294 list_for_each(el, &flctx->flc_posix) {
1295 count++;
1297 spin_unlock(&flctx->flc_lock);
1299 INIT_LIST_HEAD(&locks_to_send);
1302 * Allocating count locks is enough because no FL_POSIX locks can be
1303 * added to the list while we are holding cinode->lock_sem that
1304 * protects locking operations of this inode.
1306 for (i = 0; i < count; i++) {
1307 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1308 if (!lck) {
1309 rc = -ENOMEM;
1310 goto err_out;
1312 list_add_tail(&lck->llist, &locks_to_send);
1315 el = locks_to_send.next;
1316 spin_lock(&flctx->flc_lock);
1317 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1318 if (el == &locks_to_send) {
1320 * The list ended. We don't have enough allocated
1321 * structures - something is really wrong.
1323 cifs_dbg(VFS, "Can't push all brlocks!\n");
1324 break;
1326 length = 1 + flock->fl_end - flock->fl_start;
1327 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1328 type = CIFS_RDLCK;
1329 else
1330 type = CIFS_WRLCK;
1331 lck = list_entry(el, struct lock_to_push, llist);
1332 lck->pid = hash_lockowner(flock->fl_owner);
1333 lck->netfid = cfile->fid.netfid;
1334 lck->length = length;
1335 lck->type = type;
1336 lck->offset = flock->fl_start;
1338 spin_unlock(&flctx->flc_lock);
1340 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1341 int stored_rc;
1343 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1344 lck->offset, lck->length, NULL,
1345 lck->type, 0);
1346 if (stored_rc)
1347 rc = stored_rc;
1348 list_del(&lck->llist);
1349 kfree(lck);
1352 out:
1353 free_xid(xid);
1354 return rc;
1355 err_out:
1356 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1357 list_del(&lck->llist);
1358 kfree(lck);
1360 goto out;
1363 static int
1364 cifs_push_locks(struct cifsFileInfo *cfile)
1366 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1367 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1368 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1369 int rc = 0;
1371 /* we are going to update can_cache_brlcks here - need a write access */
1372 cifs_down_write(&cinode->lock_sem);
1373 if (!cinode->can_cache_brlcks) {
1374 up_write(&cinode->lock_sem);
1375 return rc;
1378 if (cap_unix(tcon->ses) &&
1379 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1380 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1381 rc = cifs_push_posix_locks(cfile);
1382 else
1383 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1385 cinode->can_cache_brlcks = false;
1386 up_write(&cinode->lock_sem);
1387 return rc;
1390 static void
1391 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1392 bool *wait_flag, struct TCP_Server_Info *server)
1394 if (flock->fl_flags & FL_POSIX)
1395 cifs_dbg(FYI, "Posix\n");
1396 if (flock->fl_flags & FL_FLOCK)
1397 cifs_dbg(FYI, "Flock\n");
1398 if (flock->fl_flags & FL_SLEEP) {
1399 cifs_dbg(FYI, "Blocking lock\n");
1400 *wait_flag = true;
1402 if (flock->fl_flags & FL_ACCESS)
1403 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1404 if (flock->fl_flags & FL_LEASE)
1405 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1406 if (flock->fl_flags &
1407 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1408 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1409 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1411 *type = server->vals->large_lock_type;
1412 if (flock->fl_type == F_WRLCK) {
1413 cifs_dbg(FYI, "F_WRLCK\n");
1414 *type |= server->vals->exclusive_lock_type;
1415 *lock = 1;
1416 } else if (flock->fl_type == F_UNLCK) {
1417 cifs_dbg(FYI, "F_UNLCK\n");
1418 *type |= server->vals->unlock_lock_type;
1419 *unlock = 1;
1420 /* Check if unlock includes more than one lock range */
1421 } else if (flock->fl_type == F_RDLCK) {
1422 cifs_dbg(FYI, "F_RDLCK\n");
1423 *type |= server->vals->shared_lock_type;
1424 *lock = 1;
1425 } else if (flock->fl_type == F_EXLCK) {
1426 cifs_dbg(FYI, "F_EXLCK\n");
1427 *type |= server->vals->exclusive_lock_type;
1428 *lock = 1;
1429 } else if (flock->fl_type == F_SHLCK) {
1430 cifs_dbg(FYI, "F_SHLCK\n");
1431 *type |= server->vals->shared_lock_type;
1432 *lock = 1;
1433 } else
1434 cifs_dbg(FYI, "Unknown type of lock\n");
1437 static int
1438 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1439 bool wait_flag, bool posix_lck, unsigned int xid)
1441 int rc = 0;
1442 __u64 length = 1 + flock->fl_end - flock->fl_start;
1443 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1444 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1445 struct TCP_Server_Info *server = tcon->ses->server;
1446 __u16 netfid = cfile->fid.netfid;
1448 if (posix_lck) {
1449 int posix_lock_type;
1451 rc = cifs_posix_lock_test(file, flock);
1452 if (!rc)
1453 return rc;
1455 if (type & server->vals->shared_lock_type)
1456 posix_lock_type = CIFS_RDLCK;
1457 else
1458 posix_lock_type = CIFS_WRLCK;
1459 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1460 hash_lockowner(flock->fl_owner),
1461 flock->fl_start, length, flock,
1462 posix_lock_type, wait_flag);
1463 return rc;
1466 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1467 if (!rc)
1468 return rc;
1470 /* BB we could chain these into one lock request BB */
1471 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1472 1, 0, false);
1473 if (rc == 0) {
1474 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1475 type, 0, 1, false);
1476 flock->fl_type = F_UNLCK;
1477 if (rc != 0)
1478 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1479 rc);
1480 return 0;
1483 if (type & server->vals->shared_lock_type) {
1484 flock->fl_type = F_WRLCK;
1485 return 0;
1488 type &= ~server->vals->exclusive_lock_type;
1490 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1491 type | server->vals->shared_lock_type,
1492 1, 0, false);
1493 if (rc == 0) {
1494 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1495 type | server->vals->shared_lock_type, 0, 1, false);
1496 flock->fl_type = F_RDLCK;
1497 if (rc != 0)
1498 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1499 rc);
1500 } else
1501 flock->fl_type = F_WRLCK;
1503 return 0;
1506 void
1507 cifs_move_llist(struct list_head *source, struct list_head *dest)
1509 struct list_head *li, *tmp;
1510 list_for_each_safe(li, tmp, source)
1511 list_move(li, dest);
1514 void
1515 cifs_free_llist(struct list_head *llist)
1517 struct cifsLockInfo *li, *tmp;
1518 list_for_each_entry_safe(li, tmp, llist, llist) {
1519 cifs_del_lock_waiters(li);
1520 list_del(&li->llist);
1521 kfree(li);
1526 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1527 unsigned int xid)
1529 int rc = 0, stored_rc;
1530 static const int types[] = {
1531 LOCKING_ANDX_LARGE_FILES,
1532 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1534 unsigned int i;
1535 unsigned int max_num, num, max_buf;
1536 LOCKING_ANDX_RANGE *buf, *cur;
1537 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1538 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1539 struct cifsLockInfo *li, *tmp;
1540 __u64 length = 1 + flock->fl_end - flock->fl_start;
1541 struct list_head tmp_llist;
1543 INIT_LIST_HEAD(&tmp_llist);
1546 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1547 * and check it before using.
1549 max_buf = tcon->ses->server->maxBuf;
1550 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1551 return -EINVAL;
1553 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1554 PAGE_SIZE);
1555 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1556 PAGE_SIZE);
1557 max_num = (max_buf - sizeof(struct smb_hdr)) /
1558 sizeof(LOCKING_ANDX_RANGE);
1559 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1560 if (!buf)
1561 return -ENOMEM;
1563 cifs_down_write(&cinode->lock_sem);
1564 for (i = 0; i < 2; i++) {
1565 cur = buf;
1566 num = 0;
1567 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1568 if (flock->fl_start > li->offset ||
1569 (flock->fl_start + length) <
1570 (li->offset + li->length))
1571 continue;
1572 if (current->tgid != li->pid)
1573 continue;
1574 if (types[i] != li->type)
1575 continue;
1576 if (cinode->can_cache_brlcks) {
1578 * We can cache brlock requests - simply remove
1579 * a lock from the file's list.
1581 list_del(&li->llist);
1582 cifs_del_lock_waiters(li);
1583 kfree(li);
1584 continue;
1586 cur->Pid = cpu_to_le16(li->pid);
1587 cur->LengthLow = cpu_to_le32((u32)li->length);
1588 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1589 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1590 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 * We need to save a lock here to let us add it again to
1593 * the file's list if the unlock range request fails on
1594 * the server.
1596 list_move(&li->llist, &tmp_llist);
1597 if (++num == max_num) {
1598 stored_rc = cifs_lockv(xid, tcon,
1599 cfile->fid.netfid,
1600 li->type, num, 0, buf);
1601 if (stored_rc) {
1603 * We failed on the unlock range
1604 * request - add all locks from the tmp
1605 * list to the head of the file's list.
1607 cifs_move_llist(&tmp_llist,
1608 &cfile->llist->locks);
1609 rc = stored_rc;
1610 } else
1612 * The unlock range request succeed -
1613 * free the tmp list.
1615 cifs_free_llist(&tmp_llist);
1616 cur = buf;
1617 num = 0;
1618 } else
1619 cur++;
1621 if (num) {
1622 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1623 types[i], num, 0, buf);
1624 if (stored_rc) {
1625 cifs_move_llist(&tmp_llist,
1626 &cfile->llist->locks);
1627 rc = stored_rc;
1628 } else
1629 cifs_free_llist(&tmp_llist);
1633 up_write(&cinode->lock_sem);
1634 kfree(buf);
1635 return rc;
1638 static int
1639 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1640 bool wait_flag, bool posix_lck, int lock, int unlock,
1641 unsigned int xid)
1643 int rc = 0;
1644 __u64 length = 1 + flock->fl_end - flock->fl_start;
1645 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1646 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1647 struct TCP_Server_Info *server = tcon->ses->server;
1648 struct inode *inode = d_inode(cfile->dentry);
1650 if (posix_lck) {
1651 int posix_lock_type;
1653 rc = cifs_posix_lock_set(file, flock);
1654 if (rc <= FILE_LOCK_DEFERRED)
1655 return rc;
1657 if (type & server->vals->shared_lock_type)
1658 posix_lock_type = CIFS_RDLCK;
1659 else
1660 posix_lock_type = CIFS_WRLCK;
1662 if (unlock == 1)
1663 posix_lock_type = CIFS_UNLCK;
1665 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1666 hash_lockowner(flock->fl_owner),
1667 flock->fl_start, length,
1668 NULL, posix_lock_type, wait_flag);
1669 goto out;
1672 if (lock) {
1673 struct cifsLockInfo *lock;
1675 lock = cifs_lock_init(flock->fl_start, length, type,
1676 flock->fl_flags);
1677 if (!lock)
1678 return -ENOMEM;
1680 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1681 if (rc < 0) {
1682 kfree(lock);
1683 return rc;
1685 if (!rc)
1686 goto out;
1689 * Windows 7 server can delay breaking lease from read to None
1690 * if we set a byte-range lock on a file - break it explicitly
1691 * before sending the lock to the server to be sure the next
1692 * read won't conflict with non-overlapted locks due to
1693 * pagereading.
1695 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1696 CIFS_CACHE_READ(CIFS_I(inode))) {
1697 cifs_zap_mapping(inode);
1698 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1699 inode);
1700 CIFS_I(inode)->oplock = 0;
1703 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1704 type, 1, 0, wait_flag);
1705 if (rc) {
1706 kfree(lock);
1707 return rc;
1710 cifs_lock_add(cfile, lock);
1711 } else if (unlock)
1712 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1714 out:
1715 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1717 * If this is a request to remove all locks because we
1718 * are closing the file, it doesn't matter if the
1719 * unlocking failed as both cifs.ko and the SMB server
1720 * remove the lock on file close
1722 if (rc) {
1723 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1724 if (!(flock->fl_flags & FL_CLOSE))
1725 return rc;
1727 rc = locks_lock_file_wait(file, flock);
1729 return rc;
1732 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1734 int rc, xid;
1735 int lock = 0, unlock = 0;
1736 bool wait_flag = false;
1737 bool posix_lck = false;
1738 struct cifs_sb_info *cifs_sb;
1739 struct cifs_tcon *tcon;
1740 struct cifsFileInfo *cfile;
1741 __u32 type;
1743 rc = -EACCES;
1744 xid = get_xid();
1746 if (!(fl->fl_flags & FL_FLOCK))
1747 return -ENOLCK;
1749 cfile = (struct cifsFileInfo *)file->private_data;
1750 tcon = tlink_tcon(cfile->tlink);
1752 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1753 tcon->ses->server);
1754 cifs_sb = CIFS_FILE_SB(file);
1756 if (cap_unix(tcon->ses) &&
1757 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1758 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1759 posix_lck = true;
1761 if (!lock && !unlock) {
1763 * if no lock or unlock then nothing to do since we do not
1764 * know what it is
1766 free_xid(xid);
1767 return -EOPNOTSUPP;
1770 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1771 xid);
1772 free_xid(xid);
1773 return rc;
1778 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1780 int rc, xid;
1781 int lock = 0, unlock = 0;
1782 bool wait_flag = false;
1783 bool posix_lck = false;
1784 struct cifs_sb_info *cifs_sb;
1785 struct cifs_tcon *tcon;
1786 struct cifsFileInfo *cfile;
1787 __u32 type;
1789 rc = -EACCES;
1790 xid = get_xid();
1792 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1793 cmd, flock->fl_flags, flock->fl_type,
1794 flock->fl_start, flock->fl_end);
1796 cfile = (struct cifsFileInfo *)file->private_data;
1797 tcon = tlink_tcon(cfile->tlink);
1799 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1800 tcon->ses->server);
1801 cifs_sb = CIFS_FILE_SB(file);
1803 if (cap_unix(tcon->ses) &&
1804 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1805 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1806 posix_lck = true;
1808 * BB add code here to normalize offset and length to account for
1809 * negative length which we can not accept over the wire.
1811 if (IS_GETLK(cmd)) {
1812 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1813 free_xid(xid);
1814 return rc;
1817 if (!lock && !unlock) {
1819 * if no lock or unlock then nothing to do since we do not
1820 * know what it is
1822 free_xid(xid);
1823 return -EOPNOTSUPP;
1826 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1827 xid);
1828 free_xid(xid);
1829 return rc;
1833 * update the file size (if needed) after a write. Should be called with
1834 * the inode->i_lock held
1836 void
1837 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1838 unsigned int bytes_written)
1840 loff_t end_of_write = offset + bytes_written;
1842 if (end_of_write > cifsi->server_eof)
1843 cifsi->server_eof = end_of_write;
1846 static ssize_t
1847 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1848 size_t write_size, loff_t *offset)
1850 int rc = 0;
1851 unsigned int bytes_written = 0;
1852 unsigned int total_written;
1853 struct cifs_tcon *tcon;
1854 struct TCP_Server_Info *server;
1855 unsigned int xid;
1856 struct dentry *dentry = open_file->dentry;
1857 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1858 struct cifs_io_parms io_parms = {0};
1860 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1861 write_size, *offset, dentry);
1863 tcon = tlink_tcon(open_file->tlink);
1864 server = tcon->ses->server;
1866 if (!server->ops->sync_write)
1867 return -ENOSYS;
1869 xid = get_xid();
1871 for (total_written = 0; write_size > total_written;
1872 total_written += bytes_written) {
1873 rc = -EAGAIN;
1874 while (rc == -EAGAIN) {
1875 struct kvec iov[2];
1876 unsigned int len;
1878 if (open_file->invalidHandle) {
1879 /* we could deadlock if we called
1880 filemap_fdatawait from here so tell
1881 reopen_file not to flush data to
1882 server now */
1883 rc = cifs_reopen_file(open_file, false);
1884 if (rc != 0)
1885 break;
1888 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1889 (unsigned int)write_size - total_written);
1890 /* iov[0] is reserved for smb header */
1891 iov[1].iov_base = (char *)write_data + total_written;
1892 iov[1].iov_len = len;
1893 io_parms.pid = pid;
1894 io_parms.tcon = tcon;
1895 io_parms.offset = *offset;
1896 io_parms.length = len;
1897 rc = server->ops->sync_write(xid, &open_file->fid,
1898 &io_parms, &bytes_written, iov, 1);
1900 if (rc || (bytes_written == 0)) {
1901 if (total_written)
1902 break;
1903 else {
1904 free_xid(xid);
1905 return rc;
1907 } else {
1908 spin_lock(&d_inode(dentry)->i_lock);
1909 cifs_update_eof(cifsi, *offset, bytes_written);
1910 spin_unlock(&d_inode(dentry)->i_lock);
1911 *offset += bytes_written;
1915 cifs_stats_bytes_written(tcon, total_written);
1917 if (total_written > 0) {
1918 spin_lock(&d_inode(dentry)->i_lock);
1919 if (*offset > d_inode(dentry)->i_size)
1920 i_size_write(d_inode(dentry), *offset);
1921 spin_unlock(&d_inode(dentry)->i_lock);
1923 mark_inode_dirty_sync(d_inode(dentry));
1924 free_xid(xid);
1925 return total_written;
1928 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1929 bool fsuid_only)
1931 struct cifsFileInfo *open_file = NULL;
1932 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1934 /* only filter by fsuid on multiuser mounts */
1935 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1936 fsuid_only = false;
1938 spin_lock(&cifs_inode->open_file_lock);
1939 /* we could simply get the first_list_entry since write-only entries
1940 are always at the end of the list but since the first entry might
1941 have a close pending, we go through the whole list */
1942 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1943 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1944 continue;
1945 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1946 if (!open_file->invalidHandle) {
1947 /* found a good file */
1948 /* lock it so it will not be closed on us */
1949 cifsFileInfo_get(open_file);
1950 spin_unlock(&cifs_inode->open_file_lock);
1951 return open_file;
1952 } /* else might as well continue, and look for
1953 another, or simply have the caller reopen it
1954 again rather than trying to fix this handle */
1955 } else /* write only file */
1956 break; /* write only files are last so must be done */
1958 spin_unlock(&cifs_inode->open_file_lock);
1959 return NULL;
1962 /* Return -EBADF if no handle is found and general rc otherwise */
1964 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1965 struct cifsFileInfo **ret_file)
1967 struct cifsFileInfo *open_file, *inv_file = NULL;
1968 struct cifs_sb_info *cifs_sb;
1969 bool any_available = false;
1970 int rc = -EBADF;
1971 unsigned int refind = 0;
1972 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1973 bool with_delete = flags & FIND_WR_WITH_DELETE;
1974 *ret_file = NULL;
1977 * Having a null inode here (because mapping->host was set to zero by
1978 * the VFS or MM) should not happen but we had reports of on oops (due
1979 * to it being zero) during stress testcases so we need to check for it
1982 if (cifs_inode == NULL) {
1983 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1984 dump_stack();
1985 return rc;
1988 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1990 /* only filter by fsuid on multiuser mounts */
1991 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1992 fsuid_only = false;
1994 spin_lock(&cifs_inode->open_file_lock);
1995 refind_writable:
1996 if (refind > MAX_REOPEN_ATT) {
1997 spin_unlock(&cifs_inode->open_file_lock);
1998 return rc;
2000 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2001 if (!any_available && open_file->pid != current->tgid)
2002 continue;
2003 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2004 continue;
2005 if (with_delete && !(open_file->fid.access & DELETE))
2006 continue;
2007 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2008 if (!open_file->invalidHandle) {
2009 /* found a good writable file */
2010 cifsFileInfo_get(open_file);
2011 spin_unlock(&cifs_inode->open_file_lock);
2012 *ret_file = open_file;
2013 return 0;
2014 } else {
2015 if (!inv_file)
2016 inv_file = open_file;
2020 /* couldn't find useable FH with same pid, try any available */
2021 if (!any_available) {
2022 any_available = true;
2023 goto refind_writable;
2026 if (inv_file) {
2027 any_available = false;
2028 cifsFileInfo_get(inv_file);
2031 spin_unlock(&cifs_inode->open_file_lock);
2033 if (inv_file) {
2034 rc = cifs_reopen_file(inv_file, false);
2035 if (!rc) {
2036 *ret_file = inv_file;
2037 return 0;
2040 spin_lock(&cifs_inode->open_file_lock);
2041 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2042 spin_unlock(&cifs_inode->open_file_lock);
2043 cifsFileInfo_put(inv_file);
2044 ++refind;
2045 inv_file = NULL;
2046 spin_lock(&cifs_inode->open_file_lock);
2047 goto refind_writable;
2050 return rc;
2053 struct cifsFileInfo *
2054 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2056 struct cifsFileInfo *cfile;
2057 int rc;
2059 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2060 if (rc)
2061 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2063 return cfile;
2067 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2068 int flags,
2069 struct cifsFileInfo **ret_file)
2071 struct list_head *tmp;
2072 struct cifsFileInfo *cfile;
2073 struct cifsInodeInfo *cinode;
2074 char *full_path;
2076 *ret_file = NULL;
2078 spin_lock(&tcon->open_file_lock);
2079 list_for_each(tmp, &tcon->openFileList) {
2080 cfile = list_entry(tmp, struct cifsFileInfo,
2081 tlist);
2082 full_path = build_path_from_dentry(cfile->dentry);
2083 if (full_path == NULL) {
2084 spin_unlock(&tcon->open_file_lock);
2085 return -ENOMEM;
2087 if (strcmp(full_path, name)) {
2088 kfree(full_path);
2089 continue;
2092 kfree(full_path);
2093 cinode = CIFS_I(d_inode(cfile->dentry));
2094 spin_unlock(&tcon->open_file_lock);
2095 return cifs_get_writable_file(cinode, flags, ret_file);
2098 spin_unlock(&tcon->open_file_lock);
2099 return -ENOENT;
2103 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2104 struct cifsFileInfo **ret_file)
2106 struct list_head *tmp;
2107 struct cifsFileInfo *cfile;
2108 struct cifsInodeInfo *cinode;
2109 char *full_path;
2111 *ret_file = NULL;
2113 spin_lock(&tcon->open_file_lock);
2114 list_for_each(tmp, &tcon->openFileList) {
2115 cfile = list_entry(tmp, struct cifsFileInfo,
2116 tlist);
2117 full_path = build_path_from_dentry(cfile->dentry);
2118 if (full_path == NULL) {
2119 spin_unlock(&tcon->open_file_lock);
2120 return -ENOMEM;
2122 if (strcmp(full_path, name)) {
2123 kfree(full_path);
2124 continue;
2127 kfree(full_path);
2128 cinode = CIFS_I(d_inode(cfile->dentry));
2129 spin_unlock(&tcon->open_file_lock);
2130 *ret_file = find_readable_file(cinode, 0);
2131 return *ret_file ? 0 : -ENOENT;
2134 spin_unlock(&tcon->open_file_lock);
2135 return -ENOENT;
2138 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2140 struct address_space *mapping = page->mapping;
2141 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2142 char *write_data;
2143 int rc = -EFAULT;
2144 int bytes_written = 0;
2145 struct inode *inode;
2146 struct cifsFileInfo *open_file;
2148 if (!mapping || !mapping->host)
2149 return -EFAULT;
2151 inode = page->mapping->host;
2153 offset += (loff_t)from;
2154 write_data = kmap(page);
2155 write_data += from;
2157 if ((to > PAGE_SIZE) || (from > to)) {
2158 kunmap(page);
2159 return -EIO;
2162 /* racing with truncate? */
2163 if (offset > mapping->host->i_size) {
2164 kunmap(page);
2165 return 0; /* don't care */
2168 /* check to make sure that we are not extending the file */
2169 if (mapping->host->i_size - offset < (loff_t)to)
2170 to = (unsigned)(mapping->host->i_size - offset);
2172 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2173 &open_file);
2174 if (!rc) {
2175 bytes_written = cifs_write(open_file, open_file->pid,
2176 write_data, to - from, &offset);
2177 cifsFileInfo_put(open_file);
2178 /* Does mm or vfs already set times? */
2179 inode->i_atime = inode->i_mtime = current_time(inode);
2180 if ((bytes_written > 0) && (offset))
2181 rc = 0;
2182 else if (bytes_written < 0)
2183 rc = bytes_written;
2184 else
2185 rc = -EFAULT;
2186 } else {
2187 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2188 if (!is_retryable_error(rc))
2189 rc = -EIO;
2192 kunmap(page);
2193 return rc;
2196 static struct cifs_writedata *
2197 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2198 pgoff_t end, pgoff_t *index,
2199 unsigned int *found_pages)
2201 struct cifs_writedata *wdata;
2203 wdata = cifs_writedata_alloc((unsigned int)tofind,
2204 cifs_writev_complete);
2205 if (!wdata)
2206 return NULL;
2208 *found_pages = find_get_pages_range_tag(mapping, index, end,
2209 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2210 return wdata;
2213 static unsigned int
2214 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2215 struct address_space *mapping,
2216 struct writeback_control *wbc,
2217 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2219 unsigned int nr_pages = 0, i;
2220 struct page *page;
2222 for (i = 0; i < found_pages; i++) {
2223 page = wdata->pages[i];
2225 * At this point we hold neither the i_pages lock nor the
2226 * page lock: the page may be truncated or invalidated
2227 * (changing page->mapping to NULL), or even swizzled
2228 * back from swapper_space to tmpfs file mapping
2231 if (nr_pages == 0)
2232 lock_page(page);
2233 else if (!trylock_page(page))
2234 break;
2236 if (unlikely(page->mapping != mapping)) {
2237 unlock_page(page);
2238 break;
2241 if (!wbc->range_cyclic && page->index > end) {
2242 *done = true;
2243 unlock_page(page);
2244 break;
2247 if (*next && (page->index != *next)) {
2248 /* Not next consecutive page */
2249 unlock_page(page);
2250 break;
2253 if (wbc->sync_mode != WB_SYNC_NONE)
2254 wait_on_page_writeback(page);
2256 if (PageWriteback(page) ||
2257 !clear_page_dirty_for_io(page)) {
2258 unlock_page(page);
2259 break;
2263 * This actually clears the dirty bit in the radix tree.
2264 * See cifs_writepage() for more commentary.
2266 set_page_writeback(page);
2267 if (page_offset(page) >= i_size_read(mapping->host)) {
2268 *done = true;
2269 unlock_page(page);
2270 end_page_writeback(page);
2271 break;
2274 wdata->pages[i] = page;
2275 *next = page->index + 1;
2276 ++nr_pages;
2279 /* reset index to refind any pages skipped */
2280 if (nr_pages == 0)
2281 *index = wdata->pages[0]->index + 1;
2283 /* put any pages we aren't going to use */
2284 for (i = nr_pages; i < found_pages; i++) {
2285 put_page(wdata->pages[i]);
2286 wdata->pages[i] = NULL;
2289 return nr_pages;
2292 static int
2293 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2294 struct address_space *mapping, struct writeback_control *wbc)
2296 int rc;
2298 wdata->sync_mode = wbc->sync_mode;
2299 wdata->nr_pages = nr_pages;
2300 wdata->offset = page_offset(wdata->pages[0]);
2301 wdata->pagesz = PAGE_SIZE;
2302 wdata->tailsz = min(i_size_read(mapping->host) -
2303 page_offset(wdata->pages[nr_pages - 1]),
2304 (loff_t)PAGE_SIZE);
2305 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306 wdata->pid = wdata->cfile->pid;
2308 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2309 if (rc)
2310 return rc;
2312 if (wdata->cfile->invalidHandle)
2313 rc = -EAGAIN;
2314 else
2315 rc = wdata->server->ops->async_writev(wdata,
2316 cifs_writedata_release);
2318 return rc;
2321 static int cifs_writepages(struct address_space *mapping,
2322 struct writeback_control *wbc)
2324 struct inode *inode = mapping->host;
2325 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2326 struct TCP_Server_Info *server;
2327 bool done = false, scanned = false, range_whole = false;
2328 pgoff_t end, index;
2329 struct cifs_writedata *wdata;
2330 struct cifsFileInfo *cfile = NULL;
2331 int rc = 0;
2332 int saved_rc = 0;
2333 unsigned int xid;
2336 * If wsize is smaller than the page cache size, default to writing
2337 * one page at a time via cifs_writepage
2339 if (cifs_sb->ctx->wsize < PAGE_SIZE)
2340 return generic_writepages(mapping, wbc);
2342 xid = get_xid();
2343 if (wbc->range_cyclic) {
2344 index = mapping->writeback_index; /* Start from prev offset */
2345 end = -1;
2346 } else {
2347 index = wbc->range_start >> PAGE_SHIFT;
2348 end = wbc->range_end >> PAGE_SHIFT;
2349 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2350 range_whole = true;
2351 scanned = true;
2353 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2355 retry:
2356 while (!done && index <= end) {
2357 unsigned int i, nr_pages, found_pages, wsize;
2358 pgoff_t next = 0, tofind, saved_index = index;
2359 struct cifs_credits credits_on_stack;
2360 struct cifs_credits *credits = &credits_on_stack;
2361 int get_file_rc = 0;
2363 if (cfile)
2364 cifsFileInfo_put(cfile);
2366 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2368 /* in case of an error store it to return later */
2369 if (rc)
2370 get_file_rc = rc;
2372 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2373 &wsize, credits);
2374 if (rc != 0) {
2375 done = true;
2376 break;
2379 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2381 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2382 &found_pages);
2383 if (!wdata) {
2384 rc = -ENOMEM;
2385 done = true;
2386 add_credits_and_wake_if(server, credits, 0);
2387 break;
2390 if (found_pages == 0) {
2391 kref_put(&wdata->refcount, cifs_writedata_release);
2392 add_credits_and_wake_if(server, credits, 0);
2393 break;
2396 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2397 end, &index, &next, &done);
2399 /* nothing to write? */
2400 if (nr_pages == 0) {
2401 kref_put(&wdata->refcount, cifs_writedata_release);
2402 add_credits_and_wake_if(server, credits, 0);
2403 continue;
2406 wdata->credits = credits_on_stack;
2407 wdata->cfile = cfile;
2408 wdata->server = server;
2409 cfile = NULL;
2411 if (!wdata->cfile) {
2412 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2413 get_file_rc);
2414 if (is_retryable_error(get_file_rc))
2415 rc = get_file_rc;
2416 else
2417 rc = -EBADF;
2418 } else
2419 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2421 for (i = 0; i < nr_pages; ++i)
2422 unlock_page(wdata->pages[i]);
2424 /* send failure -- clean up the mess */
2425 if (rc != 0) {
2426 add_credits_and_wake_if(server, &wdata->credits, 0);
2427 for (i = 0; i < nr_pages; ++i) {
2428 if (is_retryable_error(rc))
2429 redirty_page_for_writepage(wbc,
2430 wdata->pages[i]);
2431 else
2432 SetPageError(wdata->pages[i]);
2433 end_page_writeback(wdata->pages[i]);
2434 put_page(wdata->pages[i]);
2436 if (!is_retryable_error(rc))
2437 mapping_set_error(mapping, rc);
2439 kref_put(&wdata->refcount, cifs_writedata_release);
2441 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2442 index = saved_index;
2443 continue;
2446 /* Return immediately if we received a signal during writing */
2447 if (is_interrupt_error(rc)) {
2448 done = true;
2449 break;
2452 if (rc != 0 && saved_rc == 0)
2453 saved_rc = rc;
2455 wbc->nr_to_write -= nr_pages;
2456 if (wbc->nr_to_write <= 0)
2457 done = true;
2459 index = next;
2462 if (!scanned && !done) {
2464 * We hit the last page and there is more work to be done: wrap
2465 * back to the start of the file
2467 scanned = true;
2468 index = 0;
2469 goto retry;
2472 if (saved_rc != 0)
2473 rc = saved_rc;
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 mapping->writeback_index = index;
2478 if (cfile)
2479 cifsFileInfo_put(cfile);
2480 free_xid(xid);
2481 return rc;
2484 static int
2485 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2487 int rc;
2488 unsigned int xid;
2490 xid = get_xid();
2491 /* BB add check for wbc flags */
2492 get_page(page);
2493 if (!PageUptodate(page))
2494 cifs_dbg(FYI, "ppw - page not up to date\n");
2497 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2499 * A writepage() implementation always needs to do either this,
2500 * or re-dirty the page with "redirty_page_for_writepage()" in
2501 * the case of a failure.
2503 * Just unlocking the page will cause the radix tree tag-bits
2504 * to fail to update with the state of the page correctly.
2506 set_page_writeback(page);
2507 retry_write:
2508 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2509 if (is_retryable_error(rc)) {
2510 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2511 goto retry_write;
2512 redirty_page_for_writepage(wbc, page);
2513 } else if (rc != 0) {
2514 SetPageError(page);
2515 mapping_set_error(page->mapping, rc);
2516 } else {
2517 SetPageUptodate(page);
2519 end_page_writeback(page);
2520 put_page(page);
2521 free_xid(xid);
2522 return rc;
2525 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2527 int rc = cifs_writepage_locked(page, wbc);
2528 unlock_page(page);
2529 return rc;
2532 static int cifs_write_end(struct file *file, struct address_space *mapping,
2533 loff_t pos, unsigned len, unsigned copied,
2534 struct page *page, void *fsdata)
2536 int rc;
2537 struct inode *inode = mapping->host;
2538 struct cifsFileInfo *cfile = file->private_data;
2539 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2540 __u32 pid;
2542 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2543 pid = cfile->pid;
2544 else
2545 pid = current->tgid;
2547 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2548 page, pos, copied);
2550 if (PageChecked(page)) {
2551 if (copied == len)
2552 SetPageUptodate(page);
2553 ClearPageChecked(page);
2554 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2555 SetPageUptodate(page);
2557 if (!PageUptodate(page)) {
2558 char *page_data;
2559 unsigned offset = pos & (PAGE_SIZE - 1);
2560 unsigned int xid;
2562 xid = get_xid();
2563 /* this is probably better than directly calling
2564 partialpage_write since in this function the file handle is
2565 known which we might as well leverage */
2566 /* BB check if anything else missing out of ppw
2567 such as updating last write time */
2568 page_data = kmap(page);
2569 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2570 /* if (rc < 0) should we set writebehind rc? */
2571 kunmap(page);
2573 free_xid(xid);
2574 } else {
2575 rc = copied;
2576 pos += copied;
2577 set_page_dirty(page);
2580 if (rc > 0) {
2581 spin_lock(&inode->i_lock);
2582 if (pos > inode->i_size)
2583 i_size_write(inode, pos);
2584 spin_unlock(&inode->i_lock);
2587 unlock_page(page);
2588 put_page(page);
2590 return rc;
2593 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2594 int datasync)
2596 unsigned int xid;
2597 int rc = 0;
2598 struct cifs_tcon *tcon;
2599 struct TCP_Server_Info *server;
2600 struct cifsFileInfo *smbfile = file->private_data;
2601 struct inode *inode = file_inode(file);
2602 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2604 rc = file_write_and_wait_range(file, start, end);
2605 if (rc) {
2606 trace_cifs_fsync_err(inode->i_ino, rc);
2607 return rc;
2610 xid = get_xid();
2612 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2613 file, datasync);
2615 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2616 rc = cifs_zap_mapping(inode);
2617 if (rc) {
2618 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2619 rc = 0; /* don't care about it in fsync */
2623 tcon = tlink_tcon(smbfile->tlink);
2624 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2625 server = tcon->ses->server;
2626 if (server->ops->flush)
2627 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2628 else
2629 rc = -ENOSYS;
2632 free_xid(xid);
2633 return rc;
2636 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2638 unsigned int xid;
2639 int rc = 0;
2640 struct cifs_tcon *tcon;
2641 struct TCP_Server_Info *server;
2642 struct cifsFileInfo *smbfile = file->private_data;
2643 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2645 rc = file_write_and_wait_range(file, start, end);
2646 if (rc) {
2647 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2648 return rc;
2651 xid = get_xid();
2653 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2654 file, datasync);
2656 tcon = tlink_tcon(smbfile->tlink);
2657 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2658 server = tcon->ses->server;
2659 if (server->ops->flush)
2660 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2661 else
2662 rc = -ENOSYS;
2665 free_xid(xid);
2666 return rc;
2670 * As file closes, flush all cached write data for this inode checking
2671 * for write behind errors.
2673 int cifs_flush(struct file *file, fl_owner_t id)
2675 struct inode *inode = file_inode(file);
2676 int rc = 0;
2678 if (file->f_mode & FMODE_WRITE)
2679 rc = filemap_write_and_wait(inode->i_mapping);
2681 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2682 if (rc)
2683 trace_cifs_flush_err(inode->i_ino, rc);
2684 return rc;
2687 static int
2688 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2690 int rc = 0;
2691 unsigned long i;
2693 for (i = 0; i < num_pages; i++) {
2694 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2695 if (!pages[i]) {
2697 * save number of pages we have already allocated and
2698 * return with ENOMEM error
2700 num_pages = i;
2701 rc = -ENOMEM;
2702 break;
2706 if (rc) {
2707 for (i = 0; i < num_pages; i++)
2708 put_page(pages[i]);
2710 return rc;
2713 static inline
2714 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2716 size_t num_pages;
2717 size_t clen;
2719 clen = min_t(const size_t, len, wsize);
2720 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2722 if (cur_len)
2723 *cur_len = clen;
2725 return num_pages;
2728 static void
2729 cifs_uncached_writedata_release(struct kref *refcount)
2731 int i;
2732 struct cifs_writedata *wdata = container_of(refcount,
2733 struct cifs_writedata, refcount);
2735 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2736 for (i = 0; i < wdata->nr_pages; i++)
2737 put_page(wdata->pages[i]);
2738 cifs_writedata_release(refcount);
2741 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2743 static void
2744 cifs_uncached_writev_complete(struct work_struct *work)
2746 struct cifs_writedata *wdata = container_of(work,
2747 struct cifs_writedata, work);
2748 struct inode *inode = d_inode(wdata->cfile->dentry);
2749 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2751 spin_lock(&inode->i_lock);
2752 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2753 if (cifsi->server_eof > inode->i_size)
2754 i_size_write(inode, cifsi->server_eof);
2755 spin_unlock(&inode->i_lock);
2757 complete(&wdata->done);
2758 collect_uncached_write_data(wdata->ctx);
2759 /* the below call can possibly free the last ref to aio ctx */
2760 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2763 static int
2764 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2765 size_t *len, unsigned long *num_pages)
2767 size_t save_len, copied, bytes, cur_len = *len;
2768 unsigned long i, nr_pages = *num_pages;
2770 save_len = cur_len;
2771 for (i = 0; i < nr_pages; i++) {
2772 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2773 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2774 cur_len -= copied;
2776 * If we didn't copy as much as we expected, then that
2777 * may mean we trod into an unmapped area. Stop copying
2778 * at that point. On the next pass through the big
2779 * loop, we'll likely end up getting a zero-length
2780 * write and bailing out of it.
2782 if (copied < bytes)
2783 break;
2785 cur_len = save_len - cur_len;
2786 *len = cur_len;
2789 * If we have no data to send, then that probably means that
2790 * the copy above failed altogether. That's most likely because
2791 * the address in the iovec was bogus. Return -EFAULT and let
2792 * the caller free anything we allocated and bail out.
2794 if (!cur_len)
2795 return -EFAULT;
2798 * i + 1 now represents the number of pages we actually used in
2799 * the copy phase above.
2801 *num_pages = i + 1;
2802 return 0;
2805 static int
2806 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2807 struct cifs_aio_ctx *ctx)
2809 unsigned int wsize;
2810 struct cifs_credits credits;
2811 int rc;
2812 struct TCP_Server_Info *server = wdata->server;
2814 do {
2815 if (wdata->cfile->invalidHandle) {
2816 rc = cifs_reopen_file(wdata->cfile, false);
2817 if (rc == -EAGAIN)
2818 continue;
2819 else if (rc)
2820 break;
2825 * Wait for credits to resend this wdata.
2826 * Note: we are attempting to resend the whole wdata not in
2827 * segments
2829 do {
2830 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2831 &wsize, &credits);
2832 if (rc)
2833 goto fail;
2835 if (wsize < wdata->bytes) {
2836 add_credits_and_wake_if(server, &credits, 0);
2837 msleep(1000);
2839 } while (wsize < wdata->bytes);
2840 wdata->credits = credits;
2842 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2844 if (!rc) {
2845 if (wdata->cfile->invalidHandle)
2846 rc = -EAGAIN;
2847 else {
2848 #ifdef CONFIG_CIFS_SMB_DIRECT
2849 if (wdata->mr) {
2850 wdata->mr->need_invalidate = true;
2851 smbd_deregister_mr(wdata->mr);
2852 wdata->mr = NULL;
2854 #endif
2855 rc = server->ops->async_writev(wdata,
2856 cifs_uncached_writedata_release);
2860 /* If the write was successfully sent, we are done */
2861 if (!rc) {
2862 list_add_tail(&wdata->list, wdata_list);
2863 return 0;
2866 /* Roll back credits and retry if needed */
2867 add_credits_and_wake_if(server, &wdata->credits, 0);
2868 } while (rc == -EAGAIN);
2870 fail:
2871 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2872 return rc;
2875 static int
2876 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2877 struct cifsFileInfo *open_file,
2878 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2879 struct cifs_aio_ctx *ctx)
2881 int rc = 0;
2882 size_t cur_len;
2883 unsigned long nr_pages, num_pages, i;
2884 struct cifs_writedata *wdata;
2885 struct iov_iter saved_from = *from;
2886 loff_t saved_offset = offset;
2887 pid_t pid;
2888 struct TCP_Server_Info *server;
2889 struct page **pagevec;
2890 size_t start;
2891 unsigned int xid;
2893 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2894 pid = open_file->pid;
2895 else
2896 pid = current->tgid;
2898 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2899 xid = get_xid();
2901 do {
2902 unsigned int wsize;
2903 struct cifs_credits credits_on_stack;
2904 struct cifs_credits *credits = &credits_on_stack;
2906 if (open_file->invalidHandle) {
2907 rc = cifs_reopen_file(open_file, false);
2908 if (rc == -EAGAIN)
2909 continue;
2910 else if (rc)
2911 break;
2914 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2915 &wsize, credits);
2916 if (rc)
2917 break;
2919 cur_len = min_t(const size_t, len, wsize);
2921 if (ctx->direct_io) {
2922 ssize_t result;
2924 result = iov_iter_get_pages_alloc(
2925 from, &pagevec, cur_len, &start);
2926 if (result < 0) {
2927 cifs_dbg(VFS,
2928 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2929 result, iov_iter_type(from),
2930 from->iov_offset, from->count);
2931 dump_stack();
2933 rc = result;
2934 add_credits_and_wake_if(server, credits, 0);
2935 break;
2937 cur_len = (size_t)result;
2938 iov_iter_advance(from, cur_len);
2940 nr_pages =
2941 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2943 wdata = cifs_writedata_direct_alloc(pagevec,
2944 cifs_uncached_writev_complete);
2945 if (!wdata) {
2946 rc = -ENOMEM;
2947 add_credits_and_wake_if(server, credits, 0);
2948 break;
2952 wdata->page_offset = start;
2953 wdata->tailsz =
2954 nr_pages > 1 ?
2955 cur_len - (PAGE_SIZE - start) -
2956 (nr_pages - 2) * PAGE_SIZE :
2957 cur_len;
2958 } else {
2959 nr_pages = get_numpages(wsize, len, &cur_len);
2960 wdata = cifs_writedata_alloc(nr_pages,
2961 cifs_uncached_writev_complete);
2962 if (!wdata) {
2963 rc = -ENOMEM;
2964 add_credits_and_wake_if(server, credits, 0);
2965 break;
2968 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2969 if (rc) {
2970 kvfree(wdata->pages);
2971 kfree(wdata);
2972 add_credits_and_wake_if(server, credits, 0);
2973 break;
2976 num_pages = nr_pages;
2977 rc = wdata_fill_from_iovec(
2978 wdata, from, &cur_len, &num_pages);
2979 if (rc) {
2980 for (i = 0; i < nr_pages; i++)
2981 put_page(wdata->pages[i]);
2982 kvfree(wdata->pages);
2983 kfree(wdata);
2984 add_credits_and_wake_if(server, credits, 0);
2985 break;
2989 * Bring nr_pages down to the number of pages we
2990 * actually used, and free any pages that we didn't use.
2992 for ( ; nr_pages > num_pages; nr_pages--)
2993 put_page(wdata->pages[nr_pages - 1]);
2995 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2998 wdata->sync_mode = WB_SYNC_ALL;
2999 wdata->nr_pages = nr_pages;
3000 wdata->offset = (__u64)offset;
3001 wdata->cfile = cifsFileInfo_get(open_file);
3002 wdata->server = server;
3003 wdata->pid = pid;
3004 wdata->bytes = cur_len;
3005 wdata->pagesz = PAGE_SIZE;
3006 wdata->credits = credits_on_stack;
3007 wdata->ctx = ctx;
3008 kref_get(&ctx->refcount);
3010 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3012 if (!rc) {
3013 if (wdata->cfile->invalidHandle)
3014 rc = -EAGAIN;
3015 else
3016 rc = server->ops->async_writev(wdata,
3017 cifs_uncached_writedata_release);
3020 if (rc) {
3021 add_credits_and_wake_if(server, &wdata->credits, 0);
3022 kref_put(&wdata->refcount,
3023 cifs_uncached_writedata_release);
3024 if (rc == -EAGAIN) {
3025 *from = saved_from;
3026 iov_iter_advance(from, offset - saved_offset);
3027 continue;
3029 break;
3032 list_add_tail(&wdata->list, wdata_list);
3033 offset += cur_len;
3034 len -= cur_len;
3035 } while (len > 0);
3037 free_xid(xid);
3038 return rc;
3041 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3043 struct cifs_writedata *wdata, *tmp;
3044 struct cifs_tcon *tcon;
3045 struct cifs_sb_info *cifs_sb;
3046 struct dentry *dentry = ctx->cfile->dentry;
3047 int rc;
3049 tcon = tlink_tcon(ctx->cfile->tlink);
3050 cifs_sb = CIFS_SB(dentry->d_sb);
3052 mutex_lock(&ctx->aio_mutex);
3054 if (list_empty(&ctx->list)) {
3055 mutex_unlock(&ctx->aio_mutex);
3056 return;
3059 rc = ctx->rc;
3061 * Wait for and collect replies for any successful sends in order of
3062 * increasing offset. Once an error is hit, then return without waiting
3063 * for any more replies.
3065 restart_loop:
3066 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3067 if (!rc) {
3068 if (!try_wait_for_completion(&wdata->done)) {
3069 mutex_unlock(&ctx->aio_mutex);
3070 return;
3073 if (wdata->result)
3074 rc = wdata->result;
3075 else
3076 ctx->total_len += wdata->bytes;
3078 /* resend call if it's a retryable error */
3079 if (rc == -EAGAIN) {
3080 struct list_head tmp_list;
3081 struct iov_iter tmp_from = ctx->iter;
3083 INIT_LIST_HEAD(&tmp_list);
3084 list_del_init(&wdata->list);
3086 if (ctx->direct_io)
3087 rc = cifs_resend_wdata(
3088 wdata, &tmp_list, ctx);
3089 else {
3090 iov_iter_advance(&tmp_from,
3091 wdata->offset - ctx->pos);
3093 rc = cifs_write_from_iter(wdata->offset,
3094 wdata->bytes, &tmp_from,
3095 ctx->cfile, cifs_sb, &tmp_list,
3096 ctx);
3098 kref_put(&wdata->refcount,
3099 cifs_uncached_writedata_release);
3102 list_splice(&tmp_list, &ctx->list);
3103 goto restart_loop;
3106 list_del_init(&wdata->list);
3107 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3110 cifs_stats_bytes_written(tcon, ctx->total_len);
3111 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3113 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3115 mutex_unlock(&ctx->aio_mutex);
3117 if (ctx->iocb && ctx->iocb->ki_complete)
3118 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3119 else
3120 complete(&ctx->done);
3123 static ssize_t __cifs_writev(
3124 struct kiocb *iocb, struct iov_iter *from, bool direct)
3126 struct file *file = iocb->ki_filp;
3127 ssize_t total_written = 0;
3128 struct cifsFileInfo *cfile;
3129 struct cifs_tcon *tcon;
3130 struct cifs_sb_info *cifs_sb;
3131 struct cifs_aio_ctx *ctx;
3132 struct iov_iter saved_from = *from;
3133 size_t len = iov_iter_count(from);
3134 int rc;
3137 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3138 * In this case, fall back to non-direct write function.
3139 * this could be improved by getting pages directly in ITER_KVEC
3141 if (direct && iov_iter_is_kvec(from)) {
3142 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3143 direct = false;
3146 rc = generic_write_checks(iocb, from);
3147 if (rc <= 0)
3148 return rc;
3150 cifs_sb = CIFS_FILE_SB(file);
3151 cfile = file->private_data;
3152 tcon = tlink_tcon(cfile->tlink);
3154 if (!tcon->ses->server->ops->async_writev)
3155 return -ENOSYS;
3157 ctx = cifs_aio_ctx_alloc();
3158 if (!ctx)
3159 return -ENOMEM;
3161 ctx->cfile = cifsFileInfo_get(cfile);
3163 if (!is_sync_kiocb(iocb))
3164 ctx->iocb = iocb;
3166 ctx->pos = iocb->ki_pos;
3168 if (direct) {
3169 ctx->direct_io = true;
3170 ctx->iter = *from;
3171 ctx->len = len;
3172 } else {
3173 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3174 if (rc) {
3175 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3176 return rc;
3180 /* grab a lock here due to read response handlers can access ctx */
3181 mutex_lock(&ctx->aio_mutex);
3183 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3184 cfile, cifs_sb, &ctx->list, ctx);
3187 * If at least one write was successfully sent, then discard any rc
3188 * value from the later writes. If the other write succeeds, then
3189 * we'll end up returning whatever was written. If it fails, then
3190 * we'll get a new rc value from that.
3192 if (!list_empty(&ctx->list))
3193 rc = 0;
3195 mutex_unlock(&ctx->aio_mutex);
3197 if (rc) {
3198 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3199 return rc;
3202 if (!is_sync_kiocb(iocb)) {
3203 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3204 return -EIOCBQUEUED;
3207 rc = wait_for_completion_killable(&ctx->done);
3208 if (rc) {
3209 mutex_lock(&ctx->aio_mutex);
3210 ctx->rc = rc = -EINTR;
3211 total_written = ctx->total_len;
3212 mutex_unlock(&ctx->aio_mutex);
3213 } else {
3214 rc = ctx->rc;
3215 total_written = ctx->total_len;
3218 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3220 if (unlikely(!total_written))
3221 return rc;
3223 iocb->ki_pos += total_written;
3224 return total_written;
3227 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3229 return __cifs_writev(iocb, from, true);
3232 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3234 return __cifs_writev(iocb, from, false);
3237 static ssize_t
3238 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3240 struct file *file = iocb->ki_filp;
3241 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3242 struct inode *inode = file->f_mapping->host;
3243 struct cifsInodeInfo *cinode = CIFS_I(inode);
3244 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3245 ssize_t rc;
3247 inode_lock(inode);
3249 * We need to hold the sem to be sure nobody modifies lock list
3250 * with a brlock that prevents writing.
3252 down_read(&cinode->lock_sem);
3254 rc = generic_write_checks(iocb, from);
3255 if (rc <= 0)
3256 goto out;
3258 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3259 server->vals->exclusive_lock_type, 0,
3260 NULL, CIFS_WRITE_OP))
3261 rc = __generic_file_write_iter(iocb, from);
3262 else
3263 rc = -EACCES;
3264 out:
3265 up_read(&cinode->lock_sem);
3266 inode_unlock(inode);
3268 if (rc > 0)
3269 rc = generic_write_sync(iocb, rc);
3270 return rc;
3273 ssize_t
3274 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3276 struct inode *inode = file_inode(iocb->ki_filp);
3277 struct cifsInodeInfo *cinode = CIFS_I(inode);
3278 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3279 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3280 iocb->ki_filp->private_data;
3281 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3282 ssize_t written;
3284 written = cifs_get_writer(cinode);
3285 if (written)
3286 return written;
3288 if (CIFS_CACHE_WRITE(cinode)) {
3289 if (cap_unix(tcon->ses) &&
3290 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3291 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3292 written = generic_file_write_iter(iocb, from);
3293 goto out;
3295 written = cifs_writev(iocb, from);
3296 goto out;
3299 * For non-oplocked files in strict cache mode we need to write the data
3300 * to the server exactly from the pos to pos+len-1 rather than flush all
3301 * affected pages because it may cause a error with mandatory locks on
3302 * these pages but not on the region from pos to ppos+len-1.
3304 written = cifs_user_writev(iocb, from);
3305 if (CIFS_CACHE_READ(cinode)) {
3307 * We have read level caching and we have just sent a write
3308 * request to the server thus making data in the cache stale.
3309 * Zap the cache and set oplock/lease level to NONE to avoid
3310 * reading stale data from the cache. All subsequent read
3311 * operations will read new data from the server.
3313 cifs_zap_mapping(inode);
3314 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3315 inode);
3316 cinode->oplock = 0;
3318 out:
3319 cifs_put_writer(cinode);
3320 return written;
3323 static struct cifs_readdata *
3324 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3326 struct cifs_readdata *rdata;
3328 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3329 if (rdata != NULL) {
3330 rdata->pages = pages;
3331 kref_init(&rdata->refcount);
3332 INIT_LIST_HEAD(&rdata->list);
3333 init_completion(&rdata->done);
3334 INIT_WORK(&rdata->work, complete);
3337 return rdata;
3340 static struct cifs_readdata *
3341 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3343 struct page **pages =
3344 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3345 struct cifs_readdata *ret = NULL;
3347 if (pages) {
3348 ret = cifs_readdata_direct_alloc(pages, complete);
3349 if (!ret)
3350 kfree(pages);
3353 return ret;
3356 void
3357 cifs_readdata_release(struct kref *refcount)
3359 struct cifs_readdata *rdata = container_of(refcount,
3360 struct cifs_readdata, refcount);
3361 #ifdef CONFIG_CIFS_SMB_DIRECT
3362 if (rdata->mr) {
3363 smbd_deregister_mr(rdata->mr);
3364 rdata->mr = NULL;
3366 #endif
3367 if (rdata->cfile)
3368 cifsFileInfo_put(rdata->cfile);
3370 kvfree(rdata->pages);
3371 kfree(rdata);
3374 static int
3375 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3377 int rc = 0;
3378 struct page *page;
3379 unsigned int i;
3381 for (i = 0; i < nr_pages; i++) {
3382 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3383 if (!page) {
3384 rc = -ENOMEM;
3385 break;
3387 rdata->pages[i] = page;
3390 if (rc) {
3391 unsigned int nr_page_failed = i;
3393 for (i = 0; i < nr_page_failed; i++) {
3394 put_page(rdata->pages[i]);
3395 rdata->pages[i] = NULL;
3398 return rc;
3401 static void
3402 cifs_uncached_readdata_release(struct kref *refcount)
3404 struct cifs_readdata *rdata = container_of(refcount,
3405 struct cifs_readdata, refcount);
3406 unsigned int i;
3408 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3409 for (i = 0; i < rdata->nr_pages; i++) {
3410 put_page(rdata->pages[i]);
3412 cifs_readdata_release(refcount);
3416 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3417 * @rdata: the readdata response with list of pages holding data
3418 * @iter: destination for our data
3420 * This function copies data from a list of pages in a readdata response into
3421 * an array of iovecs. It will first calculate where the data should go
3422 * based on the info in the readdata and then copy the data into that spot.
3424 static int
3425 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3427 size_t remaining = rdata->got_bytes;
3428 unsigned int i;
3430 for (i = 0; i < rdata->nr_pages; i++) {
3431 struct page *page = rdata->pages[i];
3432 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3433 size_t written;
3435 if (unlikely(iov_iter_is_pipe(iter))) {
3436 void *addr = kmap_atomic(page);
3438 written = copy_to_iter(addr, copy, iter);
3439 kunmap_atomic(addr);
3440 } else
3441 written = copy_page_to_iter(page, 0, copy, iter);
3442 remaining -= written;
3443 if (written < copy && iov_iter_count(iter) > 0)
3444 break;
3446 return remaining ? -EFAULT : 0;
3449 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3451 static void
3452 cifs_uncached_readv_complete(struct work_struct *work)
3454 struct cifs_readdata *rdata = container_of(work,
3455 struct cifs_readdata, work);
3457 complete(&rdata->done);
3458 collect_uncached_read_data(rdata->ctx);
3459 /* the below call can possibly free the last ref to aio ctx */
3460 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3463 static int
3464 uncached_fill_pages(struct TCP_Server_Info *server,
3465 struct cifs_readdata *rdata, struct iov_iter *iter,
3466 unsigned int len)
3468 int result = 0;
3469 unsigned int i;
3470 unsigned int nr_pages = rdata->nr_pages;
3471 unsigned int page_offset = rdata->page_offset;
3473 rdata->got_bytes = 0;
3474 rdata->tailsz = PAGE_SIZE;
3475 for (i = 0; i < nr_pages; i++) {
3476 struct page *page = rdata->pages[i];
3477 size_t n;
3478 unsigned int segment_size = rdata->pagesz;
3480 if (i == 0)
3481 segment_size -= page_offset;
3482 else
3483 page_offset = 0;
3486 if (len <= 0) {
3487 /* no need to hold page hostage */
3488 rdata->pages[i] = NULL;
3489 rdata->nr_pages--;
3490 put_page(page);
3491 continue;
3494 n = len;
3495 if (len >= segment_size)
3496 /* enough data to fill the page */
3497 n = segment_size;
3498 else
3499 rdata->tailsz = len;
3500 len -= n;
3502 if (iter)
3503 result = copy_page_from_iter(
3504 page, page_offset, n, iter);
3505 #ifdef CONFIG_CIFS_SMB_DIRECT
3506 else if (rdata->mr)
3507 result = n;
3508 #endif
3509 else
3510 result = cifs_read_page_from_socket(
3511 server, page, page_offset, n);
3512 if (result < 0)
3513 break;
3515 rdata->got_bytes += result;
3518 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3519 rdata->got_bytes : result;
3522 static int
3523 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3524 struct cifs_readdata *rdata, unsigned int len)
3526 return uncached_fill_pages(server, rdata, NULL, len);
3529 static int
3530 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3531 struct cifs_readdata *rdata,
3532 struct iov_iter *iter)
3534 return uncached_fill_pages(server, rdata, iter, iter->count);
3537 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3538 struct list_head *rdata_list,
3539 struct cifs_aio_ctx *ctx)
3541 unsigned int rsize;
3542 struct cifs_credits credits;
3543 int rc;
3544 struct TCP_Server_Info *server;
3546 /* XXX: should we pick a new channel here? */
3547 server = rdata->server;
3549 do {
3550 if (rdata->cfile->invalidHandle) {
3551 rc = cifs_reopen_file(rdata->cfile, true);
3552 if (rc == -EAGAIN)
3553 continue;
3554 else if (rc)
3555 break;
3559 * Wait for credits to resend this rdata.
3560 * Note: we are attempting to resend the whole rdata not in
3561 * segments
3563 do {
3564 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3565 &rsize, &credits);
3567 if (rc)
3568 goto fail;
3570 if (rsize < rdata->bytes) {
3571 add_credits_and_wake_if(server, &credits, 0);
3572 msleep(1000);
3574 } while (rsize < rdata->bytes);
3575 rdata->credits = credits;
3577 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3578 if (!rc) {
3579 if (rdata->cfile->invalidHandle)
3580 rc = -EAGAIN;
3581 else {
3582 #ifdef CONFIG_CIFS_SMB_DIRECT
3583 if (rdata->mr) {
3584 rdata->mr->need_invalidate = true;
3585 smbd_deregister_mr(rdata->mr);
3586 rdata->mr = NULL;
3588 #endif
3589 rc = server->ops->async_readv(rdata);
3593 /* If the read was successfully sent, we are done */
3594 if (!rc) {
3595 /* Add to aio pending list */
3596 list_add_tail(&rdata->list, rdata_list);
3597 return 0;
3600 /* Roll back credits and retry if needed */
3601 add_credits_and_wake_if(server, &rdata->credits, 0);
3602 } while (rc == -EAGAIN);
3604 fail:
3605 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3606 return rc;
3609 static int
3610 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3611 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3612 struct cifs_aio_ctx *ctx)
3614 struct cifs_readdata *rdata;
3615 unsigned int npages, rsize;
3616 struct cifs_credits credits_on_stack;
3617 struct cifs_credits *credits = &credits_on_stack;
3618 size_t cur_len;
3619 int rc;
3620 pid_t pid;
3621 struct TCP_Server_Info *server;
3622 struct page **pagevec;
3623 size_t start;
3624 struct iov_iter direct_iov = ctx->iter;
3626 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3628 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3629 pid = open_file->pid;
3630 else
3631 pid = current->tgid;
3633 if (ctx->direct_io)
3634 iov_iter_advance(&direct_iov, offset - ctx->pos);
3636 do {
3637 if (open_file->invalidHandle) {
3638 rc = cifs_reopen_file(open_file, true);
3639 if (rc == -EAGAIN)
3640 continue;
3641 else if (rc)
3642 break;
3645 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3646 &rsize, credits);
3647 if (rc)
3648 break;
3650 cur_len = min_t(const size_t, len, rsize);
3652 if (ctx->direct_io) {
3653 ssize_t result;
3655 result = iov_iter_get_pages_alloc(
3656 &direct_iov, &pagevec,
3657 cur_len, &start);
3658 if (result < 0) {
3659 cifs_dbg(VFS,
3660 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3661 result, iov_iter_type(&direct_iov),
3662 direct_iov.iov_offset,
3663 direct_iov.count);
3664 dump_stack();
3666 rc = result;
3667 add_credits_and_wake_if(server, credits, 0);
3668 break;
3670 cur_len = (size_t)result;
3671 iov_iter_advance(&direct_iov, cur_len);
3673 rdata = cifs_readdata_direct_alloc(
3674 pagevec, cifs_uncached_readv_complete);
3675 if (!rdata) {
3676 add_credits_and_wake_if(server, credits, 0);
3677 rc = -ENOMEM;
3678 break;
3681 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3682 rdata->page_offset = start;
3683 rdata->tailsz = npages > 1 ?
3684 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3685 cur_len;
3687 } else {
3689 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3690 /* allocate a readdata struct */
3691 rdata = cifs_readdata_alloc(npages,
3692 cifs_uncached_readv_complete);
3693 if (!rdata) {
3694 add_credits_and_wake_if(server, credits, 0);
3695 rc = -ENOMEM;
3696 break;
3699 rc = cifs_read_allocate_pages(rdata, npages);
3700 if (rc) {
3701 kvfree(rdata->pages);
3702 kfree(rdata);
3703 add_credits_and_wake_if(server, credits, 0);
3704 break;
3707 rdata->tailsz = PAGE_SIZE;
3710 rdata->server = server;
3711 rdata->cfile = cifsFileInfo_get(open_file);
3712 rdata->nr_pages = npages;
3713 rdata->offset = offset;
3714 rdata->bytes = cur_len;
3715 rdata->pid = pid;
3716 rdata->pagesz = PAGE_SIZE;
3717 rdata->read_into_pages = cifs_uncached_read_into_pages;
3718 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3719 rdata->credits = credits_on_stack;
3720 rdata->ctx = ctx;
3721 kref_get(&ctx->refcount);
3723 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3725 if (!rc) {
3726 if (rdata->cfile->invalidHandle)
3727 rc = -EAGAIN;
3728 else
3729 rc = server->ops->async_readv(rdata);
3732 if (rc) {
3733 add_credits_and_wake_if(server, &rdata->credits, 0);
3734 kref_put(&rdata->refcount,
3735 cifs_uncached_readdata_release);
3736 if (rc == -EAGAIN) {
3737 iov_iter_revert(&direct_iov, cur_len);
3738 continue;
3740 break;
3743 list_add_tail(&rdata->list, rdata_list);
3744 offset += cur_len;
3745 len -= cur_len;
3746 } while (len > 0);
3748 return rc;
3751 static void
3752 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3754 struct cifs_readdata *rdata, *tmp;
3755 struct iov_iter *to = &ctx->iter;
3756 struct cifs_sb_info *cifs_sb;
3757 int rc;
3759 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3761 mutex_lock(&ctx->aio_mutex);
3763 if (list_empty(&ctx->list)) {
3764 mutex_unlock(&ctx->aio_mutex);
3765 return;
3768 rc = ctx->rc;
3769 /* the loop below should proceed in the order of increasing offsets */
3770 again:
3771 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3772 if (!rc) {
3773 if (!try_wait_for_completion(&rdata->done)) {
3774 mutex_unlock(&ctx->aio_mutex);
3775 return;
3778 if (rdata->result == -EAGAIN) {
3779 /* resend call if it's a retryable error */
3780 struct list_head tmp_list;
3781 unsigned int got_bytes = rdata->got_bytes;
3783 list_del_init(&rdata->list);
3784 INIT_LIST_HEAD(&tmp_list);
3787 * Got a part of data and then reconnect has
3788 * happened -- fill the buffer and continue
3789 * reading.
3791 if (got_bytes && got_bytes < rdata->bytes) {
3792 rc = 0;
3793 if (!ctx->direct_io)
3794 rc = cifs_readdata_to_iov(rdata, to);
3795 if (rc) {
3796 kref_put(&rdata->refcount,
3797 cifs_uncached_readdata_release);
3798 continue;
3802 if (ctx->direct_io) {
3804 * Re-use rdata as this is a
3805 * direct I/O
3807 rc = cifs_resend_rdata(
3808 rdata,
3809 &tmp_list, ctx);
3810 } else {
3811 rc = cifs_send_async_read(
3812 rdata->offset + got_bytes,
3813 rdata->bytes - got_bytes,
3814 rdata->cfile, cifs_sb,
3815 &tmp_list, ctx);
3817 kref_put(&rdata->refcount,
3818 cifs_uncached_readdata_release);
3821 list_splice(&tmp_list, &ctx->list);
3823 goto again;
3824 } else if (rdata->result)
3825 rc = rdata->result;
3826 else if (!ctx->direct_io)
3827 rc = cifs_readdata_to_iov(rdata, to);
3829 /* if there was a short read -- discard anything left */
3830 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3831 rc = -ENODATA;
3833 ctx->total_len += rdata->got_bytes;
3835 list_del_init(&rdata->list);
3836 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3839 if (!ctx->direct_io)
3840 ctx->total_len = ctx->len - iov_iter_count(to);
3842 /* mask nodata case */
3843 if (rc == -ENODATA)
3844 rc = 0;
3846 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3848 mutex_unlock(&ctx->aio_mutex);
3850 if (ctx->iocb && ctx->iocb->ki_complete)
3851 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3852 else
3853 complete(&ctx->done);
3856 static ssize_t __cifs_readv(
3857 struct kiocb *iocb, struct iov_iter *to, bool direct)
3859 size_t len;
3860 struct file *file = iocb->ki_filp;
3861 struct cifs_sb_info *cifs_sb;
3862 struct cifsFileInfo *cfile;
3863 struct cifs_tcon *tcon;
3864 ssize_t rc, total_read = 0;
3865 loff_t offset = iocb->ki_pos;
3866 struct cifs_aio_ctx *ctx;
3869 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3870 * fall back to data copy read path
3871 * this could be improved by getting pages directly in ITER_KVEC
3873 if (direct && iov_iter_is_kvec(to)) {
3874 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3875 direct = false;
3878 len = iov_iter_count(to);
3879 if (!len)
3880 return 0;
3882 cifs_sb = CIFS_FILE_SB(file);
3883 cfile = file->private_data;
3884 tcon = tlink_tcon(cfile->tlink);
3886 if (!tcon->ses->server->ops->async_readv)
3887 return -ENOSYS;
3889 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3890 cifs_dbg(FYI, "attempting read on write only file instance\n");
3892 ctx = cifs_aio_ctx_alloc();
3893 if (!ctx)
3894 return -ENOMEM;
3896 ctx->cfile = cifsFileInfo_get(cfile);
3898 if (!is_sync_kiocb(iocb))
3899 ctx->iocb = iocb;
3901 if (iter_is_iovec(to))
3902 ctx->should_dirty = true;
3904 if (direct) {
3905 ctx->pos = offset;
3906 ctx->direct_io = true;
3907 ctx->iter = *to;
3908 ctx->len = len;
3909 } else {
3910 rc = setup_aio_ctx_iter(ctx, to, READ);
3911 if (rc) {
3912 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3913 return rc;
3915 len = ctx->len;
3918 /* grab a lock here due to read response handlers can access ctx */
3919 mutex_lock(&ctx->aio_mutex);
3921 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3923 /* if at least one read request send succeeded, then reset rc */
3924 if (!list_empty(&ctx->list))
3925 rc = 0;
3927 mutex_unlock(&ctx->aio_mutex);
3929 if (rc) {
3930 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3931 return rc;
3934 if (!is_sync_kiocb(iocb)) {
3935 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3936 return -EIOCBQUEUED;
3939 rc = wait_for_completion_killable(&ctx->done);
3940 if (rc) {
3941 mutex_lock(&ctx->aio_mutex);
3942 ctx->rc = rc = -EINTR;
3943 total_read = ctx->total_len;
3944 mutex_unlock(&ctx->aio_mutex);
3945 } else {
3946 rc = ctx->rc;
3947 total_read = ctx->total_len;
3950 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3952 if (total_read) {
3953 iocb->ki_pos += total_read;
3954 return total_read;
3956 return rc;
3959 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3961 return __cifs_readv(iocb, to, true);
3964 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3966 return __cifs_readv(iocb, to, false);
3969 ssize_t
3970 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3972 struct inode *inode = file_inode(iocb->ki_filp);
3973 struct cifsInodeInfo *cinode = CIFS_I(inode);
3974 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3975 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3976 iocb->ki_filp->private_data;
3977 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3978 int rc = -EACCES;
3981 * In strict cache mode we need to read from the server all the time
3982 * if we don't have level II oplock because the server can delay mtime
3983 * change - so we can't make a decision about inode invalidating.
3984 * And we can also fail with pagereading if there are mandatory locks
3985 * on pages affected by this read but not on the region from pos to
3986 * pos+len-1.
3988 if (!CIFS_CACHE_READ(cinode))
3989 return cifs_user_readv(iocb, to);
3991 if (cap_unix(tcon->ses) &&
3992 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3993 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3994 return generic_file_read_iter(iocb, to);
3997 * We need to hold the sem to be sure nobody modifies lock list
3998 * with a brlock that prevents reading.
4000 down_read(&cinode->lock_sem);
4001 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4002 tcon->ses->server->vals->shared_lock_type,
4003 0, NULL, CIFS_READ_OP))
4004 rc = generic_file_read_iter(iocb, to);
4005 up_read(&cinode->lock_sem);
4006 return rc;
4009 static ssize_t
4010 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4012 int rc = -EACCES;
4013 unsigned int bytes_read = 0;
4014 unsigned int total_read;
4015 unsigned int current_read_size;
4016 unsigned int rsize;
4017 struct cifs_sb_info *cifs_sb;
4018 struct cifs_tcon *tcon;
4019 struct TCP_Server_Info *server;
4020 unsigned int xid;
4021 char *cur_offset;
4022 struct cifsFileInfo *open_file;
4023 struct cifs_io_parms io_parms = {0};
4024 int buf_type = CIFS_NO_BUFFER;
4025 __u32 pid;
4027 xid = get_xid();
4028 cifs_sb = CIFS_FILE_SB(file);
4030 /* FIXME: set up handlers for larger reads and/or convert to async */
4031 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4033 if (file->private_data == NULL) {
4034 rc = -EBADF;
4035 free_xid(xid);
4036 return rc;
4038 open_file = file->private_data;
4039 tcon = tlink_tcon(open_file->tlink);
4040 server = cifs_pick_channel(tcon->ses);
4042 if (!server->ops->sync_read) {
4043 free_xid(xid);
4044 return -ENOSYS;
4047 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4048 pid = open_file->pid;
4049 else
4050 pid = current->tgid;
4052 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4053 cifs_dbg(FYI, "attempting read on write only file instance\n");
4055 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4056 total_read += bytes_read, cur_offset += bytes_read) {
4057 do {
4058 current_read_size = min_t(uint, read_size - total_read,
4059 rsize);
4061 * For windows me and 9x we do not want to request more
4062 * than it negotiated since it will refuse the read
4063 * then.
4065 if (!(tcon->ses->capabilities &
4066 tcon->ses->server->vals->cap_large_files)) {
4067 current_read_size = min_t(uint,
4068 current_read_size, CIFSMaxBufSize);
4070 if (open_file->invalidHandle) {
4071 rc = cifs_reopen_file(open_file, true);
4072 if (rc != 0)
4073 break;
4075 io_parms.pid = pid;
4076 io_parms.tcon = tcon;
4077 io_parms.offset = *offset;
4078 io_parms.length = current_read_size;
4079 io_parms.server = server;
4080 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4081 &bytes_read, &cur_offset,
4082 &buf_type);
4083 } while (rc == -EAGAIN);
4085 if (rc || (bytes_read == 0)) {
4086 if (total_read) {
4087 break;
4088 } else {
4089 free_xid(xid);
4090 return rc;
4092 } else {
4093 cifs_stats_bytes_read(tcon, total_read);
4094 *offset += bytes_read;
4097 free_xid(xid);
4098 return total_read;
4102 * If the page is mmap'ed into a process' page tables, then we need to make
4103 * sure that it doesn't change while being written back.
4105 static vm_fault_t
4106 cifs_page_mkwrite(struct vm_fault *vmf)
4108 struct page *page = vmf->page;
4110 lock_page(page);
4111 return VM_FAULT_LOCKED;
4114 static const struct vm_operations_struct cifs_file_vm_ops = {
4115 .fault = filemap_fault,
4116 .map_pages = filemap_map_pages,
4117 .page_mkwrite = cifs_page_mkwrite,
4120 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4122 int xid, rc = 0;
4123 struct inode *inode = file_inode(file);
4125 xid = get_xid();
4127 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4128 rc = cifs_zap_mapping(inode);
4129 if (!rc)
4130 rc = generic_file_mmap(file, vma);
4131 if (!rc)
4132 vma->vm_ops = &cifs_file_vm_ops;
4134 free_xid(xid);
4135 return rc;
4138 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4140 int rc, xid;
4142 xid = get_xid();
4144 rc = cifs_revalidate_file(file);
4145 if (rc)
4146 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4147 rc);
4148 if (!rc)
4149 rc = generic_file_mmap(file, vma);
4150 if (!rc)
4151 vma->vm_ops = &cifs_file_vm_ops;
4153 free_xid(xid);
4154 return rc;
4157 static void
4158 cifs_readv_complete(struct work_struct *work)
4160 unsigned int i, got_bytes;
4161 struct cifs_readdata *rdata = container_of(work,
4162 struct cifs_readdata, work);
4164 got_bytes = rdata->got_bytes;
4165 for (i = 0; i < rdata->nr_pages; i++) {
4166 struct page *page = rdata->pages[i];
4168 lru_cache_add(page);
4170 if (rdata->result == 0 ||
4171 (rdata->result == -EAGAIN && got_bytes)) {
4172 flush_dcache_page(page);
4173 SetPageUptodate(page);
4176 unlock_page(page);
4178 if (rdata->result == 0 ||
4179 (rdata->result == -EAGAIN && got_bytes))
4180 cifs_readpage_to_fscache(rdata->mapping->host, page);
4182 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4184 put_page(page);
4185 rdata->pages[i] = NULL;
4187 kref_put(&rdata->refcount, cifs_readdata_release);
4190 static int
4191 readpages_fill_pages(struct TCP_Server_Info *server,
4192 struct cifs_readdata *rdata, struct iov_iter *iter,
4193 unsigned int len)
4195 int result = 0;
4196 unsigned int i;
4197 u64 eof;
4198 pgoff_t eof_index;
4199 unsigned int nr_pages = rdata->nr_pages;
4200 unsigned int page_offset = rdata->page_offset;
4202 /* determine the eof that the server (probably) has */
4203 eof = CIFS_I(rdata->mapping->host)->server_eof;
4204 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4205 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4207 rdata->got_bytes = 0;
4208 rdata->tailsz = PAGE_SIZE;
4209 for (i = 0; i < nr_pages; i++) {
4210 struct page *page = rdata->pages[i];
4211 unsigned int to_read = rdata->pagesz;
4212 size_t n;
4214 if (i == 0)
4215 to_read -= page_offset;
4216 else
4217 page_offset = 0;
4219 n = to_read;
4221 if (len >= to_read) {
4222 len -= to_read;
4223 } else if (len > 0) {
4224 /* enough for partial page, fill and zero the rest */
4225 zero_user(page, len + page_offset, to_read - len);
4226 n = rdata->tailsz = len;
4227 len = 0;
4228 } else if (page->index > eof_index) {
4230 * The VFS will not try to do readahead past the
4231 * i_size, but it's possible that we have outstanding
4232 * writes with gaps in the middle and the i_size hasn't
4233 * caught up yet. Populate those with zeroed out pages
4234 * to prevent the VFS from repeatedly attempting to
4235 * fill them until the writes are flushed.
4237 zero_user(page, 0, PAGE_SIZE);
4238 lru_cache_add(page);
4239 flush_dcache_page(page);
4240 SetPageUptodate(page);
4241 unlock_page(page);
4242 put_page(page);
4243 rdata->pages[i] = NULL;
4244 rdata->nr_pages--;
4245 continue;
4246 } else {
4247 /* no need to hold page hostage */
4248 lru_cache_add(page);
4249 unlock_page(page);
4250 put_page(page);
4251 rdata->pages[i] = NULL;
4252 rdata->nr_pages--;
4253 continue;
4256 if (iter)
4257 result = copy_page_from_iter(
4258 page, page_offset, n, iter);
4259 #ifdef CONFIG_CIFS_SMB_DIRECT
4260 else if (rdata->mr)
4261 result = n;
4262 #endif
4263 else
4264 result = cifs_read_page_from_socket(
4265 server, page, page_offset, n);
4266 if (result < 0)
4267 break;
4269 rdata->got_bytes += result;
4272 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4273 rdata->got_bytes : result;
4276 static int
4277 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4278 struct cifs_readdata *rdata, unsigned int len)
4280 return readpages_fill_pages(server, rdata, NULL, len);
4283 static int
4284 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4285 struct cifs_readdata *rdata,
4286 struct iov_iter *iter)
4288 return readpages_fill_pages(server, rdata, iter, iter->count);
4291 static int
4292 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4293 unsigned int rsize, struct list_head *tmplist,
4294 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4296 struct page *page, *tpage;
4297 unsigned int expected_index;
4298 int rc;
4299 gfp_t gfp = readahead_gfp_mask(mapping);
4301 INIT_LIST_HEAD(tmplist);
4303 page = lru_to_page(page_list);
4306 * Lock the page and put it in the cache. Since no one else
4307 * should have access to this page, we're safe to simply set
4308 * PG_locked without checking it first.
4310 __SetPageLocked(page);
4311 rc = add_to_page_cache_locked(page, mapping,
4312 page->index, gfp);
4314 /* give up if we can't stick it in the cache */
4315 if (rc) {
4316 __ClearPageLocked(page);
4317 return rc;
4320 /* move first page to the tmplist */
4321 *offset = (loff_t)page->index << PAGE_SHIFT;
4322 *bytes = PAGE_SIZE;
4323 *nr_pages = 1;
4324 list_move_tail(&page->lru, tmplist);
4326 /* now try and add more pages onto the request */
4327 expected_index = page->index + 1;
4328 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4329 /* discontinuity ? */
4330 if (page->index != expected_index)
4331 break;
4333 /* would this page push the read over the rsize? */
4334 if (*bytes + PAGE_SIZE > rsize)
4335 break;
4337 __SetPageLocked(page);
4338 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4339 if (rc) {
4340 __ClearPageLocked(page);
4341 break;
4343 list_move_tail(&page->lru, tmplist);
4344 (*bytes) += PAGE_SIZE;
4345 expected_index++;
4346 (*nr_pages)++;
4348 return rc;
4351 static int cifs_readpages(struct file *file, struct address_space *mapping,
4352 struct list_head *page_list, unsigned num_pages)
4354 int rc;
4355 int err = 0;
4356 struct list_head tmplist;
4357 struct cifsFileInfo *open_file = file->private_data;
4358 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4359 struct TCP_Server_Info *server;
4360 pid_t pid;
4361 unsigned int xid;
4363 xid = get_xid();
4365 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4366 * immediately if the cookie is negative
4368 * After this point, every page in the list might have PG_fscache set,
4369 * so we will need to clean that up off of every page we don't use.
4371 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4372 &num_pages);
4373 if (rc == 0) {
4374 free_xid(xid);
4375 return rc;
4378 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4379 pid = open_file->pid;
4380 else
4381 pid = current->tgid;
4383 rc = 0;
4384 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4386 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4387 __func__, file, mapping, num_pages);
4390 * Start with the page at end of list and move it to private
4391 * list. Do the same with any following pages until we hit
4392 * the rsize limit, hit an index discontinuity, or run out of
4393 * pages. Issue the async read and then start the loop again
4394 * until the list is empty.
4396 * Note that list order is important. The page_list is in
4397 * the order of declining indexes. When we put the pages in
4398 * the rdata->pages, then we want them in increasing order.
4400 while (!list_empty(page_list) && !err) {
4401 unsigned int i, nr_pages, bytes, rsize;
4402 loff_t offset;
4403 struct page *page, *tpage;
4404 struct cifs_readdata *rdata;
4405 struct cifs_credits credits_on_stack;
4406 struct cifs_credits *credits = &credits_on_stack;
4408 if (open_file->invalidHandle) {
4409 rc = cifs_reopen_file(open_file, true);
4410 if (rc == -EAGAIN)
4411 continue;
4412 else if (rc)
4413 break;
4416 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4417 &rsize, credits);
4418 if (rc)
4419 break;
4422 * Give up immediately if rsize is too small to read an entire
4423 * page. The VFS will fall back to readpage. We should never
4424 * reach this point however since we set ra_pages to 0 when the
4425 * rsize is smaller than a cache page.
4427 if (unlikely(rsize < PAGE_SIZE)) {
4428 add_credits_and_wake_if(server, credits, 0);
4429 free_xid(xid);
4430 return 0;
4433 nr_pages = 0;
4434 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4435 &nr_pages, &offset, &bytes);
4436 if (!nr_pages) {
4437 add_credits_and_wake_if(server, credits, 0);
4438 break;
4441 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4442 if (!rdata) {
4443 /* best to give up if we're out of mem */
4444 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4445 list_del(&page->lru);
4446 lru_cache_add(page);
4447 unlock_page(page);
4448 put_page(page);
4450 rc = -ENOMEM;
4451 add_credits_and_wake_if(server, credits, 0);
4452 break;
4455 rdata->cfile = cifsFileInfo_get(open_file);
4456 rdata->server = server;
4457 rdata->mapping = mapping;
4458 rdata->offset = offset;
4459 rdata->bytes = bytes;
4460 rdata->pid = pid;
4461 rdata->pagesz = PAGE_SIZE;
4462 rdata->tailsz = PAGE_SIZE;
4463 rdata->read_into_pages = cifs_readpages_read_into_pages;
4464 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4465 rdata->credits = credits_on_stack;
4467 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4468 list_del(&page->lru);
4469 rdata->pages[rdata->nr_pages++] = page;
4472 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4474 if (!rc) {
4475 if (rdata->cfile->invalidHandle)
4476 rc = -EAGAIN;
4477 else
4478 rc = server->ops->async_readv(rdata);
4481 if (rc) {
4482 add_credits_and_wake_if(server, &rdata->credits, 0);
4483 for (i = 0; i < rdata->nr_pages; i++) {
4484 page = rdata->pages[i];
4485 lru_cache_add(page);
4486 unlock_page(page);
4487 put_page(page);
4489 /* Fallback to the readpage in error/reconnect cases */
4490 kref_put(&rdata->refcount, cifs_readdata_release);
4491 break;
4494 kref_put(&rdata->refcount, cifs_readdata_release);
4497 /* Any pages that have been shown to fscache but didn't get added to
4498 * the pagecache must be uncached before they get returned to the
4499 * allocator.
4501 cifs_fscache_readpages_cancel(mapping->host, page_list);
4502 free_xid(xid);
4503 return rc;
4507 * cifs_readpage_worker must be called with the page pinned
4509 static int cifs_readpage_worker(struct file *file, struct page *page,
4510 loff_t *poffset)
4512 char *read_data;
4513 int rc;
4515 /* Is the page cached? */
4516 rc = cifs_readpage_from_fscache(file_inode(file), page);
4517 if (rc == 0)
4518 goto read_complete;
4520 read_data = kmap(page);
4521 /* for reads over a certain size could initiate async read ahead */
4523 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4525 if (rc < 0)
4526 goto io_error;
4527 else
4528 cifs_dbg(FYI, "Bytes read %d\n", rc);
4530 /* we do not want atime to be less than mtime, it broke some apps */
4531 file_inode(file)->i_atime = current_time(file_inode(file));
4532 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4533 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4534 else
4535 file_inode(file)->i_atime = current_time(file_inode(file));
4537 if (PAGE_SIZE > rc)
4538 memset(read_data + rc, 0, PAGE_SIZE - rc);
4540 flush_dcache_page(page);
4541 SetPageUptodate(page);
4543 /* send this page to the cache */
4544 cifs_readpage_to_fscache(file_inode(file), page);
4546 rc = 0;
4548 io_error:
4549 kunmap(page);
4550 unlock_page(page);
4552 read_complete:
4553 return rc;
4556 static int cifs_readpage(struct file *file, struct page *page)
4558 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4559 int rc = -EACCES;
4560 unsigned int xid;
4562 xid = get_xid();
4564 if (file->private_data == NULL) {
4565 rc = -EBADF;
4566 free_xid(xid);
4567 return rc;
4570 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4571 page, (int)offset, (int)offset);
4573 rc = cifs_readpage_worker(file, page, &offset);
4575 free_xid(xid);
4576 return rc;
4579 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4581 struct cifsFileInfo *open_file;
4583 spin_lock(&cifs_inode->open_file_lock);
4584 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4585 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4586 spin_unlock(&cifs_inode->open_file_lock);
4587 return 1;
4590 spin_unlock(&cifs_inode->open_file_lock);
4591 return 0;
4594 /* We do not want to update the file size from server for inodes
4595 open for write - to avoid races with writepage extending
4596 the file - in the future we could consider allowing
4597 refreshing the inode only on increases in the file size
4598 but this is tricky to do without racing with writebehind
4599 page caching in the current Linux kernel design */
4600 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4602 if (!cifsInode)
4603 return true;
4605 if (is_inode_writable(cifsInode)) {
4606 /* This inode is open for write at least once */
4607 struct cifs_sb_info *cifs_sb;
4609 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4610 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4611 /* since no page cache to corrupt on directio
4612 we can change size safely */
4613 return true;
4616 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4617 return true;
4619 return false;
4620 } else
4621 return true;
4624 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4625 loff_t pos, unsigned len, unsigned flags,
4626 struct page **pagep, void **fsdata)
4628 int oncethru = 0;
4629 pgoff_t index = pos >> PAGE_SHIFT;
4630 loff_t offset = pos & (PAGE_SIZE - 1);
4631 loff_t page_start = pos & PAGE_MASK;
4632 loff_t i_size;
4633 struct page *page;
4634 int rc = 0;
4636 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4638 start:
4639 page = grab_cache_page_write_begin(mapping, index, flags);
4640 if (!page) {
4641 rc = -ENOMEM;
4642 goto out;
4645 if (PageUptodate(page))
4646 goto out;
4649 * If we write a full page it will be up to date, no need to read from
4650 * the server. If the write is short, we'll end up doing a sync write
4651 * instead.
4653 if (len == PAGE_SIZE)
4654 goto out;
4657 * optimize away the read when we have an oplock, and we're not
4658 * expecting to use any of the data we'd be reading in. That
4659 * is, when the page lies beyond the EOF, or straddles the EOF
4660 * and the write will cover all of the existing data.
4662 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4663 i_size = i_size_read(mapping->host);
4664 if (page_start >= i_size ||
4665 (offset == 0 && (pos + len) >= i_size)) {
4666 zero_user_segments(page, 0, offset,
4667 offset + len,
4668 PAGE_SIZE);
4670 * PageChecked means that the parts of the page
4671 * to which we're not writing are considered up
4672 * to date. Once the data is copied to the
4673 * page, it can be set uptodate.
4675 SetPageChecked(page);
4676 goto out;
4680 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4682 * might as well read a page, it is fast enough. If we get
4683 * an error, we don't need to return it. cifs_write_end will
4684 * do a sync write instead since PG_uptodate isn't set.
4686 cifs_readpage_worker(file, page, &page_start);
4687 put_page(page);
4688 oncethru = 1;
4689 goto start;
4690 } else {
4691 /* we could try using another file handle if there is one -
4692 but how would we lock it to prevent close of that handle
4693 racing with this read? In any case
4694 this will be written out by write_end so is fine */
4696 out:
4697 *pagep = page;
4698 return rc;
4701 static int cifs_release_page(struct page *page, gfp_t gfp)
4703 if (PagePrivate(page))
4704 return 0;
4706 return cifs_fscache_release_page(page, gfp);
4709 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4710 unsigned int length)
4712 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4714 if (offset == 0 && length == PAGE_SIZE)
4715 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4718 static int cifs_launder_page(struct page *page)
4720 int rc = 0;
4721 loff_t range_start = page_offset(page);
4722 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4723 struct writeback_control wbc = {
4724 .sync_mode = WB_SYNC_ALL,
4725 .nr_to_write = 0,
4726 .range_start = range_start,
4727 .range_end = range_end,
4730 cifs_dbg(FYI, "Launder page: %p\n", page);
4732 if (clear_page_dirty_for_io(page))
4733 rc = cifs_writepage_locked(page, &wbc);
4735 cifs_fscache_invalidate_page(page, page->mapping->host);
4736 return rc;
4739 void cifs_oplock_break(struct work_struct *work)
4741 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4742 oplock_break);
4743 struct inode *inode = d_inode(cfile->dentry);
4744 struct cifsInodeInfo *cinode = CIFS_I(inode);
4745 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4746 struct TCP_Server_Info *server = tcon->ses->server;
4747 int rc = 0;
4748 bool purge_cache = false;
4750 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4751 TASK_UNINTERRUPTIBLE);
4753 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4754 cfile->oplock_epoch, &purge_cache);
4756 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4757 cifs_has_mand_locks(cinode)) {
4758 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4759 inode);
4760 cinode->oplock = 0;
4763 if (inode && S_ISREG(inode->i_mode)) {
4764 if (CIFS_CACHE_READ(cinode))
4765 break_lease(inode, O_RDONLY);
4766 else
4767 break_lease(inode, O_WRONLY);
4768 rc = filemap_fdatawrite(inode->i_mapping);
4769 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4770 rc = filemap_fdatawait(inode->i_mapping);
4771 mapping_set_error(inode->i_mapping, rc);
4772 cifs_zap_mapping(inode);
4774 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4775 if (CIFS_CACHE_WRITE(cinode))
4776 goto oplock_break_ack;
4779 rc = cifs_push_locks(cfile);
4780 if (rc)
4781 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4783 oplock_break_ack:
4785 * releasing stale oplock after recent reconnect of smb session using
4786 * a now incorrect file handle is not a data integrity issue but do
4787 * not bother sending an oplock release if session to server still is
4788 * disconnected since oplock already released by the server
4790 if (!cfile->oplock_break_cancelled) {
4791 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4792 cinode);
4793 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4795 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4796 cifs_done_oplock_break(cinode);
4800 * The presence of cifs_direct_io() in the address space ops vector
4801 * allowes open() O_DIRECT flags which would have failed otherwise.
4803 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4804 * so this method should never be called.
4806 * Direct IO is not yet supported in the cached mode.
4808 static ssize_t
4809 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4812 * FIXME
4813 * Eventually need to support direct IO for non forcedirectio mounts
4815 return -EINVAL;
4818 static int cifs_swap_activate(struct swap_info_struct *sis,
4819 struct file *swap_file, sector_t *span)
4821 struct cifsFileInfo *cfile = swap_file->private_data;
4822 struct inode *inode = swap_file->f_mapping->host;
4823 unsigned long blocks;
4824 long long isize;
4826 cifs_dbg(FYI, "swap activate\n");
4828 spin_lock(&inode->i_lock);
4829 blocks = inode->i_blocks;
4830 isize = inode->i_size;
4831 spin_unlock(&inode->i_lock);
4832 if (blocks*512 < isize) {
4833 pr_warn("swap activate: swapfile has holes\n");
4834 return -EINVAL;
4836 *span = sis->pages;
4838 pr_warn_once("Swap support over SMB3 is experimental\n");
4841 * TODO: consider adding ACL (or documenting how) to prevent other
4842 * users (on this or other systems) from reading it
4846 /* TODO: add sk_set_memalloc(inet) or similar */
4848 if (cfile)
4849 cfile->swapfile = true;
4851 * TODO: Since file already open, we can't open with DENY_ALL here
4852 * but we could add call to grab a byte range lock to prevent others
4853 * from reading or writing the file
4856 return 0;
4859 static void cifs_swap_deactivate(struct file *file)
4861 struct cifsFileInfo *cfile = file->private_data;
4863 cifs_dbg(FYI, "swap deactivate\n");
4865 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4867 if (cfile)
4868 cfile->swapfile = false;
4870 /* do we need to unpin (or unlock) the file */
4873 const struct address_space_operations cifs_addr_ops = {
4874 .readpage = cifs_readpage,
4875 .readpages = cifs_readpages,
4876 .writepage = cifs_writepage,
4877 .writepages = cifs_writepages,
4878 .write_begin = cifs_write_begin,
4879 .write_end = cifs_write_end,
4880 .set_page_dirty = __set_page_dirty_nobuffers,
4881 .releasepage = cifs_release_page,
4882 .direct_IO = cifs_direct_io,
4883 .invalidatepage = cifs_invalidate_page,
4884 .launder_page = cifs_launder_page,
4886 * TODO: investigate and if useful we could add an cifs_migratePage
4887 * helper (under an CONFIG_MIGRATION) in the future, and also
4888 * investigate and add an is_dirty_writeback helper if needed
4890 .swap_activate = cifs_swap_activate,
4891 .swap_deactivate = cifs_swap_deactivate,
4895 * cifs_readpages requires the server to support a buffer large enough to
4896 * contain the header plus one complete page of data. Otherwise, we need
4897 * to leave cifs_readpages out of the address space operations.
4899 const struct address_space_operations cifs_addr_ops_smallbuf = {
4900 .readpage = cifs_readpage,
4901 .writepage = cifs_writepage,
4902 .writepages = cifs_writepages,
4903 .write_begin = cifs_write_begin,
4904 .write_end = cifs_write_end,
4905 .set_page_dirty = __set_page_dirty_nobuffers,
4906 .releasepage = cifs_release_page,
4907 .invalidatepage = cifs_invalidate_page,
4908 .launder_page = cifs_launder_page,