io_uring: don't use 'fd' for openat/openat2/statx
[linux/fpc-iii.git] / fs / cifs / file.c
blob5920820bfbd0770be0bc18223ba9c31367f0a2da
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
48 static inline int cifs_convert_flags(unsigned int flags)
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
66 static u32 cifs_posix_convert_flags(unsigned int flags)
68 u32 posix_flags = 0;
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
97 return posix_flags;
100 static inline int cifs_get_disposition(unsigned int flags)
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
126 cifs_dbg(FYI, "posix open %s\n", full_path);
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
147 if (rc)
148 goto posix_open_ret;
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
189 return -ENOSYS;
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
225 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226 if (f_flags & O_SYNC)
227 create_options |= CREATE_WRITE_THROUGH;
229 if (f_flags & O_DIRECT)
230 create_options |= CREATE_NO_BUFFER;
232 oparms.tcon = tcon;
233 oparms.cifs_sb = cifs_sb;
234 oparms.desired_access = desired_access;
235 oparms.create_options = cifs_create_options(cifs_sb, create_options);
236 oparms.disposition = disposition;
237 oparms.path = full_path;
238 oparms.fid = fid;
239 oparms.reconnect = false;
241 rc = server->ops->open(xid, &oparms, oplock, buf);
243 if (rc)
244 goto out;
246 if (tcon->unix_ext)
247 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248 xid);
249 else
250 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251 xid, fid);
253 if (rc) {
254 server->ops->close(xid, tcon, fid);
255 if (rc == -ESTALE)
256 rc = -EOPENSTALE;
259 out:
260 kfree(buf);
261 return rc;
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
267 struct cifs_fid_locks *cur;
268 bool has_locks = false;
270 down_read(&cinode->lock_sem);
271 list_for_each_entry(cur, &cinode->llist, llist) {
272 if (!list_empty(&cur->locks)) {
273 has_locks = true;
274 break;
277 up_read(&cinode->lock_sem);
278 return has_locks;
281 void
282 cifs_down_write(struct rw_semaphore *sem)
284 while (!down_write_trylock(sem))
285 msleep(10);
288 static void cifsFileInfo_put_work(struct work_struct *work);
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292 struct tcon_link *tlink, __u32 oplock)
294 struct dentry *dentry = file_dentry(file);
295 struct inode *inode = d_inode(dentry);
296 struct cifsInodeInfo *cinode = CIFS_I(inode);
297 struct cifsFileInfo *cfile;
298 struct cifs_fid_locks *fdlocks;
299 struct cifs_tcon *tcon = tlink_tcon(tlink);
300 struct TCP_Server_Info *server = tcon->ses->server;
302 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303 if (cfile == NULL)
304 return cfile;
306 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307 if (!fdlocks) {
308 kfree(cfile);
309 return NULL;
312 INIT_LIST_HEAD(&fdlocks->locks);
313 fdlocks->cfile = cfile;
314 cfile->llist = fdlocks;
316 cfile->count = 1;
317 cfile->pid = current->tgid;
318 cfile->uid = current_fsuid();
319 cfile->dentry = dget(dentry);
320 cfile->f_flags = file->f_flags;
321 cfile->invalidHandle = false;
322 cfile->tlink = cifs_get_tlink(tlink);
323 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325 mutex_init(&cfile->fh_mutex);
326 spin_lock_init(&cfile->file_info_lock);
328 cifs_sb_active(inode->i_sb);
331 * If the server returned a read oplock and we have mandatory brlocks,
332 * set oplock level to None.
334 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336 oplock = 0;
339 cifs_down_write(&cinode->lock_sem);
340 list_add(&fdlocks->llist, &cinode->llist);
341 up_write(&cinode->lock_sem);
343 spin_lock(&tcon->open_file_lock);
344 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345 oplock = fid->pending_open->oplock;
346 list_del(&fid->pending_open->olist);
348 fid->purge_cache = false;
349 server->ops->set_fid(cfile, fid, oplock);
351 list_add(&cfile->tlist, &tcon->openFileList);
352 atomic_inc(&tcon->num_local_opens);
354 /* if readable file instance put first in list*/
355 spin_lock(&cinode->open_file_lock);
356 if (file->f_mode & FMODE_READ)
357 list_add(&cfile->flist, &cinode->openFileList);
358 else
359 list_add_tail(&cfile->flist, &cinode->openFileList);
360 spin_unlock(&cinode->open_file_lock);
361 spin_unlock(&tcon->open_file_lock);
363 if (fid->purge_cache)
364 cifs_zap_mapping(inode);
366 file->private_data = cfile;
367 return cfile;
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
373 spin_lock(&cifs_file->file_info_lock);
374 cifsFileInfo_get_locked(cifs_file);
375 spin_unlock(&cifs_file->file_info_lock);
376 return cifs_file;
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
381 struct inode *inode = d_inode(cifs_file->dentry);
382 struct cifsInodeInfo *cifsi = CIFS_I(inode);
383 struct cifsLockInfo *li, *tmp;
384 struct super_block *sb = inode->i_sb;
387 * Delete any outstanding lock records. We'll lose them when the file
388 * is closed anyway.
390 cifs_down_write(&cifsi->lock_sem);
391 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392 list_del(&li->llist);
393 cifs_del_lock_waiters(li);
394 kfree(li);
396 list_del(&cifs_file->llist->llist);
397 kfree(cifs_file->llist);
398 up_write(&cifsi->lock_sem);
400 cifs_put_tlink(cifs_file->tlink);
401 dput(cifs_file->dentry);
402 cifs_sb_deactive(sb);
403 kfree(cifs_file);
406 static void cifsFileInfo_put_work(struct work_struct *work)
408 struct cifsFileInfo *cifs_file = container_of(work,
409 struct cifsFileInfo, put);
411 cifsFileInfo_put_final(cifs_file);
415 * cifsFileInfo_put - release a reference of file priv data
417 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
421 _cifsFileInfo_put(cifs_file, true, true);
425 * _cifsFileInfo_put - release a reference of file priv data
427 * This may involve closing the filehandle @cifs_file out on the
428 * server. Must be called without holding tcon->open_file_lock,
429 * cinode->open_file_lock and cifs_file->file_info_lock.
431 * If @wait_for_oplock_handler is true and we are releasing the last
432 * reference, wait for any running oplock break handler of the file
433 * and cancel any pending one. If calling this function from the
434 * oplock break handler, you need to pass false.
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438 bool wait_oplock_handler, bool offload)
440 struct inode *inode = d_inode(cifs_file->dentry);
441 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442 struct TCP_Server_Info *server = tcon->ses->server;
443 struct cifsInodeInfo *cifsi = CIFS_I(inode);
444 struct super_block *sb = inode->i_sb;
445 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446 struct cifs_fid fid;
447 struct cifs_pending_open open;
448 bool oplock_break_cancelled;
450 spin_lock(&tcon->open_file_lock);
451 spin_lock(&cifsi->open_file_lock);
452 spin_lock(&cifs_file->file_info_lock);
453 if (--cifs_file->count > 0) {
454 spin_unlock(&cifs_file->file_info_lock);
455 spin_unlock(&cifsi->open_file_lock);
456 spin_unlock(&tcon->open_file_lock);
457 return;
459 spin_unlock(&cifs_file->file_info_lock);
461 if (server->ops->get_lease_key)
462 server->ops->get_lease_key(inode, &fid);
464 /* store open in pending opens to make sure we don't miss lease break */
465 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467 /* remove it from the lists */
468 list_del(&cifs_file->flist);
469 list_del(&cifs_file->tlist);
470 atomic_dec(&tcon->num_local_opens);
472 if (list_empty(&cifsi->openFileList)) {
473 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474 d_inode(cifs_file->dentry));
476 * In strict cache mode we need invalidate mapping on the last
477 * close because it may cause a error when we open this file
478 * again and get at least level II oplock.
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482 cifs_set_oplock_level(cifsi, 0);
485 spin_unlock(&cifsi->open_file_lock);
486 spin_unlock(&tcon->open_file_lock);
488 oplock_break_cancelled = wait_oplock_handler ?
489 cancel_work_sync(&cifs_file->oplock_break) : false;
491 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492 struct TCP_Server_Info *server = tcon->ses->server;
493 unsigned int xid;
495 xid = get_xid();
496 if (server->ops->close_getattr)
497 server->ops->close_getattr(xid, tcon, cifs_file);
498 else if (server->ops->close)
499 server->ops->close(xid, tcon, &cifs_file->fid);
500 _free_xid(xid);
503 if (oplock_break_cancelled)
504 cifs_done_oplock_break(cifsi);
506 cifs_del_pending_open(&open);
508 if (offload)
509 queue_work(fileinfo_put_wq, &cifs_file->put);
510 else
511 cifsFileInfo_put_final(cifs_file);
514 int cifs_open(struct inode *inode, struct file *file)
517 int rc = -EACCES;
518 unsigned int xid;
519 __u32 oplock;
520 struct cifs_sb_info *cifs_sb;
521 struct TCP_Server_Info *server;
522 struct cifs_tcon *tcon;
523 struct tcon_link *tlink;
524 struct cifsFileInfo *cfile = NULL;
525 char *full_path = NULL;
526 bool posix_open_ok = false;
527 struct cifs_fid fid;
528 struct cifs_pending_open open;
530 xid = get_xid();
532 cifs_sb = CIFS_SB(inode->i_sb);
533 tlink = cifs_sb_tlink(cifs_sb);
534 if (IS_ERR(tlink)) {
535 free_xid(xid);
536 return PTR_ERR(tlink);
538 tcon = tlink_tcon(tlink);
539 server = tcon->ses->server;
541 full_path = build_path_from_dentry(file_dentry(file));
542 if (full_path == NULL) {
543 rc = -ENOMEM;
544 goto out;
547 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548 inode, file->f_flags, full_path);
550 if (file->f_flags & O_DIRECT &&
551 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553 file->f_op = &cifs_file_direct_nobrl_ops;
554 else
555 file->f_op = &cifs_file_direct_ops;
558 if (server->oplocks)
559 oplock = REQ_OPLOCK;
560 else
561 oplock = 0;
563 if (!tcon->broken_posix_open && tcon->unix_ext &&
564 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566 /* can not refresh inode info since size could be stale */
567 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568 cifs_sb->mnt_file_mode /* ignored */,
569 file->f_flags, &oplock, &fid.netfid, xid);
570 if (rc == 0) {
571 cifs_dbg(FYI, "posix open succeeded\n");
572 posix_open_ok = true;
573 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574 if (tcon->ses->serverNOS)
575 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576 tcon->ses->serverName,
577 tcon->ses->serverNOS);
578 tcon->broken_posix_open = true;
579 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580 (rc != -EOPNOTSUPP)) /* path not found or net err */
581 goto out;
583 * Else fallthrough to retry open the old way on network i/o
584 * or DFS errors.
588 if (server->ops->get_lease_key)
589 server->ops->get_lease_key(inode, &fid);
591 cifs_add_pending_open(&fid, tlink, &open);
593 if (!posix_open_ok) {
594 if (server->ops->get_lease_key)
595 server->ops->get_lease_key(inode, &fid);
597 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598 file->f_flags, &oplock, &fid, xid);
599 if (rc) {
600 cifs_del_pending_open(&open);
601 goto out;
605 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606 if (cfile == NULL) {
607 if (server->ops->close)
608 server->ops->close(xid, tcon, &fid);
609 cifs_del_pending_open(&open);
610 rc = -ENOMEM;
611 goto out;
614 cifs_fscache_set_inode_cookie(inode, file);
616 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
618 * Time to set mode which we can not set earlier due to
619 * problems creating new read-only files.
621 struct cifs_unix_set_info_args args = {
622 .mode = inode->i_mode,
623 .uid = INVALID_UID, /* no change */
624 .gid = INVALID_GID, /* no change */
625 .ctime = NO_CHANGE_64,
626 .atime = NO_CHANGE_64,
627 .mtime = NO_CHANGE_64,
628 .device = 0,
630 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631 cfile->pid);
634 out:
635 kfree(full_path);
636 free_xid(xid);
637 cifs_put_tlink(tlink);
638 return rc;
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
644 * Try to reacquire byte range locks that were released when session
645 * to server was lost.
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
650 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653 int rc = 0;
655 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656 if (cinode->can_cache_brlcks) {
657 /* can cache locks - no need to relock */
658 up_read(&cinode->lock_sem);
659 return rc;
662 if (cap_unix(tcon->ses) &&
663 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665 rc = cifs_push_posix_locks(cfile);
666 else
667 rc = tcon->ses->server->ops->push_mand_locks(cfile);
669 up_read(&cinode->lock_sem);
670 return rc;
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
676 int rc = -EACCES;
677 unsigned int xid;
678 __u32 oplock;
679 struct cifs_sb_info *cifs_sb;
680 struct cifs_tcon *tcon;
681 struct TCP_Server_Info *server;
682 struct cifsInodeInfo *cinode;
683 struct inode *inode;
684 char *full_path = NULL;
685 int desired_access;
686 int disposition = FILE_OPEN;
687 int create_options = CREATE_NOT_DIR;
688 struct cifs_open_parms oparms;
690 xid = get_xid();
691 mutex_lock(&cfile->fh_mutex);
692 if (!cfile->invalidHandle) {
693 mutex_unlock(&cfile->fh_mutex);
694 rc = 0;
695 free_xid(xid);
696 return rc;
699 inode = d_inode(cfile->dentry);
700 cifs_sb = CIFS_SB(inode->i_sb);
701 tcon = tlink_tcon(cfile->tlink);
702 server = tcon->ses->server;
705 * Can not grab rename sem here because various ops, including those
706 * that already have the rename sem can end up causing writepage to get
707 * called and if the server was down that means we end up here, and we
708 * can never tell if the caller already has the rename_sem.
710 full_path = build_path_from_dentry(cfile->dentry);
711 if (full_path == NULL) {
712 rc = -ENOMEM;
713 mutex_unlock(&cfile->fh_mutex);
714 free_xid(xid);
715 return rc;
718 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719 inode, cfile->f_flags, full_path);
721 if (tcon->ses->server->oplocks)
722 oplock = REQ_OPLOCK;
723 else
724 oplock = 0;
726 if (tcon->unix_ext && cap_unix(tcon->ses) &&
727 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
730 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731 * original open. Must mask them off for a reopen.
733 unsigned int oflags = cfile->f_flags &
734 ~(O_CREAT | O_EXCL | O_TRUNC);
736 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737 cifs_sb->mnt_file_mode /* ignored */,
738 oflags, &oplock, &cfile->fid.netfid, xid);
739 if (rc == 0) {
740 cifs_dbg(FYI, "posix reopen succeeded\n");
741 oparms.reconnect = true;
742 goto reopen_success;
745 * fallthrough to retry open the old way on errors, especially
746 * in the reconnect path it is important to retry hard
750 desired_access = cifs_convert_flags(cfile->f_flags);
752 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753 if (cfile->f_flags & O_SYNC)
754 create_options |= CREATE_WRITE_THROUGH;
756 if (cfile->f_flags & O_DIRECT)
757 create_options |= CREATE_NO_BUFFER;
759 if (server->ops->get_lease_key)
760 server->ops->get_lease_key(inode, &cfile->fid);
762 oparms.tcon = tcon;
763 oparms.cifs_sb = cifs_sb;
764 oparms.desired_access = desired_access;
765 oparms.create_options = cifs_create_options(cifs_sb, create_options);
766 oparms.disposition = disposition;
767 oparms.path = full_path;
768 oparms.fid = &cfile->fid;
769 oparms.reconnect = true;
772 * Can not refresh inode by passing in file_info buf to be returned by
773 * ops->open and then calling get_inode_info with returned buf since
774 * file might have write behind data that needs to be flushed and server
775 * version of file size can be stale. If we knew for sure that inode was
776 * not dirty locally we could do this.
778 rc = server->ops->open(xid, &oparms, &oplock, NULL);
779 if (rc == -ENOENT && oparms.reconnect == false) {
780 /* durable handle timeout is expired - open the file again */
781 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782 /* indicate that we need to relock the file */
783 oparms.reconnect = true;
786 if (rc) {
787 mutex_unlock(&cfile->fh_mutex);
788 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789 cifs_dbg(FYI, "oplock: %d\n", oplock);
790 goto reopen_error_exit;
793 reopen_success:
794 cfile->invalidHandle = false;
795 mutex_unlock(&cfile->fh_mutex);
796 cinode = CIFS_I(inode);
798 if (can_flush) {
799 rc = filemap_write_and_wait(inode->i_mapping);
800 if (!is_interrupt_error(rc))
801 mapping_set_error(inode->i_mapping, rc);
803 if (tcon->unix_ext)
804 rc = cifs_get_inode_info_unix(&inode, full_path,
805 inode->i_sb, xid);
806 else
807 rc = cifs_get_inode_info(&inode, full_path, NULL,
808 inode->i_sb, xid, NULL);
811 * Else we are writing out data to server already and could deadlock if
812 * we tried to flush data, and since we do not know if we have data that
813 * would invalidate the current end of file on the server we can not go
814 * to the server to get the new inode info.
818 * If the server returned a read oplock and we have mandatory brlocks,
819 * set oplock level to None.
821 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823 oplock = 0;
826 server->ops->set_fid(cfile, &cfile->fid, oplock);
827 if (oparms.reconnect)
828 cifs_relock_file(cfile);
830 reopen_error_exit:
831 kfree(full_path);
832 free_xid(xid);
833 return rc;
836 int cifs_close(struct inode *inode, struct file *file)
838 if (file->private_data != NULL) {
839 _cifsFileInfo_put(file->private_data, true, false);
840 file->private_data = NULL;
843 /* return code from the ->release op is always ignored */
844 return 0;
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
850 struct cifsFileInfo *open_file;
851 struct list_head *tmp;
852 struct list_head *tmp1;
853 struct list_head tmp_list;
855 if (!tcon->use_persistent || !tcon->need_reopen_files)
856 return;
858 tcon->need_reopen_files = false;
860 cifs_dbg(FYI, "Reopen persistent handles");
861 INIT_LIST_HEAD(&tmp_list);
863 /* list all files open on tree connection, reopen resilient handles */
864 spin_lock(&tcon->open_file_lock);
865 list_for_each(tmp, &tcon->openFileList) {
866 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867 if (!open_file->invalidHandle)
868 continue;
869 cifsFileInfo_get(open_file);
870 list_add_tail(&open_file->rlist, &tmp_list);
872 spin_unlock(&tcon->open_file_lock);
874 list_for_each_safe(tmp, tmp1, &tmp_list) {
875 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876 if (cifs_reopen_file(open_file, false /* do not flush */))
877 tcon->need_reopen_files = true;
878 list_del_init(&open_file->rlist);
879 cifsFileInfo_put(open_file);
883 int cifs_closedir(struct inode *inode, struct file *file)
885 int rc = 0;
886 unsigned int xid;
887 struct cifsFileInfo *cfile = file->private_data;
888 struct cifs_tcon *tcon;
889 struct TCP_Server_Info *server;
890 char *buf;
892 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
894 if (cfile == NULL)
895 return rc;
897 xid = get_xid();
898 tcon = tlink_tcon(cfile->tlink);
899 server = tcon->ses->server;
901 cifs_dbg(FYI, "Freeing private data in close dir\n");
902 spin_lock(&cfile->file_info_lock);
903 if (server->ops->dir_needs_close(cfile)) {
904 cfile->invalidHandle = true;
905 spin_unlock(&cfile->file_info_lock);
906 if (server->ops->close_dir)
907 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908 else
909 rc = -ENOSYS;
910 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911 /* not much we can do if it fails anyway, ignore rc */
912 rc = 0;
913 } else
914 spin_unlock(&cfile->file_info_lock);
916 buf = cfile->srch_inf.ntwrk_buf_start;
917 if (buf) {
918 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919 cfile->srch_inf.ntwrk_buf_start = NULL;
920 if (cfile->srch_inf.smallBuf)
921 cifs_small_buf_release(buf);
922 else
923 cifs_buf_release(buf);
926 cifs_put_tlink(cfile->tlink);
927 kfree(file->private_data);
928 file->private_data = NULL;
929 /* BB can we lock the filestruct while this is going on? */
930 free_xid(xid);
931 return rc;
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
937 struct cifsLockInfo *lock =
938 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939 if (!lock)
940 return lock;
941 lock->offset = offset;
942 lock->length = length;
943 lock->type = type;
944 lock->pid = current->tgid;
945 lock->flags = flags;
946 INIT_LIST_HEAD(&lock->blist);
947 init_waitqueue_head(&lock->block_q);
948 return lock;
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
954 struct cifsLockInfo *li, *tmp;
955 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956 list_del_init(&li->blist);
957 wake_up(&li->block_q);
961 #define CIFS_LOCK_OP 0
962 #define CIFS_READ_OP 1
963 #define CIFS_WRITE_OP 2
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968 __u64 length, __u8 type, __u16 flags,
969 struct cifsFileInfo *cfile,
970 struct cifsLockInfo **conf_lock, int rw_check)
972 struct cifsLockInfo *li;
973 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
976 list_for_each_entry(li, &fdlocks->locks, llist) {
977 if (offset + length <= li->offset ||
978 offset >= li->offset + li->length)
979 continue;
980 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981 server->ops->compare_fids(cfile, cur_cfile)) {
982 /* shared lock prevents write op through the same fid */
983 if (!(li->type & server->vals->shared_lock_type) ||
984 rw_check != CIFS_WRITE_OP)
985 continue;
987 if ((type & server->vals->shared_lock_type) &&
988 ((server->ops->compare_fids(cfile, cur_cfile) &&
989 current->tgid == li->pid) || type == li->type))
990 continue;
991 if (rw_check == CIFS_LOCK_OP &&
992 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993 server->ops->compare_fids(cfile, cur_cfile))
994 continue;
995 if (conf_lock)
996 *conf_lock = li;
997 return true;
999 return false;
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004 __u8 type, __u16 flags,
1005 struct cifsLockInfo **conf_lock, int rw_check)
1007 bool rc = false;
1008 struct cifs_fid_locks *cur;
1009 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1011 list_for_each_entry(cur, &cinode->llist, llist) {
1012 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013 flags, cfile, conf_lock,
1014 rw_check);
1015 if (rc)
1016 break;
1019 return rc;
1023 * Check if there is another lock that prevents us to set the lock (mandatory
1024 * style). If such a lock exists, update the flock structure with its
1025 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026 * or leave it the same if we can't. Returns 0 if we don't need to request to
1027 * the server or 1 otherwise.
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031 __u8 type, struct file_lock *flock)
1033 int rc = 0;
1034 struct cifsLockInfo *conf_lock;
1035 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037 bool exist;
1039 down_read(&cinode->lock_sem);
1041 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042 flock->fl_flags, &conf_lock,
1043 CIFS_LOCK_OP);
1044 if (exist) {
1045 flock->fl_start = conf_lock->offset;
1046 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047 flock->fl_pid = conf_lock->pid;
1048 if (conf_lock->type & server->vals->shared_lock_type)
1049 flock->fl_type = F_RDLCK;
1050 else
1051 flock->fl_type = F_WRLCK;
1052 } else if (!cinode->can_cache_brlcks)
1053 rc = 1;
1054 else
1055 flock->fl_type = F_UNLCK;
1057 up_read(&cinode->lock_sem);
1058 return rc;
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1064 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065 cifs_down_write(&cinode->lock_sem);
1066 list_add_tail(&lock->llist, &cfile->llist->locks);
1067 up_write(&cinode->lock_sem);
1071 * Set the byte-range lock (mandatory style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if no locks prevent us but we need to request to the server;
1074 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078 bool wait)
1080 struct cifsLockInfo *conf_lock;
1081 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082 bool exist;
1083 int rc = 0;
1085 try_again:
1086 exist = false;
1087 cifs_down_write(&cinode->lock_sem);
1089 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090 lock->type, lock->flags, &conf_lock,
1091 CIFS_LOCK_OP);
1092 if (!exist && cinode->can_cache_brlcks) {
1093 list_add_tail(&lock->llist, &cfile->llist->locks);
1094 up_write(&cinode->lock_sem);
1095 return rc;
1098 if (!exist)
1099 rc = 1;
1100 else if (!wait)
1101 rc = -EACCES;
1102 else {
1103 list_add_tail(&lock->blist, &conf_lock->blist);
1104 up_write(&cinode->lock_sem);
1105 rc = wait_event_interruptible(lock->block_q,
1106 (lock->blist.prev == &lock->blist) &&
1107 (lock->blist.next == &lock->blist));
1108 if (!rc)
1109 goto try_again;
1110 cifs_down_write(&cinode->lock_sem);
1111 list_del_init(&lock->blist);
1114 up_write(&cinode->lock_sem);
1115 return rc;
1119 * Check if there is another lock that prevents us to set the lock (posix
1120 * style). If such a lock exists, update the flock structure with its
1121 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122 * or leave it the same if we can't. Returns 0 if we don't need to request to
1123 * the server or 1 otherwise.
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1128 int rc = 0;
1129 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130 unsigned char saved_type = flock->fl_type;
1132 if ((flock->fl_flags & FL_POSIX) == 0)
1133 return 1;
1135 down_read(&cinode->lock_sem);
1136 posix_test_lock(file, flock);
1138 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139 flock->fl_type = saved_type;
1140 rc = 1;
1143 up_read(&cinode->lock_sem);
1144 return rc;
1148 * Set the byte-range lock (posix style). Returns:
1149 * 1) 0, if we set the lock and don't need to request to the server;
1150 * 2) 1, if we need to request to the server;
1151 * 3) <0, if the error occurs while setting the lock.
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1156 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157 int rc = 1;
1159 if ((flock->fl_flags & FL_POSIX) == 0)
1160 return rc;
1162 try_again:
1163 cifs_down_write(&cinode->lock_sem);
1164 if (!cinode->can_cache_brlcks) {
1165 up_write(&cinode->lock_sem);
1166 return rc;
1169 rc = posix_lock_file(file, flock, NULL);
1170 up_write(&cinode->lock_sem);
1171 if (rc == FILE_LOCK_DEFERRED) {
1172 rc = wait_event_interruptible(flock->fl_wait,
1173 list_empty(&flock->fl_blocked_member));
1174 if (!rc)
1175 goto try_again;
1176 locks_delete_block(flock);
1178 return rc;
1182 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1184 unsigned int xid;
1185 int rc = 0, stored_rc;
1186 struct cifsLockInfo *li, *tmp;
1187 struct cifs_tcon *tcon;
1188 unsigned int num, max_num, max_buf;
1189 LOCKING_ANDX_RANGE *buf, *cur;
1190 static const int types[] = {
1191 LOCKING_ANDX_LARGE_FILES,
1192 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1194 int i;
1196 xid = get_xid();
1197 tcon = tlink_tcon(cfile->tlink);
1200 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1201 * and check it before using.
1203 max_buf = tcon->ses->server->maxBuf;
1204 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1205 free_xid(xid);
1206 return -EINVAL;
1209 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1210 PAGE_SIZE);
1211 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1212 PAGE_SIZE);
1213 max_num = (max_buf - sizeof(struct smb_hdr)) /
1214 sizeof(LOCKING_ANDX_RANGE);
1215 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1216 if (!buf) {
1217 free_xid(xid);
1218 return -ENOMEM;
1221 for (i = 0; i < 2; i++) {
1222 cur = buf;
1223 num = 0;
1224 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1225 if (li->type != types[i])
1226 continue;
1227 cur->Pid = cpu_to_le16(li->pid);
1228 cur->LengthLow = cpu_to_le32((u32)li->length);
1229 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1230 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1231 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1232 if (++num == max_num) {
1233 stored_rc = cifs_lockv(xid, tcon,
1234 cfile->fid.netfid,
1235 (__u8)li->type, 0, num,
1236 buf);
1237 if (stored_rc)
1238 rc = stored_rc;
1239 cur = buf;
1240 num = 0;
1241 } else
1242 cur++;
1245 if (num) {
1246 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1247 (__u8)types[i], 0, num, buf);
1248 if (stored_rc)
1249 rc = stored_rc;
1253 kfree(buf);
1254 free_xid(xid);
1255 return rc;
1258 static __u32
1259 hash_lockowner(fl_owner_t owner)
1261 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1264 struct lock_to_push {
1265 struct list_head llist;
1266 __u64 offset;
1267 __u64 length;
1268 __u32 pid;
1269 __u16 netfid;
1270 __u8 type;
1273 static int
1274 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1276 struct inode *inode = d_inode(cfile->dentry);
1277 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1278 struct file_lock *flock;
1279 struct file_lock_context *flctx = inode->i_flctx;
1280 unsigned int count = 0, i;
1281 int rc = 0, xid, type;
1282 struct list_head locks_to_send, *el;
1283 struct lock_to_push *lck, *tmp;
1284 __u64 length;
1286 xid = get_xid();
1288 if (!flctx)
1289 goto out;
1291 spin_lock(&flctx->flc_lock);
1292 list_for_each(el, &flctx->flc_posix) {
1293 count++;
1295 spin_unlock(&flctx->flc_lock);
1297 INIT_LIST_HEAD(&locks_to_send);
1300 * Allocating count locks is enough because no FL_POSIX locks can be
1301 * added to the list while we are holding cinode->lock_sem that
1302 * protects locking operations of this inode.
1304 for (i = 0; i < count; i++) {
1305 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1306 if (!lck) {
1307 rc = -ENOMEM;
1308 goto err_out;
1310 list_add_tail(&lck->llist, &locks_to_send);
1313 el = locks_to_send.next;
1314 spin_lock(&flctx->flc_lock);
1315 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1316 if (el == &locks_to_send) {
1318 * The list ended. We don't have enough allocated
1319 * structures - something is really wrong.
1321 cifs_dbg(VFS, "Can't push all brlocks!\n");
1322 break;
1324 length = 1 + flock->fl_end - flock->fl_start;
1325 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1326 type = CIFS_RDLCK;
1327 else
1328 type = CIFS_WRLCK;
1329 lck = list_entry(el, struct lock_to_push, llist);
1330 lck->pid = hash_lockowner(flock->fl_owner);
1331 lck->netfid = cfile->fid.netfid;
1332 lck->length = length;
1333 lck->type = type;
1334 lck->offset = flock->fl_start;
1336 spin_unlock(&flctx->flc_lock);
1338 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1339 int stored_rc;
1341 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1342 lck->offset, lck->length, NULL,
1343 lck->type, 0);
1344 if (stored_rc)
1345 rc = stored_rc;
1346 list_del(&lck->llist);
1347 kfree(lck);
1350 out:
1351 free_xid(xid);
1352 return rc;
1353 err_out:
1354 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1355 list_del(&lck->llist);
1356 kfree(lck);
1358 goto out;
1361 static int
1362 cifs_push_locks(struct cifsFileInfo *cfile)
1364 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1365 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1366 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1367 int rc = 0;
1369 /* we are going to update can_cache_brlcks here - need a write access */
1370 cifs_down_write(&cinode->lock_sem);
1371 if (!cinode->can_cache_brlcks) {
1372 up_write(&cinode->lock_sem);
1373 return rc;
1376 if (cap_unix(tcon->ses) &&
1377 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1378 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1379 rc = cifs_push_posix_locks(cfile);
1380 else
1381 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1383 cinode->can_cache_brlcks = false;
1384 up_write(&cinode->lock_sem);
1385 return rc;
1388 static void
1389 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1390 bool *wait_flag, struct TCP_Server_Info *server)
1392 if (flock->fl_flags & FL_POSIX)
1393 cifs_dbg(FYI, "Posix\n");
1394 if (flock->fl_flags & FL_FLOCK)
1395 cifs_dbg(FYI, "Flock\n");
1396 if (flock->fl_flags & FL_SLEEP) {
1397 cifs_dbg(FYI, "Blocking lock\n");
1398 *wait_flag = true;
1400 if (flock->fl_flags & FL_ACCESS)
1401 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1402 if (flock->fl_flags & FL_LEASE)
1403 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1404 if (flock->fl_flags &
1405 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1406 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1407 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1409 *type = server->vals->large_lock_type;
1410 if (flock->fl_type == F_WRLCK) {
1411 cifs_dbg(FYI, "F_WRLCK\n");
1412 *type |= server->vals->exclusive_lock_type;
1413 *lock = 1;
1414 } else if (flock->fl_type == F_UNLCK) {
1415 cifs_dbg(FYI, "F_UNLCK\n");
1416 *type |= server->vals->unlock_lock_type;
1417 *unlock = 1;
1418 /* Check if unlock includes more than one lock range */
1419 } else if (flock->fl_type == F_RDLCK) {
1420 cifs_dbg(FYI, "F_RDLCK\n");
1421 *type |= server->vals->shared_lock_type;
1422 *lock = 1;
1423 } else if (flock->fl_type == F_EXLCK) {
1424 cifs_dbg(FYI, "F_EXLCK\n");
1425 *type |= server->vals->exclusive_lock_type;
1426 *lock = 1;
1427 } else if (flock->fl_type == F_SHLCK) {
1428 cifs_dbg(FYI, "F_SHLCK\n");
1429 *type |= server->vals->shared_lock_type;
1430 *lock = 1;
1431 } else
1432 cifs_dbg(FYI, "Unknown type of lock\n");
1435 static int
1436 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1437 bool wait_flag, bool posix_lck, unsigned int xid)
1439 int rc = 0;
1440 __u64 length = 1 + flock->fl_end - flock->fl_start;
1441 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1442 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443 struct TCP_Server_Info *server = tcon->ses->server;
1444 __u16 netfid = cfile->fid.netfid;
1446 if (posix_lck) {
1447 int posix_lock_type;
1449 rc = cifs_posix_lock_test(file, flock);
1450 if (!rc)
1451 return rc;
1453 if (type & server->vals->shared_lock_type)
1454 posix_lock_type = CIFS_RDLCK;
1455 else
1456 posix_lock_type = CIFS_WRLCK;
1457 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1458 hash_lockowner(flock->fl_owner),
1459 flock->fl_start, length, flock,
1460 posix_lock_type, wait_flag);
1461 return rc;
1464 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1465 if (!rc)
1466 return rc;
1468 /* BB we could chain these into one lock request BB */
1469 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1470 1, 0, false);
1471 if (rc == 0) {
1472 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1473 type, 0, 1, false);
1474 flock->fl_type = F_UNLCK;
1475 if (rc != 0)
1476 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1477 rc);
1478 return 0;
1481 if (type & server->vals->shared_lock_type) {
1482 flock->fl_type = F_WRLCK;
1483 return 0;
1486 type &= ~server->vals->exclusive_lock_type;
1488 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489 type | server->vals->shared_lock_type,
1490 1, 0, false);
1491 if (rc == 0) {
1492 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1493 type | server->vals->shared_lock_type, 0, 1, false);
1494 flock->fl_type = F_RDLCK;
1495 if (rc != 0)
1496 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1497 rc);
1498 } else
1499 flock->fl_type = F_WRLCK;
1501 return 0;
1504 void
1505 cifs_move_llist(struct list_head *source, struct list_head *dest)
1507 struct list_head *li, *tmp;
1508 list_for_each_safe(li, tmp, source)
1509 list_move(li, dest);
1512 void
1513 cifs_free_llist(struct list_head *llist)
1515 struct cifsLockInfo *li, *tmp;
1516 list_for_each_entry_safe(li, tmp, llist, llist) {
1517 cifs_del_lock_waiters(li);
1518 list_del(&li->llist);
1519 kfree(li);
1524 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1525 unsigned int xid)
1527 int rc = 0, stored_rc;
1528 static const int types[] = {
1529 LOCKING_ANDX_LARGE_FILES,
1530 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1532 unsigned int i;
1533 unsigned int max_num, num, max_buf;
1534 LOCKING_ANDX_RANGE *buf, *cur;
1535 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1536 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1537 struct cifsLockInfo *li, *tmp;
1538 __u64 length = 1 + flock->fl_end - flock->fl_start;
1539 struct list_head tmp_llist;
1541 INIT_LIST_HEAD(&tmp_llist);
1544 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1545 * and check it before using.
1547 max_buf = tcon->ses->server->maxBuf;
1548 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1549 return -EINVAL;
1551 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1552 PAGE_SIZE);
1553 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1554 PAGE_SIZE);
1555 max_num = (max_buf - sizeof(struct smb_hdr)) /
1556 sizeof(LOCKING_ANDX_RANGE);
1557 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1558 if (!buf)
1559 return -ENOMEM;
1561 cifs_down_write(&cinode->lock_sem);
1562 for (i = 0; i < 2; i++) {
1563 cur = buf;
1564 num = 0;
1565 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1566 if (flock->fl_start > li->offset ||
1567 (flock->fl_start + length) <
1568 (li->offset + li->length))
1569 continue;
1570 if (current->tgid != li->pid)
1571 continue;
1572 if (types[i] != li->type)
1573 continue;
1574 if (cinode->can_cache_brlcks) {
1576 * We can cache brlock requests - simply remove
1577 * a lock from the file's list.
1579 list_del(&li->llist);
1580 cifs_del_lock_waiters(li);
1581 kfree(li);
1582 continue;
1584 cur->Pid = cpu_to_le16(li->pid);
1585 cur->LengthLow = cpu_to_le32((u32)li->length);
1586 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1587 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1588 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1590 * We need to save a lock here to let us add it again to
1591 * the file's list if the unlock range request fails on
1592 * the server.
1594 list_move(&li->llist, &tmp_llist);
1595 if (++num == max_num) {
1596 stored_rc = cifs_lockv(xid, tcon,
1597 cfile->fid.netfid,
1598 li->type, num, 0, buf);
1599 if (stored_rc) {
1601 * We failed on the unlock range
1602 * request - add all locks from the tmp
1603 * list to the head of the file's list.
1605 cifs_move_llist(&tmp_llist,
1606 &cfile->llist->locks);
1607 rc = stored_rc;
1608 } else
1610 * The unlock range request succeed -
1611 * free the tmp list.
1613 cifs_free_llist(&tmp_llist);
1614 cur = buf;
1615 num = 0;
1616 } else
1617 cur++;
1619 if (num) {
1620 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1621 types[i], num, 0, buf);
1622 if (stored_rc) {
1623 cifs_move_llist(&tmp_llist,
1624 &cfile->llist->locks);
1625 rc = stored_rc;
1626 } else
1627 cifs_free_llist(&tmp_llist);
1631 up_write(&cinode->lock_sem);
1632 kfree(buf);
1633 return rc;
1636 static int
1637 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1638 bool wait_flag, bool posix_lck, int lock, int unlock,
1639 unsigned int xid)
1641 int rc = 0;
1642 __u64 length = 1 + flock->fl_end - flock->fl_start;
1643 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1644 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1645 struct TCP_Server_Info *server = tcon->ses->server;
1646 struct inode *inode = d_inode(cfile->dentry);
1648 if (posix_lck) {
1649 int posix_lock_type;
1651 rc = cifs_posix_lock_set(file, flock);
1652 if (!rc || rc < 0)
1653 return rc;
1655 if (type & server->vals->shared_lock_type)
1656 posix_lock_type = CIFS_RDLCK;
1657 else
1658 posix_lock_type = CIFS_WRLCK;
1660 if (unlock == 1)
1661 posix_lock_type = CIFS_UNLCK;
1663 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1664 hash_lockowner(flock->fl_owner),
1665 flock->fl_start, length,
1666 NULL, posix_lock_type, wait_flag);
1667 goto out;
1670 if (lock) {
1671 struct cifsLockInfo *lock;
1673 lock = cifs_lock_init(flock->fl_start, length, type,
1674 flock->fl_flags);
1675 if (!lock)
1676 return -ENOMEM;
1678 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1679 if (rc < 0) {
1680 kfree(lock);
1681 return rc;
1683 if (!rc)
1684 goto out;
1687 * Windows 7 server can delay breaking lease from read to None
1688 * if we set a byte-range lock on a file - break it explicitly
1689 * before sending the lock to the server to be sure the next
1690 * read won't conflict with non-overlapted locks due to
1691 * pagereading.
1693 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1694 CIFS_CACHE_READ(CIFS_I(inode))) {
1695 cifs_zap_mapping(inode);
1696 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1697 inode);
1698 CIFS_I(inode)->oplock = 0;
1701 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1702 type, 1, 0, wait_flag);
1703 if (rc) {
1704 kfree(lock);
1705 return rc;
1708 cifs_lock_add(cfile, lock);
1709 } else if (unlock)
1710 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1712 out:
1713 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1715 * If this is a request to remove all locks because we
1716 * are closing the file, it doesn't matter if the
1717 * unlocking failed as both cifs.ko and the SMB server
1718 * remove the lock on file close
1720 if (rc) {
1721 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1722 if (!(flock->fl_flags & FL_CLOSE))
1723 return rc;
1725 rc = locks_lock_file_wait(file, flock);
1727 return rc;
1730 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1732 int rc, xid;
1733 int lock = 0, unlock = 0;
1734 bool wait_flag = false;
1735 bool posix_lck = false;
1736 struct cifs_sb_info *cifs_sb;
1737 struct cifs_tcon *tcon;
1738 struct cifsFileInfo *cfile;
1739 __u32 type;
1741 rc = -EACCES;
1742 xid = get_xid();
1744 if (!(fl->fl_flags & FL_FLOCK))
1745 return -ENOLCK;
1747 cfile = (struct cifsFileInfo *)file->private_data;
1748 tcon = tlink_tcon(cfile->tlink);
1750 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1751 tcon->ses->server);
1752 cifs_sb = CIFS_FILE_SB(file);
1754 if (cap_unix(tcon->ses) &&
1755 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1756 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1757 posix_lck = true;
1759 if (!lock && !unlock) {
1761 * if no lock or unlock then nothing to do since we do not
1762 * know what it is
1764 free_xid(xid);
1765 return -EOPNOTSUPP;
1768 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1769 xid);
1770 free_xid(xid);
1771 return rc;
1776 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1778 int rc, xid;
1779 int lock = 0, unlock = 0;
1780 bool wait_flag = false;
1781 bool posix_lck = false;
1782 struct cifs_sb_info *cifs_sb;
1783 struct cifs_tcon *tcon;
1784 struct cifsFileInfo *cfile;
1785 __u32 type;
1787 rc = -EACCES;
1788 xid = get_xid();
1790 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1791 cmd, flock->fl_flags, flock->fl_type,
1792 flock->fl_start, flock->fl_end);
1794 cfile = (struct cifsFileInfo *)file->private_data;
1795 tcon = tlink_tcon(cfile->tlink);
1797 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1798 tcon->ses->server);
1799 cifs_sb = CIFS_FILE_SB(file);
1801 if (cap_unix(tcon->ses) &&
1802 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1803 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1804 posix_lck = true;
1806 * BB add code here to normalize offset and length to account for
1807 * negative length which we can not accept over the wire.
1809 if (IS_GETLK(cmd)) {
1810 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1811 free_xid(xid);
1812 return rc;
1815 if (!lock && !unlock) {
1817 * if no lock or unlock then nothing to do since we do not
1818 * know what it is
1820 free_xid(xid);
1821 return -EOPNOTSUPP;
1824 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1825 xid);
1826 free_xid(xid);
1827 return rc;
1831 * update the file size (if needed) after a write. Should be called with
1832 * the inode->i_lock held
1834 void
1835 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1836 unsigned int bytes_written)
1838 loff_t end_of_write = offset + bytes_written;
1840 if (end_of_write > cifsi->server_eof)
1841 cifsi->server_eof = end_of_write;
1844 static ssize_t
1845 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1846 size_t write_size, loff_t *offset)
1848 int rc = 0;
1849 unsigned int bytes_written = 0;
1850 unsigned int total_written;
1851 struct cifs_tcon *tcon;
1852 struct TCP_Server_Info *server;
1853 unsigned int xid;
1854 struct dentry *dentry = open_file->dentry;
1855 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1856 struct cifs_io_parms io_parms;
1858 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1859 write_size, *offset, dentry);
1861 tcon = tlink_tcon(open_file->tlink);
1862 server = tcon->ses->server;
1864 if (!server->ops->sync_write)
1865 return -ENOSYS;
1867 xid = get_xid();
1869 for (total_written = 0; write_size > total_written;
1870 total_written += bytes_written) {
1871 rc = -EAGAIN;
1872 while (rc == -EAGAIN) {
1873 struct kvec iov[2];
1874 unsigned int len;
1876 if (open_file->invalidHandle) {
1877 /* we could deadlock if we called
1878 filemap_fdatawait from here so tell
1879 reopen_file not to flush data to
1880 server now */
1881 rc = cifs_reopen_file(open_file, false);
1882 if (rc != 0)
1883 break;
1886 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1887 (unsigned int)write_size - total_written);
1888 /* iov[0] is reserved for smb header */
1889 iov[1].iov_base = (char *)write_data + total_written;
1890 iov[1].iov_len = len;
1891 io_parms.pid = pid;
1892 io_parms.tcon = tcon;
1893 io_parms.offset = *offset;
1894 io_parms.length = len;
1895 rc = server->ops->sync_write(xid, &open_file->fid,
1896 &io_parms, &bytes_written, iov, 1);
1898 if (rc || (bytes_written == 0)) {
1899 if (total_written)
1900 break;
1901 else {
1902 free_xid(xid);
1903 return rc;
1905 } else {
1906 spin_lock(&d_inode(dentry)->i_lock);
1907 cifs_update_eof(cifsi, *offset, bytes_written);
1908 spin_unlock(&d_inode(dentry)->i_lock);
1909 *offset += bytes_written;
1913 cifs_stats_bytes_written(tcon, total_written);
1915 if (total_written > 0) {
1916 spin_lock(&d_inode(dentry)->i_lock);
1917 if (*offset > d_inode(dentry)->i_size)
1918 i_size_write(d_inode(dentry), *offset);
1919 spin_unlock(&d_inode(dentry)->i_lock);
1921 mark_inode_dirty_sync(d_inode(dentry));
1922 free_xid(xid);
1923 return total_written;
1926 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1927 bool fsuid_only)
1929 struct cifsFileInfo *open_file = NULL;
1930 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1932 /* only filter by fsuid on multiuser mounts */
1933 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1934 fsuid_only = false;
1936 spin_lock(&cifs_inode->open_file_lock);
1937 /* we could simply get the first_list_entry since write-only entries
1938 are always at the end of the list but since the first entry might
1939 have a close pending, we go through the whole list */
1940 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1941 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1942 continue;
1943 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1944 if (!open_file->invalidHandle) {
1945 /* found a good file */
1946 /* lock it so it will not be closed on us */
1947 cifsFileInfo_get(open_file);
1948 spin_unlock(&cifs_inode->open_file_lock);
1949 return open_file;
1950 } /* else might as well continue, and look for
1951 another, or simply have the caller reopen it
1952 again rather than trying to fix this handle */
1953 } else /* write only file */
1954 break; /* write only files are last so must be done */
1956 spin_unlock(&cifs_inode->open_file_lock);
1957 return NULL;
1960 /* Return -EBADF if no handle is found and general rc otherwise */
1962 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1963 struct cifsFileInfo **ret_file)
1965 struct cifsFileInfo *open_file, *inv_file = NULL;
1966 struct cifs_sb_info *cifs_sb;
1967 bool any_available = false;
1968 int rc = -EBADF;
1969 unsigned int refind = 0;
1970 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1971 bool with_delete = flags & FIND_WR_WITH_DELETE;
1972 *ret_file = NULL;
1975 * Having a null inode here (because mapping->host was set to zero by
1976 * the VFS or MM) should not happen but we had reports of on oops (due
1977 * to it being zero) during stress testcases so we need to check for it
1980 if (cifs_inode == NULL) {
1981 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1982 dump_stack();
1983 return rc;
1986 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1988 /* only filter by fsuid on multiuser mounts */
1989 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1990 fsuid_only = false;
1992 spin_lock(&cifs_inode->open_file_lock);
1993 refind_writable:
1994 if (refind > MAX_REOPEN_ATT) {
1995 spin_unlock(&cifs_inode->open_file_lock);
1996 return rc;
1998 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1999 if (!any_available && open_file->pid != current->tgid)
2000 continue;
2001 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2002 continue;
2003 if (with_delete && !(open_file->fid.access & DELETE))
2004 continue;
2005 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2006 if (!open_file->invalidHandle) {
2007 /* found a good writable file */
2008 cifsFileInfo_get(open_file);
2009 spin_unlock(&cifs_inode->open_file_lock);
2010 *ret_file = open_file;
2011 return 0;
2012 } else {
2013 if (!inv_file)
2014 inv_file = open_file;
2018 /* couldn't find useable FH with same pid, try any available */
2019 if (!any_available) {
2020 any_available = true;
2021 goto refind_writable;
2024 if (inv_file) {
2025 any_available = false;
2026 cifsFileInfo_get(inv_file);
2029 spin_unlock(&cifs_inode->open_file_lock);
2031 if (inv_file) {
2032 rc = cifs_reopen_file(inv_file, false);
2033 if (!rc) {
2034 *ret_file = inv_file;
2035 return 0;
2038 spin_lock(&cifs_inode->open_file_lock);
2039 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2040 spin_unlock(&cifs_inode->open_file_lock);
2041 cifsFileInfo_put(inv_file);
2042 ++refind;
2043 inv_file = NULL;
2044 spin_lock(&cifs_inode->open_file_lock);
2045 goto refind_writable;
2048 return rc;
2051 struct cifsFileInfo *
2052 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2054 struct cifsFileInfo *cfile;
2055 int rc;
2057 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2058 if (rc)
2059 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2061 return cfile;
2065 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2066 int flags,
2067 struct cifsFileInfo **ret_file)
2069 struct list_head *tmp;
2070 struct cifsFileInfo *cfile;
2071 struct cifsInodeInfo *cinode;
2072 char *full_path;
2074 *ret_file = NULL;
2076 spin_lock(&tcon->open_file_lock);
2077 list_for_each(tmp, &tcon->openFileList) {
2078 cfile = list_entry(tmp, struct cifsFileInfo,
2079 tlist);
2080 full_path = build_path_from_dentry(cfile->dentry);
2081 if (full_path == NULL) {
2082 spin_unlock(&tcon->open_file_lock);
2083 return -ENOMEM;
2085 if (strcmp(full_path, name)) {
2086 kfree(full_path);
2087 continue;
2090 kfree(full_path);
2091 cinode = CIFS_I(d_inode(cfile->dentry));
2092 spin_unlock(&tcon->open_file_lock);
2093 return cifs_get_writable_file(cinode, flags, ret_file);
2096 spin_unlock(&tcon->open_file_lock);
2097 return -ENOENT;
2101 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2102 struct cifsFileInfo **ret_file)
2104 struct list_head *tmp;
2105 struct cifsFileInfo *cfile;
2106 struct cifsInodeInfo *cinode;
2107 char *full_path;
2109 *ret_file = NULL;
2111 spin_lock(&tcon->open_file_lock);
2112 list_for_each(tmp, &tcon->openFileList) {
2113 cfile = list_entry(tmp, struct cifsFileInfo,
2114 tlist);
2115 full_path = build_path_from_dentry(cfile->dentry);
2116 if (full_path == NULL) {
2117 spin_unlock(&tcon->open_file_lock);
2118 return -ENOMEM;
2120 if (strcmp(full_path, name)) {
2121 kfree(full_path);
2122 continue;
2125 kfree(full_path);
2126 cinode = CIFS_I(d_inode(cfile->dentry));
2127 spin_unlock(&tcon->open_file_lock);
2128 *ret_file = find_readable_file(cinode, 0);
2129 return *ret_file ? 0 : -ENOENT;
2132 spin_unlock(&tcon->open_file_lock);
2133 return -ENOENT;
2136 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2138 struct address_space *mapping = page->mapping;
2139 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2140 char *write_data;
2141 int rc = -EFAULT;
2142 int bytes_written = 0;
2143 struct inode *inode;
2144 struct cifsFileInfo *open_file;
2146 if (!mapping || !mapping->host)
2147 return -EFAULT;
2149 inode = page->mapping->host;
2151 offset += (loff_t)from;
2152 write_data = kmap(page);
2153 write_data += from;
2155 if ((to > PAGE_SIZE) || (from > to)) {
2156 kunmap(page);
2157 return -EIO;
2160 /* racing with truncate? */
2161 if (offset > mapping->host->i_size) {
2162 kunmap(page);
2163 return 0; /* don't care */
2166 /* check to make sure that we are not extending the file */
2167 if (mapping->host->i_size - offset < (loff_t)to)
2168 to = (unsigned)(mapping->host->i_size - offset);
2170 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2171 &open_file);
2172 if (!rc) {
2173 bytes_written = cifs_write(open_file, open_file->pid,
2174 write_data, to - from, &offset);
2175 cifsFileInfo_put(open_file);
2176 /* Does mm or vfs already set times? */
2177 inode->i_atime = inode->i_mtime = current_time(inode);
2178 if ((bytes_written > 0) && (offset))
2179 rc = 0;
2180 else if (bytes_written < 0)
2181 rc = bytes_written;
2182 else
2183 rc = -EFAULT;
2184 } else {
2185 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186 if (!is_retryable_error(rc))
2187 rc = -EIO;
2190 kunmap(page);
2191 return rc;
2194 static struct cifs_writedata *
2195 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196 pgoff_t end, pgoff_t *index,
2197 unsigned int *found_pages)
2199 struct cifs_writedata *wdata;
2201 wdata = cifs_writedata_alloc((unsigned int)tofind,
2202 cifs_writev_complete);
2203 if (!wdata)
2204 return NULL;
2206 *found_pages = find_get_pages_range_tag(mapping, index, end,
2207 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208 return wdata;
2211 static unsigned int
2212 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213 struct address_space *mapping,
2214 struct writeback_control *wbc,
2215 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2217 unsigned int nr_pages = 0, i;
2218 struct page *page;
2220 for (i = 0; i < found_pages; i++) {
2221 page = wdata->pages[i];
2223 * At this point we hold neither the i_pages lock nor the
2224 * page lock: the page may be truncated or invalidated
2225 * (changing page->mapping to NULL), or even swizzled
2226 * back from swapper_space to tmpfs file mapping
2229 if (nr_pages == 0)
2230 lock_page(page);
2231 else if (!trylock_page(page))
2232 break;
2234 if (unlikely(page->mapping != mapping)) {
2235 unlock_page(page);
2236 break;
2239 if (!wbc->range_cyclic && page->index > end) {
2240 *done = true;
2241 unlock_page(page);
2242 break;
2245 if (*next && (page->index != *next)) {
2246 /* Not next consecutive page */
2247 unlock_page(page);
2248 break;
2251 if (wbc->sync_mode != WB_SYNC_NONE)
2252 wait_on_page_writeback(page);
2254 if (PageWriteback(page) ||
2255 !clear_page_dirty_for_io(page)) {
2256 unlock_page(page);
2257 break;
2261 * This actually clears the dirty bit in the radix tree.
2262 * See cifs_writepage() for more commentary.
2264 set_page_writeback(page);
2265 if (page_offset(page) >= i_size_read(mapping->host)) {
2266 *done = true;
2267 unlock_page(page);
2268 end_page_writeback(page);
2269 break;
2272 wdata->pages[i] = page;
2273 *next = page->index + 1;
2274 ++nr_pages;
2277 /* reset index to refind any pages skipped */
2278 if (nr_pages == 0)
2279 *index = wdata->pages[0]->index + 1;
2281 /* put any pages we aren't going to use */
2282 for (i = nr_pages; i < found_pages; i++) {
2283 put_page(wdata->pages[i]);
2284 wdata->pages[i] = NULL;
2287 return nr_pages;
2290 static int
2291 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292 struct address_space *mapping, struct writeback_control *wbc)
2294 int rc;
2295 struct TCP_Server_Info *server =
2296 tlink_tcon(wdata->cfile->tlink)->ses->server;
2298 wdata->sync_mode = wbc->sync_mode;
2299 wdata->nr_pages = nr_pages;
2300 wdata->offset = page_offset(wdata->pages[0]);
2301 wdata->pagesz = PAGE_SIZE;
2302 wdata->tailsz = min(i_size_read(mapping->host) -
2303 page_offset(wdata->pages[nr_pages - 1]),
2304 (loff_t)PAGE_SIZE);
2305 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2306 wdata->pid = wdata->cfile->pid;
2308 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2309 if (rc)
2310 return rc;
2312 if (wdata->cfile->invalidHandle)
2313 rc = -EAGAIN;
2314 else
2315 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2317 return rc;
2320 static int cifs_writepages(struct address_space *mapping,
2321 struct writeback_control *wbc)
2323 struct inode *inode = mapping->host;
2324 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2325 struct TCP_Server_Info *server;
2326 bool done = false, scanned = false, range_whole = false;
2327 pgoff_t end, index;
2328 struct cifs_writedata *wdata;
2329 struct cifsFileInfo *cfile = NULL;
2330 int rc = 0;
2331 int saved_rc = 0;
2332 unsigned int xid;
2335 * If wsize is smaller than the page cache size, default to writing
2336 * one page at a time via cifs_writepage
2338 if (cifs_sb->wsize < PAGE_SIZE)
2339 return generic_writepages(mapping, wbc);
2341 xid = get_xid();
2342 if (wbc->range_cyclic) {
2343 index = mapping->writeback_index; /* Start from prev offset */
2344 end = -1;
2345 } else {
2346 index = wbc->range_start >> PAGE_SHIFT;
2347 end = wbc->range_end >> PAGE_SHIFT;
2348 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2349 range_whole = true;
2350 scanned = true;
2352 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2353 retry:
2354 while (!done && index <= end) {
2355 unsigned int i, nr_pages, found_pages, wsize;
2356 pgoff_t next = 0, tofind, saved_index = index;
2357 struct cifs_credits credits_on_stack;
2358 struct cifs_credits *credits = &credits_on_stack;
2359 int get_file_rc = 0;
2361 if (cfile)
2362 cifsFileInfo_put(cfile);
2364 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2366 /* in case of an error store it to return later */
2367 if (rc)
2368 get_file_rc = rc;
2370 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371 &wsize, credits);
2372 if (rc != 0) {
2373 done = true;
2374 break;
2377 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2379 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380 &found_pages);
2381 if (!wdata) {
2382 rc = -ENOMEM;
2383 done = true;
2384 add_credits_and_wake_if(server, credits, 0);
2385 break;
2388 if (found_pages == 0) {
2389 kref_put(&wdata->refcount, cifs_writedata_release);
2390 add_credits_and_wake_if(server, credits, 0);
2391 break;
2394 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395 end, &index, &next, &done);
2397 /* nothing to write? */
2398 if (nr_pages == 0) {
2399 kref_put(&wdata->refcount, cifs_writedata_release);
2400 add_credits_and_wake_if(server, credits, 0);
2401 continue;
2404 wdata->credits = credits_on_stack;
2405 wdata->cfile = cfile;
2406 cfile = NULL;
2408 if (!wdata->cfile) {
2409 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2410 get_file_rc);
2411 if (is_retryable_error(get_file_rc))
2412 rc = get_file_rc;
2413 else
2414 rc = -EBADF;
2415 } else
2416 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2418 for (i = 0; i < nr_pages; ++i)
2419 unlock_page(wdata->pages[i]);
2421 /* send failure -- clean up the mess */
2422 if (rc != 0) {
2423 add_credits_and_wake_if(server, &wdata->credits, 0);
2424 for (i = 0; i < nr_pages; ++i) {
2425 if (is_retryable_error(rc))
2426 redirty_page_for_writepage(wbc,
2427 wdata->pages[i]);
2428 else
2429 SetPageError(wdata->pages[i]);
2430 end_page_writeback(wdata->pages[i]);
2431 put_page(wdata->pages[i]);
2433 if (!is_retryable_error(rc))
2434 mapping_set_error(mapping, rc);
2436 kref_put(&wdata->refcount, cifs_writedata_release);
2438 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2439 index = saved_index;
2440 continue;
2443 /* Return immediately if we received a signal during writing */
2444 if (is_interrupt_error(rc)) {
2445 done = true;
2446 break;
2449 if (rc != 0 && saved_rc == 0)
2450 saved_rc = rc;
2452 wbc->nr_to_write -= nr_pages;
2453 if (wbc->nr_to_write <= 0)
2454 done = true;
2456 index = next;
2459 if (!scanned && !done) {
2461 * We hit the last page and there is more work to be done: wrap
2462 * back to the start of the file
2464 scanned = true;
2465 index = 0;
2466 goto retry;
2469 if (saved_rc != 0)
2470 rc = saved_rc;
2472 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2473 mapping->writeback_index = index;
2475 if (cfile)
2476 cifsFileInfo_put(cfile);
2477 free_xid(xid);
2478 return rc;
2481 static int
2482 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2484 int rc;
2485 unsigned int xid;
2487 xid = get_xid();
2488 /* BB add check for wbc flags */
2489 get_page(page);
2490 if (!PageUptodate(page))
2491 cifs_dbg(FYI, "ppw - page not up to date\n");
2494 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2496 * A writepage() implementation always needs to do either this,
2497 * or re-dirty the page with "redirty_page_for_writepage()" in
2498 * the case of a failure.
2500 * Just unlocking the page will cause the radix tree tag-bits
2501 * to fail to update with the state of the page correctly.
2503 set_page_writeback(page);
2504 retry_write:
2505 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2506 if (is_retryable_error(rc)) {
2507 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2508 goto retry_write;
2509 redirty_page_for_writepage(wbc, page);
2510 } else if (rc != 0) {
2511 SetPageError(page);
2512 mapping_set_error(page->mapping, rc);
2513 } else {
2514 SetPageUptodate(page);
2516 end_page_writeback(page);
2517 put_page(page);
2518 free_xid(xid);
2519 return rc;
2522 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2524 int rc = cifs_writepage_locked(page, wbc);
2525 unlock_page(page);
2526 return rc;
2529 static int cifs_write_end(struct file *file, struct address_space *mapping,
2530 loff_t pos, unsigned len, unsigned copied,
2531 struct page *page, void *fsdata)
2533 int rc;
2534 struct inode *inode = mapping->host;
2535 struct cifsFileInfo *cfile = file->private_data;
2536 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2537 __u32 pid;
2539 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2540 pid = cfile->pid;
2541 else
2542 pid = current->tgid;
2544 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2545 page, pos, copied);
2547 if (PageChecked(page)) {
2548 if (copied == len)
2549 SetPageUptodate(page);
2550 ClearPageChecked(page);
2551 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2552 SetPageUptodate(page);
2554 if (!PageUptodate(page)) {
2555 char *page_data;
2556 unsigned offset = pos & (PAGE_SIZE - 1);
2557 unsigned int xid;
2559 xid = get_xid();
2560 /* this is probably better than directly calling
2561 partialpage_write since in this function the file handle is
2562 known which we might as well leverage */
2563 /* BB check if anything else missing out of ppw
2564 such as updating last write time */
2565 page_data = kmap(page);
2566 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2567 /* if (rc < 0) should we set writebehind rc? */
2568 kunmap(page);
2570 free_xid(xid);
2571 } else {
2572 rc = copied;
2573 pos += copied;
2574 set_page_dirty(page);
2577 if (rc > 0) {
2578 spin_lock(&inode->i_lock);
2579 if (pos > inode->i_size)
2580 i_size_write(inode, pos);
2581 spin_unlock(&inode->i_lock);
2584 unlock_page(page);
2585 put_page(page);
2587 return rc;
2590 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2591 int datasync)
2593 unsigned int xid;
2594 int rc = 0;
2595 struct cifs_tcon *tcon;
2596 struct TCP_Server_Info *server;
2597 struct cifsFileInfo *smbfile = file->private_data;
2598 struct inode *inode = file_inode(file);
2599 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2601 rc = file_write_and_wait_range(file, start, end);
2602 if (rc) {
2603 trace_cifs_fsync_err(inode->i_ino, rc);
2604 return rc;
2607 xid = get_xid();
2609 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2610 file, datasync);
2612 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2613 rc = cifs_zap_mapping(inode);
2614 if (rc) {
2615 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2616 rc = 0; /* don't care about it in fsync */
2620 tcon = tlink_tcon(smbfile->tlink);
2621 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2622 server = tcon->ses->server;
2623 if (server->ops->flush)
2624 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2625 else
2626 rc = -ENOSYS;
2629 free_xid(xid);
2630 return rc;
2633 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2635 unsigned int xid;
2636 int rc = 0;
2637 struct cifs_tcon *tcon;
2638 struct TCP_Server_Info *server;
2639 struct cifsFileInfo *smbfile = file->private_data;
2640 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2642 rc = file_write_and_wait_range(file, start, end);
2643 if (rc) {
2644 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2645 return rc;
2648 xid = get_xid();
2650 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2651 file, datasync);
2653 tcon = tlink_tcon(smbfile->tlink);
2654 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2655 server = tcon->ses->server;
2656 if (server->ops->flush)
2657 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2658 else
2659 rc = -ENOSYS;
2662 free_xid(xid);
2663 return rc;
2667 * As file closes, flush all cached write data for this inode checking
2668 * for write behind errors.
2670 int cifs_flush(struct file *file, fl_owner_t id)
2672 struct inode *inode = file_inode(file);
2673 int rc = 0;
2675 if (file->f_mode & FMODE_WRITE)
2676 rc = filemap_write_and_wait(inode->i_mapping);
2678 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2679 if (rc)
2680 trace_cifs_flush_err(inode->i_ino, rc);
2681 return rc;
2684 static int
2685 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2687 int rc = 0;
2688 unsigned long i;
2690 for (i = 0; i < num_pages; i++) {
2691 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2692 if (!pages[i]) {
2694 * save number of pages we have already allocated and
2695 * return with ENOMEM error
2697 num_pages = i;
2698 rc = -ENOMEM;
2699 break;
2703 if (rc) {
2704 for (i = 0; i < num_pages; i++)
2705 put_page(pages[i]);
2707 return rc;
2710 static inline
2711 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2713 size_t num_pages;
2714 size_t clen;
2716 clen = min_t(const size_t, len, wsize);
2717 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2719 if (cur_len)
2720 *cur_len = clen;
2722 return num_pages;
2725 static void
2726 cifs_uncached_writedata_release(struct kref *refcount)
2728 int i;
2729 struct cifs_writedata *wdata = container_of(refcount,
2730 struct cifs_writedata, refcount);
2732 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2733 for (i = 0; i < wdata->nr_pages; i++)
2734 put_page(wdata->pages[i]);
2735 cifs_writedata_release(refcount);
2738 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2740 static void
2741 cifs_uncached_writev_complete(struct work_struct *work)
2743 struct cifs_writedata *wdata = container_of(work,
2744 struct cifs_writedata, work);
2745 struct inode *inode = d_inode(wdata->cfile->dentry);
2746 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2748 spin_lock(&inode->i_lock);
2749 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2750 if (cifsi->server_eof > inode->i_size)
2751 i_size_write(inode, cifsi->server_eof);
2752 spin_unlock(&inode->i_lock);
2754 complete(&wdata->done);
2755 collect_uncached_write_data(wdata->ctx);
2756 /* the below call can possibly free the last ref to aio ctx */
2757 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2760 static int
2761 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2762 size_t *len, unsigned long *num_pages)
2764 size_t save_len, copied, bytes, cur_len = *len;
2765 unsigned long i, nr_pages = *num_pages;
2767 save_len = cur_len;
2768 for (i = 0; i < nr_pages; i++) {
2769 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2770 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2771 cur_len -= copied;
2773 * If we didn't copy as much as we expected, then that
2774 * may mean we trod into an unmapped area. Stop copying
2775 * at that point. On the next pass through the big
2776 * loop, we'll likely end up getting a zero-length
2777 * write and bailing out of it.
2779 if (copied < bytes)
2780 break;
2782 cur_len = save_len - cur_len;
2783 *len = cur_len;
2786 * If we have no data to send, then that probably means that
2787 * the copy above failed altogether. That's most likely because
2788 * the address in the iovec was bogus. Return -EFAULT and let
2789 * the caller free anything we allocated and bail out.
2791 if (!cur_len)
2792 return -EFAULT;
2795 * i + 1 now represents the number of pages we actually used in
2796 * the copy phase above.
2798 *num_pages = i + 1;
2799 return 0;
2802 static int
2803 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2804 struct cifs_aio_ctx *ctx)
2806 unsigned int wsize;
2807 struct cifs_credits credits;
2808 int rc;
2809 struct TCP_Server_Info *server =
2810 tlink_tcon(wdata->cfile->tlink)->ses->server;
2812 do {
2813 if (wdata->cfile->invalidHandle) {
2814 rc = cifs_reopen_file(wdata->cfile, false);
2815 if (rc == -EAGAIN)
2816 continue;
2817 else if (rc)
2818 break;
2823 * Wait for credits to resend this wdata.
2824 * Note: we are attempting to resend the whole wdata not in
2825 * segments
2827 do {
2828 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2829 &wsize, &credits);
2830 if (rc)
2831 goto fail;
2833 if (wsize < wdata->bytes) {
2834 add_credits_and_wake_if(server, &credits, 0);
2835 msleep(1000);
2837 } while (wsize < wdata->bytes);
2838 wdata->credits = credits;
2840 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2842 if (!rc) {
2843 if (wdata->cfile->invalidHandle)
2844 rc = -EAGAIN;
2845 else {
2846 #ifdef CONFIG_CIFS_SMB_DIRECT
2847 if (wdata->mr) {
2848 wdata->mr->need_invalidate = true;
2849 smbd_deregister_mr(wdata->mr);
2850 wdata->mr = NULL;
2852 #endif
2853 rc = server->ops->async_writev(wdata,
2854 cifs_uncached_writedata_release);
2858 /* If the write was successfully sent, we are done */
2859 if (!rc) {
2860 list_add_tail(&wdata->list, wdata_list);
2861 return 0;
2864 /* Roll back credits and retry if needed */
2865 add_credits_and_wake_if(server, &wdata->credits, 0);
2866 } while (rc == -EAGAIN);
2868 fail:
2869 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2870 return rc;
2873 static int
2874 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2875 struct cifsFileInfo *open_file,
2876 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2877 struct cifs_aio_ctx *ctx)
2879 int rc = 0;
2880 size_t cur_len;
2881 unsigned long nr_pages, num_pages, i;
2882 struct cifs_writedata *wdata;
2883 struct iov_iter saved_from = *from;
2884 loff_t saved_offset = offset;
2885 pid_t pid;
2886 struct TCP_Server_Info *server;
2887 struct page **pagevec;
2888 size_t start;
2889 unsigned int xid;
2891 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2892 pid = open_file->pid;
2893 else
2894 pid = current->tgid;
2896 server = tlink_tcon(open_file->tlink)->ses->server;
2897 xid = get_xid();
2899 do {
2900 unsigned int wsize;
2901 struct cifs_credits credits_on_stack;
2902 struct cifs_credits *credits = &credits_on_stack;
2904 if (open_file->invalidHandle) {
2905 rc = cifs_reopen_file(open_file, false);
2906 if (rc == -EAGAIN)
2907 continue;
2908 else if (rc)
2909 break;
2912 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2913 &wsize, credits);
2914 if (rc)
2915 break;
2917 cur_len = min_t(const size_t, len, wsize);
2919 if (ctx->direct_io) {
2920 ssize_t result;
2922 result = iov_iter_get_pages_alloc(
2923 from, &pagevec, cur_len, &start);
2924 if (result < 0) {
2925 cifs_dbg(VFS,
2926 "direct_writev couldn't get user pages "
2927 "(rc=%zd) iter type %d iov_offset %zd "
2928 "count %zd\n",
2929 result, iov_iter_type(from),
2930 from->iov_offset, from->count);
2931 dump_stack();
2933 rc = result;
2934 add_credits_and_wake_if(server, credits, 0);
2935 break;
2937 cur_len = (size_t)result;
2938 iov_iter_advance(from, cur_len);
2940 nr_pages =
2941 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2943 wdata = cifs_writedata_direct_alloc(pagevec,
2944 cifs_uncached_writev_complete);
2945 if (!wdata) {
2946 rc = -ENOMEM;
2947 add_credits_and_wake_if(server, credits, 0);
2948 break;
2952 wdata->page_offset = start;
2953 wdata->tailsz =
2954 nr_pages > 1 ?
2955 cur_len - (PAGE_SIZE - start) -
2956 (nr_pages - 2) * PAGE_SIZE :
2957 cur_len;
2958 } else {
2959 nr_pages = get_numpages(wsize, len, &cur_len);
2960 wdata = cifs_writedata_alloc(nr_pages,
2961 cifs_uncached_writev_complete);
2962 if (!wdata) {
2963 rc = -ENOMEM;
2964 add_credits_and_wake_if(server, credits, 0);
2965 break;
2968 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2969 if (rc) {
2970 kvfree(wdata->pages);
2971 kfree(wdata);
2972 add_credits_and_wake_if(server, credits, 0);
2973 break;
2976 num_pages = nr_pages;
2977 rc = wdata_fill_from_iovec(
2978 wdata, from, &cur_len, &num_pages);
2979 if (rc) {
2980 for (i = 0; i < nr_pages; i++)
2981 put_page(wdata->pages[i]);
2982 kvfree(wdata->pages);
2983 kfree(wdata);
2984 add_credits_and_wake_if(server, credits, 0);
2985 break;
2989 * Bring nr_pages down to the number of pages we
2990 * actually used, and free any pages that we didn't use.
2992 for ( ; nr_pages > num_pages; nr_pages--)
2993 put_page(wdata->pages[nr_pages - 1]);
2995 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2998 wdata->sync_mode = WB_SYNC_ALL;
2999 wdata->nr_pages = nr_pages;
3000 wdata->offset = (__u64)offset;
3001 wdata->cfile = cifsFileInfo_get(open_file);
3002 wdata->pid = pid;
3003 wdata->bytes = cur_len;
3004 wdata->pagesz = PAGE_SIZE;
3005 wdata->credits = credits_on_stack;
3006 wdata->ctx = ctx;
3007 kref_get(&ctx->refcount);
3009 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3011 if (!rc) {
3012 if (wdata->cfile->invalidHandle)
3013 rc = -EAGAIN;
3014 else
3015 rc = server->ops->async_writev(wdata,
3016 cifs_uncached_writedata_release);
3019 if (rc) {
3020 add_credits_and_wake_if(server, &wdata->credits, 0);
3021 kref_put(&wdata->refcount,
3022 cifs_uncached_writedata_release);
3023 if (rc == -EAGAIN) {
3024 *from = saved_from;
3025 iov_iter_advance(from, offset - saved_offset);
3026 continue;
3028 break;
3031 list_add_tail(&wdata->list, wdata_list);
3032 offset += cur_len;
3033 len -= cur_len;
3034 } while (len > 0);
3036 free_xid(xid);
3037 return rc;
3040 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3042 struct cifs_writedata *wdata, *tmp;
3043 struct cifs_tcon *tcon;
3044 struct cifs_sb_info *cifs_sb;
3045 struct dentry *dentry = ctx->cfile->dentry;
3046 int rc;
3048 tcon = tlink_tcon(ctx->cfile->tlink);
3049 cifs_sb = CIFS_SB(dentry->d_sb);
3051 mutex_lock(&ctx->aio_mutex);
3053 if (list_empty(&ctx->list)) {
3054 mutex_unlock(&ctx->aio_mutex);
3055 return;
3058 rc = ctx->rc;
3060 * Wait for and collect replies for any successful sends in order of
3061 * increasing offset. Once an error is hit, then return without waiting
3062 * for any more replies.
3064 restart_loop:
3065 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3066 if (!rc) {
3067 if (!try_wait_for_completion(&wdata->done)) {
3068 mutex_unlock(&ctx->aio_mutex);
3069 return;
3072 if (wdata->result)
3073 rc = wdata->result;
3074 else
3075 ctx->total_len += wdata->bytes;
3077 /* resend call if it's a retryable error */
3078 if (rc == -EAGAIN) {
3079 struct list_head tmp_list;
3080 struct iov_iter tmp_from = ctx->iter;
3082 INIT_LIST_HEAD(&tmp_list);
3083 list_del_init(&wdata->list);
3085 if (ctx->direct_io)
3086 rc = cifs_resend_wdata(
3087 wdata, &tmp_list, ctx);
3088 else {
3089 iov_iter_advance(&tmp_from,
3090 wdata->offset - ctx->pos);
3092 rc = cifs_write_from_iter(wdata->offset,
3093 wdata->bytes, &tmp_from,
3094 ctx->cfile, cifs_sb, &tmp_list,
3095 ctx);
3097 kref_put(&wdata->refcount,
3098 cifs_uncached_writedata_release);
3101 list_splice(&tmp_list, &ctx->list);
3102 goto restart_loop;
3105 list_del_init(&wdata->list);
3106 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3109 cifs_stats_bytes_written(tcon, ctx->total_len);
3110 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3112 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3114 mutex_unlock(&ctx->aio_mutex);
3116 if (ctx->iocb && ctx->iocb->ki_complete)
3117 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3118 else
3119 complete(&ctx->done);
3122 static ssize_t __cifs_writev(
3123 struct kiocb *iocb, struct iov_iter *from, bool direct)
3125 struct file *file = iocb->ki_filp;
3126 ssize_t total_written = 0;
3127 struct cifsFileInfo *cfile;
3128 struct cifs_tcon *tcon;
3129 struct cifs_sb_info *cifs_sb;
3130 struct cifs_aio_ctx *ctx;
3131 struct iov_iter saved_from = *from;
3132 size_t len = iov_iter_count(from);
3133 int rc;
3136 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3137 * In this case, fall back to non-direct write function.
3138 * this could be improved by getting pages directly in ITER_KVEC
3140 if (direct && iov_iter_is_kvec(from)) {
3141 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3142 direct = false;
3145 rc = generic_write_checks(iocb, from);
3146 if (rc <= 0)
3147 return rc;
3149 cifs_sb = CIFS_FILE_SB(file);
3150 cfile = file->private_data;
3151 tcon = tlink_tcon(cfile->tlink);
3153 if (!tcon->ses->server->ops->async_writev)
3154 return -ENOSYS;
3156 ctx = cifs_aio_ctx_alloc();
3157 if (!ctx)
3158 return -ENOMEM;
3160 ctx->cfile = cifsFileInfo_get(cfile);
3162 if (!is_sync_kiocb(iocb))
3163 ctx->iocb = iocb;
3165 ctx->pos = iocb->ki_pos;
3167 if (direct) {
3168 ctx->direct_io = true;
3169 ctx->iter = *from;
3170 ctx->len = len;
3171 } else {
3172 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3173 if (rc) {
3174 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3175 return rc;
3179 /* grab a lock here due to read response handlers can access ctx */
3180 mutex_lock(&ctx->aio_mutex);
3182 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3183 cfile, cifs_sb, &ctx->list, ctx);
3186 * If at least one write was successfully sent, then discard any rc
3187 * value from the later writes. If the other write succeeds, then
3188 * we'll end up returning whatever was written. If it fails, then
3189 * we'll get a new rc value from that.
3191 if (!list_empty(&ctx->list))
3192 rc = 0;
3194 mutex_unlock(&ctx->aio_mutex);
3196 if (rc) {
3197 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198 return rc;
3201 if (!is_sync_kiocb(iocb)) {
3202 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3203 return -EIOCBQUEUED;
3206 rc = wait_for_completion_killable(&ctx->done);
3207 if (rc) {
3208 mutex_lock(&ctx->aio_mutex);
3209 ctx->rc = rc = -EINTR;
3210 total_written = ctx->total_len;
3211 mutex_unlock(&ctx->aio_mutex);
3212 } else {
3213 rc = ctx->rc;
3214 total_written = ctx->total_len;
3217 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3219 if (unlikely(!total_written))
3220 return rc;
3222 iocb->ki_pos += total_written;
3223 return total_written;
3226 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3228 return __cifs_writev(iocb, from, true);
3231 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3233 return __cifs_writev(iocb, from, false);
3236 static ssize_t
3237 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3239 struct file *file = iocb->ki_filp;
3240 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3241 struct inode *inode = file->f_mapping->host;
3242 struct cifsInodeInfo *cinode = CIFS_I(inode);
3243 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3244 ssize_t rc;
3246 inode_lock(inode);
3248 * We need to hold the sem to be sure nobody modifies lock list
3249 * with a brlock that prevents writing.
3251 down_read(&cinode->lock_sem);
3253 rc = generic_write_checks(iocb, from);
3254 if (rc <= 0)
3255 goto out;
3257 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3258 server->vals->exclusive_lock_type, 0,
3259 NULL, CIFS_WRITE_OP))
3260 rc = __generic_file_write_iter(iocb, from);
3261 else
3262 rc = -EACCES;
3263 out:
3264 up_read(&cinode->lock_sem);
3265 inode_unlock(inode);
3267 if (rc > 0)
3268 rc = generic_write_sync(iocb, rc);
3269 return rc;
3272 ssize_t
3273 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3275 struct inode *inode = file_inode(iocb->ki_filp);
3276 struct cifsInodeInfo *cinode = CIFS_I(inode);
3277 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3278 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3279 iocb->ki_filp->private_data;
3280 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3281 ssize_t written;
3283 written = cifs_get_writer(cinode);
3284 if (written)
3285 return written;
3287 if (CIFS_CACHE_WRITE(cinode)) {
3288 if (cap_unix(tcon->ses) &&
3289 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3290 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3291 written = generic_file_write_iter(iocb, from);
3292 goto out;
3294 written = cifs_writev(iocb, from);
3295 goto out;
3298 * For non-oplocked files in strict cache mode we need to write the data
3299 * to the server exactly from the pos to pos+len-1 rather than flush all
3300 * affected pages because it may cause a error with mandatory locks on
3301 * these pages but not on the region from pos to ppos+len-1.
3303 written = cifs_user_writev(iocb, from);
3304 if (CIFS_CACHE_READ(cinode)) {
3306 * We have read level caching and we have just sent a write
3307 * request to the server thus making data in the cache stale.
3308 * Zap the cache and set oplock/lease level to NONE to avoid
3309 * reading stale data from the cache. All subsequent read
3310 * operations will read new data from the server.
3312 cifs_zap_mapping(inode);
3313 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3314 inode);
3315 cinode->oplock = 0;
3317 out:
3318 cifs_put_writer(cinode);
3319 return written;
3322 static struct cifs_readdata *
3323 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3325 struct cifs_readdata *rdata;
3327 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3328 if (rdata != NULL) {
3329 rdata->pages = pages;
3330 kref_init(&rdata->refcount);
3331 INIT_LIST_HEAD(&rdata->list);
3332 init_completion(&rdata->done);
3333 INIT_WORK(&rdata->work, complete);
3336 return rdata;
3339 static struct cifs_readdata *
3340 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3342 struct page **pages =
3343 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3344 struct cifs_readdata *ret = NULL;
3346 if (pages) {
3347 ret = cifs_readdata_direct_alloc(pages, complete);
3348 if (!ret)
3349 kfree(pages);
3352 return ret;
3355 void
3356 cifs_readdata_release(struct kref *refcount)
3358 struct cifs_readdata *rdata = container_of(refcount,
3359 struct cifs_readdata, refcount);
3360 #ifdef CONFIG_CIFS_SMB_DIRECT
3361 if (rdata->mr) {
3362 smbd_deregister_mr(rdata->mr);
3363 rdata->mr = NULL;
3365 #endif
3366 if (rdata->cfile)
3367 cifsFileInfo_put(rdata->cfile);
3369 kvfree(rdata->pages);
3370 kfree(rdata);
3373 static int
3374 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3376 int rc = 0;
3377 struct page *page;
3378 unsigned int i;
3380 for (i = 0; i < nr_pages; i++) {
3381 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3382 if (!page) {
3383 rc = -ENOMEM;
3384 break;
3386 rdata->pages[i] = page;
3389 if (rc) {
3390 unsigned int nr_page_failed = i;
3392 for (i = 0; i < nr_page_failed; i++) {
3393 put_page(rdata->pages[i]);
3394 rdata->pages[i] = NULL;
3397 return rc;
3400 static void
3401 cifs_uncached_readdata_release(struct kref *refcount)
3403 struct cifs_readdata *rdata = container_of(refcount,
3404 struct cifs_readdata, refcount);
3405 unsigned int i;
3407 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3408 for (i = 0; i < rdata->nr_pages; i++) {
3409 put_page(rdata->pages[i]);
3411 cifs_readdata_release(refcount);
3415 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3416 * @rdata: the readdata response with list of pages holding data
3417 * @iter: destination for our data
3419 * This function copies data from a list of pages in a readdata response into
3420 * an array of iovecs. It will first calculate where the data should go
3421 * based on the info in the readdata and then copy the data into that spot.
3423 static int
3424 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3426 size_t remaining = rdata->got_bytes;
3427 unsigned int i;
3429 for (i = 0; i < rdata->nr_pages; i++) {
3430 struct page *page = rdata->pages[i];
3431 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3432 size_t written;
3434 if (unlikely(iov_iter_is_pipe(iter))) {
3435 void *addr = kmap_atomic(page);
3437 written = copy_to_iter(addr, copy, iter);
3438 kunmap_atomic(addr);
3439 } else
3440 written = copy_page_to_iter(page, 0, copy, iter);
3441 remaining -= written;
3442 if (written < copy && iov_iter_count(iter) > 0)
3443 break;
3445 return remaining ? -EFAULT : 0;
3448 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3450 static void
3451 cifs_uncached_readv_complete(struct work_struct *work)
3453 struct cifs_readdata *rdata = container_of(work,
3454 struct cifs_readdata, work);
3456 complete(&rdata->done);
3457 collect_uncached_read_data(rdata->ctx);
3458 /* the below call can possibly free the last ref to aio ctx */
3459 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3462 static int
3463 uncached_fill_pages(struct TCP_Server_Info *server,
3464 struct cifs_readdata *rdata, struct iov_iter *iter,
3465 unsigned int len)
3467 int result = 0;
3468 unsigned int i;
3469 unsigned int nr_pages = rdata->nr_pages;
3470 unsigned int page_offset = rdata->page_offset;
3472 rdata->got_bytes = 0;
3473 rdata->tailsz = PAGE_SIZE;
3474 for (i = 0; i < nr_pages; i++) {
3475 struct page *page = rdata->pages[i];
3476 size_t n;
3477 unsigned int segment_size = rdata->pagesz;
3479 if (i == 0)
3480 segment_size -= page_offset;
3481 else
3482 page_offset = 0;
3485 if (len <= 0) {
3486 /* no need to hold page hostage */
3487 rdata->pages[i] = NULL;
3488 rdata->nr_pages--;
3489 put_page(page);
3490 continue;
3493 n = len;
3494 if (len >= segment_size)
3495 /* enough data to fill the page */
3496 n = segment_size;
3497 else
3498 rdata->tailsz = len;
3499 len -= n;
3501 if (iter)
3502 result = copy_page_from_iter(
3503 page, page_offset, n, iter);
3504 #ifdef CONFIG_CIFS_SMB_DIRECT
3505 else if (rdata->mr)
3506 result = n;
3507 #endif
3508 else
3509 result = cifs_read_page_from_socket(
3510 server, page, page_offset, n);
3511 if (result < 0)
3512 break;
3514 rdata->got_bytes += result;
3517 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3518 rdata->got_bytes : result;
3521 static int
3522 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3523 struct cifs_readdata *rdata, unsigned int len)
3525 return uncached_fill_pages(server, rdata, NULL, len);
3528 static int
3529 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3530 struct cifs_readdata *rdata,
3531 struct iov_iter *iter)
3533 return uncached_fill_pages(server, rdata, iter, iter->count);
3536 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3537 struct list_head *rdata_list,
3538 struct cifs_aio_ctx *ctx)
3540 unsigned int rsize;
3541 struct cifs_credits credits;
3542 int rc;
3543 struct TCP_Server_Info *server =
3544 tlink_tcon(rdata->cfile->tlink)->ses->server;
3546 do {
3547 if (rdata->cfile->invalidHandle) {
3548 rc = cifs_reopen_file(rdata->cfile, true);
3549 if (rc == -EAGAIN)
3550 continue;
3551 else if (rc)
3552 break;
3556 * Wait for credits to resend this rdata.
3557 * Note: we are attempting to resend the whole rdata not in
3558 * segments
3560 do {
3561 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3562 &rsize, &credits);
3564 if (rc)
3565 goto fail;
3567 if (rsize < rdata->bytes) {
3568 add_credits_and_wake_if(server, &credits, 0);
3569 msleep(1000);
3571 } while (rsize < rdata->bytes);
3572 rdata->credits = credits;
3574 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3575 if (!rc) {
3576 if (rdata->cfile->invalidHandle)
3577 rc = -EAGAIN;
3578 else {
3579 #ifdef CONFIG_CIFS_SMB_DIRECT
3580 if (rdata->mr) {
3581 rdata->mr->need_invalidate = true;
3582 smbd_deregister_mr(rdata->mr);
3583 rdata->mr = NULL;
3585 #endif
3586 rc = server->ops->async_readv(rdata);
3590 /* If the read was successfully sent, we are done */
3591 if (!rc) {
3592 /* Add to aio pending list */
3593 list_add_tail(&rdata->list, rdata_list);
3594 return 0;
3597 /* Roll back credits and retry if needed */
3598 add_credits_and_wake_if(server, &rdata->credits, 0);
3599 } while (rc == -EAGAIN);
3601 fail:
3602 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3603 return rc;
3606 static int
3607 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3608 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3609 struct cifs_aio_ctx *ctx)
3611 struct cifs_readdata *rdata;
3612 unsigned int npages, rsize;
3613 struct cifs_credits credits_on_stack;
3614 struct cifs_credits *credits = &credits_on_stack;
3615 size_t cur_len;
3616 int rc;
3617 pid_t pid;
3618 struct TCP_Server_Info *server;
3619 struct page **pagevec;
3620 size_t start;
3621 struct iov_iter direct_iov = ctx->iter;
3623 server = tlink_tcon(open_file->tlink)->ses->server;
3625 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3626 pid = open_file->pid;
3627 else
3628 pid = current->tgid;
3630 if (ctx->direct_io)
3631 iov_iter_advance(&direct_iov, offset - ctx->pos);
3633 do {
3634 if (open_file->invalidHandle) {
3635 rc = cifs_reopen_file(open_file, true);
3636 if (rc == -EAGAIN)
3637 continue;
3638 else if (rc)
3639 break;
3642 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3643 &rsize, credits);
3644 if (rc)
3645 break;
3647 cur_len = min_t(const size_t, len, rsize);
3649 if (ctx->direct_io) {
3650 ssize_t result;
3652 result = iov_iter_get_pages_alloc(
3653 &direct_iov, &pagevec,
3654 cur_len, &start);
3655 if (result < 0) {
3656 cifs_dbg(VFS,
3657 "couldn't get user pages (rc=%zd)"
3658 " iter type %d"
3659 " iov_offset %zd count %zd\n",
3660 result, iov_iter_type(&direct_iov),
3661 direct_iov.iov_offset,
3662 direct_iov.count);
3663 dump_stack();
3665 rc = result;
3666 add_credits_and_wake_if(server, credits, 0);
3667 break;
3669 cur_len = (size_t)result;
3670 iov_iter_advance(&direct_iov, cur_len);
3672 rdata = cifs_readdata_direct_alloc(
3673 pagevec, cifs_uncached_readv_complete);
3674 if (!rdata) {
3675 add_credits_and_wake_if(server, credits, 0);
3676 rc = -ENOMEM;
3677 break;
3680 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3681 rdata->page_offset = start;
3682 rdata->tailsz = npages > 1 ?
3683 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3684 cur_len;
3686 } else {
3688 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3689 /* allocate a readdata struct */
3690 rdata = cifs_readdata_alloc(npages,
3691 cifs_uncached_readv_complete);
3692 if (!rdata) {
3693 add_credits_and_wake_if(server, credits, 0);
3694 rc = -ENOMEM;
3695 break;
3698 rc = cifs_read_allocate_pages(rdata, npages);
3699 if (rc) {
3700 kvfree(rdata->pages);
3701 kfree(rdata);
3702 add_credits_and_wake_if(server, credits, 0);
3703 break;
3706 rdata->tailsz = PAGE_SIZE;
3709 rdata->cfile = cifsFileInfo_get(open_file);
3710 rdata->nr_pages = npages;
3711 rdata->offset = offset;
3712 rdata->bytes = cur_len;
3713 rdata->pid = pid;
3714 rdata->pagesz = PAGE_SIZE;
3715 rdata->read_into_pages = cifs_uncached_read_into_pages;
3716 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3717 rdata->credits = credits_on_stack;
3718 rdata->ctx = ctx;
3719 kref_get(&ctx->refcount);
3721 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3723 if (!rc) {
3724 if (rdata->cfile->invalidHandle)
3725 rc = -EAGAIN;
3726 else
3727 rc = server->ops->async_readv(rdata);
3730 if (rc) {
3731 add_credits_and_wake_if(server, &rdata->credits, 0);
3732 kref_put(&rdata->refcount,
3733 cifs_uncached_readdata_release);
3734 if (rc == -EAGAIN) {
3735 iov_iter_revert(&direct_iov, cur_len);
3736 continue;
3738 break;
3741 list_add_tail(&rdata->list, rdata_list);
3742 offset += cur_len;
3743 len -= cur_len;
3744 } while (len > 0);
3746 return rc;
3749 static void
3750 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3752 struct cifs_readdata *rdata, *tmp;
3753 struct iov_iter *to = &ctx->iter;
3754 struct cifs_sb_info *cifs_sb;
3755 int rc;
3757 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3759 mutex_lock(&ctx->aio_mutex);
3761 if (list_empty(&ctx->list)) {
3762 mutex_unlock(&ctx->aio_mutex);
3763 return;
3766 rc = ctx->rc;
3767 /* the loop below should proceed in the order of increasing offsets */
3768 again:
3769 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3770 if (!rc) {
3771 if (!try_wait_for_completion(&rdata->done)) {
3772 mutex_unlock(&ctx->aio_mutex);
3773 return;
3776 if (rdata->result == -EAGAIN) {
3777 /* resend call if it's a retryable error */
3778 struct list_head tmp_list;
3779 unsigned int got_bytes = rdata->got_bytes;
3781 list_del_init(&rdata->list);
3782 INIT_LIST_HEAD(&tmp_list);
3785 * Got a part of data and then reconnect has
3786 * happened -- fill the buffer and continue
3787 * reading.
3789 if (got_bytes && got_bytes < rdata->bytes) {
3790 rc = 0;
3791 if (!ctx->direct_io)
3792 rc = cifs_readdata_to_iov(rdata, to);
3793 if (rc) {
3794 kref_put(&rdata->refcount,
3795 cifs_uncached_readdata_release);
3796 continue;
3800 if (ctx->direct_io) {
3802 * Re-use rdata as this is a
3803 * direct I/O
3805 rc = cifs_resend_rdata(
3806 rdata,
3807 &tmp_list, ctx);
3808 } else {
3809 rc = cifs_send_async_read(
3810 rdata->offset + got_bytes,
3811 rdata->bytes - got_bytes,
3812 rdata->cfile, cifs_sb,
3813 &tmp_list, ctx);
3815 kref_put(&rdata->refcount,
3816 cifs_uncached_readdata_release);
3819 list_splice(&tmp_list, &ctx->list);
3821 goto again;
3822 } else if (rdata->result)
3823 rc = rdata->result;
3824 else if (!ctx->direct_io)
3825 rc = cifs_readdata_to_iov(rdata, to);
3827 /* if there was a short read -- discard anything left */
3828 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3829 rc = -ENODATA;
3831 ctx->total_len += rdata->got_bytes;
3833 list_del_init(&rdata->list);
3834 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3837 if (!ctx->direct_io)
3838 ctx->total_len = ctx->len - iov_iter_count(to);
3840 /* mask nodata case */
3841 if (rc == -ENODATA)
3842 rc = 0;
3844 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3846 mutex_unlock(&ctx->aio_mutex);
3848 if (ctx->iocb && ctx->iocb->ki_complete)
3849 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3850 else
3851 complete(&ctx->done);
3854 static ssize_t __cifs_readv(
3855 struct kiocb *iocb, struct iov_iter *to, bool direct)
3857 size_t len;
3858 struct file *file = iocb->ki_filp;
3859 struct cifs_sb_info *cifs_sb;
3860 struct cifsFileInfo *cfile;
3861 struct cifs_tcon *tcon;
3862 ssize_t rc, total_read = 0;
3863 loff_t offset = iocb->ki_pos;
3864 struct cifs_aio_ctx *ctx;
3867 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3868 * fall back to data copy read path
3869 * this could be improved by getting pages directly in ITER_KVEC
3871 if (direct && iov_iter_is_kvec(to)) {
3872 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3873 direct = false;
3876 len = iov_iter_count(to);
3877 if (!len)
3878 return 0;
3880 cifs_sb = CIFS_FILE_SB(file);
3881 cfile = file->private_data;
3882 tcon = tlink_tcon(cfile->tlink);
3884 if (!tcon->ses->server->ops->async_readv)
3885 return -ENOSYS;
3887 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3888 cifs_dbg(FYI, "attempting read on write only file instance\n");
3890 ctx = cifs_aio_ctx_alloc();
3891 if (!ctx)
3892 return -ENOMEM;
3894 ctx->cfile = cifsFileInfo_get(cfile);
3896 if (!is_sync_kiocb(iocb))
3897 ctx->iocb = iocb;
3899 if (iter_is_iovec(to))
3900 ctx->should_dirty = true;
3902 if (direct) {
3903 ctx->pos = offset;
3904 ctx->direct_io = true;
3905 ctx->iter = *to;
3906 ctx->len = len;
3907 } else {
3908 rc = setup_aio_ctx_iter(ctx, to, READ);
3909 if (rc) {
3910 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3911 return rc;
3913 len = ctx->len;
3916 /* grab a lock here due to read response handlers can access ctx */
3917 mutex_lock(&ctx->aio_mutex);
3919 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3921 /* if at least one read request send succeeded, then reset rc */
3922 if (!list_empty(&ctx->list))
3923 rc = 0;
3925 mutex_unlock(&ctx->aio_mutex);
3927 if (rc) {
3928 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3929 return rc;
3932 if (!is_sync_kiocb(iocb)) {
3933 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3934 return -EIOCBQUEUED;
3937 rc = wait_for_completion_killable(&ctx->done);
3938 if (rc) {
3939 mutex_lock(&ctx->aio_mutex);
3940 ctx->rc = rc = -EINTR;
3941 total_read = ctx->total_len;
3942 mutex_unlock(&ctx->aio_mutex);
3943 } else {
3944 rc = ctx->rc;
3945 total_read = ctx->total_len;
3948 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3950 if (total_read) {
3951 iocb->ki_pos += total_read;
3952 return total_read;
3954 return rc;
3957 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3959 return __cifs_readv(iocb, to, true);
3962 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3964 return __cifs_readv(iocb, to, false);
3967 ssize_t
3968 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3970 struct inode *inode = file_inode(iocb->ki_filp);
3971 struct cifsInodeInfo *cinode = CIFS_I(inode);
3972 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3973 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3974 iocb->ki_filp->private_data;
3975 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3976 int rc = -EACCES;
3979 * In strict cache mode we need to read from the server all the time
3980 * if we don't have level II oplock because the server can delay mtime
3981 * change - so we can't make a decision about inode invalidating.
3982 * And we can also fail with pagereading if there are mandatory locks
3983 * on pages affected by this read but not on the region from pos to
3984 * pos+len-1.
3986 if (!CIFS_CACHE_READ(cinode))
3987 return cifs_user_readv(iocb, to);
3989 if (cap_unix(tcon->ses) &&
3990 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3991 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3992 return generic_file_read_iter(iocb, to);
3995 * We need to hold the sem to be sure nobody modifies lock list
3996 * with a brlock that prevents reading.
3998 down_read(&cinode->lock_sem);
3999 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4000 tcon->ses->server->vals->shared_lock_type,
4001 0, NULL, CIFS_READ_OP))
4002 rc = generic_file_read_iter(iocb, to);
4003 up_read(&cinode->lock_sem);
4004 return rc;
4007 static ssize_t
4008 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4010 int rc = -EACCES;
4011 unsigned int bytes_read = 0;
4012 unsigned int total_read;
4013 unsigned int current_read_size;
4014 unsigned int rsize;
4015 struct cifs_sb_info *cifs_sb;
4016 struct cifs_tcon *tcon;
4017 struct TCP_Server_Info *server;
4018 unsigned int xid;
4019 char *cur_offset;
4020 struct cifsFileInfo *open_file;
4021 struct cifs_io_parms io_parms;
4022 int buf_type = CIFS_NO_BUFFER;
4023 __u32 pid;
4025 xid = get_xid();
4026 cifs_sb = CIFS_FILE_SB(file);
4028 /* FIXME: set up handlers for larger reads and/or convert to async */
4029 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4031 if (file->private_data == NULL) {
4032 rc = -EBADF;
4033 free_xid(xid);
4034 return rc;
4036 open_file = file->private_data;
4037 tcon = tlink_tcon(open_file->tlink);
4038 server = tcon->ses->server;
4040 if (!server->ops->sync_read) {
4041 free_xid(xid);
4042 return -ENOSYS;
4045 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4046 pid = open_file->pid;
4047 else
4048 pid = current->tgid;
4050 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4051 cifs_dbg(FYI, "attempting read on write only file instance\n");
4053 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4054 total_read += bytes_read, cur_offset += bytes_read) {
4055 do {
4056 current_read_size = min_t(uint, read_size - total_read,
4057 rsize);
4059 * For windows me and 9x we do not want to request more
4060 * than it negotiated since it will refuse the read
4061 * then.
4063 if ((tcon->ses) && !(tcon->ses->capabilities &
4064 tcon->ses->server->vals->cap_large_files)) {
4065 current_read_size = min_t(uint,
4066 current_read_size, CIFSMaxBufSize);
4068 if (open_file->invalidHandle) {
4069 rc = cifs_reopen_file(open_file, true);
4070 if (rc != 0)
4071 break;
4073 io_parms.pid = pid;
4074 io_parms.tcon = tcon;
4075 io_parms.offset = *offset;
4076 io_parms.length = current_read_size;
4077 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4078 &bytes_read, &cur_offset,
4079 &buf_type);
4080 } while (rc == -EAGAIN);
4082 if (rc || (bytes_read == 0)) {
4083 if (total_read) {
4084 break;
4085 } else {
4086 free_xid(xid);
4087 return rc;
4089 } else {
4090 cifs_stats_bytes_read(tcon, total_read);
4091 *offset += bytes_read;
4094 free_xid(xid);
4095 return total_read;
4099 * If the page is mmap'ed into a process' page tables, then we need to make
4100 * sure that it doesn't change while being written back.
4102 static vm_fault_t
4103 cifs_page_mkwrite(struct vm_fault *vmf)
4105 struct page *page = vmf->page;
4107 lock_page(page);
4108 return VM_FAULT_LOCKED;
4111 static const struct vm_operations_struct cifs_file_vm_ops = {
4112 .fault = filemap_fault,
4113 .map_pages = filemap_map_pages,
4114 .page_mkwrite = cifs_page_mkwrite,
4117 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4119 int xid, rc = 0;
4120 struct inode *inode = file_inode(file);
4122 xid = get_xid();
4124 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4125 rc = cifs_zap_mapping(inode);
4126 if (!rc)
4127 rc = generic_file_mmap(file, vma);
4128 if (!rc)
4129 vma->vm_ops = &cifs_file_vm_ops;
4131 free_xid(xid);
4132 return rc;
4135 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4137 int rc, xid;
4139 xid = get_xid();
4141 rc = cifs_revalidate_file(file);
4142 if (rc)
4143 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4144 rc);
4145 if (!rc)
4146 rc = generic_file_mmap(file, vma);
4147 if (!rc)
4148 vma->vm_ops = &cifs_file_vm_ops;
4150 free_xid(xid);
4151 return rc;
4154 static void
4155 cifs_readv_complete(struct work_struct *work)
4157 unsigned int i, got_bytes;
4158 struct cifs_readdata *rdata = container_of(work,
4159 struct cifs_readdata, work);
4161 got_bytes = rdata->got_bytes;
4162 for (i = 0; i < rdata->nr_pages; i++) {
4163 struct page *page = rdata->pages[i];
4165 lru_cache_add_file(page);
4167 if (rdata->result == 0 ||
4168 (rdata->result == -EAGAIN && got_bytes)) {
4169 flush_dcache_page(page);
4170 SetPageUptodate(page);
4173 unlock_page(page);
4175 if (rdata->result == 0 ||
4176 (rdata->result == -EAGAIN && got_bytes))
4177 cifs_readpage_to_fscache(rdata->mapping->host, page);
4179 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4181 put_page(page);
4182 rdata->pages[i] = NULL;
4184 kref_put(&rdata->refcount, cifs_readdata_release);
4187 static int
4188 readpages_fill_pages(struct TCP_Server_Info *server,
4189 struct cifs_readdata *rdata, struct iov_iter *iter,
4190 unsigned int len)
4192 int result = 0;
4193 unsigned int i;
4194 u64 eof;
4195 pgoff_t eof_index;
4196 unsigned int nr_pages = rdata->nr_pages;
4197 unsigned int page_offset = rdata->page_offset;
4199 /* determine the eof that the server (probably) has */
4200 eof = CIFS_I(rdata->mapping->host)->server_eof;
4201 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4202 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4204 rdata->got_bytes = 0;
4205 rdata->tailsz = PAGE_SIZE;
4206 for (i = 0; i < nr_pages; i++) {
4207 struct page *page = rdata->pages[i];
4208 unsigned int to_read = rdata->pagesz;
4209 size_t n;
4211 if (i == 0)
4212 to_read -= page_offset;
4213 else
4214 page_offset = 0;
4216 n = to_read;
4218 if (len >= to_read) {
4219 len -= to_read;
4220 } else if (len > 0) {
4221 /* enough for partial page, fill and zero the rest */
4222 zero_user(page, len + page_offset, to_read - len);
4223 n = rdata->tailsz = len;
4224 len = 0;
4225 } else if (page->index > eof_index) {
4227 * The VFS will not try to do readahead past the
4228 * i_size, but it's possible that we have outstanding
4229 * writes with gaps in the middle and the i_size hasn't
4230 * caught up yet. Populate those with zeroed out pages
4231 * to prevent the VFS from repeatedly attempting to
4232 * fill them until the writes are flushed.
4234 zero_user(page, 0, PAGE_SIZE);
4235 lru_cache_add_file(page);
4236 flush_dcache_page(page);
4237 SetPageUptodate(page);
4238 unlock_page(page);
4239 put_page(page);
4240 rdata->pages[i] = NULL;
4241 rdata->nr_pages--;
4242 continue;
4243 } else {
4244 /* no need to hold page hostage */
4245 lru_cache_add_file(page);
4246 unlock_page(page);
4247 put_page(page);
4248 rdata->pages[i] = NULL;
4249 rdata->nr_pages--;
4250 continue;
4253 if (iter)
4254 result = copy_page_from_iter(
4255 page, page_offset, n, iter);
4256 #ifdef CONFIG_CIFS_SMB_DIRECT
4257 else if (rdata->mr)
4258 result = n;
4259 #endif
4260 else
4261 result = cifs_read_page_from_socket(
4262 server, page, page_offset, n);
4263 if (result < 0)
4264 break;
4266 rdata->got_bytes += result;
4269 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4270 rdata->got_bytes : result;
4273 static int
4274 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4275 struct cifs_readdata *rdata, unsigned int len)
4277 return readpages_fill_pages(server, rdata, NULL, len);
4280 static int
4281 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4282 struct cifs_readdata *rdata,
4283 struct iov_iter *iter)
4285 return readpages_fill_pages(server, rdata, iter, iter->count);
4288 static int
4289 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4290 unsigned int rsize, struct list_head *tmplist,
4291 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4293 struct page *page, *tpage;
4294 unsigned int expected_index;
4295 int rc;
4296 gfp_t gfp = readahead_gfp_mask(mapping);
4298 INIT_LIST_HEAD(tmplist);
4300 page = lru_to_page(page_list);
4303 * Lock the page and put it in the cache. Since no one else
4304 * should have access to this page, we're safe to simply set
4305 * PG_locked without checking it first.
4307 __SetPageLocked(page);
4308 rc = add_to_page_cache_locked(page, mapping,
4309 page->index, gfp);
4311 /* give up if we can't stick it in the cache */
4312 if (rc) {
4313 __ClearPageLocked(page);
4314 return rc;
4317 /* move first page to the tmplist */
4318 *offset = (loff_t)page->index << PAGE_SHIFT;
4319 *bytes = PAGE_SIZE;
4320 *nr_pages = 1;
4321 list_move_tail(&page->lru, tmplist);
4323 /* now try and add more pages onto the request */
4324 expected_index = page->index + 1;
4325 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4326 /* discontinuity ? */
4327 if (page->index != expected_index)
4328 break;
4330 /* would this page push the read over the rsize? */
4331 if (*bytes + PAGE_SIZE > rsize)
4332 break;
4334 __SetPageLocked(page);
4335 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4336 __ClearPageLocked(page);
4337 break;
4339 list_move_tail(&page->lru, tmplist);
4340 (*bytes) += PAGE_SIZE;
4341 expected_index++;
4342 (*nr_pages)++;
4344 return rc;
4347 static int cifs_readpages(struct file *file, struct address_space *mapping,
4348 struct list_head *page_list, unsigned num_pages)
4350 int rc;
4351 struct list_head tmplist;
4352 struct cifsFileInfo *open_file = file->private_data;
4353 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4354 struct TCP_Server_Info *server;
4355 pid_t pid;
4356 unsigned int xid;
4358 xid = get_xid();
4360 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4361 * immediately if the cookie is negative
4363 * After this point, every page in the list might have PG_fscache set,
4364 * so we will need to clean that up off of every page we don't use.
4366 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4367 &num_pages);
4368 if (rc == 0) {
4369 free_xid(xid);
4370 return rc;
4373 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4374 pid = open_file->pid;
4375 else
4376 pid = current->tgid;
4378 rc = 0;
4379 server = tlink_tcon(open_file->tlink)->ses->server;
4381 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4382 __func__, file, mapping, num_pages);
4385 * Start with the page at end of list and move it to private
4386 * list. Do the same with any following pages until we hit
4387 * the rsize limit, hit an index discontinuity, or run out of
4388 * pages. Issue the async read and then start the loop again
4389 * until the list is empty.
4391 * Note that list order is important. The page_list is in
4392 * the order of declining indexes. When we put the pages in
4393 * the rdata->pages, then we want them in increasing order.
4395 while (!list_empty(page_list)) {
4396 unsigned int i, nr_pages, bytes, rsize;
4397 loff_t offset;
4398 struct page *page, *tpage;
4399 struct cifs_readdata *rdata;
4400 struct cifs_credits credits_on_stack;
4401 struct cifs_credits *credits = &credits_on_stack;
4403 if (open_file->invalidHandle) {
4404 rc = cifs_reopen_file(open_file, true);
4405 if (rc == -EAGAIN)
4406 continue;
4407 else if (rc)
4408 break;
4411 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4412 &rsize, credits);
4413 if (rc)
4414 break;
4417 * Give up immediately if rsize is too small to read an entire
4418 * page. The VFS will fall back to readpage. We should never
4419 * reach this point however since we set ra_pages to 0 when the
4420 * rsize is smaller than a cache page.
4422 if (unlikely(rsize < PAGE_SIZE)) {
4423 add_credits_and_wake_if(server, credits, 0);
4424 free_xid(xid);
4425 return 0;
4428 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4429 &nr_pages, &offset, &bytes);
4430 if (rc) {
4431 add_credits_and_wake_if(server, credits, 0);
4432 break;
4435 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4436 if (!rdata) {
4437 /* best to give up if we're out of mem */
4438 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4439 list_del(&page->lru);
4440 lru_cache_add_file(page);
4441 unlock_page(page);
4442 put_page(page);
4444 rc = -ENOMEM;
4445 add_credits_and_wake_if(server, credits, 0);
4446 break;
4449 rdata->cfile = cifsFileInfo_get(open_file);
4450 rdata->mapping = mapping;
4451 rdata->offset = offset;
4452 rdata->bytes = bytes;
4453 rdata->pid = pid;
4454 rdata->pagesz = PAGE_SIZE;
4455 rdata->tailsz = PAGE_SIZE;
4456 rdata->read_into_pages = cifs_readpages_read_into_pages;
4457 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4458 rdata->credits = credits_on_stack;
4460 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4461 list_del(&page->lru);
4462 rdata->pages[rdata->nr_pages++] = page;
4465 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4467 if (!rc) {
4468 if (rdata->cfile->invalidHandle)
4469 rc = -EAGAIN;
4470 else
4471 rc = server->ops->async_readv(rdata);
4474 if (rc) {
4475 add_credits_and_wake_if(server, &rdata->credits, 0);
4476 for (i = 0; i < rdata->nr_pages; i++) {
4477 page = rdata->pages[i];
4478 lru_cache_add_file(page);
4479 unlock_page(page);
4480 put_page(page);
4482 /* Fallback to the readpage in error/reconnect cases */
4483 kref_put(&rdata->refcount, cifs_readdata_release);
4484 break;
4487 kref_put(&rdata->refcount, cifs_readdata_release);
4490 /* Any pages that have been shown to fscache but didn't get added to
4491 * the pagecache must be uncached before they get returned to the
4492 * allocator.
4494 cifs_fscache_readpages_cancel(mapping->host, page_list);
4495 free_xid(xid);
4496 return rc;
4500 * cifs_readpage_worker must be called with the page pinned
4502 static int cifs_readpage_worker(struct file *file, struct page *page,
4503 loff_t *poffset)
4505 char *read_data;
4506 int rc;
4508 /* Is the page cached? */
4509 rc = cifs_readpage_from_fscache(file_inode(file), page);
4510 if (rc == 0)
4511 goto read_complete;
4513 read_data = kmap(page);
4514 /* for reads over a certain size could initiate async read ahead */
4516 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4518 if (rc < 0)
4519 goto io_error;
4520 else
4521 cifs_dbg(FYI, "Bytes read %d\n", rc);
4523 /* we do not want atime to be less than mtime, it broke some apps */
4524 file_inode(file)->i_atime = current_time(file_inode(file));
4525 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4526 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4527 else
4528 file_inode(file)->i_atime = current_time(file_inode(file));
4530 if (PAGE_SIZE > rc)
4531 memset(read_data + rc, 0, PAGE_SIZE - rc);
4533 flush_dcache_page(page);
4534 SetPageUptodate(page);
4536 /* send this page to the cache */
4537 cifs_readpage_to_fscache(file_inode(file), page);
4539 rc = 0;
4541 io_error:
4542 kunmap(page);
4543 unlock_page(page);
4545 read_complete:
4546 return rc;
4549 static int cifs_readpage(struct file *file, struct page *page)
4551 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4552 int rc = -EACCES;
4553 unsigned int xid;
4555 xid = get_xid();
4557 if (file->private_data == NULL) {
4558 rc = -EBADF;
4559 free_xid(xid);
4560 return rc;
4563 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4564 page, (int)offset, (int)offset);
4566 rc = cifs_readpage_worker(file, page, &offset);
4568 free_xid(xid);
4569 return rc;
4572 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4574 struct cifsFileInfo *open_file;
4576 spin_lock(&cifs_inode->open_file_lock);
4577 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4578 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4579 spin_unlock(&cifs_inode->open_file_lock);
4580 return 1;
4583 spin_unlock(&cifs_inode->open_file_lock);
4584 return 0;
4587 /* We do not want to update the file size from server for inodes
4588 open for write - to avoid races with writepage extending
4589 the file - in the future we could consider allowing
4590 refreshing the inode only on increases in the file size
4591 but this is tricky to do without racing with writebehind
4592 page caching in the current Linux kernel design */
4593 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4595 if (!cifsInode)
4596 return true;
4598 if (is_inode_writable(cifsInode)) {
4599 /* This inode is open for write at least once */
4600 struct cifs_sb_info *cifs_sb;
4602 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4603 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4604 /* since no page cache to corrupt on directio
4605 we can change size safely */
4606 return true;
4609 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4610 return true;
4612 return false;
4613 } else
4614 return true;
4617 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4618 loff_t pos, unsigned len, unsigned flags,
4619 struct page **pagep, void **fsdata)
4621 int oncethru = 0;
4622 pgoff_t index = pos >> PAGE_SHIFT;
4623 loff_t offset = pos & (PAGE_SIZE - 1);
4624 loff_t page_start = pos & PAGE_MASK;
4625 loff_t i_size;
4626 struct page *page;
4627 int rc = 0;
4629 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4631 start:
4632 page = grab_cache_page_write_begin(mapping, index, flags);
4633 if (!page) {
4634 rc = -ENOMEM;
4635 goto out;
4638 if (PageUptodate(page))
4639 goto out;
4642 * If we write a full page it will be up to date, no need to read from
4643 * the server. If the write is short, we'll end up doing a sync write
4644 * instead.
4646 if (len == PAGE_SIZE)
4647 goto out;
4650 * optimize away the read when we have an oplock, and we're not
4651 * expecting to use any of the data we'd be reading in. That
4652 * is, when the page lies beyond the EOF, or straddles the EOF
4653 * and the write will cover all of the existing data.
4655 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4656 i_size = i_size_read(mapping->host);
4657 if (page_start >= i_size ||
4658 (offset == 0 && (pos + len) >= i_size)) {
4659 zero_user_segments(page, 0, offset,
4660 offset + len,
4661 PAGE_SIZE);
4663 * PageChecked means that the parts of the page
4664 * to which we're not writing are considered up
4665 * to date. Once the data is copied to the
4666 * page, it can be set uptodate.
4668 SetPageChecked(page);
4669 goto out;
4673 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4675 * might as well read a page, it is fast enough. If we get
4676 * an error, we don't need to return it. cifs_write_end will
4677 * do a sync write instead since PG_uptodate isn't set.
4679 cifs_readpage_worker(file, page, &page_start);
4680 put_page(page);
4681 oncethru = 1;
4682 goto start;
4683 } else {
4684 /* we could try using another file handle if there is one -
4685 but how would we lock it to prevent close of that handle
4686 racing with this read? In any case
4687 this will be written out by write_end so is fine */
4689 out:
4690 *pagep = page;
4691 return rc;
4694 static int cifs_release_page(struct page *page, gfp_t gfp)
4696 if (PagePrivate(page))
4697 return 0;
4699 return cifs_fscache_release_page(page, gfp);
4702 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4703 unsigned int length)
4705 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4707 if (offset == 0 && length == PAGE_SIZE)
4708 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4711 static int cifs_launder_page(struct page *page)
4713 int rc = 0;
4714 loff_t range_start = page_offset(page);
4715 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4716 struct writeback_control wbc = {
4717 .sync_mode = WB_SYNC_ALL,
4718 .nr_to_write = 0,
4719 .range_start = range_start,
4720 .range_end = range_end,
4723 cifs_dbg(FYI, "Launder page: %p\n", page);
4725 if (clear_page_dirty_for_io(page))
4726 rc = cifs_writepage_locked(page, &wbc);
4728 cifs_fscache_invalidate_page(page, page->mapping->host);
4729 return rc;
4732 void cifs_oplock_break(struct work_struct *work)
4734 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4735 oplock_break);
4736 struct inode *inode = d_inode(cfile->dentry);
4737 struct cifsInodeInfo *cinode = CIFS_I(inode);
4738 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4739 struct TCP_Server_Info *server = tcon->ses->server;
4740 int rc = 0;
4741 bool purge_cache = false;
4743 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4744 TASK_UNINTERRUPTIBLE);
4746 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4747 cfile->oplock_epoch, &purge_cache);
4749 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4750 cifs_has_mand_locks(cinode)) {
4751 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4752 inode);
4753 cinode->oplock = 0;
4756 if (inode && S_ISREG(inode->i_mode)) {
4757 if (CIFS_CACHE_READ(cinode))
4758 break_lease(inode, O_RDONLY);
4759 else
4760 break_lease(inode, O_WRONLY);
4761 rc = filemap_fdatawrite(inode->i_mapping);
4762 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4763 rc = filemap_fdatawait(inode->i_mapping);
4764 mapping_set_error(inode->i_mapping, rc);
4765 cifs_zap_mapping(inode);
4767 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4768 if (CIFS_CACHE_WRITE(cinode))
4769 goto oplock_break_ack;
4772 rc = cifs_push_locks(cfile);
4773 if (rc)
4774 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4776 oplock_break_ack:
4778 * releasing stale oplock after recent reconnect of smb session using
4779 * a now incorrect file handle is not a data integrity issue but do
4780 * not bother sending an oplock release if session to server still is
4781 * disconnected since oplock already released by the server
4783 if (!cfile->oplock_break_cancelled) {
4784 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4785 cinode);
4786 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4788 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4789 cifs_done_oplock_break(cinode);
4793 * The presence of cifs_direct_io() in the address space ops vector
4794 * allowes open() O_DIRECT flags which would have failed otherwise.
4796 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4797 * so this method should never be called.
4799 * Direct IO is not yet supported in the cached mode.
4801 static ssize_t
4802 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4805 * FIXME
4806 * Eventually need to support direct IO for non forcedirectio mounts
4808 return -EINVAL;
4812 const struct address_space_operations cifs_addr_ops = {
4813 .readpage = cifs_readpage,
4814 .readpages = cifs_readpages,
4815 .writepage = cifs_writepage,
4816 .writepages = cifs_writepages,
4817 .write_begin = cifs_write_begin,
4818 .write_end = cifs_write_end,
4819 .set_page_dirty = __set_page_dirty_nobuffers,
4820 .releasepage = cifs_release_page,
4821 .direct_IO = cifs_direct_io,
4822 .invalidatepage = cifs_invalidate_page,
4823 .launder_page = cifs_launder_page,
4827 * cifs_readpages requires the server to support a buffer large enough to
4828 * contain the header plus one complete page of data. Otherwise, we need
4829 * to leave cifs_readpages out of the address space operations.
4831 const struct address_space_operations cifs_addr_ops_smallbuf = {
4832 .readpage = cifs_readpage,
4833 .writepage = cifs_writepage,
4834 .writepages = cifs_writepages,
4835 .write_begin = cifs_write_begin,
4836 .write_end = cifs_write_end,
4837 .set_page_dirty = __set_page_dirty_nobuffers,
4838 .releasepage = cifs_release_page,
4839 .invalidatepage = cifs_invalidate_page,
4840 .launder_page = cifs_launder_page,