Merge tag 'mtd/fixes-for-5.2-final' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6/linux-2.6-arm.git] / fs / cifs / file.c
blob97090693d18278045f5447fcf4de4cf162edaefb
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
48 static inline int cifs_convert_flags(unsigned int flags)
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
66 static u32 cifs_posix_convert_flags(unsigned int flags)
68 u32 posix_flags = 0;
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
97 return posix_flags;
100 static inline int cifs_get_disposition(unsigned int flags)
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
126 cifs_dbg(FYI, "posix open %s\n", full_path);
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
147 if (rc)
148 goto posix_open_ret;
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
189 return -ENOSYS;
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
244 rc = server->ops->open(xid, &oparms, oplock, buf);
246 if (rc)
247 goto out;
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
256 out:
257 kfree(buf);
258 return rc;
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
274 up_read(&cinode->lock_sem);
275 return has_locks;
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
318 cifs_sb_active(inode->i_sb);
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
340 /* if readable file instance put first in list*/
341 spin_lock(&cinode->open_file_lock);
342 if (file->f_mode & FMODE_READ)
343 list_add(&cfile->flist, &cinode->openFileList);
344 else
345 list_add_tail(&cfile->flist, &cinode->openFileList);
346 spin_unlock(&cinode->open_file_lock);
347 spin_unlock(&tcon->open_file_lock);
349 if (fid->purge_cache)
350 cifs_zap_mapping(inode);
352 file->private_data = cfile;
353 return cfile;
356 struct cifsFileInfo *
357 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
359 spin_lock(&cifs_file->file_info_lock);
360 cifsFileInfo_get_locked(cifs_file);
361 spin_unlock(&cifs_file->file_info_lock);
362 return cifs_file;
366 * cifsFileInfo_put - release a reference of file priv data
368 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
370 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
372 _cifsFileInfo_put(cifs_file, true);
376 * _cifsFileInfo_put - release a reference of file priv data
378 * This may involve closing the filehandle @cifs_file out on the
379 * server. Must be called without holding tcon->open_file_lock and
380 * cifs_file->file_info_lock.
382 * If @wait_for_oplock_handler is true and we are releasing the last
383 * reference, wait for any running oplock break handler of the file
384 * and cancel any pending one. If calling this function from the
385 * oplock break handler, you need to pass false.
388 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
390 struct inode *inode = d_inode(cifs_file->dentry);
391 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
392 struct TCP_Server_Info *server = tcon->ses->server;
393 struct cifsInodeInfo *cifsi = CIFS_I(inode);
394 struct super_block *sb = inode->i_sb;
395 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
396 struct cifsLockInfo *li, *tmp;
397 struct cifs_fid fid;
398 struct cifs_pending_open open;
399 bool oplock_break_cancelled;
401 spin_lock(&tcon->open_file_lock);
403 spin_lock(&cifs_file->file_info_lock);
404 if (--cifs_file->count > 0) {
405 spin_unlock(&cifs_file->file_info_lock);
406 spin_unlock(&tcon->open_file_lock);
407 return;
409 spin_unlock(&cifs_file->file_info_lock);
411 if (server->ops->get_lease_key)
412 server->ops->get_lease_key(inode, &fid);
414 /* store open in pending opens to make sure we don't miss lease break */
415 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
417 /* remove it from the lists */
418 spin_lock(&cifsi->open_file_lock);
419 list_del(&cifs_file->flist);
420 spin_unlock(&cifsi->open_file_lock);
421 list_del(&cifs_file->tlist);
422 atomic_dec(&tcon->num_local_opens);
424 if (list_empty(&cifsi->openFileList)) {
425 cifs_dbg(FYI, "closing last open instance for inode %p\n",
426 d_inode(cifs_file->dentry));
428 * In strict cache mode we need invalidate mapping on the last
429 * close because it may cause a error when we open this file
430 * again and get at least level II oplock.
432 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
433 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
434 cifs_set_oplock_level(cifsi, 0);
437 spin_unlock(&tcon->open_file_lock);
439 oplock_break_cancelled = wait_oplock_handler ?
440 cancel_work_sync(&cifs_file->oplock_break) : false;
442 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
443 struct TCP_Server_Info *server = tcon->ses->server;
444 unsigned int xid;
446 xid = get_xid();
447 if (server->ops->close)
448 server->ops->close(xid, tcon, &cifs_file->fid);
449 _free_xid(xid);
452 if (oplock_break_cancelled)
453 cifs_done_oplock_break(cifsi);
455 cifs_del_pending_open(&open);
458 * Delete any outstanding lock records. We'll lose them when the file
459 * is closed anyway.
461 down_write(&cifsi->lock_sem);
462 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
463 list_del(&li->llist);
464 cifs_del_lock_waiters(li);
465 kfree(li);
467 list_del(&cifs_file->llist->llist);
468 kfree(cifs_file->llist);
469 up_write(&cifsi->lock_sem);
471 cifs_put_tlink(cifs_file->tlink);
472 dput(cifs_file->dentry);
473 cifs_sb_deactive(sb);
474 kfree(cifs_file);
477 int cifs_open(struct inode *inode, struct file *file)
480 int rc = -EACCES;
481 unsigned int xid;
482 __u32 oplock;
483 struct cifs_sb_info *cifs_sb;
484 struct TCP_Server_Info *server;
485 struct cifs_tcon *tcon;
486 struct tcon_link *tlink;
487 struct cifsFileInfo *cfile = NULL;
488 char *full_path = NULL;
489 bool posix_open_ok = false;
490 struct cifs_fid fid;
491 struct cifs_pending_open open;
493 xid = get_xid();
495 cifs_sb = CIFS_SB(inode->i_sb);
496 tlink = cifs_sb_tlink(cifs_sb);
497 if (IS_ERR(tlink)) {
498 free_xid(xid);
499 return PTR_ERR(tlink);
501 tcon = tlink_tcon(tlink);
502 server = tcon->ses->server;
504 full_path = build_path_from_dentry(file_dentry(file));
505 if (full_path == NULL) {
506 rc = -ENOMEM;
507 goto out;
510 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
511 inode, file->f_flags, full_path);
513 if (file->f_flags & O_DIRECT &&
514 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
515 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
516 file->f_op = &cifs_file_direct_nobrl_ops;
517 else
518 file->f_op = &cifs_file_direct_ops;
521 if (server->oplocks)
522 oplock = REQ_OPLOCK;
523 else
524 oplock = 0;
526 if (!tcon->broken_posix_open && tcon->unix_ext &&
527 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
528 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
529 /* can not refresh inode info since size could be stale */
530 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
531 cifs_sb->mnt_file_mode /* ignored */,
532 file->f_flags, &oplock, &fid.netfid, xid);
533 if (rc == 0) {
534 cifs_dbg(FYI, "posix open succeeded\n");
535 posix_open_ok = true;
536 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
537 if (tcon->ses->serverNOS)
538 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
539 tcon->ses->serverName,
540 tcon->ses->serverNOS);
541 tcon->broken_posix_open = true;
542 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
543 (rc != -EOPNOTSUPP)) /* path not found or net err */
544 goto out;
546 * Else fallthrough to retry open the old way on network i/o
547 * or DFS errors.
551 if (server->ops->get_lease_key)
552 server->ops->get_lease_key(inode, &fid);
554 cifs_add_pending_open(&fid, tlink, &open);
556 if (!posix_open_ok) {
557 if (server->ops->get_lease_key)
558 server->ops->get_lease_key(inode, &fid);
560 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
561 file->f_flags, &oplock, &fid, xid);
562 if (rc) {
563 cifs_del_pending_open(&open);
564 goto out;
568 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
569 if (cfile == NULL) {
570 if (server->ops->close)
571 server->ops->close(xid, tcon, &fid);
572 cifs_del_pending_open(&open);
573 rc = -ENOMEM;
574 goto out;
577 cifs_fscache_set_inode_cookie(inode, file);
579 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
581 * Time to set mode which we can not set earlier due to
582 * problems creating new read-only files.
584 struct cifs_unix_set_info_args args = {
585 .mode = inode->i_mode,
586 .uid = INVALID_UID, /* no change */
587 .gid = INVALID_GID, /* no change */
588 .ctime = NO_CHANGE_64,
589 .atime = NO_CHANGE_64,
590 .mtime = NO_CHANGE_64,
591 .device = 0,
593 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
594 cfile->pid);
597 out:
598 kfree(full_path);
599 free_xid(xid);
600 cifs_put_tlink(tlink);
601 return rc;
604 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
607 * Try to reacquire byte range locks that were released when session
608 * to server was lost.
610 static int
611 cifs_relock_file(struct cifsFileInfo *cfile)
613 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
614 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
615 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
616 int rc = 0;
618 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
619 if (cinode->can_cache_brlcks) {
620 /* can cache locks - no need to relock */
621 up_read(&cinode->lock_sem);
622 return rc;
625 if (cap_unix(tcon->ses) &&
626 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
627 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
628 rc = cifs_push_posix_locks(cfile);
629 else
630 rc = tcon->ses->server->ops->push_mand_locks(cfile);
632 up_read(&cinode->lock_sem);
633 return rc;
636 static int
637 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
639 int rc = -EACCES;
640 unsigned int xid;
641 __u32 oplock;
642 struct cifs_sb_info *cifs_sb;
643 struct cifs_tcon *tcon;
644 struct TCP_Server_Info *server;
645 struct cifsInodeInfo *cinode;
646 struct inode *inode;
647 char *full_path = NULL;
648 int desired_access;
649 int disposition = FILE_OPEN;
650 int create_options = CREATE_NOT_DIR;
651 struct cifs_open_parms oparms;
653 xid = get_xid();
654 mutex_lock(&cfile->fh_mutex);
655 if (!cfile->invalidHandle) {
656 mutex_unlock(&cfile->fh_mutex);
657 rc = 0;
658 free_xid(xid);
659 return rc;
662 inode = d_inode(cfile->dentry);
663 cifs_sb = CIFS_SB(inode->i_sb);
664 tcon = tlink_tcon(cfile->tlink);
665 server = tcon->ses->server;
668 * Can not grab rename sem here because various ops, including those
669 * that already have the rename sem can end up causing writepage to get
670 * called and if the server was down that means we end up here, and we
671 * can never tell if the caller already has the rename_sem.
673 full_path = build_path_from_dentry(cfile->dentry);
674 if (full_path == NULL) {
675 rc = -ENOMEM;
676 mutex_unlock(&cfile->fh_mutex);
677 free_xid(xid);
678 return rc;
681 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
682 inode, cfile->f_flags, full_path);
684 if (tcon->ses->server->oplocks)
685 oplock = REQ_OPLOCK;
686 else
687 oplock = 0;
689 if (tcon->unix_ext && cap_unix(tcon->ses) &&
690 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
691 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
693 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
694 * original open. Must mask them off for a reopen.
696 unsigned int oflags = cfile->f_flags &
697 ~(O_CREAT | O_EXCL | O_TRUNC);
699 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
700 cifs_sb->mnt_file_mode /* ignored */,
701 oflags, &oplock, &cfile->fid.netfid, xid);
702 if (rc == 0) {
703 cifs_dbg(FYI, "posix reopen succeeded\n");
704 oparms.reconnect = true;
705 goto reopen_success;
708 * fallthrough to retry open the old way on errors, especially
709 * in the reconnect path it is important to retry hard
713 desired_access = cifs_convert_flags(cfile->f_flags);
715 if (backup_cred(cifs_sb))
716 create_options |= CREATE_OPEN_BACKUP_INTENT;
718 if (server->ops->get_lease_key)
719 server->ops->get_lease_key(inode, &cfile->fid);
721 oparms.tcon = tcon;
722 oparms.cifs_sb = cifs_sb;
723 oparms.desired_access = desired_access;
724 oparms.create_options = create_options;
725 oparms.disposition = disposition;
726 oparms.path = full_path;
727 oparms.fid = &cfile->fid;
728 oparms.reconnect = true;
731 * Can not refresh inode by passing in file_info buf to be returned by
732 * ops->open and then calling get_inode_info with returned buf since
733 * file might have write behind data that needs to be flushed and server
734 * version of file size can be stale. If we knew for sure that inode was
735 * not dirty locally we could do this.
737 rc = server->ops->open(xid, &oparms, &oplock, NULL);
738 if (rc == -ENOENT && oparms.reconnect == false) {
739 /* durable handle timeout is expired - open the file again */
740 rc = server->ops->open(xid, &oparms, &oplock, NULL);
741 /* indicate that we need to relock the file */
742 oparms.reconnect = true;
745 if (rc) {
746 mutex_unlock(&cfile->fh_mutex);
747 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
748 cifs_dbg(FYI, "oplock: %d\n", oplock);
749 goto reopen_error_exit;
752 reopen_success:
753 cfile->invalidHandle = false;
754 mutex_unlock(&cfile->fh_mutex);
755 cinode = CIFS_I(inode);
757 if (can_flush) {
758 rc = filemap_write_and_wait(inode->i_mapping);
759 if (!is_interrupt_error(rc))
760 mapping_set_error(inode->i_mapping, rc);
762 if (tcon->unix_ext)
763 rc = cifs_get_inode_info_unix(&inode, full_path,
764 inode->i_sb, xid);
765 else
766 rc = cifs_get_inode_info(&inode, full_path, NULL,
767 inode->i_sb, xid, NULL);
770 * Else we are writing out data to server already and could deadlock if
771 * we tried to flush data, and since we do not know if we have data that
772 * would invalidate the current end of file on the server we can not go
773 * to the server to get the new inode info.
777 * If the server returned a read oplock and we have mandatory brlocks,
778 * set oplock level to None.
780 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
781 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
782 oplock = 0;
785 server->ops->set_fid(cfile, &cfile->fid, oplock);
786 if (oparms.reconnect)
787 cifs_relock_file(cfile);
789 reopen_error_exit:
790 kfree(full_path);
791 free_xid(xid);
792 return rc;
795 int cifs_close(struct inode *inode, struct file *file)
797 if (file->private_data != NULL) {
798 cifsFileInfo_put(file->private_data);
799 file->private_data = NULL;
802 /* return code from the ->release op is always ignored */
803 return 0;
806 void
807 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
809 struct cifsFileInfo *open_file;
810 struct list_head *tmp;
811 struct list_head *tmp1;
812 struct list_head tmp_list;
814 if (!tcon->use_persistent || !tcon->need_reopen_files)
815 return;
817 tcon->need_reopen_files = false;
819 cifs_dbg(FYI, "Reopen persistent handles");
820 INIT_LIST_HEAD(&tmp_list);
822 /* list all files open on tree connection, reopen resilient handles */
823 spin_lock(&tcon->open_file_lock);
824 list_for_each(tmp, &tcon->openFileList) {
825 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
826 if (!open_file->invalidHandle)
827 continue;
828 cifsFileInfo_get(open_file);
829 list_add_tail(&open_file->rlist, &tmp_list);
831 spin_unlock(&tcon->open_file_lock);
833 list_for_each_safe(tmp, tmp1, &tmp_list) {
834 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
835 if (cifs_reopen_file(open_file, false /* do not flush */))
836 tcon->need_reopen_files = true;
837 list_del_init(&open_file->rlist);
838 cifsFileInfo_put(open_file);
842 int cifs_closedir(struct inode *inode, struct file *file)
844 int rc = 0;
845 unsigned int xid;
846 struct cifsFileInfo *cfile = file->private_data;
847 struct cifs_tcon *tcon;
848 struct TCP_Server_Info *server;
849 char *buf;
851 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
853 if (cfile == NULL)
854 return rc;
856 xid = get_xid();
857 tcon = tlink_tcon(cfile->tlink);
858 server = tcon->ses->server;
860 cifs_dbg(FYI, "Freeing private data in close dir\n");
861 spin_lock(&cfile->file_info_lock);
862 if (server->ops->dir_needs_close(cfile)) {
863 cfile->invalidHandle = true;
864 spin_unlock(&cfile->file_info_lock);
865 if (server->ops->close_dir)
866 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
867 else
868 rc = -ENOSYS;
869 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
870 /* not much we can do if it fails anyway, ignore rc */
871 rc = 0;
872 } else
873 spin_unlock(&cfile->file_info_lock);
875 buf = cfile->srch_inf.ntwrk_buf_start;
876 if (buf) {
877 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
878 cfile->srch_inf.ntwrk_buf_start = NULL;
879 if (cfile->srch_inf.smallBuf)
880 cifs_small_buf_release(buf);
881 else
882 cifs_buf_release(buf);
885 cifs_put_tlink(cfile->tlink);
886 kfree(file->private_data);
887 file->private_data = NULL;
888 /* BB can we lock the filestruct while this is going on? */
889 free_xid(xid);
890 return rc;
893 static struct cifsLockInfo *
894 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
896 struct cifsLockInfo *lock =
897 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
898 if (!lock)
899 return lock;
900 lock->offset = offset;
901 lock->length = length;
902 lock->type = type;
903 lock->pid = current->tgid;
904 lock->flags = flags;
905 INIT_LIST_HEAD(&lock->blist);
906 init_waitqueue_head(&lock->block_q);
907 return lock;
910 void
911 cifs_del_lock_waiters(struct cifsLockInfo *lock)
913 struct cifsLockInfo *li, *tmp;
914 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
915 list_del_init(&li->blist);
916 wake_up(&li->block_q);
920 #define CIFS_LOCK_OP 0
921 #define CIFS_READ_OP 1
922 #define CIFS_WRITE_OP 2
924 /* @rw_check : 0 - no op, 1 - read, 2 - write */
925 static bool
926 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
927 __u64 length, __u8 type, __u16 flags,
928 struct cifsFileInfo *cfile,
929 struct cifsLockInfo **conf_lock, int rw_check)
931 struct cifsLockInfo *li;
932 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
933 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
935 list_for_each_entry(li, &fdlocks->locks, llist) {
936 if (offset + length <= li->offset ||
937 offset >= li->offset + li->length)
938 continue;
939 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
940 server->ops->compare_fids(cfile, cur_cfile)) {
941 /* shared lock prevents write op through the same fid */
942 if (!(li->type & server->vals->shared_lock_type) ||
943 rw_check != CIFS_WRITE_OP)
944 continue;
946 if ((type & server->vals->shared_lock_type) &&
947 ((server->ops->compare_fids(cfile, cur_cfile) &&
948 current->tgid == li->pid) || type == li->type))
949 continue;
950 if (rw_check == CIFS_LOCK_OP &&
951 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
952 server->ops->compare_fids(cfile, cur_cfile))
953 continue;
954 if (conf_lock)
955 *conf_lock = li;
956 return true;
958 return false;
961 bool
962 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
963 __u8 type, __u16 flags,
964 struct cifsLockInfo **conf_lock, int rw_check)
966 bool rc = false;
967 struct cifs_fid_locks *cur;
968 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
970 list_for_each_entry(cur, &cinode->llist, llist) {
971 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
972 flags, cfile, conf_lock,
973 rw_check);
974 if (rc)
975 break;
978 return rc;
982 * Check if there is another lock that prevents us to set the lock (mandatory
983 * style). If such a lock exists, update the flock structure with its
984 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
985 * or leave it the same if we can't. Returns 0 if we don't need to request to
986 * the server or 1 otherwise.
988 static int
989 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
990 __u8 type, struct file_lock *flock)
992 int rc = 0;
993 struct cifsLockInfo *conf_lock;
994 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
995 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
996 bool exist;
998 down_read(&cinode->lock_sem);
1000 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1001 flock->fl_flags, &conf_lock,
1002 CIFS_LOCK_OP);
1003 if (exist) {
1004 flock->fl_start = conf_lock->offset;
1005 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1006 flock->fl_pid = conf_lock->pid;
1007 if (conf_lock->type & server->vals->shared_lock_type)
1008 flock->fl_type = F_RDLCK;
1009 else
1010 flock->fl_type = F_WRLCK;
1011 } else if (!cinode->can_cache_brlcks)
1012 rc = 1;
1013 else
1014 flock->fl_type = F_UNLCK;
1016 up_read(&cinode->lock_sem);
1017 return rc;
1020 static void
1021 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1023 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1024 down_write(&cinode->lock_sem);
1025 list_add_tail(&lock->llist, &cfile->llist->locks);
1026 up_write(&cinode->lock_sem);
1030 * Set the byte-range lock (mandatory style). Returns:
1031 * 1) 0, if we set the lock and don't need to request to the server;
1032 * 2) 1, if no locks prevent us but we need to request to the server;
1033 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1035 static int
1036 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1037 bool wait)
1039 struct cifsLockInfo *conf_lock;
1040 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1041 bool exist;
1042 int rc = 0;
1044 try_again:
1045 exist = false;
1046 down_write(&cinode->lock_sem);
1048 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1049 lock->type, lock->flags, &conf_lock,
1050 CIFS_LOCK_OP);
1051 if (!exist && cinode->can_cache_brlcks) {
1052 list_add_tail(&lock->llist, &cfile->llist->locks);
1053 up_write(&cinode->lock_sem);
1054 return rc;
1057 if (!exist)
1058 rc = 1;
1059 else if (!wait)
1060 rc = -EACCES;
1061 else {
1062 list_add_tail(&lock->blist, &conf_lock->blist);
1063 up_write(&cinode->lock_sem);
1064 rc = wait_event_interruptible(lock->block_q,
1065 (lock->blist.prev == &lock->blist) &&
1066 (lock->blist.next == &lock->blist));
1067 if (!rc)
1068 goto try_again;
1069 down_write(&cinode->lock_sem);
1070 list_del_init(&lock->blist);
1073 up_write(&cinode->lock_sem);
1074 return rc;
1078 * Check if there is another lock that prevents us to set the lock (posix
1079 * style). If such a lock exists, update the flock structure with its
1080 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1081 * or leave it the same if we can't. Returns 0 if we don't need to request to
1082 * the server or 1 otherwise.
1084 static int
1085 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1087 int rc = 0;
1088 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1089 unsigned char saved_type = flock->fl_type;
1091 if ((flock->fl_flags & FL_POSIX) == 0)
1092 return 1;
1094 down_read(&cinode->lock_sem);
1095 posix_test_lock(file, flock);
1097 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1098 flock->fl_type = saved_type;
1099 rc = 1;
1102 up_read(&cinode->lock_sem);
1103 return rc;
1107 * Set the byte-range lock (posix style). Returns:
1108 * 1) 0, if we set the lock and don't need to request to the server;
1109 * 2) 1, if we need to request to the server;
1110 * 3) <0, if the error occurs while setting the lock.
1112 static int
1113 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1115 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1116 int rc = 1;
1118 if ((flock->fl_flags & FL_POSIX) == 0)
1119 return rc;
1121 try_again:
1122 down_write(&cinode->lock_sem);
1123 if (!cinode->can_cache_brlcks) {
1124 up_write(&cinode->lock_sem);
1125 return rc;
1128 rc = posix_lock_file(file, flock, NULL);
1129 up_write(&cinode->lock_sem);
1130 if (rc == FILE_LOCK_DEFERRED) {
1131 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1132 if (!rc)
1133 goto try_again;
1134 locks_delete_block(flock);
1136 return rc;
1140 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1142 unsigned int xid;
1143 int rc = 0, stored_rc;
1144 struct cifsLockInfo *li, *tmp;
1145 struct cifs_tcon *tcon;
1146 unsigned int num, max_num, max_buf;
1147 LOCKING_ANDX_RANGE *buf, *cur;
1148 static const int types[] = {
1149 LOCKING_ANDX_LARGE_FILES,
1150 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1152 int i;
1154 xid = get_xid();
1155 tcon = tlink_tcon(cfile->tlink);
1158 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1159 * and check it before using.
1161 max_buf = tcon->ses->server->maxBuf;
1162 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1163 free_xid(xid);
1164 return -EINVAL;
1167 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1168 PAGE_SIZE);
1169 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1170 PAGE_SIZE);
1171 max_num = (max_buf - sizeof(struct smb_hdr)) /
1172 sizeof(LOCKING_ANDX_RANGE);
1173 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1174 if (!buf) {
1175 free_xid(xid);
1176 return -ENOMEM;
1179 for (i = 0; i < 2; i++) {
1180 cur = buf;
1181 num = 0;
1182 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1183 if (li->type != types[i])
1184 continue;
1185 cur->Pid = cpu_to_le16(li->pid);
1186 cur->LengthLow = cpu_to_le32((u32)li->length);
1187 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1188 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1189 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1190 if (++num == max_num) {
1191 stored_rc = cifs_lockv(xid, tcon,
1192 cfile->fid.netfid,
1193 (__u8)li->type, 0, num,
1194 buf);
1195 if (stored_rc)
1196 rc = stored_rc;
1197 cur = buf;
1198 num = 0;
1199 } else
1200 cur++;
1203 if (num) {
1204 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1205 (__u8)types[i], 0, num, buf);
1206 if (stored_rc)
1207 rc = stored_rc;
1211 kfree(buf);
1212 free_xid(xid);
1213 return rc;
1216 static __u32
1217 hash_lockowner(fl_owner_t owner)
1219 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1222 struct lock_to_push {
1223 struct list_head llist;
1224 __u64 offset;
1225 __u64 length;
1226 __u32 pid;
1227 __u16 netfid;
1228 __u8 type;
1231 static int
1232 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1234 struct inode *inode = d_inode(cfile->dentry);
1235 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1236 struct file_lock *flock;
1237 struct file_lock_context *flctx = inode->i_flctx;
1238 unsigned int count = 0, i;
1239 int rc = 0, xid, type;
1240 struct list_head locks_to_send, *el;
1241 struct lock_to_push *lck, *tmp;
1242 __u64 length;
1244 xid = get_xid();
1246 if (!flctx)
1247 goto out;
1249 spin_lock(&flctx->flc_lock);
1250 list_for_each(el, &flctx->flc_posix) {
1251 count++;
1253 spin_unlock(&flctx->flc_lock);
1255 INIT_LIST_HEAD(&locks_to_send);
1258 * Allocating count locks is enough because no FL_POSIX locks can be
1259 * added to the list while we are holding cinode->lock_sem that
1260 * protects locking operations of this inode.
1262 for (i = 0; i < count; i++) {
1263 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1264 if (!lck) {
1265 rc = -ENOMEM;
1266 goto err_out;
1268 list_add_tail(&lck->llist, &locks_to_send);
1271 el = locks_to_send.next;
1272 spin_lock(&flctx->flc_lock);
1273 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1274 if (el == &locks_to_send) {
1276 * The list ended. We don't have enough allocated
1277 * structures - something is really wrong.
1279 cifs_dbg(VFS, "Can't push all brlocks!\n");
1280 break;
1282 length = 1 + flock->fl_end - flock->fl_start;
1283 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1284 type = CIFS_RDLCK;
1285 else
1286 type = CIFS_WRLCK;
1287 lck = list_entry(el, struct lock_to_push, llist);
1288 lck->pid = hash_lockowner(flock->fl_owner);
1289 lck->netfid = cfile->fid.netfid;
1290 lck->length = length;
1291 lck->type = type;
1292 lck->offset = flock->fl_start;
1294 spin_unlock(&flctx->flc_lock);
1296 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1297 int stored_rc;
1299 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1300 lck->offset, lck->length, NULL,
1301 lck->type, 0);
1302 if (stored_rc)
1303 rc = stored_rc;
1304 list_del(&lck->llist);
1305 kfree(lck);
1308 out:
1309 free_xid(xid);
1310 return rc;
1311 err_out:
1312 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1313 list_del(&lck->llist);
1314 kfree(lck);
1316 goto out;
1319 static int
1320 cifs_push_locks(struct cifsFileInfo *cfile)
1322 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1323 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1324 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1325 int rc = 0;
1327 /* we are going to update can_cache_brlcks here - need a write access */
1328 down_write(&cinode->lock_sem);
1329 if (!cinode->can_cache_brlcks) {
1330 up_write(&cinode->lock_sem);
1331 return rc;
1334 if (cap_unix(tcon->ses) &&
1335 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1336 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1337 rc = cifs_push_posix_locks(cfile);
1338 else
1339 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1341 cinode->can_cache_brlcks = false;
1342 up_write(&cinode->lock_sem);
1343 return rc;
1346 static void
1347 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1348 bool *wait_flag, struct TCP_Server_Info *server)
1350 if (flock->fl_flags & FL_POSIX)
1351 cifs_dbg(FYI, "Posix\n");
1352 if (flock->fl_flags & FL_FLOCK)
1353 cifs_dbg(FYI, "Flock\n");
1354 if (flock->fl_flags & FL_SLEEP) {
1355 cifs_dbg(FYI, "Blocking lock\n");
1356 *wait_flag = true;
1358 if (flock->fl_flags & FL_ACCESS)
1359 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1360 if (flock->fl_flags & FL_LEASE)
1361 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1362 if (flock->fl_flags &
1363 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1364 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1365 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1367 *type = server->vals->large_lock_type;
1368 if (flock->fl_type == F_WRLCK) {
1369 cifs_dbg(FYI, "F_WRLCK\n");
1370 *type |= server->vals->exclusive_lock_type;
1371 *lock = 1;
1372 } else if (flock->fl_type == F_UNLCK) {
1373 cifs_dbg(FYI, "F_UNLCK\n");
1374 *type |= server->vals->unlock_lock_type;
1375 *unlock = 1;
1376 /* Check if unlock includes more than one lock range */
1377 } else if (flock->fl_type == F_RDLCK) {
1378 cifs_dbg(FYI, "F_RDLCK\n");
1379 *type |= server->vals->shared_lock_type;
1380 *lock = 1;
1381 } else if (flock->fl_type == F_EXLCK) {
1382 cifs_dbg(FYI, "F_EXLCK\n");
1383 *type |= server->vals->exclusive_lock_type;
1384 *lock = 1;
1385 } else if (flock->fl_type == F_SHLCK) {
1386 cifs_dbg(FYI, "F_SHLCK\n");
1387 *type |= server->vals->shared_lock_type;
1388 *lock = 1;
1389 } else
1390 cifs_dbg(FYI, "Unknown type of lock\n");
1393 static int
1394 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1395 bool wait_flag, bool posix_lck, unsigned int xid)
1397 int rc = 0;
1398 __u64 length = 1 + flock->fl_end - flock->fl_start;
1399 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1400 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1401 struct TCP_Server_Info *server = tcon->ses->server;
1402 __u16 netfid = cfile->fid.netfid;
1404 if (posix_lck) {
1405 int posix_lock_type;
1407 rc = cifs_posix_lock_test(file, flock);
1408 if (!rc)
1409 return rc;
1411 if (type & server->vals->shared_lock_type)
1412 posix_lock_type = CIFS_RDLCK;
1413 else
1414 posix_lock_type = CIFS_WRLCK;
1415 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1416 hash_lockowner(flock->fl_owner),
1417 flock->fl_start, length, flock,
1418 posix_lock_type, wait_flag);
1419 return rc;
1422 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1423 if (!rc)
1424 return rc;
1426 /* BB we could chain these into one lock request BB */
1427 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1428 1, 0, false);
1429 if (rc == 0) {
1430 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1431 type, 0, 1, false);
1432 flock->fl_type = F_UNLCK;
1433 if (rc != 0)
1434 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1435 rc);
1436 return 0;
1439 if (type & server->vals->shared_lock_type) {
1440 flock->fl_type = F_WRLCK;
1441 return 0;
1444 type &= ~server->vals->exclusive_lock_type;
1446 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1447 type | server->vals->shared_lock_type,
1448 1, 0, false);
1449 if (rc == 0) {
1450 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1451 type | server->vals->shared_lock_type, 0, 1, false);
1452 flock->fl_type = F_RDLCK;
1453 if (rc != 0)
1454 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1455 rc);
1456 } else
1457 flock->fl_type = F_WRLCK;
1459 return 0;
1462 void
1463 cifs_move_llist(struct list_head *source, struct list_head *dest)
1465 struct list_head *li, *tmp;
1466 list_for_each_safe(li, tmp, source)
1467 list_move(li, dest);
1470 void
1471 cifs_free_llist(struct list_head *llist)
1473 struct cifsLockInfo *li, *tmp;
1474 list_for_each_entry_safe(li, tmp, llist, llist) {
1475 cifs_del_lock_waiters(li);
1476 list_del(&li->llist);
1477 kfree(li);
1482 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1483 unsigned int xid)
1485 int rc = 0, stored_rc;
1486 static const int types[] = {
1487 LOCKING_ANDX_LARGE_FILES,
1488 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1490 unsigned int i;
1491 unsigned int max_num, num, max_buf;
1492 LOCKING_ANDX_RANGE *buf, *cur;
1493 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1494 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1495 struct cifsLockInfo *li, *tmp;
1496 __u64 length = 1 + flock->fl_end - flock->fl_start;
1497 struct list_head tmp_llist;
1499 INIT_LIST_HEAD(&tmp_llist);
1502 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1503 * and check it before using.
1505 max_buf = tcon->ses->server->maxBuf;
1506 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1507 return -EINVAL;
1509 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1510 PAGE_SIZE);
1511 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1512 PAGE_SIZE);
1513 max_num = (max_buf - sizeof(struct smb_hdr)) /
1514 sizeof(LOCKING_ANDX_RANGE);
1515 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1516 if (!buf)
1517 return -ENOMEM;
1519 down_write(&cinode->lock_sem);
1520 for (i = 0; i < 2; i++) {
1521 cur = buf;
1522 num = 0;
1523 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1524 if (flock->fl_start > li->offset ||
1525 (flock->fl_start + length) <
1526 (li->offset + li->length))
1527 continue;
1528 if (current->tgid != li->pid)
1529 continue;
1530 if (types[i] != li->type)
1531 continue;
1532 if (cinode->can_cache_brlcks) {
1534 * We can cache brlock requests - simply remove
1535 * a lock from the file's list.
1537 list_del(&li->llist);
1538 cifs_del_lock_waiters(li);
1539 kfree(li);
1540 continue;
1542 cur->Pid = cpu_to_le16(li->pid);
1543 cur->LengthLow = cpu_to_le32((u32)li->length);
1544 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1545 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1546 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1548 * We need to save a lock here to let us add it again to
1549 * the file's list if the unlock range request fails on
1550 * the server.
1552 list_move(&li->llist, &tmp_llist);
1553 if (++num == max_num) {
1554 stored_rc = cifs_lockv(xid, tcon,
1555 cfile->fid.netfid,
1556 li->type, num, 0, buf);
1557 if (stored_rc) {
1559 * We failed on the unlock range
1560 * request - add all locks from the tmp
1561 * list to the head of the file's list.
1563 cifs_move_llist(&tmp_llist,
1564 &cfile->llist->locks);
1565 rc = stored_rc;
1566 } else
1568 * The unlock range request succeed -
1569 * free the tmp list.
1571 cifs_free_llist(&tmp_llist);
1572 cur = buf;
1573 num = 0;
1574 } else
1575 cur++;
1577 if (num) {
1578 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1579 types[i], num, 0, buf);
1580 if (stored_rc) {
1581 cifs_move_llist(&tmp_llist,
1582 &cfile->llist->locks);
1583 rc = stored_rc;
1584 } else
1585 cifs_free_llist(&tmp_llist);
1589 up_write(&cinode->lock_sem);
1590 kfree(buf);
1591 return rc;
1594 static int
1595 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1596 bool wait_flag, bool posix_lck, int lock, int unlock,
1597 unsigned int xid)
1599 int rc = 0;
1600 __u64 length = 1 + flock->fl_end - flock->fl_start;
1601 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1602 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1603 struct TCP_Server_Info *server = tcon->ses->server;
1604 struct inode *inode = d_inode(cfile->dentry);
1606 if (posix_lck) {
1607 int posix_lock_type;
1609 rc = cifs_posix_lock_set(file, flock);
1610 if (!rc || rc < 0)
1611 return rc;
1613 if (type & server->vals->shared_lock_type)
1614 posix_lock_type = CIFS_RDLCK;
1615 else
1616 posix_lock_type = CIFS_WRLCK;
1618 if (unlock == 1)
1619 posix_lock_type = CIFS_UNLCK;
1621 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1622 hash_lockowner(flock->fl_owner),
1623 flock->fl_start, length,
1624 NULL, posix_lock_type, wait_flag);
1625 goto out;
1628 if (lock) {
1629 struct cifsLockInfo *lock;
1631 lock = cifs_lock_init(flock->fl_start, length, type,
1632 flock->fl_flags);
1633 if (!lock)
1634 return -ENOMEM;
1636 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1637 if (rc < 0) {
1638 kfree(lock);
1639 return rc;
1641 if (!rc)
1642 goto out;
1645 * Windows 7 server can delay breaking lease from read to None
1646 * if we set a byte-range lock on a file - break it explicitly
1647 * before sending the lock to the server to be sure the next
1648 * read won't conflict with non-overlapted locks due to
1649 * pagereading.
1651 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1652 CIFS_CACHE_READ(CIFS_I(inode))) {
1653 cifs_zap_mapping(inode);
1654 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1655 inode);
1656 CIFS_I(inode)->oplock = 0;
1659 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1660 type, 1, 0, wait_flag);
1661 if (rc) {
1662 kfree(lock);
1663 return rc;
1666 cifs_lock_add(cfile, lock);
1667 } else if (unlock)
1668 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1670 out:
1671 if (flock->fl_flags & FL_POSIX) {
1673 * If this is a request to remove all locks because we
1674 * are closing the file, it doesn't matter if the
1675 * unlocking failed as both cifs.ko and the SMB server
1676 * remove the lock on file close
1678 if (rc) {
1679 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1680 if (!(flock->fl_flags & FL_CLOSE))
1681 return rc;
1683 rc = locks_lock_file_wait(file, flock);
1685 return rc;
1688 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1690 int rc, xid;
1691 int lock = 0, unlock = 0;
1692 bool wait_flag = false;
1693 bool posix_lck = false;
1694 struct cifs_sb_info *cifs_sb;
1695 struct cifs_tcon *tcon;
1696 struct cifsInodeInfo *cinode;
1697 struct cifsFileInfo *cfile;
1698 __u16 netfid;
1699 __u32 type;
1701 rc = -EACCES;
1702 xid = get_xid();
1704 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1705 cmd, flock->fl_flags, flock->fl_type,
1706 flock->fl_start, flock->fl_end);
1708 cfile = (struct cifsFileInfo *)file->private_data;
1709 tcon = tlink_tcon(cfile->tlink);
1711 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1712 tcon->ses->server);
1713 cifs_sb = CIFS_FILE_SB(file);
1714 netfid = cfile->fid.netfid;
1715 cinode = CIFS_I(file_inode(file));
1717 if (cap_unix(tcon->ses) &&
1718 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1719 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1720 posix_lck = true;
1722 * BB add code here to normalize offset and length to account for
1723 * negative length which we can not accept over the wire.
1725 if (IS_GETLK(cmd)) {
1726 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1727 free_xid(xid);
1728 return rc;
1731 if (!lock && !unlock) {
1733 * if no lock or unlock then nothing to do since we do not
1734 * know what it is
1736 free_xid(xid);
1737 return -EOPNOTSUPP;
1740 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1741 xid);
1742 free_xid(xid);
1743 return rc;
1747 * update the file size (if needed) after a write. Should be called with
1748 * the inode->i_lock held
1750 void
1751 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1752 unsigned int bytes_written)
1754 loff_t end_of_write = offset + bytes_written;
1756 if (end_of_write > cifsi->server_eof)
1757 cifsi->server_eof = end_of_write;
1760 static ssize_t
1761 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1762 size_t write_size, loff_t *offset)
1764 int rc = 0;
1765 unsigned int bytes_written = 0;
1766 unsigned int total_written;
1767 struct cifs_sb_info *cifs_sb;
1768 struct cifs_tcon *tcon;
1769 struct TCP_Server_Info *server;
1770 unsigned int xid;
1771 struct dentry *dentry = open_file->dentry;
1772 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1773 struct cifs_io_parms io_parms;
1775 cifs_sb = CIFS_SB(dentry->d_sb);
1777 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1778 write_size, *offset, dentry);
1780 tcon = tlink_tcon(open_file->tlink);
1781 server = tcon->ses->server;
1783 if (!server->ops->sync_write)
1784 return -ENOSYS;
1786 xid = get_xid();
1788 for (total_written = 0; write_size > total_written;
1789 total_written += bytes_written) {
1790 rc = -EAGAIN;
1791 while (rc == -EAGAIN) {
1792 struct kvec iov[2];
1793 unsigned int len;
1795 if (open_file->invalidHandle) {
1796 /* we could deadlock if we called
1797 filemap_fdatawait from here so tell
1798 reopen_file not to flush data to
1799 server now */
1800 rc = cifs_reopen_file(open_file, false);
1801 if (rc != 0)
1802 break;
1805 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1806 (unsigned int)write_size - total_written);
1807 /* iov[0] is reserved for smb header */
1808 iov[1].iov_base = (char *)write_data + total_written;
1809 iov[1].iov_len = len;
1810 io_parms.pid = pid;
1811 io_parms.tcon = tcon;
1812 io_parms.offset = *offset;
1813 io_parms.length = len;
1814 rc = server->ops->sync_write(xid, &open_file->fid,
1815 &io_parms, &bytes_written, iov, 1);
1817 if (rc || (bytes_written == 0)) {
1818 if (total_written)
1819 break;
1820 else {
1821 free_xid(xid);
1822 return rc;
1824 } else {
1825 spin_lock(&d_inode(dentry)->i_lock);
1826 cifs_update_eof(cifsi, *offset, bytes_written);
1827 spin_unlock(&d_inode(dentry)->i_lock);
1828 *offset += bytes_written;
1832 cifs_stats_bytes_written(tcon, total_written);
1834 if (total_written > 0) {
1835 spin_lock(&d_inode(dentry)->i_lock);
1836 if (*offset > d_inode(dentry)->i_size)
1837 i_size_write(d_inode(dentry), *offset);
1838 spin_unlock(&d_inode(dentry)->i_lock);
1840 mark_inode_dirty_sync(d_inode(dentry));
1841 free_xid(xid);
1842 return total_written;
1845 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1846 bool fsuid_only)
1848 struct cifsFileInfo *open_file = NULL;
1849 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1850 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1852 /* only filter by fsuid on multiuser mounts */
1853 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1854 fsuid_only = false;
1856 spin_lock(&tcon->open_file_lock);
1857 /* we could simply get the first_list_entry since write-only entries
1858 are always at the end of the list but since the first entry might
1859 have a close pending, we go through the whole list */
1860 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1861 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1862 continue;
1863 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1864 if (!open_file->invalidHandle) {
1865 /* found a good file */
1866 /* lock it so it will not be closed on us */
1867 cifsFileInfo_get(open_file);
1868 spin_unlock(&tcon->open_file_lock);
1869 return open_file;
1870 } /* else might as well continue, and look for
1871 another, or simply have the caller reopen it
1872 again rather than trying to fix this handle */
1873 } else /* write only file */
1874 break; /* write only files are last so must be done */
1876 spin_unlock(&tcon->open_file_lock);
1877 return NULL;
1880 /* Return -EBADF if no handle is found and general rc otherwise */
1882 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1883 struct cifsFileInfo **ret_file)
1885 struct cifsFileInfo *open_file, *inv_file = NULL;
1886 struct cifs_sb_info *cifs_sb;
1887 struct cifs_tcon *tcon;
1888 bool any_available = false;
1889 int rc = -EBADF;
1890 unsigned int refind = 0;
1892 *ret_file = NULL;
1895 * Having a null inode here (because mapping->host was set to zero by
1896 * the VFS or MM) should not happen but we had reports of on oops (due
1897 * to it being zero) during stress testcases so we need to check for it
1900 if (cifs_inode == NULL) {
1901 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1902 dump_stack();
1903 return rc;
1906 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1907 tcon = cifs_sb_master_tcon(cifs_sb);
1909 /* only filter by fsuid on multiuser mounts */
1910 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1911 fsuid_only = false;
1913 spin_lock(&tcon->open_file_lock);
1914 refind_writable:
1915 if (refind > MAX_REOPEN_ATT) {
1916 spin_unlock(&tcon->open_file_lock);
1917 return rc;
1919 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1920 if (!any_available && open_file->pid != current->tgid)
1921 continue;
1922 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1923 continue;
1924 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1925 if (!open_file->invalidHandle) {
1926 /* found a good writable file */
1927 cifsFileInfo_get(open_file);
1928 spin_unlock(&tcon->open_file_lock);
1929 *ret_file = open_file;
1930 return 0;
1931 } else {
1932 if (!inv_file)
1933 inv_file = open_file;
1937 /* couldn't find useable FH with same pid, try any available */
1938 if (!any_available) {
1939 any_available = true;
1940 goto refind_writable;
1943 if (inv_file) {
1944 any_available = false;
1945 cifsFileInfo_get(inv_file);
1948 spin_unlock(&tcon->open_file_lock);
1950 if (inv_file) {
1951 rc = cifs_reopen_file(inv_file, false);
1952 if (!rc) {
1953 *ret_file = inv_file;
1954 return 0;
1957 spin_lock(&cifs_inode->open_file_lock);
1958 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1959 spin_unlock(&cifs_inode->open_file_lock);
1960 cifsFileInfo_put(inv_file);
1961 ++refind;
1962 inv_file = NULL;
1963 spin_lock(&tcon->open_file_lock);
1964 goto refind_writable;
1967 return rc;
1970 struct cifsFileInfo *
1971 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1973 struct cifsFileInfo *cfile;
1974 int rc;
1976 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1977 if (rc)
1978 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1980 return cfile;
1983 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1985 struct address_space *mapping = page->mapping;
1986 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1987 char *write_data;
1988 int rc = -EFAULT;
1989 int bytes_written = 0;
1990 struct inode *inode;
1991 struct cifsFileInfo *open_file;
1993 if (!mapping || !mapping->host)
1994 return -EFAULT;
1996 inode = page->mapping->host;
1998 offset += (loff_t)from;
1999 write_data = kmap(page);
2000 write_data += from;
2002 if ((to > PAGE_SIZE) || (from > to)) {
2003 kunmap(page);
2004 return -EIO;
2007 /* racing with truncate? */
2008 if (offset > mapping->host->i_size) {
2009 kunmap(page);
2010 return 0; /* don't care */
2013 /* check to make sure that we are not extending the file */
2014 if (mapping->host->i_size - offset < (loff_t)to)
2015 to = (unsigned)(mapping->host->i_size - offset);
2017 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2018 if (!rc) {
2019 bytes_written = cifs_write(open_file, open_file->pid,
2020 write_data, to - from, &offset);
2021 cifsFileInfo_put(open_file);
2022 /* Does mm or vfs already set times? */
2023 inode->i_atime = inode->i_mtime = current_time(inode);
2024 if ((bytes_written > 0) && (offset))
2025 rc = 0;
2026 else if (bytes_written < 0)
2027 rc = bytes_written;
2028 else
2029 rc = -EFAULT;
2030 } else {
2031 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2032 if (!is_retryable_error(rc))
2033 rc = -EIO;
2036 kunmap(page);
2037 return rc;
2040 static struct cifs_writedata *
2041 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2042 pgoff_t end, pgoff_t *index,
2043 unsigned int *found_pages)
2045 struct cifs_writedata *wdata;
2047 wdata = cifs_writedata_alloc((unsigned int)tofind,
2048 cifs_writev_complete);
2049 if (!wdata)
2050 return NULL;
2052 *found_pages = find_get_pages_range_tag(mapping, index, end,
2053 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2054 return wdata;
2057 static unsigned int
2058 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2059 struct address_space *mapping,
2060 struct writeback_control *wbc,
2061 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2063 unsigned int nr_pages = 0, i;
2064 struct page *page;
2066 for (i = 0; i < found_pages; i++) {
2067 page = wdata->pages[i];
2069 * At this point we hold neither the i_pages lock nor the
2070 * page lock: the page may be truncated or invalidated
2071 * (changing page->mapping to NULL), or even swizzled
2072 * back from swapper_space to tmpfs file mapping
2075 if (nr_pages == 0)
2076 lock_page(page);
2077 else if (!trylock_page(page))
2078 break;
2080 if (unlikely(page->mapping != mapping)) {
2081 unlock_page(page);
2082 break;
2085 if (!wbc->range_cyclic && page->index > end) {
2086 *done = true;
2087 unlock_page(page);
2088 break;
2091 if (*next && (page->index != *next)) {
2092 /* Not next consecutive page */
2093 unlock_page(page);
2094 break;
2097 if (wbc->sync_mode != WB_SYNC_NONE)
2098 wait_on_page_writeback(page);
2100 if (PageWriteback(page) ||
2101 !clear_page_dirty_for_io(page)) {
2102 unlock_page(page);
2103 break;
2107 * This actually clears the dirty bit in the radix tree.
2108 * See cifs_writepage() for more commentary.
2110 set_page_writeback(page);
2111 if (page_offset(page) >= i_size_read(mapping->host)) {
2112 *done = true;
2113 unlock_page(page);
2114 end_page_writeback(page);
2115 break;
2118 wdata->pages[i] = page;
2119 *next = page->index + 1;
2120 ++nr_pages;
2123 /* reset index to refind any pages skipped */
2124 if (nr_pages == 0)
2125 *index = wdata->pages[0]->index + 1;
2127 /* put any pages we aren't going to use */
2128 for (i = nr_pages; i < found_pages; i++) {
2129 put_page(wdata->pages[i]);
2130 wdata->pages[i] = NULL;
2133 return nr_pages;
2136 static int
2137 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2138 struct address_space *mapping, struct writeback_control *wbc)
2140 int rc;
2141 struct TCP_Server_Info *server =
2142 tlink_tcon(wdata->cfile->tlink)->ses->server;
2144 wdata->sync_mode = wbc->sync_mode;
2145 wdata->nr_pages = nr_pages;
2146 wdata->offset = page_offset(wdata->pages[0]);
2147 wdata->pagesz = PAGE_SIZE;
2148 wdata->tailsz = min(i_size_read(mapping->host) -
2149 page_offset(wdata->pages[nr_pages - 1]),
2150 (loff_t)PAGE_SIZE);
2151 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2152 wdata->pid = wdata->cfile->pid;
2154 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2155 if (rc)
2156 return rc;
2158 if (wdata->cfile->invalidHandle)
2159 rc = -EAGAIN;
2160 else
2161 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2163 return rc;
2166 static int cifs_writepages(struct address_space *mapping,
2167 struct writeback_control *wbc)
2169 struct inode *inode = mapping->host;
2170 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2171 struct TCP_Server_Info *server;
2172 bool done = false, scanned = false, range_whole = false;
2173 pgoff_t end, index;
2174 struct cifs_writedata *wdata;
2175 struct cifsFileInfo *cfile = NULL;
2176 int rc = 0;
2177 int saved_rc = 0;
2178 unsigned int xid;
2181 * If wsize is smaller than the page cache size, default to writing
2182 * one page at a time via cifs_writepage
2184 if (cifs_sb->wsize < PAGE_SIZE)
2185 return generic_writepages(mapping, wbc);
2187 xid = get_xid();
2188 if (wbc->range_cyclic) {
2189 index = mapping->writeback_index; /* Start from prev offset */
2190 end = -1;
2191 } else {
2192 index = wbc->range_start >> PAGE_SHIFT;
2193 end = wbc->range_end >> PAGE_SHIFT;
2194 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2195 range_whole = true;
2196 scanned = true;
2198 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2199 retry:
2200 while (!done && index <= end) {
2201 unsigned int i, nr_pages, found_pages, wsize;
2202 pgoff_t next = 0, tofind, saved_index = index;
2203 struct cifs_credits credits_on_stack;
2204 struct cifs_credits *credits = &credits_on_stack;
2205 int get_file_rc = 0;
2207 if (cfile)
2208 cifsFileInfo_put(cfile);
2210 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2212 /* in case of an error store it to return later */
2213 if (rc)
2214 get_file_rc = rc;
2216 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2217 &wsize, credits);
2218 if (rc != 0) {
2219 done = true;
2220 break;
2223 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2225 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2226 &found_pages);
2227 if (!wdata) {
2228 rc = -ENOMEM;
2229 done = true;
2230 add_credits_and_wake_if(server, credits, 0);
2231 break;
2234 if (found_pages == 0) {
2235 kref_put(&wdata->refcount, cifs_writedata_release);
2236 add_credits_and_wake_if(server, credits, 0);
2237 break;
2240 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2241 end, &index, &next, &done);
2243 /* nothing to write? */
2244 if (nr_pages == 0) {
2245 kref_put(&wdata->refcount, cifs_writedata_release);
2246 add_credits_and_wake_if(server, credits, 0);
2247 continue;
2250 wdata->credits = credits_on_stack;
2251 wdata->cfile = cfile;
2252 cfile = NULL;
2254 if (!wdata->cfile) {
2255 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2256 get_file_rc);
2257 if (is_retryable_error(get_file_rc))
2258 rc = get_file_rc;
2259 else
2260 rc = -EBADF;
2261 } else
2262 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2264 for (i = 0; i < nr_pages; ++i)
2265 unlock_page(wdata->pages[i]);
2267 /* send failure -- clean up the mess */
2268 if (rc != 0) {
2269 add_credits_and_wake_if(server, &wdata->credits, 0);
2270 for (i = 0; i < nr_pages; ++i) {
2271 if (is_retryable_error(rc))
2272 redirty_page_for_writepage(wbc,
2273 wdata->pages[i]);
2274 else
2275 SetPageError(wdata->pages[i]);
2276 end_page_writeback(wdata->pages[i]);
2277 put_page(wdata->pages[i]);
2279 if (!is_retryable_error(rc))
2280 mapping_set_error(mapping, rc);
2282 kref_put(&wdata->refcount, cifs_writedata_release);
2284 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2285 index = saved_index;
2286 continue;
2289 /* Return immediately if we received a signal during writing */
2290 if (is_interrupt_error(rc)) {
2291 done = true;
2292 break;
2295 if (rc != 0 && saved_rc == 0)
2296 saved_rc = rc;
2298 wbc->nr_to_write -= nr_pages;
2299 if (wbc->nr_to_write <= 0)
2300 done = true;
2302 index = next;
2305 if (!scanned && !done) {
2307 * We hit the last page and there is more work to be done: wrap
2308 * back to the start of the file
2310 scanned = true;
2311 index = 0;
2312 goto retry;
2315 if (saved_rc != 0)
2316 rc = saved_rc;
2318 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2319 mapping->writeback_index = index;
2321 if (cfile)
2322 cifsFileInfo_put(cfile);
2323 free_xid(xid);
2324 return rc;
2327 static int
2328 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2330 int rc;
2331 unsigned int xid;
2333 xid = get_xid();
2334 /* BB add check for wbc flags */
2335 get_page(page);
2336 if (!PageUptodate(page))
2337 cifs_dbg(FYI, "ppw - page not up to date\n");
2340 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2342 * A writepage() implementation always needs to do either this,
2343 * or re-dirty the page with "redirty_page_for_writepage()" in
2344 * the case of a failure.
2346 * Just unlocking the page will cause the radix tree tag-bits
2347 * to fail to update with the state of the page correctly.
2349 set_page_writeback(page);
2350 retry_write:
2351 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2352 if (is_retryable_error(rc)) {
2353 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2354 goto retry_write;
2355 redirty_page_for_writepage(wbc, page);
2356 } else if (rc != 0) {
2357 SetPageError(page);
2358 mapping_set_error(page->mapping, rc);
2359 } else {
2360 SetPageUptodate(page);
2362 end_page_writeback(page);
2363 put_page(page);
2364 free_xid(xid);
2365 return rc;
2368 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2370 int rc = cifs_writepage_locked(page, wbc);
2371 unlock_page(page);
2372 return rc;
2375 static int cifs_write_end(struct file *file, struct address_space *mapping,
2376 loff_t pos, unsigned len, unsigned copied,
2377 struct page *page, void *fsdata)
2379 int rc;
2380 struct inode *inode = mapping->host;
2381 struct cifsFileInfo *cfile = file->private_data;
2382 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2383 __u32 pid;
2385 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2386 pid = cfile->pid;
2387 else
2388 pid = current->tgid;
2390 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2391 page, pos, copied);
2393 if (PageChecked(page)) {
2394 if (copied == len)
2395 SetPageUptodate(page);
2396 ClearPageChecked(page);
2397 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2398 SetPageUptodate(page);
2400 if (!PageUptodate(page)) {
2401 char *page_data;
2402 unsigned offset = pos & (PAGE_SIZE - 1);
2403 unsigned int xid;
2405 xid = get_xid();
2406 /* this is probably better than directly calling
2407 partialpage_write since in this function the file handle is
2408 known which we might as well leverage */
2409 /* BB check if anything else missing out of ppw
2410 such as updating last write time */
2411 page_data = kmap(page);
2412 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2413 /* if (rc < 0) should we set writebehind rc? */
2414 kunmap(page);
2416 free_xid(xid);
2417 } else {
2418 rc = copied;
2419 pos += copied;
2420 set_page_dirty(page);
2423 if (rc > 0) {
2424 spin_lock(&inode->i_lock);
2425 if (pos > inode->i_size)
2426 i_size_write(inode, pos);
2427 spin_unlock(&inode->i_lock);
2430 unlock_page(page);
2431 put_page(page);
2433 return rc;
2436 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2437 int datasync)
2439 unsigned int xid;
2440 int rc = 0;
2441 struct cifs_tcon *tcon;
2442 struct TCP_Server_Info *server;
2443 struct cifsFileInfo *smbfile = file->private_data;
2444 struct inode *inode = file_inode(file);
2445 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2447 rc = file_write_and_wait_range(file, start, end);
2448 if (rc)
2449 return rc;
2451 xid = get_xid();
2453 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2454 file, datasync);
2456 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2457 rc = cifs_zap_mapping(inode);
2458 if (rc) {
2459 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2460 rc = 0; /* don't care about it in fsync */
2464 tcon = tlink_tcon(smbfile->tlink);
2465 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2466 server = tcon->ses->server;
2467 if (server->ops->flush)
2468 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2469 else
2470 rc = -ENOSYS;
2473 free_xid(xid);
2474 return rc;
2477 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2479 unsigned int xid;
2480 int rc = 0;
2481 struct cifs_tcon *tcon;
2482 struct TCP_Server_Info *server;
2483 struct cifsFileInfo *smbfile = file->private_data;
2484 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2486 rc = file_write_and_wait_range(file, start, end);
2487 if (rc)
2488 return rc;
2490 xid = get_xid();
2492 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2493 file, datasync);
2495 tcon = tlink_tcon(smbfile->tlink);
2496 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2497 server = tcon->ses->server;
2498 if (server->ops->flush)
2499 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2500 else
2501 rc = -ENOSYS;
2504 free_xid(xid);
2505 return rc;
2509 * As file closes, flush all cached write data for this inode checking
2510 * for write behind errors.
2512 int cifs_flush(struct file *file, fl_owner_t id)
2514 struct inode *inode = file_inode(file);
2515 int rc = 0;
2517 if (file->f_mode & FMODE_WRITE)
2518 rc = filemap_write_and_wait(inode->i_mapping);
2520 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2522 return rc;
2525 static int
2526 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2528 int rc = 0;
2529 unsigned long i;
2531 for (i = 0; i < num_pages; i++) {
2532 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2533 if (!pages[i]) {
2535 * save number of pages we have already allocated and
2536 * return with ENOMEM error
2538 num_pages = i;
2539 rc = -ENOMEM;
2540 break;
2544 if (rc) {
2545 for (i = 0; i < num_pages; i++)
2546 put_page(pages[i]);
2548 return rc;
2551 static inline
2552 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2554 size_t num_pages;
2555 size_t clen;
2557 clen = min_t(const size_t, len, wsize);
2558 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2560 if (cur_len)
2561 *cur_len = clen;
2563 return num_pages;
2566 static void
2567 cifs_uncached_writedata_release(struct kref *refcount)
2569 int i;
2570 struct cifs_writedata *wdata = container_of(refcount,
2571 struct cifs_writedata, refcount);
2573 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2574 for (i = 0; i < wdata->nr_pages; i++)
2575 put_page(wdata->pages[i]);
2576 cifs_writedata_release(refcount);
2579 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2581 static void
2582 cifs_uncached_writev_complete(struct work_struct *work)
2584 struct cifs_writedata *wdata = container_of(work,
2585 struct cifs_writedata, work);
2586 struct inode *inode = d_inode(wdata->cfile->dentry);
2587 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2589 spin_lock(&inode->i_lock);
2590 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2591 if (cifsi->server_eof > inode->i_size)
2592 i_size_write(inode, cifsi->server_eof);
2593 spin_unlock(&inode->i_lock);
2595 complete(&wdata->done);
2596 collect_uncached_write_data(wdata->ctx);
2597 /* the below call can possibly free the last ref to aio ctx */
2598 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2601 static int
2602 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2603 size_t *len, unsigned long *num_pages)
2605 size_t save_len, copied, bytes, cur_len = *len;
2606 unsigned long i, nr_pages = *num_pages;
2608 save_len = cur_len;
2609 for (i = 0; i < nr_pages; i++) {
2610 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2611 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2612 cur_len -= copied;
2614 * If we didn't copy as much as we expected, then that
2615 * may mean we trod into an unmapped area. Stop copying
2616 * at that point. On the next pass through the big
2617 * loop, we'll likely end up getting a zero-length
2618 * write and bailing out of it.
2620 if (copied < bytes)
2621 break;
2623 cur_len = save_len - cur_len;
2624 *len = cur_len;
2627 * If we have no data to send, then that probably means that
2628 * the copy above failed altogether. That's most likely because
2629 * the address in the iovec was bogus. Return -EFAULT and let
2630 * the caller free anything we allocated and bail out.
2632 if (!cur_len)
2633 return -EFAULT;
2636 * i + 1 now represents the number of pages we actually used in
2637 * the copy phase above.
2639 *num_pages = i + 1;
2640 return 0;
2643 static int
2644 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2645 struct cifs_aio_ctx *ctx)
2647 unsigned int wsize;
2648 struct cifs_credits credits;
2649 int rc;
2650 struct TCP_Server_Info *server =
2651 tlink_tcon(wdata->cfile->tlink)->ses->server;
2653 do {
2654 if (wdata->cfile->invalidHandle) {
2655 rc = cifs_reopen_file(wdata->cfile, false);
2656 if (rc == -EAGAIN)
2657 continue;
2658 else if (rc)
2659 break;
2664 * Wait for credits to resend this wdata.
2665 * Note: we are attempting to resend the whole wdata not in
2666 * segments
2668 do {
2669 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2670 &wsize, &credits);
2671 if (rc)
2672 goto fail;
2674 if (wsize < wdata->bytes) {
2675 add_credits_and_wake_if(server, &credits, 0);
2676 msleep(1000);
2678 } while (wsize < wdata->bytes);
2679 wdata->credits = credits;
2681 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2683 if (!rc) {
2684 if (wdata->cfile->invalidHandle)
2685 rc = -EAGAIN;
2686 else
2687 rc = server->ops->async_writev(wdata,
2688 cifs_uncached_writedata_release);
2691 /* If the write was successfully sent, we are done */
2692 if (!rc) {
2693 list_add_tail(&wdata->list, wdata_list);
2694 return 0;
2697 /* Roll back credits and retry if needed */
2698 add_credits_and_wake_if(server, &wdata->credits, 0);
2699 } while (rc == -EAGAIN);
2701 fail:
2702 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2703 return rc;
2706 static int
2707 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2708 struct cifsFileInfo *open_file,
2709 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2710 struct cifs_aio_ctx *ctx)
2712 int rc = 0;
2713 size_t cur_len;
2714 unsigned long nr_pages, num_pages, i;
2715 struct cifs_writedata *wdata;
2716 struct iov_iter saved_from = *from;
2717 loff_t saved_offset = offset;
2718 pid_t pid;
2719 struct TCP_Server_Info *server;
2720 struct page **pagevec;
2721 size_t start;
2722 unsigned int xid;
2724 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2725 pid = open_file->pid;
2726 else
2727 pid = current->tgid;
2729 server = tlink_tcon(open_file->tlink)->ses->server;
2730 xid = get_xid();
2732 do {
2733 unsigned int wsize;
2734 struct cifs_credits credits_on_stack;
2735 struct cifs_credits *credits = &credits_on_stack;
2737 if (open_file->invalidHandle) {
2738 rc = cifs_reopen_file(open_file, false);
2739 if (rc == -EAGAIN)
2740 continue;
2741 else if (rc)
2742 break;
2745 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2746 &wsize, credits);
2747 if (rc)
2748 break;
2750 cur_len = min_t(const size_t, len, wsize);
2752 if (ctx->direct_io) {
2753 ssize_t result;
2755 result = iov_iter_get_pages_alloc(
2756 from, &pagevec, cur_len, &start);
2757 if (result < 0) {
2758 cifs_dbg(VFS,
2759 "direct_writev couldn't get user pages "
2760 "(rc=%zd) iter type %d iov_offset %zd "
2761 "count %zd\n",
2762 result, from->type,
2763 from->iov_offset, from->count);
2764 dump_stack();
2766 rc = result;
2767 add_credits_and_wake_if(server, credits, 0);
2768 break;
2770 cur_len = (size_t)result;
2771 iov_iter_advance(from, cur_len);
2773 nr_pages =
2774 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2776 wdata = cifs_writedata_direct_alloc(pagevec,
2777 cifs_uncached_writev_complete);
2778 if (!wdata) {
2779 rc = -ENOMEM;
2780 add_credits_and_wake_if(server, credits, 0);
2781 break;
2785 wdata->page_offset = start;
2786 wdata->tailsz =
2787 nr_pages > 1 ?
2788 cur_len - (PAGE_SIZE - start) -
2789 (nr_pages - 2) * PAGE_SIZE :
2790 cur_len;
2791 } else {
2792 nr_pages = get_numpages(wsize, len, &cur_len);
2793 wdata = cifs_writedata_alloc(nr_pages,
2794 cifs_uncached_writev_complete);
2795 if (!wdata) {
2796 rc = -ENOMEM;
2797 add_credits_and_wake_if(server, credits, 0);
2798 break;
2801 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2802 if (rc) {
2803 kvfree(wdata->pages);
2804 kfree(wdata);
2805 add_credits_and_wake_if(server, credits, 0);
2806 break;
2809 num_pages = nr_pages;
2810 rc = wdata_fill_from_iovec(
2811 wdata, from, &cur_len, &num_pages);
2812 if (rc) {
2813 for (i = 0; i < nr_pages; i++)
2814 put_page(wdata->pages[i]);
2815 kvfree(wdata->pages);
2816 kfree(wdata);
2817 add_credits_and_wake_if(server, credits, 0);
2818 break;
2822 * Bring nr_pages down to the number of pages we
2823 * actually used, and free any pages that we didn't use.
2825 for ( ; nr_pages > num_pages; nr_pages--)
2826 put_page(wdata->pages[nr_pages - 1]);
2828 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2831 wdata->sync_mode = WB_SYNC_ALL;
2832 wdata->nr_pages = nr_pages;
2833 wdata->offset = (__u64)offset;
2834 wdata->cfile = cifsFileInfo_get(open_file);
2835 wdata->pid = pid;
2836 wdata->bytes = cur_len;
2837 wdata->pagesz = PAGE_SIZE;
2838 wdata->credits = credits_on_stack;
2839 wdata->ctx = ctx;
2840 kref_get(&ctx->refcount);
2842 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2844 if (!rc) {
2845 if (wdata->cfile->invalidHandle)
2846 rc = -EAGAIN;
2847 else
2848 rc = server->ops->async_writev(wdata,
2849 cifs_uncached_writedata_release);
2852 if (rc) {
2853 add_credits_and_wake_if(server, &wdata->credits, 0);
2854 kref_put(&wdata->refcount,
2855 cifs_uncached_writedata_release);
2856 if (rc == -EAGAIN) {
2857 *from = saved_from;
2858 iov_iter_advance(from, offset - saved_offset);
2859 continue;
2861 break;
2864 list_add_tail(&wdata->list, wdata_list);
2865 offset += cur_len;
2866 len -= cur_len;
2867 } while (len > 0);
2869 free_xid(xid);
2870 return rc;
2873 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2875 struct cifs_writedata *wdata, *tmp;
2876 struct cifs_tcon *tcon;
2877 struct cifs_sb_info *cifs_sb;
2878 struct dentry *dentry = ctx->cfile->dentry;
2879 int rc;
2881 tcon = tlink_tcon(ctx->cfile->tlink);
2882 cifs_sb = CIFS_SB(dentry->d_sb);
2884 mutex_lock(&ctx->aio_mutex);
2886 if (list_empty(&ctx->list)) {
2887 mutex_unlock(&ctx->aio_mutex);
2888 return;
2891 rc = ctx->rc;
2893 * Wait for and collect replies for any successful sends in order of
2894 * increasing offset. Once an error is hit, then return without waiting
2895 * for any more replies.
2897 restart_loop:
2898 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2899 if (!rc) {
2900 if (!try_wait_for_completion(&wdata->done)) {
2901 mutex_unlock(&ctx->aio_mutex);
2902 return;
2905 if (wdata->result)
2906 rc = wdata->result;
2907 else
2908 ctx->total_len += wdata->bytes;
2910 /* resend call if it's a retryable error */
2911 if (rc == -EAGAIN) {
2912 struct list_head tmp_list;
2913 struct iov_iter tmp_from = ctx->iter;
2915 INIT_LIST_HEAD(&tmp_list);
2916 list_del_init(&wdata->list);
2918 if (ctx->direct_io)
2919 rc = cifs_resend_wdata(
2920 wdata, &tmp_list, ctx);
2921 else {
2922 iov_iter_advance(&tmp_from,
2923 wdata->offset - ctx->pos);
2925 rc = cifs_write_from_iter(wdata->offset,
2926 wdata->bytes, &tmp_from,
2927 ctx->cfile, cifs_sb, &tmp_list,
2928 ctx);
2930 kref_put(&wdata->refcount,
2931 cifs_uncached_writedata_release);
2934 list_splice(&tmp_list, &ctx->list);
2935 goto restart_loop;
2938 list_del_init(&wdata->list);
2939 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2942 cifs_stats_bytes_written(tcon, ctx->total_len);
2943 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2945 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2947 mutex_unlock(&ctx->aio_mutex);
2949 if (ctx->iocb && ctx->iocb->ki_complete)
2950 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2951 else
2952 complete(&ctx->done);
2955 static ssize_t __cifs_writev(
2956 struct kiocb *iocb, struct iov_iter *from, bool direct)
2958 struct file *file = iocb->ki_filp;
2959 ssize_t total_written = 0;
2960 struct cifsFileInfo *cfile;
2961 struct cifs_tcon *tcon;
2962 struct cifs_sb_info *cifs_sb;
2963 struct cifs_aio_ctx *ctx;
2964 struct iov_iter saved_from = *from;
2965 size_t len = iov_iter_count(from);
2966 int rc;
2969 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2970 * In this case, fall back to non-direct write function.
2971 * this could be improved by getting pages directly in ITER_KVEC
2973 if (direct && from->type & ITER_KVEC) {
2974 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2975 direct = false;
2978 rc = generic_write_checks(iocb, from);
2979 if (rc <= 0)
2980 return rc;
2982 cifs_sb = CIFS_FILE_SB(file);
2983 cfile = file->private_data;
2984 tcon = tlink_tcon(cfile->tlink);
2986 if (!tcon->ses->server->ops->async_writev)
2987 return -ENOSYS;
2989 ctx = cifs_aio_ctx_alloc();
2990 if (!ctx)
2991 return -ENOMEM;
2993 ctx->cfile = cifsFileInfo_get(cfile);
2995 if (!is_sync_kiocb(iocb))
2996 ctx->iocb = iocb;
2998 ctx->pos = iocb->ki_pos;
3000 if (direct) {
3001 ctx->direct_io = true;
3002 ctx->iter = *from;
3003 ctx->len = len;
3004 } else {
3005 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3006 if (rc) {
3007 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3008 return rc;
3012 /* grab a lock here due to read response handlers can access ctx */
3013 mutex_lock(&ctx->aio_mutex);
3015 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3016 cfile, cifs_sb, &ctx->list, ctx);
3019 * If at least one write was successfully sent, then discard any rc
3020 * value from the later writes. If the other write succeeds, then
3021 * we'll end up returning whatever was written. If it fails, then
3022 * we'll get a new rc value from that.
3024 if (!list_empty(&ctx->list))
3025 rc = 0;
3027 mutex_unlock(&ctx->aio_mutex);
3029 if (rc) {
3030 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3031 return rc;
3034 if (!is_sync_kiocb(iocb)) {
3035 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3036 return -EIOCBQUEUED;
3039 rc = wait_for_completion_killable(&ctx->done);
3040 if (rc) {
3041 mutex_lock(&ctx->aio_mutex);
3042 ctx->rc = rc = -EINTR;
3043 total_written = ctx->total_len;
3044 mutex_unlock(&ctx->aio_mutex);
3045 } else {
3046 rc = ctx->rc;
3047 total_written = ctx->total_len;
3050 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3052 if (unlikely(!total_written))
3053 return rc;
3055 iocb->ki_pos += total_written;
3056 return total_written;
3059 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3061 return __cifs_writev(iocb, from, true);
3064 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3066 return __cifs_writev(iocb, from, false);
3069 static ssize_t
3070 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3072 struct file *file = iocb->ki_filp;
3073 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3074 struct inode *inode = file->f_mapping->host;
3075 struct cifsInodeInfo *cinode = CIFS_I(inode);
3076 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3077 ssize_t rc;
3079 inode_lock(inode);
3081 * We need to hold the sem to be sure nobody modifies lock list
3082 * with a brlock that prevents writing.
3084 down_read(&cinode->lock_sem);
3086 rc = generic_write_checks(iocb, from);
3087 if (rc <= 0)
3088 goto out;
3090 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3091 server->vals->exclusive_lock_type, 0,
3092 NULL, CIFS_WRITE_OP))
3093 rc = __generic_file_write_iter(iocb, from);
3094 else
3095 rc = -EACCES;
3096 out:
3097 up_read(&cinode->lock_sem);
3098 inode_unlock(inode);
3100 if (rc > 0)
3101 rc = generic_write_sync(iocb, rc);
3102 return rc;
3105 ssize_t
3106 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3108 struct inode *inode = file_inode(iocb->ki_filp);
3109 struct cifsInodeInfo *cinode = CIFS_I(inode);
3110 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3111 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3112 iocb->ki_filp->private_data;
3113 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3114 ssize_t written;
3116 written = cifs_get_writer(cinode);
3117 if (written)
3118 return written;
3120 if (CIFS_CACHE_WRITE(cinode)) {
3121 if (cap_unix(tcon->ses) &&
3122 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3123 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3124 written = generic_file_write_iter(iocb, from);
3125 goto out;
3127 written = cifs_writev(iocb, from);
3128 goto out;
3131 * For non-oplocked files in strict cache mode we need to write the data
3132 * to the server exactly from the pos to pos+len-1 rather than flush all
3133 * affected pages because it may cause a error with mandatory locks on
3134 * these pages but not on the region from pos to ppos+len-1.
3136 written = cifs_user_writev(iocb, from);
3137 if (CIFS_CACHE_READ(cinode)) {
3139 * We have read level caching and we have just sent a write
3140 * request to the server thus making data in the cache stale.
3141 * Zap the cache and set oplock/lease level to NONE to avoid
3142 * reading stale data from the cache. All subsequent read
3143 * operations will read new data from the server.
3145 cifs_zap_mapping(inode);
3146 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3147 inode);
3148 cinode->oplock = 0;
3150 out:
3151 cifs_put_writer(cinode);
3152 return written;
3155 static struct cifs_readdata *
3156 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3158 struct cifs_readdata *rdata;
3160 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3161 if (rdata != NULL) {
3162 rdata->pages = pages;
3163 kref_init(&rdata->refcount);
3164 INIT_LIST_HEAD(&rdata->list);
3165 init_completion(&rdata->done);
3166 INIT_WORK(&rdata->work, complete);
3169 return rdata;
3172 static struct cifs_readdata *
3173 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3175 struct page **pages =
3176 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3177 struct cifs_readdata *ret = NULL;
3179 if (pages) {
3180 ret = cifs_readdata_direct_alloc(pages, complete);
3181 if (!ret)
3182 kfree(pages);
3185 return ret;
3188 void
3189 cifs_readdata_release(struct kref *refcount)
3191 struct cifs_readdata *rdata = container_of(refcount,
3192 struct cifs_readdata, refcount);
3193 #ifdef CONFIG_CIFS_SMB_DIRECT
3194 if (rdata->mr) {
3195 smbd_deregister_mr(rdata->mr);
3196 rdata->mr = NULL;
3198 #endif
3199 if (rdata->cfile)
3200 cifsFileInfo_put(rdata->cfile);
3202 kvfree(rdata->pages);
3203 kfree(rdata);
3206 static int
3207 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3209 int rc = 0;
3210 struct page *page;
3211 unsigned int i;
3213 for (i = 0; i < nr_pages; i++) {
3214 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3215 if (!page) {
3216 rc = -ENOMEM;
3217 break;
3219 rdata->pages[i] = page;
3222 if (rc) {
3223 unsigned int nr_page_failed = i;
3225 for (i = 0; i < nr_page_failed; i++) {
3226 put_page(rdata->pages[i]);
3227 rdata->pages[i] = NULL;
3230 return rc;
3233 static void
3234 cifs_uncached_readdata_release(struct kref *refcount)
3236 struct cifs_readdata *rdata = container_of(refcount,
3237 struct cifs_readdata, refcount);
3238 unsigned int i;
3240 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3241 for (i = 0; i < rdata->nr_pages; i++) {
3242 put_page(rdata->pages[i]);
3244 cifs_readdata_release(refcount);
3248 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3249 * @rdata: the readdata response with list of pages holding data
3250 * @iter: destination for our data
3252 * This function copies data from a list of pages in a readdata response into
3253 * an array of iovecs. It will first calculate where the data should go
3254 * based on the info in the readdata and then copy the data into that spot.
3256 static int
3257 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3259 size_t remaining = rdata->got_bytes;
3260 unsigned int i;
3262 for (i = 0; i < rdata->nr_pages; i++) {
3263 struct page *page = rdata->pages[i];
3264 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3265 size_t written;
3267 if (unlikely(iov_iter_is_pipe(iter))) {
3268 void *addr = kmap_atomic(page);
3270 written = copy_to_iter(addr, copy, iter);
3271 kunmap_atomic(addr);
3272 } else
3273 written = copy_page_to_iter(page, 0, copy, iter);
3274 remaining -= written;
3275 if (written < copy && iov_iter_count(iter) > 0)
3276 break;
3278 return remaining ? -EFAULT : 0;
3281 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3283 static void
3284 cifs_uncached_readv_complete(struct work_struct *work)
3286 struct cifs_readdata *rdata = container_of(work,
3287 struct cifs_readdata, work);
3289 complete(&rdata->done);
3290 collect_uncached_read_data(rdata->ctx);
3291 /* the below call can possibly free the last ref to aio ctx */
3292 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3295 static int
3296 uncached_fill_pages(struct TCP_Server_Info *server,
3297 struct cifs_readdata *rdata, struct iov_iter *iter,
3298 unsigned int len)
3300 int result = 0;
3301 unsigned int i;
3302 unsigned int nr_pages = rdata->nr_pages;
3303 unsigned int page_offset = rdata->page_offset;
3305 rdata->got_bytes = 0;
3306 rdata->tailsz = PAGE_SIZE;
3307 for (i = 0; i < nr_pages; i++) {
3308 struct page *page = rdata->pages[i];
3309 size_t n;
3310 unsigned int segment_size = rdata->pagesz;
3312 if (i == 0)
3313 segment_size -= page_offset;
3314 else
3315 page_offset = 0;
3318 if (len <= 0) {
3319 /* no need to hold page hostage */
3320 rdata->pages[i] = NULL;
3321 rdata->nr_pages--;
3322 put_page(page);
3323 continue;
3326 n = len;
3327 if (len >= segment_size)
3328 /* enough data to fill the page */
3329 n = segment_size;
3330 else
3331 rdata->tailsz = len;
3332 len -= n;
3334 if (iter)
3335 result = copy_page_from_iter(
3336 page, page_offset, n, iter);
3337 #ifdef CONFIG_CIFS_SMB_DIRECT
3338 else if (rdata->mr)
3339 result = n;
3340 #endif
3341 else
3342 result = cifs_read_page_from_socket(
3343 server, page, page_offset, n);
3344 if (result < 0)
3345 break;
3347 rdata->got_bytes += result;
3350 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3351 rdata->got_bytes : result;
3354 static int
3355 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3356 struct cifs_readdata *rdata, unsigned int len)
3358 return uncached_fill_pages(server, rdata, NULL, len);
3361 static int
3362 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3363 struct cifs_readdata *rdata,
3364 struct iov_iter *iter)
3366 return uncached_fill_pages(server, rdata, iter, iter->count);
3369 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3370 struct list_head *rdata_list,
3371 struct cifs_aio_ctx *ctx)
3373 unsigned int rsize;
3374 struct cifs_credits credits;
3375 int rc;
3376 struct TCP_Server_Info *server =
3377 tlink_tcon(rdata->cfile->tlink)->ses->server;
3379 do {
3380 if (rdata->cfile->invalidHandle) {
3381 rc = cifs_reopen_file(rdata->cfile, true);
3382 if (rc == -EAGAIN)
3383 continue;
3384 else if (rc)
3385 break;
3389 * Wait for credits to resend this rdata.
3390 * Note: we are attempting to resend the whole rdata not in
3391 * segments
3393 do {
3394 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3395 &rsize, &credits);
3397 if (rc)
3398 goto fail;
3400 if (rsize < rdata->bytes) {
3401 add_credits_and_wake_if(server, &credits, 0);
3402 msleep(1000);
3404 } while (rsize < rdata->bytes);
3405 rdata->credits = credits;
3407 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3408 if (!rc) {
3409 if (rdata->cfile->invalidHandle)
3410 rc = -EAGAIN;
3411 else
3412 rc = server->ops->async_readv(rdata);
3415 /* If the read was successfully sent, we are done */
3416 if (!rc) {
3417 /* Add to aio pending list */
3418 list_add_tail(&rdata->list, rdata_list);
3419 return 0;
3422 /* Roll back credits and retry if needed */
3423 add_credits_and_wake_if(server, &rdata->credits, 0);
3424 } while (rc == -EAGAIN);
3426 fail:
3427 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3428 return rc;
3431 static int
3432 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3433 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3434 struct cifs_aio_ctx *ctx)
3436 struct cifs_readdata *rdata;
3437 unsigned int npages, rsize;
3438 struct cifs_credits credits_on_stack;
3439 struct cifs_credits *credits = &credits_on_stack;
3440 size_t cur_len;
3441 int rc;
3442 pid_t pid;
3443 struct TCP_Server_Info *server;
3444 struct page **pagevec;
3445 size_t start;
3446 struct iov_iter direct_iov = ctx->iter;
3448 server = tlink_tcon(open_file->tlink)->ses->server;
3450 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3451 pid = open_file->pid;
3452 else
3453 pid = current->tgid;
3455 if (ctx->direct_io)
3456 iov_iter_advance(&direct_iov, offset - ctx->pos);
3458 do {
3459 if (open_file->invalidHandle) {
3460 rc = cifs_reopen_file(open_file, true);
3461 if (rc == -EAGAIN)
3462 continue;
3463 else if (rc)
3464 break;
3467 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3468 &rsize, credits);
3469 if (rc)
3470 break;
3472 cur_len = min_t(const size_t, len, rsize);
3474 if (ctx->direct_io) {
3475 ssize_t result;
3477 result = iov_iter_get_pages_alloc(
3478 &direct_iov, &pagevec,
3479 cur_len, &start);
3480 if (result < 0) {
3481 cifs_dbg(VFS,
3482 "couldn't get user pages (rc=%zd)"
3483 " iter type %d"
3484 " iov_offset %zd count %zd\n",
3485 result, direct_iov.type,
3486 direct_iov.iov_offset,
3487 direct_iov.count);
3488 dump_stack();
3490 rc = result;
3491 add_credits_and_wake_if(server, credits, 0);
3492 break;
3494 cur_len = (size_t)result;
3495 iov_iter_advance(&direct_iov, cur_len);
3497 rdata = cifs_readdata_direct_alloc(
3498 pagevec, cifs_uncached_readv_complete);
3499 if (!rdata) {
3500 add_credits_and_wake_if(server, credits, 0);
3501 rc = -ENOMEM;
3502 break;
3505 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3506 rdata->page_offset = start;
3507 rdata->tailsz = npages > 1 ?
3508 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3509 cur_len;
3511 } else {
3513 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3514 /* allocate a readdata struct */
3515 rdata = cifs_readdata_alloc(npages,
3516 cifs_uncached_readv_complete);
3517 if (!rdata) {
3518 add_credits_and_wake_if(server, credits, 0);
3519 rc = -ENOMEM;
3520 break;
3523 rc = cifs_read_allocate_pages(rdata, npages);
3524 if (rc) {
3525 kvfree(rdata->pages);
3526 kfree(rdata);
3527 add_credits_and_wake_if(server, credits, 0);
3528 break;
3531 rdata->tailsz = PAGE_SIZE;
3534 rdata->cfile = cifsFileInfo_get(open_file);
3535 rdata->nr_pages = npages;
3536 rdata->offset = offset;
3537 rdata->bytes = cur_len;
3538 rdata->pid = pid;
3539 rdata->pagesz = PAGE_SIZE;
3540 rdata->read_into_pages = cifs_uncached_read_into_pages;
3541 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3542 rdata->credits = credits_on_stack;
3543 rdata->ctx = ctx;
3544 kref_get(&ctx->refcount);
3546 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3548 if (!rc) {
3549 if (rdata->cfile->invalidHandle)
3550 rc = -EAGAIN;
3551 else
3552 rc = server->ops->async_readv(rdata);
3555 if (rc) {
3556 add_credits_and_wake_if(server, &rdata->credits, 0);
3557 kref_put(&rdata->refcount,
3558 cifs_uncached_readdata_release);
3559 if (rc == -EAGAIN) {
3560 iov_iter_revert(&direct_iov, cur_len);
3561 continue;
3563 break;
3566 list_add_tail(&rdata->list, rdata_list);
3567 offset += cur_len;
3568 len -= cur_len;
3569 } while (len > 0);
3571 return rc;
3574 static void
3575 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3577 struct cifs_readdata *rdata, *tmp;
3578 struct iov_iter *to = &ctx->iter;
3579 struct cifs_sb_info *cifs_sb;
3580 struct cifs_tcon *tcon;
3581 int rc;
3583 tcon = tlink_tcon(ctx->cfile->tlink);
3584 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3586 mutex_lock(&ctx->aio_mutex);
3588 if (list_empty(&ctx->list)) {
3589 mutex_unlock(&ctx->aio_mutex);
3590 return;
3593 rc = ctx->rc;
3594 /* the loop below should proceed in the order of increasing offsets */
3595 again:
3596 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3597 if (!rc) {
3598 if (!try_wait_for_completion(&rdata->done)) {
3599 mutex_unlock(&ctx->aio_mutex);
3600 return;
3603 if (rdata->result == -EAGAIN) {
3604 /* resend call if it's a retryable error */
3605 struct list_head tmp_list;
3606 unsigned int got_bytes = rdata->got_bytes;
3608 list_del_init(&rdata->list);
3609 INIT_LIST_HEAD(&tmp_list);
3612 * Got a part of data and then reconnect has
3613 * happened -- fill the buffer and continue
3614 * reading.
3616 if (got_bytes && got_bytes < rdata->bytes) {
3617 rc = 0;
3618 if (!ctx->direct_io)
3619 rc = cifs_readdata_to_iov(rdata, to);
3620 if (rc) {
3621 kref_put(&rdata->refcount,
3622 cifs_uncached_readdata_release);
3623 continue;
3627 if (ctx->direct_io) {
3629 * Re-use rdata as this is a
3630 * direct I/O
3632 rc = cifs_resend_rdata(
3633 rdata,
3634 &tmp_list, ctx);
3635 } else {
3636 rc = cifs_send_async_read(
3637 rdata->offset + got_bytes,
3638 rdata->bytes - got_bytes,
3639 rdata->cfile, cifs_sb,
3640 &tmp_list, ctx);
3642 kref_put(&rdata->refcount,
3643 cifs_uncached_readdata_release);
3646 list_splice(&tmp_list, &ctx->list);
3648 goto again;
3649 } else if (rdata->result)
3650 rc = rdata->result;
3651 else if (!ctx->direct_io)
3652 rc = cifs_readdata_to_iov(rdata, to);
3654 /* if there was a short read -- discard anything left */
3655 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3656 rc = -ENODATA;
3658 ctx->total_len += rdata->got_bytes;
3660 list_del_init(&rdata->list);
3661 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3664 if (!ctx->direct_io)
3665 ctx->total_len = ctx->len - iov_iter_count(to);
3667 /* mask nodata case */
3668 if (rc == -ENODATA)
3669 rc = 0;
3671 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3673 mutex_unlock(&ctx->aio_mutex);
3675 if (ctx->iocb && ctx->iocb->ki_complete)
3676 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3677 else
3678 complete(&ctx->done);
3681 static ssize_t __cifs_readv(
3682 struct kiocb *iocb, struct iov_iter *to, bool direct)
3684 size_t len;
3685 struct file *file = iocb->ki_filp;
3686 struct cifs_sb_info *cifs_sb;
3687 struct cifsFileInfo *cfile;
3688 struct cifs_tcon *tcon;
3689 ssize_t rc, total_read = 0;
3690 loff_t offset = iocb->ki_pos;
3691 struct cifs_aio_ctx *ctx;
3694 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3695 * fall back to data copy read path
3696 * this could be improved by getting pages directly in ITER_KVEC
3698 if (direct && to->type & ITER_KVEC) {
3699 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3700 direct = false;
3703 len = iov_iter_count(to);
3704 if (!len)
3705 return 0;
3707 cifs_sb = CIFS_FILE_SB(file);
3708 cfile = file->private_data;
3709 tcon = tlink_tcon(cfile->tlink);
3711 if (!tcon->ses->server->ops->async_readv)
3712 return -ENOSYS;
3714 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3715 cifs_dbg(FYI, "attempting read on write only file instance\n");
3717 ctx = cifs_aio_ctx_alloc();
3718 if (!ctx)
3719 return -ENOMEM;
3721 ctx->cfile = cifsFileInfo_get(cfile);
3723 if (!is_sync_kiocb(iocb))
3724 ctx->iocb = iocb;
3726 if (iter_is_iovec(to))
3727 ctx->should_dirty = true;
3729 if (direct) {
3730 ctx->pos = offset;
3731 ctx->direct_io = true;
3732 ctx->iter = *to;
3733 ctx->len = len;
3734 } else {
3735 rc = setup_aio_ctx_iter(ctx, to, READ);
3736 if (rc) {
3737 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3738 return rc;
3740 len = ctx->len;
3743 /* grab a lock here due to read response handlers can access ctx */
3744 mutex_lock(&ctx->aio_mutex);
3746 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3748 /* if at least one read request send succeeded, then reset rc */
3749 if (!list_empty(&ctx->list))
3750 rc = 0;
3752 mutex_unlock(&ctx->aio_mutex);
3754 if (rc) {
3755 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3756 return rc;
3759 if (!is_sync_kiocb(iocb)) {
3760 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3761 return -EIOCBQUEUED;
3764 rc = wait_for_completion_killable(&ctx->done);
3765 if (rc) {
3766 mutex_lock(&ctx->aio_mutex);
3767 ctx->rc = rc = -EINTR;
3768 total_read = ctx->total_len;
3769 mutex_unlock(&ctx->aio_mutex);
3770 } else {
3771 rc = ctx->rc;
3772 total_read = ctx->total_len;
3775 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3777 if (total_read) {
3778 iocb->ki_pos += total_read;
3779 return total_read;
3781 return rc;
3784 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3786 return __cifs_readv(iocb, to, true);
3789 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3791 return __cifs_readv(iocb, to, false);
3794 ssize_t
3795 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3797 struct inode *inode = file_inode(iocb->ki_filp);
3798 struct cifsInodeInfo *cinode = CIFS_I(inode);
3799 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3800 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3801 iocb->ki_filp->private_data;
3802 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3803 int rc = -EACCES;
3806 * In strict cache mode we need to read from the server all the time
3807 * if we don't have level II oplock because the server can delay mtime
3808 * change - so we can't make a decision about inode invalidating.
3809 * And we can also fail with pagereading if there are mandatory locks
3810 * on pages affected by this read but not on the region from pos to
3811 * pos+len-1.
3813 if (!CIFS_CACHE_READ(cinode))
3814 return cifs_user_readv(iocb, to);
3816 if (cap_unix(tcon->ses) &&
3817 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3818 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3819 return generic_file_read_iter(iocb, to);
3822 * We need to hold the sem to be sure nobody modifies lock list
3823 * with a brlock that prevents reading.
3825 down_read(&cinode->lock_sem);
3826 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3827 tcon->ses->server->vals->shared_lock_type,
3828 0, NULL, CIFS_READ_OP))
3829 rc = generic_file_read_iter(iocb, to);
3830 up_read(&cinode->lock_sem);
3831 return rc;
3834 static ssize_t
3835 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3837 int rc = -EACCES;
3838 unsigned int bytes_read = 0;
3839 unsigned int total_read;
3840 unsigned int current_read_size;
3841 unsigned int rsize;
3842 struct cifs_sb_info *cifs_sb;
3843 struct cifs_tcon *tcon;
3844 struct TCP_Server_Info *server;
3845 unsigned int xid;
3846 char *cur_offset;
3847 struct cifsFileInfo *open_file;
3848 struct cifs_io_parms io_parms;
3849 int buf_type = CIFS_NO_BUFFER;
3850 __u32 pid;
3852 xid = get_xid();
3853 cifs_sb = CIFS_FILE_SB(file);
3855 /* FIXME: set up handlers for larger reads and/or convert to async */
3856 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3858 if (file->private_data == NULL) {
3859 rc = -EBADF;
3860 free_xid(xid);
3861 return rc;
3863 open_file = file->private_data;
3864 tcon = tlink_tcon(open_file->tlink);
3865 server = tcon->ses->server;
3867 if (!server->ops->sync_read) {
3868 free_xid(xid);
3869 return -ENOSYS;
3872 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3873 pid = open_file->pid;
3874 else
3875 pid = current->tgid;
3877 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3878 cifs_dbg(FYI, "attempting read on write only file instance\n");
3880 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3881 total_read += bytes_read, cur_offset += bytes_read) {
3882 do {
3883 current_read_size = min_t(uint, read_size - total_read,
3884 rsize);
3886 * For windows me and 9x we do not want to request more
3887 * than it negotiated since it will refuse the read
3888 * then.
3890 if ((tcon->ses) && !(tcon->ses->capabilities &
3891 tcon->ses->server->vals->cap_large_files)) {
3892 current_read_size = min_t(uint,
3893 current_read_size, CIFSMaxBufSize);
3895 if (open_file->invalidHandle) {
3896 rc = cifs_reopen_file(open_file, true);
3897 if (rc != 0)
3898 break;
3900 io_parms.pid = pid;
3901 io_parms.tcon = tcon;
3902 io_parms.offset = *offset;
3903 io_parms.length = current_read_size;
3904 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3905 &bytes_read, &cur_offset,
3906 &buf_type);
3907 } while (rc == -EAGAIN);
3909 if (rc || (bytes_read == 0)) {
3910 if (total_read) {
3911 break;
3912 } else {
3913 free_xid(xid);
3914 return rc;
3916 } else {
3917 cifs_stats_bytes_read(tcon, total_read);
3918 *offset += bytes_read;
3921 free_xid(xid);
3922 return total_read;
3926 * If the page is mmap'ed into a process' page tables, then we need to make
3927 * sure that it doesn't change while being written back.
3929 static vm_fault_t
3930 cifs_page_mkwrite(struct vm_fault *vmf)
3932 struct page *page = vmf->page;
3934 lock_page(page);
3935 return VM_FAULT_LOCKED;
3938 static const struct vm_operations_struct cifs_file_vm_ops = {
3939 .fault = filemap_fault,
3940 .map_pages = filemap_map_pages,
3941 .page_mkwrite = cifs_page_mkwrite,
3944 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3946 int xid, rc = 0;
3947 struct inode *inode = file_inode(file);
3949 xid = get_xid();
3951 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3952 rc = cifs_zap_mapping(inode);
3953 if (!rc)
3954 rc = generic_file_mmap(file, vma);
3955 if (!rc)
3956 vma->vm_ops = &cifs_file_vm_ops;
3958 free_xid(xid);
3959 return rc;
3962 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3964 int rc, xid;
3966 xid = get_xid();
3968 rc = cifs_revalidate_file(file);
3969 if (rc)
3970 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3971 rc);
3972 if (!rc)
3973 rc = generic_file_mmap(file, vma);
3974 if (!rc)
3975 vma->vm_ops = &cifs_file_vm_ops;
3977 free_xid(xid);
3978 return rc;
3981 static void
3982 cifs_readv_complete(struct work_struct *work)
3984 unsigned int i, got_bytes;
3985 struct cifs_readdata *rdata = container_of(work,
3986 struct cifs_readdata, work);
3988 got_bytes = rdata->got_bytes;
3989 for (i = 0; i < rdata->nr_pages; i++) {
3990 struct page *page = rdata->pages[i];
3992 lru_cache_add_file(page);
3994 if (rdata->result == 0 ||
3995 (rdata->result == -EAGAIN && got_bytes)) {
3996 flush_dcache_page(page);
3997 SetPageUptodate(page);
4000 unlock_page(page);
4002 if (rdata->result == 0 ||
4003 (rdata->result == -EAGAIN && got_bytes))
4004 cifs_readpage_to_fscache(rdata->mapping->host, page);
4006 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4008 put_page(page);
4009 rdata->pages[i] = NULL;
4011 kref_put(&rdata->refcount, cifs_readdata_release);
4014 static int
4015 readpages_fill_pages(struct TCP_Server_Info *server,
4016 struct cifs_readdata *rdata, struct iov_iter *iter,
4017 unsigned int len)
4019 int result = 0;
4020 unsigned int i;
4021 u64 eof;
4022 pgoff_t eof_index;
4023 unsigned int nr_pages = rdata->nr_pages;
4024 unsigned int page_offset = rdata->page_offset;
4026 /* determine the eof that the server (probably) has */
4027 eof = CIFS_I(rdata->mapping->host)->server_eof;
4028 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4029 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4031 rdata->got_bytes = 0;
4032 rdata->tailsz = PAGE_SIZE;
4033 for (i = 0; i < nr_pages; i++) {
4034 struct page *page = rdata->pages[i];
4035 unsigned int to_read = rdata->pagesz;
4036 size_t n;
4038 if (i == 0)
4039 to_read -= page_offset;
4040 else
4041 page_offset = 0;
4043 n = to_read;
4045 if (len >= to_read) {
4046 len -= to_read;
4047 } else if (len > 0) {
4048 /* enough for partial page, fill and zero the rest */
4049 zero_user(page, len + page_offset, to_read - len);
4050 n = rdata->tailsz = len;
4051 len = 0;
4052 } else if (page->index > eof_index) {
4054 * The VFS will not try to do readahead past the
4055 * i_size, but it's possible that we have outstanding
4056 * writes with gaps in the middle and the i_size hasn't
4057 * caught up yet. Populate those with zeroed out pages
4058 * to prevent the VFS from repeatedly attempting to
4059 * fill them until the writes are flushed.
4061 zero_user(page, 0, PAGE_SIZE);
4062 lru_cache_add_file(page);
4063 flush_dcache_page(page);
4064 SetPageUptodate(page);
4065 unlock_page(page);
4066 put_page(page);
4067 rdata->pages[i] = NULL;
4068 rdata->nr_pages--;
4069 continue;
4070 } else {
4071 /* no need to hold page hostage */
4072 lru_cache_add_file(page);
4073 unlock_page(page);
4074 put_page(page);
4075 rdata->pages[i] = NULL;
4076 rdata->nr_pages--;
4077 continue;
4080 if (iter)
4081 result = copy_page_from_iter(
4082 page, page_offset, n, iter);
4083 #ifdef CONFIG_CIFS_SMB_DIRECT
4084 else if (rdata->mr)
4085 result = n;
4086 #endif
4087 else
4088 result = cifs_read_page_from_socket(
4089 server, page, page_offset, n);
4090 if (result < 0)
4091 break;
4093 rdata->got_bytes += result;
4096 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4097 rdata->got_bytes : result;
4100 static int
4101 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4102 struct cifs_readdata *rdata, unsigned int len)
4104 return readpages_fill_pages(server, rdata, NULL, len);
4107 static int
4108 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4109 struct cifs_readdata *rdata,
4110 struct iov_iter *iter)
4112 return readpages_fill_pages(server, rdata, iter, iter->count);
4115 static int
4116 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4117 unsigned int rsize, struct list_head *tmplist,
4118 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4120 struct page *page, *tpage;
4121 unsigned int expected_index;
4122 int rc;
4123 gfp_t gfp = readahead_gfp_mask(mapping);
4125 INIT_LIST_HEAD(tmplist);
4127 page = lru_to_page(page_list);
4130 * Lock the page and put it in the cache. Since no one else
4131 * should have access to this page, we're safe to simply set
4132 * PG_locked without checking it first.
4134 __SetPageLocked(page);
4135 rc = add_to_page_cache_locked(page, mapping,
4136 page->index, gfp);
4138 /* give up if we can't stick it in the cache */
4139 if (rc) {
4140 __ClearPageLocked(page);
4141 return rc;
4144 /* move first page to the tmplist */
4145 *offset = (loff_t)page->index << PAGE_SHIFT;
4146 *bytes = PAGE_SIZE;
4147 *nr_pages = 1;
4148 list_move_tail(&page->lru, tmplist);
4150 /* now try and add more pages onto the request */
4151 expected_index = page->index + 1;
4152 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4153 /* discontinuity ? */
4154 if (page->index != expected_index)
4155 break;
4157 /* would this page push the read over the rsize? */
4158 if (*bytes + PAGE_SIZE > rsize)
4159 break;
4161 __SetPageLocked(page);
4162 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4163 __ClearPageLocked(page);
4164 break;
4166 list_move_tail(&page->lru, tmplist);
4167 (*bytes) += PAGE_SIZE;
4168 expected_index++;
4169 (*nr_pages)++;
4171 return rc;
4174 static int cifs_readpages(struct file *file, struct address_space *mapping,
4175 struct list_head *page_list, unsigned num_pages)
4177 int rc;
4178 struct list_head tmplist;
4179 struct cifsFileInfo *open_file = file->private_data;
4180 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4181 struct TCP_Server_Info *server;
4182 pid_t pid;
4183 unsigned int xid;
4185 xid = get_xid();
4187 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4188 * immediately if the cookie is negative
4190 * After this point, every page in the list might have PG_fscache set,
4191 * so we will need to clean that up off of every page we don't use.
4193 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4194 &num_pages);
4195 if (rc == 0) {
4196 free_xid(xid);
4197 return rc;
4200 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4201 pid = open_file->pid;
4202 else
4203 pid = current->tgid;
4205 rc = 0;
4206 server = tlink_tcon(open_file->tlink)->ses->server;
4208 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4209 __func__, file, mapping, num_pages);
4212 * Start with the page at end of list and move it to private
4213 * list. Do the same with any following pages until we hit
4214 * the rsize limit, hit an index discontinuity, or run out of
4215 * pages. Issue the async read and then start the loop again
4216 * until the list is empty.
4218 * Note that list order is important. The page_list is in
4219 * the order of declining indexes. When we put the pages in
4220 * the rdata->pages, then we want them in increasing order.
4222 while (!list_empty(page_list)) {
4223 unsigned int i, nr_pages, bytes, rsize;
4224 loff_t offset;
4225 struct page *page, *tpage;
4226 struct cifs_readdata *rdata;
4227 struct cifs_credits credits_on_stack;
4228 struct cifs_credits *credits = &credits_on_stack;
4230 if (open_file->invalidHandle) {
4231 rc = cifs_reopen_file(open_file, true);
4232 if (rc == -EAGAIN)
4233 continue;
4234 else if (rc)
4235 break;
4238 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4239 &rsize, credits);
4240 if (rc)
4241 break;
4244 * Give up immediately if rsize is too small to read an entire
4245 * page. The VFS will fall back to readpage. We should never
4246 * reach this point however since we set ra_pages to 0 when the
4247 * rsize is smaller than a cache page.
4249 if (unlikely(rsize < PAGE_SIZE)) {
4250 add_credits_and_wake_if(server, credits, 0);
4251 free_xid(xid);
4252 return 0;
4255 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4256 &nr_pages, &offset, &bytes);
4257 if (rc) {
4258 add_credits_and_wake_if(server, credits, 0);
4259 break;
4262 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4263 if (!rdata) {
4264 /* best to give up if we're out of mem */
4265 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4266 list_del(&page->lru);
4267 lru_cache_add_file(page);
4268 unlock_page(page);
4269 put_page(page);
4271 rc = -ENOMEM;
4272 add_credits_and_wake_if(server, credits, 0);
4273 break;
4276 rdata->cfile = cifsFileInfo_get(open_file);
4277 rdata->mapping = mapping;
4278 rdata->offset = offset;
4279 rdata->bytes = bytes;
4280 rdata->pid = pid;
4281 rdata->pagesz = PAGE_SIZE;
4282 rdata->tailsz = PAGE_SIZE;
4283 rdata->read_into_pages = cifs_readpages_read_into_pages;
4284 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4285 rdata->credits = credits_on_stack;
4287 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4288 list_del(&page->lru);
4289 rdata->pages[rdata->nr_pages++] = page;
4292 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4294 if (!rc) {
4295 if (rdata->cfile->invalidHandle)
4296 rc = -EAGAIN;
4297 else
4298 rc = server->ops->async_readv(rdata);
4301 if (rc) {
4302 add_credits_and_wake_if(server, &rdata->credits, 0);
4303 for (i = 0; i < rdata->nr_pages; i++) {
4304 page = rdata->pages[i];
4305 lru_cache_add_file(page);
4306 unlock_page(page);
4307 put_page(page);
4309 /* Fallback to the readpage in error/reconnect cases */
4310 kref_put(&rdata->refcount, cifs_readdata_release);
4311 break;
4314 kref_put(&rdata->refcount, cifs_readdata_release);
4317 /* Any pages that have been shown to fscache but didn't get added to
4318 * the pagecache must be uncached before they get returned to the
4319 * allocator.
4321 cifs_fscache_readpages_cancel(mapping->host, page_list);
4322 free_xid(xid);
4323 return rc;
4327 * cifs_readpage_worker must be called with the page pinned
4329 static int cifs_readpage_worker(struct file *file, struct page *page,
4330 loff_t *poffset)
4332 char *read_data;
4333 int rc;
4335 /* Is the page cached? */
4336 rc = cifs_readpage_from_fscache(file_inode(file), page);
4337 if (rc == 0)
4338 goto read_complete;
4340 read_data = kmap(page);
4341 /* for reads over a certain size could initiate async read ahead */
4343 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4345 if (rc < 0)
4346 goto io_error;
4347 else
4348 cifs_dbg(FYI, "Bytes read %d\n", rc);
4350 /* we do not want atime to be less than mtime, it broke some apps */
4351 file_inode(file)->i_atime = current_time(file_inode(file));
4352 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4353 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4354 else
4355 file_inode(file)->i_atime = current_time(file_inode(file));
4357 if (PAGE_SIZE > rc)
4358 memset(read_data + rc, 0, PAGE_SIZE - rc);
4360 flush_dcache_page(page);
4361 SetPageUptodate(page);
4363 /* send this page to the cache */
4364 cifs_readpage_to_fscache(file_inode(file), page);
4366 rc = 0;
4368 io_error:
4369 kunmap(page);
4370 unlock_page(page);
4372 read_complete:
4373 return rc;
4376 static int cifs_readpage(struct file *file, struct page *page)
4378 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4379 int rc = -EACCES;
4380 unsigned int xid;
4382 xid = get_xid();
4384 if (file->private_data == NULL) {
4385 rc = -EBADF;
4386 free_xid(xid);
4387 return rc;
4390 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4391 page, (int)offset, (int)offset);
4393 rc = cifs_readpage_worker(file, page, &offset);
4395 free_xid(xid);
4396 return rc;
4399 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4401 struct cifsFileInfo *open_file;
4402 struct cifs_tcon *tcon =
4403 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4405 spin_lock(&tcon->open_file_lock);
4406 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4407 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4408 spin_unlock(&tcon->open_file_lock);
4409 return 1;
4412 spin_unlock(&tcon->open_file_lock);
4413 return 0;
4416 /* We do not want to update the file size from server for inodes
4417 open for write - to avoid races with writepage extending
4418 the file - in the future we could consider allowing
4419 refreshing the inode only on increases in the file size
4420 but this is tricky to do without racing with writebehind
4421 page caching in the current Linux kernel design */
4422 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4424 if (!cifsInode)
4425 return true;
4427 if (is_inode_writable(cifsInode)) {
4428 /* This inode is open for write at least once */
4429 struct cifs_sb_info *cifs_sb;
4431 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4432 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4433 /* since no page cache to corrupt on directio
4434 we can change size safely */
4435 return true;
4438 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4439 return true;
4441 return false;
4442 } else
4443 return true;
4446 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4447 loff_t pos, unsigned len, unsigned flags,
4448 struct page **pagep, void **fsdata)
4450 int oncethru = 0;
4451 pgoff_t index = pos >> PAGE_SHIFT;
4452 loff_t offset = pos & (PAGE_SIZE - 1);
4453 loff_t page_start = pos & PAGE_MASK;
4454 loff_t i_size;
4455 struct page *page;
4456 int rc = 0;
4458 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4460 start:
4461 page = grab_cache_page_write_begin(mapping, index, flags);
4462 if (!page) {
4463 rc = -ENOMEM;
4464 goto out;
4467 if (PageUptodate(page))
4468 goto out;
4471 * If we write a full page it will be up to date, no need to read from
4472 * the server. If the write is short, we'll end up doing a sync write
4473 * instead.
4475 if (len == PAGE_SIZE)
4476 goto out;
4479 * optimize away the read when we have an oplock, and we're not
4480 * expecting to use any of the data we'd be reading in. That
4481 * is, when the page lies beyond the EOF, or straddles the EOF
4482 * and the write will cover all of the existing data.
4484 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4485 i_size = i_size_read(mapping->host);
4486 if (page_start >= i_size ||
4487 (offset == 0 && (pos + len) >= i_size)) {
4488 zero_user_segments(page, 0, offset,
4489 offset + len,
4490 PAGE_SIZE);
4492 * PageChecked means that the parts of the page
4493 * to which we're not writing are considered up
4494 * to date. Once the data is copied to the
4495 * page, it can be set uptodate.
4497 SetPageChecked(page);
4498 goto out;
4502 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4504 * might as well read a page, it is fast enough. If we get
4505 * an error, we don't need to return it. cifs_write_end will
4506 * do a sync write instead since PG_uptodate isn't set.
4508 cifs_readpage_worker(file, page, &page_start);
4509 put_page(page);
4510 oncethru = 1;
4511 goto start;
4512 } else {
4513 /* we could try using another file handle if there is one -
4514 but how would we lock it to prevent close of that handle
4515 racing with this read? In any case
4516 this will be written out by write_end so is fine */
4518 out:
4519 *pagep = page;
4520 return rc;
4523 static int cifs_release_page(struct page *page, gfp_t gfp)
4525 if (PagePrivate(page))
4526 return 0;
4528 return cifs_fscache_release_page(page, gfp);
4531 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4532 unsigned int length)
4534 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4536 if (offset == 0 && length == PAGE_SIZE)
4537 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4540 static int cifs_launder_page(struct page *page)
4542 int rc = 0;
4543 loff_t range_start = page_offset(page);
4544 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4545 struct writeback_control wbc = {
4546 .sync_mode = WB_SYNC_ALL,
4547 .nr_to_write = 0,
4548 .range_start = range_start,
4549 .range_end = range_end,
4552 cifs_dbg(FYI, "Launder page: %p\n", page);
4554 if (clear_page_dirty_for_io(page))
4555 rc = cifs_writepage_locked(page, &wbc);
4557 cifs_fscache_invalidate_page(page, page->mapping->host);
4558 return rc;
4561 void cifs_oplock_break(struct work_struct *work)
4563 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4564 oplock_break);
4565 struct inode *inode = d_inode(cfile->dentry);
4566 struct cifsInodeInfo *cinode = CIFS_I(inode);
4567 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4568 struct TCP_Server_Info *server = tcon->ses->server;
4569 int rc = 0;
4571 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4572 TASK_UNINTERRUPTIBLE);
4574 server->ops->downgrade_oplock(server, cinode,
4575 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4577 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4578 cifs_has_mand_locks(cinode)) {
4579 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4580 inode);
4581 cinode->oplock = 0;
4584 if (inode && S_ISREG(inode->i_mode)) {
4585 if (CIFS_CACHE_READ(cinode))
4586 break_lease(inode, O_RDONLY);
4587 else
4588 break_lease(inode, O_WRONLY);
4589 rc = filemap_fdatawrite(inode->i_mapping);
4590 if (!CIFS_CACHE_READ(cinode)) {
4591 rc = filemap_fdatawait(inode->i_mapping);
4592 mapping_set_error(inode->i_mapping, rc);
4593 cifs_zap_mapping(inode);
4595 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4598 rc = cifs_push_locks(cfile);
4599 if (rc)
4600 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4603 * releasing stale oplock after recent reconnect of smb session using
4604 * a now incorrect file handle is not a data integrity issue but do
4605 * not bother sending an oplock release if session to server still is
4606 * disconnected since oplock already released by the server
4608 if (!cfile->oplock_break_cancelled) {
4609 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4610 cinode);
4611 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4613 _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4614 cifs_done_oplock_break(cinode);
4618 * The presence of cifs_direct_io() in the address space ops vector
4619 * allowes open() O_DIRECT flags which would have failed otherwise.
4621 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4622 * so this method should never be called.
4624 * Direct IO is not yet supported in the cached mode.
4626 static ssize_t
4627 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4630 * FIXME
4631 * Eventually need to support direct IO for non forcedirectio mounts
4633 return -EINVAL;
4637 const struct address_space_operations cifs_addr_ops = {
4638 .readpage = cifs_readpage,
4639 .readpages = cifs_readpages,
4640 .writepage = cifs_writepage,
4641 .writepages = cifs_writepages,
4642 .write_begin = cifs_write_begin,
4643 .write_end = cifs_write_end,
4644 .set_page_dirty = __set_page_dirty_nobuffers,
4645 .releasepage = cifs_release_page,
4646 .direct_IO = cifs_direct_io,
4647 .invalidatepage = cifs_invalidate_page,
4648 .launder_page = cifs_launder_page,
4652 * cifs_readpages requires the server to support a buffer large enough to
4653 * contain the header plus one complete page of data. Otherwise, we need
4654 * to leave cifs_readpages out of the address space operations.
4656 const struct address_space_operations cifs_addr_ops_smallbuf = {
4657 .readpage = cifs_readpage,
4658 .writepage = cifs_writepage,
4659 .writepages = cifs_writepages,
4660 .write_begin = cifs_write_begin,
4661 .write_end = cifs_write_end,
4662 .set_page_dirty = __set_page_dirty_nobuffers,
4663 .releasepage = cifs_release_page,
4664 .invalidatepage = cifs_invalidate_page,
4665 .launder_page = cifs_launder_page,