Linux 4.19.133
[linux/fpc-iii.git] / fs / cifs / file.c
blob128cbd69911b46e9b2095b8776bc64e251618ad5
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45 #include "smbdirect.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
65 static u32 cifs_posix_convert_flags(unsigned int flags)
67 u32 posix_flags = 0;
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
96 return posix_flags;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
146 if (rc)
147 goto posix_open_ret;
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
152 if (!pinode)
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
161 if (!*pinode) {
162 rc = -ENOMEM;
163 goto posix_open_ret;
165 } else {
166 cifs_fattr_to_inode(*pinode, &fattr);
169 posix_open_ret:
170 kfree(presp_data);
171 return rc;
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
179 int rc;
180 int desired_access;
181 int disposition;
182 int create_options = CREATE_NOT_DIR;
183 FILE_ALL_INFO *buf;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
187 if (!server->ops->open)
188 return -ENOSYS;
190 desired_access = cifs_convert_flags(f_flags);
192 /*********************************************************************
193 * open flag mapping table:
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
216 disposition = cifs_get_disposition(f_flags);
218 /* BB pass O_SYNC flag through on file attributes .. BB */
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221 if (!buf)
222 return -ENOMEM;
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
234 oparms.tcon = tcon;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = create_options;
238 oparms.disposition = disposition;
239 oparms.path = full_path;
240 oparms.fid = fid;
241 oparms.reconnect = false;
243 rc = server->ops->open(xid, &oparms, oplock, buf);
245 if (rc)
246 goto out;
248 if (tcon->unix_ext)
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250 xid);
251 else
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253 xid, fid);
255 if (rc) {
256 server->ops->close(xid, tcon, fid);
257 if (rc == -ESTALE)
258 rc = -EOPENSTALE;
261 out:
262 kfree(buf);
263 return rc;
266 static bool
267 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 struct cifs_fid_locks *cur;
270 bool has_locks = false;
272 down_read(&cinode->lock_sem);
273 list_for_each_entry(cur, &cinode->llist, llist) {
274 if (!list_empty(&cur->locks)) {
275 has_locks = true;
276 break;
279 up_read(&cinode->lock_sem);
280 return has_locks;
283 void
284 cifs_down_write(struct rw_semaphore *sem)
286 while (!down_write_trylock(sem))
287 msleep(10);
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292 struct tcon_link *tlink, __u32 oplock)
294 struct dentry *dentry = file_dentry(file);
295 struct inode *inode = d_inode(dentry);
296 struct cifsInodeInfo *cinode = CIFS_I(inode);
297 struct cifsFileInfo *cfile;
298 struct cifs_fid_locks *fdlocks;
299 struct cifs_tcon *tcon = tlink_tcon(tlink);
300 struct TCP_Server_Info *server = tcon->ses->server;
302 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303 if (cfile == NULL)
304 return cfile;
306 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307 if (!fdlocks) {
308 kfree(cfile);
309 return NULL;
312 INIT_LIST_HEAD(&fdlocks->locks);
313 fdlocks->cfile = cfile;
314 cfile->llist = fdlocks;
316 cfile->count = 1;
317 cfile->pid = current->tgid;
318 cfile->uid = current_fsuid();
319 cfile->dentry = dget(dentry);
320 cfile->f_flags = file->f_flags;
321 cfile->invalidHandle = false;
322 cfile->tlink = cifs_get_tlink(tlink);
323 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324 mutex_init(&cfile->fh_mutex);
325 spin_lock_init(&cfile->file_info_lock);
327 cifs_sb_active(inode->i_sb);
330 * If the server returned a read oplock and we have mandatory brlocks,
331 * set oplock level to None.
333 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
334 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
335 oplock = 0;
338 cifs_down_write(&cinode->lock_sem);
339 list_add(&fdlocks->llist, &cinode->llist);
340 up_write(&cinode->lock_sem);
342 spin_lock(&tcon->open_file_lock);
343 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
344 oplock = fid->pending_open->oplock;
345 list_del(&fid->pending_open->olist);
347 fid->purge_cache = false;
348 server->ops->set_fid(cfile, fid, oplock);
350 list_add(&cfile->tlist, &tcon->openFileList);
352 /* if readable file instance put first in list*/
353 spin_lock(&cinode->open_file_lock);
354 if (file->f_mode & FMODE_READ)
355 list_add(&cfile->flist, &cinode->openFileList);
356 else
357 list_add_tail(&cfile->flist, &cinode->openFileList);
358 spin_unlock(&cinode->open_file_lock);
359 spin_unlock(&tcon->open_file_lock);
361 if (fid->purge_cache)
362 cifs_zap_mapping(inode);
364 file->private_data = cfile;
365 return cfile;
368 struct cifsFileInfo *
369 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
371 spin_lock(&cifs_file->file_info_lock);
372 cifsFileInfo_get_locked(cifs_file);
373 spin_unlock(&cifs_file->file_info_lock);
374 return cifs_file;
378 * cifsFileInfo_put - release a reference of file priv data
380 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
382 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
384 _cifsFileInfo_put(cifs_file, true);
388 * _cifsFileInfo_put - release a reference of file priv data
390 * This may involve closing the filehandle @cifs_file out on the
391 * server. Must be called without holding tcon->open_file_lock and
392 * cifs_file->file_info_lock.
394 * If @wait_for_oplock_handler is true and we are releasing the last
395 * reference, wait for any running oplock break handler of the file
396 * and cancel any pending one. If calling this function from the
397 * oplock break handler, you need to pass false.
400 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
402 struct inode *inode = d_inode(cifs_file->dentry);
403 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
404 struct TCP_Server_Info *server = tcon->ses->server;
405 struct cifsInodeInfo *cifsi = CIFS_I(inode);
406 struct super_block *sb = inode->i_sb;
407 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
408 struct cifsLockInfo *li, *tmp;
409 struct cifs_fid fid;
410 struct cifs_pending_open open;
411 bool oplock_break_cancelled;
413 spin_lock(&tcon->open_file_lock);
414 spin_lock(&cifsi->open_file_lock);
415 spin_lock(&cifs_file->file_info_lock);
416 if (--cifs_file->count > 0) {
417 spin_unlock(&cifs_file->file_info_lock);
418 spin_unlock(&cifsi->open_file_lock);
419 spin_unlock(&tcon->open_file_lock);
420 return;
422 spin_unlock(&cifs_file->file_info_lock);
424 if (server->ops->get_lease_key)
425 server->ops->get_lease_key(inode, &fid);
427 /* store open in pending opens to make sure we don't miss lease break */
428 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
430 /* remove it from the lists */
431 list_del(&cifs_file->flist);
432 list_del(&cifs_file->tlist);
434 if (list_empty(&cifsi->openFileList)) {
435 cifs_dbg(FYI, "closing last open instance for inode %p\n",
436 d_inode(cifs_file->dentry));
438 * In strict cache mode we need invalidate mapping on the last
439 * close because it may cause a error when we open this file
440 * again and get at least level II oplock.
442 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
443 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
444 cifs_set_oplock_level(cifsi, 0);
447 spin_unlock(&cifsi->open_file_lock);
448 spin_unlock(&tcon->open_file_lock);
450 oplock_break_cancelled = wait_oplock_handler ?
451 cancel_work_sync(&cifs_file->oplock_break) : false;
453 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
454 struct TCP_Server_Info *server = tcon->ses->server;
455 unsigned int xid;
457 xid = get_xid();
458 if (server->ops->close)
459 server->ops->close(xid, tcon, &cifs_file->fid);
460 _free_xid(xid);
463 if (oplock_break_cancelled)
464 cifs_done_oplock_break(cifsi);
466 cifs_del_pending_open(&open);
469 * Delete any outstanding lock records. We'll lose them when the file
470 * is closed anyway.
472 cifs_down_write(&cifsi->lock_sem);
473 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
474 list_del(&li->llist);
475 cifs_del_lock_waiters(li);
476 kfree(li);
478 list_del(&cifs_file->llist->llist);
479 kfree(cifs_file->llist);
480 up_write(&cifsi->lock_sem);
482 cifs_put_tlink(cifs_file->tlink);
483 dput(cifs_file->dentry);
484 cifs_sb_deactive(sb);
485 kfree(cifs_file);
488 int cifs_open(struct inode *inode, struct file *file)
491 int rc = -EACCES;
492 unsigned int xid;
493 __u32 oplock;
494 struct cifs_sb_info *cifs_sb;
495 struct TCP_Server_Info *server;
496 struct cifs_tcon *tcon;
497 struct tcon_link *tlink;
498 struct cifsFileInfo *cfile = NULL;
499 char *full_path = NULL;
500 bool posix_open_ok = false;
501 struct cifs_fid fid;
502 struct cifs_pending_open open;
504 xid = get_xid();
506 cifs_sb = CIFS_SB(inode->i_sb);
507 tlink = cifs_sb_tlink(cifs_sb);
508 if (IS_ERR(tlink)) {
509 free_xid(xid);
510 return PTR_ERR(tlink);
512 tcon = tlink_tcon(tlink);
513 server = tcon->ses->server;
515 full_path = build_path_from_dentry(file_dentry(file));
516 if (full_path == NULL) {
517 rc = -ENOMEM;
518 goto out;
521 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
522 inode, file->f_flags, full_path);
524 if (file->f_flags & O_DIRECT &&
525 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
526 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
527 file->f_op = &cifs_file_direct_nobrl_ops;
528 else
529 file->f_op = &cifs_file_direct_ops;
532 if (server->oplocks)
533 oplock = REQ_OPLOCK;
534 else
535 oplock = 0;
537 if (!tcon->broken_posix_open && tcon->unix_ext &&
538 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
539 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
540 /* can not refresh inode info since size could be stale */
541 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
542 cifs_sb->mnt_file_mode /* ignored */,
543 file->f_flags, &oplock, &fid.netfid, xid);
544 if (rc == 0) {
545 cifs_dbg(FYI, "posix open succeeded\n");
546 posix_open_ok = true;
547 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
548 if (tcon->ses->serverNOS)
549 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
550 tcon->ses->serverName,
551 tcon->ses->serverNOS);
552 tcon->broken_posix_open = true;
553 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
554 (rc != -EOPNOTSUPP)) /* path not found or net err */
555 goto out;
557 * Else fallthrough to retry open the old way on network i/o
558 * or DFS errors.
562 if (server->ops->get_lease_key)
563 server->ops->get_lease_key(inode, &fid);
565 cifs_add_pending_open(&fid, tlink, &open);
567 if (!posix_open_ok) {
568 if (server->ops->get_lease_key)
569 server->ops->get_lease_key(inode, &fid);
571 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
572 file->f_flags, &oplock, &fid, xid);
573 if (rc) {
574 cifs_del_pending_open(&open);
575 goto out;
579 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
580 if (cfile == NULL) {
581 if (server->ops->close)
582 server->ops->close(xid, tcon, &fid);
583 cifs_del_pending_open(&open);
584 rc = -ENOMEM;
585 goto out;
588 cifs_fscache_set_inode_cookie(inode, file);
590 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
592 * Time to set mode which we can not set earlier due to
593 * problems creating new read-only files.
595 struct cifs_unix_set_info_args args = {
596 .mode = inode->i_mode,
597 .uid = INVALID_UID, /* no change */
598 .gid = INVALID_GID, /* no change */
599 .ctime = NO_CHANGE_64,
600 .atime = NO_CHANGE_64,
601 .mtime = NO_CHANGE_64,
602 .device = 0,
604 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
605 cfile->pid);
608 out:
609 kfree(full_path);
610 free_xid(xid);
611 cifs_put_tlink(tlink);
612 return rc;
615 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
618 * Try to reacquire byte range locks that were released when session
619 * to server was lost.
621 static int
622 cifs_relock_file(struct cifsFileInfo *cfile)
624 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
625 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
626 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
627 int rc = 0;
629 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
630 if (cinode->can_cache_brlcks) {
631 /* can cache locks - no need to relock */
632 up_read(&cinode->lock_sem);
633 return rc;
636 if (cap_unix(tcon->ses) &&
637 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
638 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
639 rc = cifs_push_posix_locks(cfile);
640 else
641 rc = tcon->ses->server->ops->push_mand_locks(cfile);
643 up_read(&cinode->lock_sem);
644 return rc;
647 static int
648 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
650 int rc = -EACCES;
651 unsigned int xid;
652 __u32 oplock;
653 struct cifs_sb_info *cifs_sb;
654 struct cifs_tcon *tcon;
655 struct TCP_Server_Info *server;
656 struct cifsInodeInfo *cinode;
657 struct inode *inode;
658 char *full_path = NULL;
659 int desired_access;
660 int disposition = FILE_OPEN;
661 int create_options = CREATE_NOT_DIR;
662 struct cifs_open_parms oparms;
664 xid = get_xid();
665 mutex_lock(&cfile->fh_mutex);
666 if (!cfile->invalidHandle) {
667 mutex_unlock(&cfile->fh_mutex);
668 rc = 0;
669 free_xid(xid);
670 return rc;
673 inode = d_inode(cfile->dentry);
674 cifs_sb = CIFS_SB(inode->i_sb);
675 tcon = tlink_tcon(cfile->tlink);
676 server = tcon->ses->server;
679 * Can not grab rename sem here because various ops, including those
680 * that already have the rename sem can end up causing writepage to get
681 * called and if the server was down that means we end up here, and we
682 * can never tell if the caller already has the rename_sem.
684 full_path = build_path_from_dentry(cfile->dentry);
685 if (full_path == NULL) {
686 rc = -ENOMEM;
687 mutex_unlock(&cfile->fh_mutex);
688 free_xid(xid);
689 return rc;
692 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
693 inode, cfile->f_flags, full_path);
695 if (tcon->ses->server->oplocks)
696 oplock = REQ_OPLOCK;
697 else
698 oplock = 0;
700 if (tcon->unix_ext && cap_unix(tcon->ses) &&
701 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
702 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
704 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
705 * original open. Must mask them off for a reopen.
707 unsigned int oflags = cfile->f_flags &
708 ~(O_CREAT | O_EXCL | O_TRUNC);
710 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
711 cifs_sb->mnt_file_mode /* ignored */,
712 oflags, &oplock, &cfile->fid.netfid, xid);
713 if (rc == 0) {
714 cifs_dbg(FYI, "posix reopen succeeded\n");
715 oparms.reconnect = true;
716 goto reopen_success;
719 * fallthrough to retry open the old way on errors, especially
720 * in the reconnect path it is important to retry hard
724 desired_access = cifs_convert_flags(cfile->f_flags);
726 if (backup_cred(cifs_sb))
727 create_options |= CREATE_OPEN_BACKUP_INTENT;
729 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
730 if (cfile->f_flags & O_SYNC)
731 create_options |= CREATE_WRITE_THROUGH;
733 if (cfile->f_flags & O_DIRECT)
734 create_options |= CREATE_NO_BUFFER;
736 if (server->ops->get_lease_key)
737 server->ops->get_lease_key(inode, &cfile->fid);
739 oparms.tcon = tcon;
740 oparms.cifs_sb = cifs_sb;
741 oparms.desired_access = desired_access;
742 oparms.create_options = create_options;
743 oparms.disposition = disposition;
744 oparms.path = full_path;
745 oparms.fid = &cfile->fid;
746 oparms.reconnect = true;
749 * Can not refresh inode by passing in file_info buf to be returned by
750 * ops->open and then calling get_inode_info with returned buf since
751 * file might have write behind data that needs to be flushed and server
752 * version of file size can be stale. If we knew for sure that inode was
753 * not dirty locally we could do this.
755 rc = server->ops->open(xid, &oparms, &oplock, NULL);
756 if (rc == -ENOENT && oparms.reconnect == false) {
757 /* durable handle timeout is expired - open the file again */
758 rc = server->ops->open(xid, &oparms, &oplock, NULL);
759 /* indicate that we need to relock the file */
760 oparms.reconnect = true;
763 if (rc) {
764 mutex_unlock(&cfile->fh_mutex);
765 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
766 cifs_dbg(FYI, "oplock: %d\n", oplock);
767 goto reopen_error_exit;
770 reopen_success:
771 cfile->invalidHandle = false;
772 mutex_unlock(&cfile->fh_mutex);
773 cinode = CIFS_I(inode);
775 if (can_flush) {
776 rc = filemap_write_and_wait(inode->i_mapping);
777 if (!is_interrupt_error(rc))
778 mapping_set_error(inode->i_mapping, rc);
780 if (tcon->unix_ext)
781 rc = cifs_get_inode_info_unix(&inode, full_path,
782 inode->i_sb, xid);
783 else
784 rc = cifs_get_inode_info(&inode, full_path, NULL,
785 inode->i_sb, xid, NULL);
788 * Else we are writing out data to server already and could deadlock if
789 * we tried to flush data, and since we do not know if we have data that
790 * would invalidate the current end of file on the server we can not go
791 * to the server to get the new inode info.
795 * If the server returned a read oplock and we have mandatory brlocks,
796 * set oplock level to None.
798 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
799 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
800 oplock = 0;
803 server->ops->set_fid(cfile, &cfile->fid, oplock);
804 if (oparms.reconnect)
805 cifs_relock_file(cfile);
807 reopen_error_exit:
808 kfree(full_path);
809 free_xid(xid);
810 return rc;
813 int cifs_close(struct inode *inode, struct file *file)
815 if (file->private_data != NULL) {
816 cifsFileInfo_put(file->private_data);
817 file->private_data = NULL;
820 /* return code from the ->release op is always ignored */
821 return 0;
824 void
825 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
827 struct cifsFileInfo *open_file;
828 struct list_head *tmp;
829 struct list_head *tmp1;
830 struct list_head tmp_list;
832 if (!tcon->use_persistent || !tcon->need_reopen_files)
833 return;
835 tcon->need_reopen_files = false;
837 cifs_dbg(FYI, "Reopen persistent handles");
838 INIT_LIST_HEAD(&tmp_list);
840 /* list all files open on tree connection, reopen resilient handles */
841 spin_lock(&tcon->open_file_lock);
842 list_for_each(tmp, &tcon->openFileList) {
843 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
844 if (!open_file->invalidHandle)
845 continue;
846 cifsFileInfo_get(open_file);
847 list_add_tail(&open_file->rlist, &tmp_list);
849 spin_unlock(&tcon->open_file_lock);
851 list_for_each_safe(tmp, tmp1, &tmp_list) {
852 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
853 if (cifs_reopen_file(open_file, false /* do not flush */))
854 tcon->need_reopen_files = true;
855 list_del_init(&open_file->rlist);
856 cifsFileInfo_put(open_file);
860 int cifs_closedir(struct inode *inode, struct file *file)
862 int rc = 0;
863 unsigned int xid;
864 struct cifsFileInfo *cfile = file->private_data;
865 struct cifs_tcon *tcon;
866 struct TCP_Server_Info *server;
867 char *buf;
869 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
871 if (cfile == NULL)
872 return rc;
874 xid = get_xid();
875 tcon = tlink_tcon(cfile->tlink);
876 server = tcon->ses->server;
878 cifs_dbg(FYI, "Freeing private data in close dir\n");
879 spin_lock(&cfile->file_info_lock);
880 if (server->ops->dir_needs_close(cfile)) {
881 cfile->invalidHandle = true;
882 spin_unlock(&cfile->file_info_lock);
883 if (server->ops->close_dir)
884 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
885 else
886 rc = -ENOSYS;
887 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
888 /* not much we can do if it fails anyway, ignore rc */
889 rc = 0;
890 } else
891 spin_unlock(&cfile->file_info_lock);
893 buf = cfile->srch_inf.ntwrk_buf_start;
894 if (buf) {
895 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
896 cfile->srch_inf.ntwrk_buf_start = NULL;
897 if (cfile->srch_inf.smallBuf)
898 cifs_small_buf_release(buf);
899 else
900 cifs_buf_release(buf);
903 cifs_put_tlink(cfile->tlink);
904 kfree(file->private_data);
905 file->private_data = NULL;
906 /* BB can we lock the filestruct while this is going on? */
907 free_xid(xid);
908 return rc;
911 static struct cifsLockInfo *
912 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
914 struct cifsLockInfo *lock =
915 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
916 if (!lock)
917 return lock;
918 lock->offset = offset;
919 lock->length = length;
920 lock->type = type;
921 lock->pid = current->tgid;
922 INIT_LIST_HEAD(&lock->blist);
923 init_waitqueue_head(&lock->block_q);
924 return lock;
927 void
928 cifs_del_lock_waiters(struct cifsLockInfo *lock)
930 struct cifsLockInfo *li, *tmp;
931 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
932 list_del_init(&li->blist);
933 wake_up(&li->block_q);
937 #define CIFS_LOCK_OP 0
938 #define CIFS_READ_OP 1
939 #define CIFS_WRITE_OP 2
941 /* @rw_check : 0 - no op, 1 - read, 2 - write */
942 static bool
943 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
944 __u64 length, __u8 type, struct cifsFileInfo *cfile,
945 struct cifsLockInfo **conf_lock, int rw_check)
947 struct cifsLockInfo *li;
948 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
949 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
951 list_for_each_entry(li, &fdlocks->locks, llist) {
952 if (offset + length <= li->offset ||
953 offset >= li->offset + li->length)
954 continue;
955 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
956 server->ops->compare_fids(cfile, cur_cfile)) {
957 /* shared lock prevents write op through the same fid */
958 if (!(li->type & server->vals->shared_lock_type) ||
959 rw_check != CIFS_WRITE_OP)
960 continue;
962 if ((type & server->vals->shared_lock_type) &&
963 ((server->ops->compare_fids(cfile, cur_cfile) &&
964 current->tgid == li->pid) || type == li->type))
965 continue;
966 if (conf_lock)
967 *conf_lock = li;
968 return true;
970 return false;
973 bool
974 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
975 __u8 type, struct cifsLockInfo **conf_lock,
976 int rw_check)
978 bool rc = false;
979 struct cifs_fid_locks *cur;
980 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
982 list_for_each_entry(cur, &cinode->llist, llist) {
983 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
984 cfile, conf_lock, rw_check);
985 if (rc)
986 break;
989 return rc;
993 * Check if there is another lock that prevents us to set the lock (mandatory
994 * style). If such a lock exists, update the flock structure with its
995 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
996 * or leave it the same if we can't. Returns 0 if we don't need to request to
997 * the server or 1 otherwise.
999 static int
1000 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1001 __u8 type, struct file_lock *flock)
1003 int rc = 0;
1004 struct cifsLockInfo *conf_lock;
1005 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1007 bool exist;
1009 down_read(&cinode->lock_sem);
1011 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1012 &conf_lock, CIFS_LOCK_OP);
1013 if (exist) {
1014 flock->fl_start = conf_lock->offset;
1015 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1016 flock->fl_pid = conf_lock->pid;
1017 if (conf_lock->type & server->vals->shared_lock_type)
1018 flock->fl_type = F_RDLCK;
1019 else
1020 flock->fl_type = F_WRLCK;
1021 } else if (!cinode->can_cache_brlcks)
1022 rc = 1;
1023 else
1024 flock->fl_type = F_UNLCK;
1026 up_read(&cinode->lock_sem);
1027 return rc;
1030 static void
1031 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1033 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1034 cifs_down_write(&cinode->lock_sem);
1035 list_add_tail(&lock->llist, &cfile->llist->locks);
1036 up_write(&cinode->lock_sem);
1040 * Set the byte-range lock (mandatory style). Returns:
1041 * 1) 0, if we set the lock and don't need to request to the server;
1042 * 2) 1, if no locks prevent us but we need to request to the server;
1043 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1045 static int
1046 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1047 bool wait)
1049 struct cifsLockInfo *conf_lock;
1050 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1051 bool exist;
1052 int rc = 0;
1054 try_again:
1055 exist = false;
1056 cifs_down_write(&cinode->lock_sem);
1058 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1059 lock->type, &conf_lock, CIFS_LOCK_OP);
1060 if (!exist && cinode->can_cache_brlcks) {
1061 list_add_tail(&lock->llist, &cfile->llist->locks);
1062 up_write(&cinode->lock_sem);
1063 return rc;
1066 if (!exist)
1067 rc = 1;
1068 else if (!wait)
1069 rc = -EACCES;
1070 else {
1071 list_add_tail(&lock->blist, &conf_lock->blist);
1072 up_write(&cinode->lock_sem);
1073 rc = wait_event_interruptible(lock->block_q,
1074 (lock->blist.prev == &lock->blist) &&
1075 (lock->blist.next == &lock->blist));
1076 if (!rc)
1077 goto try_again;
1078 cifs_down_write(&cinode->lock_sem);
1079 list_del_init(&lock->blist);
1082 up_write(&cinode->lock_sem);
1083 return rc;
1087 * Check if there is another lock that prevents us to set the lock (posix
1088 * style). If such a lock exists, update the flock structure with its
1089 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1090 * or leave it the same if we can't. Returns 0 if we don't need to request to
1091 * the server or 1 otherwise.
1093 static int
1094 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1096 int rc = 0;
1097 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1098 unsigned char saved_type = flock->fl_type;
1100 if ((flock->fl_flags & FL_POSIX) == 0)
1101 return 1;
1103 down_read(&cinode->lock_sem);
1104 posix_test_lock(file, flock);
1106 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1107 flock->fl_type = saved_type;
1108 rc = 1;
1111 up_read(&cinode->lock_sem);
1112 return rc;
1116 * Set the byte-range lock (posix style). Returns:
1117 * 1) 0, if we set the lock and don't need to request to the server;
1118 * 2) 1, if we need to request to the server;
1119 * 3) <0, if the error occurs while setting the lock.
1121 static int
1122 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1124 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1125 int rc = 1;
1127 if ((flock->fl_flags & FL_POSIX) == 0)
1128 return rc;
1130 try_again:
1131 cifs_down_write(&cinode->lock_sem);
1132 if (!cinode->can_cache_brlcks) {
1133 up_write(&cinode->lock_sem);
1134 return rc;
1137 rc = posix_lock_file(file, flock, NULL);
1138 up_write(&cinode->lock_sem);
1139 if (rc == FILE_LOCK_DEFERRED) {
1140 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1141 if (!rc)
1142 goto try_again;
1143 posix_unblock_lock(flock);
1145 return rc;
1149 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1151 unsigned int xid;
1152 int rc = 0, stored_rc;
1153 struct cifsLockInfo *li, *tmp;
1154 struct cifs_tcon *tcon;
1155 unsigned int num, max_num, max_buf;
1156 LOCKING_ANDX_RANGE *buf, *cur;
1157 static const int types[] = {
1158 LOCKING_ANDX_LARGE_FILES,
1159 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1161 int i;
1163 xid = get_xid();
1164 tcon = tlink_tcon(cfile->tlink);
1167 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1168 * and check it before using.
1170 max_buf = tcon->ses->server->maxBuf;
1171 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1172 free_xid(xid);
1173 return -EINVAL;
1176 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1177 PAGE_SIZE);
1178 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1179 PAGE_SIZE);
1180 max_num = (max_buf - sizeof(struct smb_hdr)) /
1181 sizeof(LOCKING_ANDX_RANGE);
1182 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1183 if (!buf) {
1184 free_xid(xid);
1185 return -ENOMEM;
1188 for (i = 0; i < 2; i++) {
1189 cur = buf;
1190 num = 0;
1191 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1192 if (li->type != types[i])
1193 continue;
1194 cur->Pid = cpu_to_le16(li->pid);
1195 cur->LengthLow = cpu_to_le32((u32)li->length);
1196 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1197 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1198 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1199 if (++num == max_num) {
1200 stored_rc = cifs_lockv(xid, tcon,
1201 cfile->fid.netfid,
1202 (__u8)li->type, 0, num,
1203 buf);
1204 if (stored_rc)
1205 rc = stored_rc;
1206 cur = buf;
1207 num = 0;
1208 } else
1209 cur++;
1212 if (num) {
1213 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1214 (__u8)types[i], 0, num, buf);
1215 if (stored_rc)
1216 rc = stored_rc;
1220 kfree(buf);
1221 free_xid(xid);
1222 return rc;
1225 static __u32
1226 hash_lockowner(fl_owner_t owner)
1228 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1231 struct lock_to_push {
1232 struct list_head llist;
1233 __u64 offset;
1234 __u64 length;
1235 __u32 pid;
1236 __u16 netfid;
1237 __u8 type;
1240 static int
1241 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1243 struct inode *inode = d_inode(cfile->dentry);
1244 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1245 struct file_lock *flock;
1246 struct file_lock_context *flctx = inode->i_flctx;
1247 unsigned int count = 0, i;
1248 int rc = 0, xid, type;
1249 struct list_head locks_to_send, *el;
1250 struct lock_to_push *lck, *tmp;
1251 __u64 length;
1253 xid = get_xid();
1255 if (!flctx)
1256 goto out;
1258 spin_lock(&flctx->flc_lock);
1259 list_for_each(el, &flctx->flc_posix) {
1260 count++;
1262 spin_unlock(&flctx->flc_lock);
1264 INIT_LIST_HEAD(&locks_to_send);
1267 * Allocating count locks is enough because no FL_POSIX locks can be
1268 * added to the list while we are holding cinode->lock_sem that
1269 * protects locking operations of this inode.
1271 for (i = 0; i < count; i++) {
1272 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1273 if (!lck) {
1274 rc = -ENOMEM;
1275 goto err_out;
1277 list_add_tail(&lck->llist, &locks_to_send);
1280 el = locks_to_send.next;
1281 spin_lock(&flctx->flc_lock);
1282 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1283 if (el == &locks_to_send) {
1285 * The list ended. We don't have enough allocated
1286 * structures - something is really wrong.
1288 cifs_dbg(VFS, "Can't push all brlocks!\n");
1289 break;
1291 length = 1 + flock->fl_end - flock->fl_start;
1292 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1293 type = CIFS_RDLCK;
1294 else
1295 type = CIFS_WRLCK;
1296 lck = list_entry(el, struct lock_to_push, llist);
1297 lck->pid = hash_lockowner(flock->fl_owner);
1298 lck->netfid = cfile->fid.netfid;
1299 lck->length = length;
1300 lck->type = type;
1301 lck->offset = flock->fl_start;
1303 spin_unlock(&flctx->flc_lock);
1305 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1306 int stored_rc;
1308 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1309 lck->offset, lck->length, NULL,
1310 lck->type, 0);
1311 if (stored_rc)
1312 rc = stored_rc;
1313 list_del(&lck->llist);
1314 kfree(lck);
1317 out:
1318 free_xid(xid);
1319 return rc;
1320 err_out:
1321 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1322 list_del(&lck->llist);
1323 kfree(lck);
1325 goto out;
1328 static int
1329 cifs_push_locks(struct cifsFileInfo *cfile)
1331 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1332 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1333 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1334 int rc = 0;
1336 /* we are going to update can_cache_brlcks here - need a write access */
1337 cifs_down_write(&cinode->lock_sem);
1338 if (!cinode->can_cache_brlcks) {
1339 up_write(&cinode->lock_sem);
1340 return rc;
1343 if (cap_unix(tcon->ses) &&
1344 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1345 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1346 rc = cifs_push_posix_locks(cfile);
1347 else
1348 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1350 cinode->can_cache_brlcks = false;
1351 up_write(&cinode->lock_sem);
1352 return rc;
1355 static void
1356 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1357 bool *wait_flag, struct TCP_Server_Info *server)
1359 if (flock->fl_flags & FL_POSIX)
1360 cifs_dbg(FYI, "Posix\n");
1361 if (flock->fl_flags & FL_FLOCK)
1362 cifs_dbg(FYI, "Flock\n");
1363 if (flock->fl_flags & FL_SLEEP) {
1364 cifs_dbg(FYI, "Blocking lock\n");
1365 *wait_flag = true;
1367 if (flock->fl_flags & FL_ACCESS)
1368 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1369 if (flock->fl_flags & FL_LEASE)
1370 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1371 if (flock->fl_flags &
1372 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1373 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1374 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1376 *type = server->vals->large_lock_type;
1377 if (flock->fl_type == F_WRLCK) {
1378 cifs_dbg(FYI, "F_WRLCK\n");
1379 *type |= server->vals->exclusive_lock_type;
1380 *lock = 1;
1381 } else if (flock->fl_type == F_UNLCK) {
1382 cifs_dbg(FYI, "F_UNLCK\n");
1383 *type |= server->vals->unlock_lock_type;
1384 *unlock = 1;
1385 /* Check if unlock includes more than one lock range */
1386 } else if (flock->fl_type == F_RDLCK) {
1387 cifs_dbg(FYI, "F_RDLCK\n");
1388 *type |= server->vals->shared_lock_type;
1389 *lock = 1;
1390 } else if (flock->fl_type == F_EXLCK) {
1391 cifs_dbg(FYI, "F_EXLCK\n");
1392 *type |= server->vals->exclusive_lock_type;
1393 *lock = 1;
1394 } else if (flock->fl_type == F_SHLCK) {
1395 cifs_dbg(FYI, "F_SHLCK\n");
1396 *type |= server->vals->shared_lock_type;
1397 *lock = 1;
1398 } else
1399 cifs_dbg(FYI, "Unknown type of lock\n");
1402 static int
1403 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1404 bool wait_flag, bool posix_lck, unsigned int xid)
1406 int rc = 0;
1407 __u64 length = 1 + flock->fl_end - flock->fl_start;
1408 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1409 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1410 struct TCP_Server_Info *server = tcon->ses->server;
1411 __u16 netfid = cfile->fid.netfid;
1413 if (posix_lck) {
1414 int posix_lock_type;
1416 rc = cifs_posix_lock_test(file, flock);
1417 if (!rc)
1418 return rc;
1420 if (type & server->vals->shared_lock_type)
1421 posix_lock_type = CIFS_RDLCK;
1422 else
1423 posix_lock_type = CIFS_WRLCK;
1424 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1425 hash_lockowner(flock->fl_owner),
1426 flock->fl_start, length, flock,
1427 posix_lock_type, wait_flag);
1428 return rc;
1431 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1432 if (!rc)
1433 return rc;
1435 /* BB we could chain these into one lock request BB */
1436 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1437 1, 0, false);
1438 if (rc == 0) {
1439 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1440 type, 0, 1, false);
1441 flock->fl_type = F_UNLCK;
1442 if (rc != 0)
1443 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1444 rc);
1445 return 0;
1448 if (type & server->vals->shared_lock_type) {
1449 flock->fl_type = F_WRLCK;
1450 return 0;
1453 type &= ~server->vals->exclusive_lock_type;
1455 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1456 type | server->vals->shared_lock_type,
1457 1, 0, false);
1458 if (rc == 0) {
1459 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1460 type | server->vals->shared_lock_type, 0, 1, false);
1461 flock->fl_type = F_RDLCK;
1462 if (rc != 0)
1463 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1464 rc);
1465 } else
1466 flock->fl_type = F_WRLCK;
1468 return 0;
1471 void
1472 cifs_move_llist(struct list_head *source, struct list_head *dest)
1474 struct list_head *li, *tmp;
1475 list_for_each_safe(li, tmp, source)
1476 list_move(li, dest);
1479 void
1480 cifs_free_llist(struct list_head *llist)
1482 struct cifsLockInfo *li, *tmp;
1483 list_for_each_entry_safe(li, tmp, llist, llist) {
1484 cifs_del_lock_waiters(li);
1485 list_del(&li->llist);
1486 kfree(li);
1491 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1492 unsigned int xid)
1494 int rc = 0, stored_rc;
1495 static const int types[] = {
1496 LOCKING_ANDX_LARGE_FILES,
1497 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1499 unsigned int i;
1500 unsigned int max_num, num, max_buf;
1501 LOCKING_ANDX_RANGE *buf, *cur;
1502 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1503 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1504 struct cifsLockInfo *li, *tmp;
1505 __u64 length = 1 + flock->fl_end - flock->fl_start;
1506 struct list_head tmp_llist;
1508 INIT_LIST_HEAD(&tmp_llist);
1511 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1512 * and check it before using.
1514 max_buf = tcon->ses->server->maxBuf;
1515 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1516 return -EINVAL;
1518 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1519 PAGE_SIZE);
1520 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1521 PAGE_SIZE);
1522 max_num = (max_buf - sizeof(struct smb_hdr)) /
1523 sizeof(LOCKING_ANDX_RANGE);
1524 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1525 if (!buf)
1526 return -ENOMEM;
1528 cifs_down_write(&cinode->lock_sem);
1529 for (i = 0; i < 2; i++) {
1530 cur = buf;
1531 num = 0;
1532 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1533 if (flock->fl_start > li->offset ||
1534 (flock->fl_start + length) <
1535 (li->offset + li->length))
1536 continue;
1537 if (current->tgid != li->pid)
1538 continue;
1539 if (types[i] != li->type)
1540 continue;
1541 if (cinode->can_cache_brlcks) {
1543 * We can cache brlock requests - simply remove
1544 * a lock from the file's list.
1546 list_del(&li->llist);
1547 cifs_del_lock_waiters(li);
1548 kfree(li);
1549 continue;
1551 cur->Pid = cpu_to_le16(li->pid);
1552 cur->LengthLow = cpu_to_le32((u32)li->length);
1553 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1554 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1555 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1557 * We need to save a lock here to let us add it again to
1558 * the file's list if the unlock range request fails on
1559 * the server.
1561 list_move(&li->llist, &tmp_llist);
1562 if (++num == max_num) {
1563 stored_rc = cifs_lockv(xid, tcon,
1564 cfile->fid.netfid,
1565 li->type, num, 0, buf);
1566 if (stored_rc) {
1568 * We failed on the unlock range
1569 * request - add all locks from the tmp
1570 * list to the head of the file's list.
1572 cifs_move_llist(&tmp_llist,
1573 &cfile->llist->locks);
1574 rc = stored_rc;
1575 } else
1577 * The unlock range request succeed -
1578 * free the tmp list.
1580 cifs_free_llist(&tmp_llist);
1581 cur = buf;
1582 num = 0;
1583 } else
1584 cur++;
1586 if (num) {
1587 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1588 types[i], num, 0, buf);
1589 if (stored_rc) {
1590 cifs_move_llist(&tmp_llist,
1591 &cfile->llist->locks);
1592 rc = stored_rc;
1593 } else
1594 cifs_free_llist(&tmp_llist);
1598 up_write(&cinode->lock_sem);
1599 kfree(buf);
1600 return rc;
1603 static int
1604 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1605 bool wait_flag, bool posix_lck, int lock, int unlock,
1606 unsigned int xid)
1608 int rc = 0;
1609 __u64 length = 1 + flock->fl_end - flock->fl_start;
1610 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1611 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1612 struct TCP_Server_Info *server = tcon->ses->server;
1613 struct inode *inode = d_inode(cfile->dentry);
1615 if (posix_lck) {
1616 int posix_lock_type;
1618 rc = cifs_posix_lock_set(file, flock);
1619 if (!rc || rc < 0)
1620 return rc;
1622 if (type & server->vals->shared_lock_type)
1623 posix_lock_type = CIFS_RDLCK;
1624 else
1625 posix_lock_type = CIFS_WRLCK;
1627 if (unlock == 1)
1628 posix_lock_type = CIFS_UNLCK;
1630 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1631 hash_lockowner(flock->fl_owner),
1632 flock->fl_start, length,
1633 NULL, posix_lock_type, wait_flag);
1634 goto out;
1637 if (lock) {
1638 struct cifsLockInfo *lock;
1640 lock = cifs_lock_init(flock->fl_start, length, type);
1641 if (!lock)
1642 return -ENOMEM;
1644 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1645 if (rc < 0) {
1646 kfree(lock);
1647 return rc;
1649 if (!rc)
1650 goto out;
1653 * Windows 7 server can delay breaking lease from read to None
1654 * if we set a byte-range lock on a file - break it explicitly
1655 * before sending the lock to the server to be sure the next
1656 * read won't conflict with non-overlapted locks due to
1657 * pagereading.
1659 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1660 CIFS_CACHE_READ(CIFS_I(inode))) {
1661 cifs_zap_mapping(inode);
1662 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1663 inode);
1664 CIFS_I(inode)->oplock = 0;
1667 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1668 type, 1, 0, wait_flag);
1669 if (rc) {
1670 kfree(lock);
1671 return rc;
1674 cifs_lock_add(cfile, lock);
1675 } else if (unlock)
1676 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1678 out:
1679 if (flock->fl_flags & FL_POSIX) {
1681 * If this is a request to remove all locks because we
1682 * are closing the file, it doesn't matter if the
1683 * unlocking failed as both cifs.ko and the SMB server
1684 * remove the lock on file close
1686 if (rc) {
1687 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1688 if (!(flock->fl_flags & FL_CLOSE))
1689 return rc;
1691 rc = locks_lock_file_wait(file, flock);
1693 return rc;
1696 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1698 int rc, xid;
1699 int lock = 0, unlock = 0;
1700 bool wait_flag = false;
1701 bool posix_lck = false;
1702 struct cifs_sb_info *cifs_sb;
1703 struct cifs_tcon *tcon;
1704 struct cifsInodeInfo *cinode;
1705 struct cifsFileInfo *cfile;
1706 __u16 netfid;
1707 __u32 type;
1709 rc = -EACCES;
1710 xid = get_xid();
1712 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1713 cmd, flock->fl_flags, flock->fl_type,
1714 flock->fl_start, flock->fl_end);
1716 cfile = (struct cifsFileInfo *)file->private_data;
1717 tcon = tlink_tcon(cfile->tlink);
1719 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1720 tcon->ses->server);
1722 cifs_sb = CIFS_FILE_SB(file);
1723 netfid = cfile->fid.netfid;
1724 cinode = CIFS_I(file_inode(file));
1726 if (cap_unix(tcon->ses) &&
1727 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1728 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1729 posix_lck = true;
1731 * BB add code here to normalize offset and length to account for
1732 * negative length which we can not accept over the wire.
1734 if (IS_GETLK(cmd)) {
1735 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1736 free_xid(xid);
1737 return rc;
1740 if (!lock && !unlock) {
1742 * if no lock or unlock then nothing to do since we do not
1743 * know what it is
1745 free_xid(xid);
1746 return -EOPNOTSUPP;
1749 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1750 xid);
1751 free_xid(xid);
1752 return rc;
1756 * update the file size (if needed) after a write. Should be called with
1757 * the inode->i_lock held
1759 void
1760 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1761 unsigned int bytes_written)
1763 loff_t end_of_write = offset + bytes_written;
1765 if (end_of_write > cifsi->server_eof)
1766 cifsi->server_eof = end_of_write;
1769 static ssize_t
1770 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1771 size_t write_size, loff_t *offset)
1773 int rc = 0;
1774 unsigned int bytes_written = 0;
1775 unsigned int total_written;
1776 struct cifs_sb_info *cifs_sb;
1777 struct cifs_tcon *tcon;
1778 struct TCP_Server_Info *server;
1779 unsigned int xid;
1780 struct dentry *dentry = open_file->dentry;
1781 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1782 struct cifs_io_parms io_parms;
1784 cifs_sb = CIFS_SB(dentry->d_sb);
1786 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1787 write_size, *offset, dentry);
1789 tcon = tlink_tcon(open_file->tlink);
1790 server = tcon->ses->server;
1792 if (!server->ops->sync_write)
1793 return -ENOSYS;
1795 xid = get_xid();
1797 for (total_written = 0; write_size > total_written;
1798 total_written += bytes_written) {
1799 rc = -EAGAIN;
1800 while (rc == -EAGAIN) {
1801 struct kvec iov[2];
1802 unsigned int len;
1804 if (open_file->invalidHandle) {
1805 /* we could deadlock if we called
1806 filemap_fdatawait from here so tell
1807 reopen_file not to flush data to
1808 server now */
1809 rc = cifs_reopen_file(open_file, false);
1810 if (rc != 0)
1811 break;
1814 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1815 (unsigned int)write_size - total_written);
1816 /* iov[0] is reserved for smb header */
1817 iov[1].iov_base = (char *)write_data + total_written;
1818 iov[1].iov_len = len;
1819 io_parms.pid = pid;
1820 io_parms.tcon = tcon;
1821 io_parms.offset = *offset;
1822 io_parms.length = len;
1823 rc = server->ops->sync_write(xid, &open_file->fid,
1824 &io_parms, &bytes_written, iov, 1);
1826 if (rc || (bytes_written == 0)) {
1827 if (total_written)
1828 break;
1829 else {
1830 free_xid(xid);
1831 return rc;
1833 } else {
1834 spin_lock(&d_inode(dentry)->i_lock);
1835 cifs_update_eof(cifsi, *offset, bytes_written);
1836 spin_unlock(&d_inode(dentry)->i_lock);
1837 *offset += bytes_written;
1841 cifs_stats_bytes_written(tcon, total_written);
1843 if (total_written > 0) {
1844 spin_lock(&d_inode(dentry)->i_lock);
1845 if (*offset > d_inode(dentry)->i_size)
1846 i_size_write(d_inode(dentry), *offset);
1847 spin_unlock(&d_inode(dentry)->i_lock);
1849 mark_inode_dirty_sync(d_inode(dentry));
1850 free_xid(xid);
1851 return total_written;
1854 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1855 bool fsuid_only)
1857 struct cifsFileInfo *open_file = NULL;
1858 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1860 /* only filter by fsuid on multiuser mounts */
1861 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1862 fsuid_only = false;
1864 spin_lock(&cifs_inode->open_file_lock);
1865 /* we could simply get the first_list_entry since write-only entries
1866 are always at the end of the list but since the first entry might
1867 have a close pending, we go through the whole list */
1868 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1869 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1870 continue;
1871 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1872 if (!open_file->invalidHandle) {
1873 /* found a good file */
1874 /* lock it so it will not be closed on us */
1875 cifsFileInfo_get(open_file);
1876 spin_unlock(&cifs_inode->open_file_lock);
1877 return open_file;
1878 } /* else might as well continue, and look for
1879 another, or simply have the caller reopen it
1880 again rather than trying to fix this handle */
1881 } else /* write only file */
1882 break; /* write only files are last so must be done */
1884 spin_unlock(&cifs_inode->open_file_lock);
1885 return NULL;
1888 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1889 bool fsuid_only)
1891 struct cifsFileInfo *open_file, *inv_file = NULL;
1892 struct cifs_sb_info *cifs_sb;
1893 bool any_available = false;
1894 int rc;
1895 unsigned int refind = 0;
1897 /* Having a null inode here (because mapping->host was set to zero by
1898 the VFS or MM) should not happen but we had reports of on oops (due to
1899 it being zero) during stress testcases so we need to check for it */
1901 if (cifs_inode == NULL) {
1902 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1903 dump_stack();
1904 return NULL;
1907 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1909 /* only filter by fsuid on multiuser mounts */
1910 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1911 fsuid_only = false;
1913 spin_lock(&cifs_inode->open_file_lock);
1914 refind_writable:
1915 if (refind > MAX_REOPEN_ATT) {
1916 spin_unlock(&cifs_inode->open_file_lock);
1917 return NULL;
1919 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1920 if (!any_available && open_file->pid != current->tgid)
1921 continue;
1922 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1923 continue;
1924 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1925 if (!open_file->invalidHandle) {
1926 /* found a good writable file */
1927 cifsFileInfo_get(open_file);
1928 spin_unlock(&cifs_inode->open_file_lock);
1929 return open_file;
1930 } else {
1931 if (!inv_file)
1932 inv_file = open_file;
1936 /* couldn't find useable FH with same pid, try any available */
1937 if (!any_available) {
1938 any_available = true;
1939 goto refind_writable;
1942 if (inv_file) {
1943 any_available = false;
1944 cifsFileInfo_get(inv_file);
1947 spin_unlock(&cifs_inode->open_file_lock);
1949 if (inv_file) {
1950 rc = cifs_reopen_file(inv_file, false);
1951 if (!rc)
1952 return inv_file;
1953 else {
1954 spin_lock(&cifs_inode->open_file_lock);
1955 list_move_tail(&inv_file->flist,
1956 &cifs_inode->openFileList);
1957 spin_unlock(&cifs_inode->open_file_lock);
1958 cifsFileInfo_put(inv_file);
1959 ++refind;
1960 inv_file = NULL;
1961 spin_lock(&cifs_inode->open_file_lock);
1962 goto refind_writable;
1966 return NULL;
1969 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1971 struct address_space *mapping = page->mapping;
1972 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1973 char *write_data;
1974 int rc = -EFAULT;
1975 int bytes_written = 0;
1976 struct inode *inode;
1977 struct cifsFileInfo *open_file;
1979 if (!mapping || !mapping->host)
1980 return -EFAULT;
1982 inode = page->mapping->host;
1984 offset += (loff_t)from;
1985 write_data = kmap(page);
1986 write_data += from;
1988 if ((to > PAGE_SIZE) || (from > to)) {
1989 kunmap(page);
1990 return -EIO;
1993 /* racing with truncate? */
1994 if (offset > mapping->host->i_size) {
1995 kunmap(page);
1996 return 0; /* don't care */
1999 /* check to make sure that we are not extending the file */
2000 if (mapping->host->i_size - offset < (loff_t)to)
2001 to = (unsigned)(mapping->host->i_size - offset);
2003 open_file = find_writable_file(CIFS_I(mapping->host), false);
2004 if (open_file) {
2005 bytes_written = cifs_write(open_file, open_file->pid,
2006 write_data, to - from, &offset);
2007 cifsFileInfo_put(open_file);
2008 /* Does mm or vfs already set times? */
2009 inode->i_atime = inode->i_mtime = current_time(inode);
2010 if ((bytes_written > 0) && (offset))
2011 rc = 0;
2012 else if (bytes_written < 0)
2013 rc = bytes_written;
2014 } else {
2015 cifs_dbg(FYI, "No writeable filehandles for inode\n");
2016 rc = -EIO;
2019 kunmap(page);
2020 return rc;
2023 static struct cifs_writedata *
2024 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2025 pgoff_t end, pgoff_t *index,
2026 unsigned int *found_pages)
2028 struct cifs_writedata *wdata;
2030 wdata = cifs_writedata_alloc((unsigned int)tofind,
2031 cifs_writev_complete);
2032 if (!wdata)
2033 return NULL;
2035 *found_pages = find_get_pages_range_tag(mapping, index, end,
2036 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2037 return wdata;
2040 static unsigned int
2041 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2042 struct address_space *mapping,
2043 struct writeback_control *wbc,
2044 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2046 unsigned int nr_pages = 0, i;
2047 struct page *page;
2049 for (i = 0; i < found_pages; i++) {
2050 page = wdata->pages[i];
2052 * At this point we hold neither the i_pages lock nor the
2053 * page lock: the page may be truncated or invalidated
2054 * (changing page->mapping to NULL), or even swizzled
2055 * back from swapper_space to tmpfs file mapping
2058 if (nr_pages == 0)
2059 lock_page(page);
2060 else if (!trylock_page(page))
2061 break;
2063 if (unlikely(page->mapping != mapping)) {
2064 unlock_page(page);
2065 break;
2068 if (!wbc->range_cyclic && page->index > end) {
2069 *done = true;
2070 unlock_page(page);
2071 break;
2074 if (*next && (page->index != *next)) {
2075 /* Not next consecutive page */
2076 unlock_page(page);
2077 break;
2080 if (wbc->sync_mode != WB_SYNC_NONE)
2081 wait_on_page_writeback(page);
2083 if (PageWriteback(page) ||
2084 !clear_page_dirty_for_io(page)) {
2085 unlock_page(page);
2086 break;
2090 * This actually clears the dirty bit in the radix tree.
2091 * See cifs_writepage() for more commentary.
2093 set_page_writeback(page);
2094 if (page_offset(page) >= i_size_read(mapping->host)) {
2095 *done = true;
2096 unlock_page(page);
2097 end_page_writeback(page);
2098 break;
2101 wdata->pages[i] = page;
2102 *next = page->index + 1;
2103 ++nr_pages;
2106 /* reset index to refind any pages skipped */
2107 if (nr_pages == 0)
2108 *index = wdata->pages[0]->index + 1;
2110 /* put any pages we aren't going to use */
2111 for (i = nr_pages; i < found_pages; i++) {
2112 put_page(wdata->pages[i]);
2113 wdata->pages[i] = NULL;
2116 return nr_pages;
2119 static int
2120 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2121 struct address_space *mapping, struct writeback_control *wbc)
2123 int rc = 0;
2124 struct TCP_Server_Info *server;
2125 unsigned int i;
2127 wdata->sync_mode = wbc->sync_mode;
2128 wdata->nr_pages = nr_pages;
2129 wdata->offset = page_offset(wdata->pages[0]);
2130 wdata->pagesz = PAGE_SIZE;
2131 wdata->tailsz = min(i_size_read(mapping->host) -
2132 page_offset(wdata->pages[nr_pages - 1]),
2133 (loff_t)PAGE_SIZE);
2134 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2136 if (wdata->cfile != NULL)
2137 cifsFileInfo_put(wdata->cfile);
2138 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2139 if (!wdata->cfile) {
2140 cifs_dbg(VFS, "No writable handles for inode\n");
2141 rc = -EBADF;
2142 } else {
2143 wdata->pid = wdata->cfile->pid;
2144 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2145 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2148 for (i = 0; i < nr_pages; ++i)
2149 unlock_page(wdata->pages[i]);
2151 return rc;
2154 static int cifs_writepages(struct address_space *mapping,
2155 struct writeback_control *wbc)
2157 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2158 struct TCP_Server_Info *server;
2159 bool done = false, scanned = false, range_whole = false;
2160 pgoff_t end, index;
2161 struct cifs_writedata *wdata;
2162 int rc = 0;
2163 int saved_rc = 0;
2166 * If wsize is smaller than the page cache size, default to writing
2167 * one page at a time via cifs_writepage
2169 if (cifs_sb->wsize < PAGE_SIZE)
2170 return generic_writepages(mapping, wbc);
2172 if (wbc->range_cyclic) {
2173 index = mapping->writeback_index; /* Start from prev offset */
2174 end = -1;
2175 } else {
2176 index = wbc->range_start >> PAGE_SHIFT;
2177 end = wbc->range_end >> PAGE_SHIFT;
2178 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2179 range_whole = true;
2180 scanned = true;
2182 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2183 retry:
2184 while (!done && index <= end) {
2185 unsigned int i, nr_pages, found_pages, wsize, credits;
2186 pgoff_t next = 0, tofind, saved_index = index;
2188 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2189 &wsize, &credits);
2190 if (rc != 0) {
2191 done = true;
2192 break;
2195 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2197 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2198 &found_pages);
2199 if (!wdata) {
2200 rc = -ENOMEM;
2201 done = true;
2202 add_credits_and_wake_if(server, credits, 0);
2203 break;
2206 if (found_pages == 0) {
2207 kref_put(&wdata->refcount, cifs_writedata_release);
2208 add_credits_and_wake_if(server, credits, 0);
2209 break;
2212 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2213 end, &index, &next, &done);
2215 /* nothing to write? */
2216 if (nr_pages == 0) {
2217 kref_put(&wdata->refcount, cifs_writedata_release);
2218 add_credits_and_wake_if(server, credits, 0);
2219 continue;
2222 wdata->credits = credits;
2224 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2226 /* send failure -- clean up the mess */
2227 if (rc != 0) {
2228 add_credits_and_wake_if(server, wdata->credits, 0);
2229 for (i = 0; i < nr_pages; ++i) {
2230 if (is_retryable_error(rc))
2231 redirty_page_for_writepage(wbc,
2232 wdata->pages[i]);
2233 else
2234 SetPageError(wdata->pages[i]);
2235 end_page_writeback(wdata->pages[i]);
2236 put_page(wdata->pages[i]);
2238 if (!is_retryable_error(rc))
2239 mapping_set_error(mapping, rc);
2241 kref_put(&wdata->refcount, cifs_writedata_release);
2243 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2244 index = saved_index;
2245 continue;
2248 /* Return immediately if we received a signal during writing */
2249 if (is_interrupt_error(rc)) {
2250 done = true;
2251 break;
2254 if (rc != 0 && saved_rc == 0)
2255 saved_rc = rc;
2257 wbc->nr_to_write -= nr_pages;
2258 if (wbc->nr_to_write <= 0)
2259 done = true;
2261 index = next;
2264 if (!scanned && !done) {
2266 * We hit the last page and there is more work to be done: wrap
2267 * back to the start of the file
2269 scanned = true;
2270 index = 0;
2271 goto retry;
2274 if (saved_rc != 0)
2275 rc = saved_rc;
2277 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2278 mapping->writeback_index = index;
2280 return rc;
2283 static int
2284 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2286 int rc;
2287 unsigned int xid;
2289 xid = get_xid();
2290 /* BB add check for wbc flags */
2291 get_page(page);
2292 if (!PageUptodate(page))
2293 cifs_dbg(FYI, "ppw - page not up to date\n");
2296 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2298 * A writepage() implementation always needs to do either this,
2299 * or re-dirty the page with "redirty_page_for_writepage()" in
2300 * the case of a failure.
2302 * Just unlocking the page will cause the radix tree tag-bits
2303 * to fail to update with the state of the page correctly.
2305 set_page_writeback(page);
2306 retry_write:
2307 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2308 if (is_retryable_error(rc)) {
2309 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2310 goto retry_write;
2311 redirty_page_for_writepage(wbc, page);
2312 } else if (rc != 0) {
2313 SetPageError(page);
2314 mapping_set_error(page->mapping, rc);
2315 } else {
2316 SetPageUptodate(page);
2318 end_page_writeback(page);
2319 put_page(page);
2320 free_xid(xid);
2321 return rc;
2324 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2326 int rc = cifs_writepage_locked(page, wbc);
2327 unlock_page(page);
2328 return rc;
2331 static int cifs_write_end(struct file *file, struct address_space *mapping,
2332 loff_t pos, unsigned len, unsigned copied,
2333 struct page *page, void *fsdata)
2335 int rc;
2336 struct inode *inode = mapping->host;
2337 struct cifsFileInfo *cfile = file->private_data;
2338 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2339 __u32 pid;
2341 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2342 pid = cfile->pid;
2343 else
2344 pid = current->tgid;
2346 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2347 page, pos, copied);
2349 if (PageChecked(page)) {
2350 if (copied == len)
2351 SetPageUptodate(page);
2352 ClearPageChecked(page);
2353 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2354 SetPageUptodate(page);
2356 if (!PageUptodate(page)) {
2357 char *page_data;
2358 unsigned offset = pos & (PAGE_SIZE - 1);
2359 unsigned int xid;
2361 xid = get_xid();
2362 /* this is probably better than directly calling
2363 partialpage_write since in this function the file handle is
2364 known which we might as well leverage */
2365 /* BB check if anything else missing out of ppw
2366 such as updating last write time */
2367 page_data = kmap(page);
2368 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2369 /* if (rc < 0) should we set writebehind rc? */
2370 kunmap(page);
2372 free_xid(xid);
2373 } else {
2374 rc = copied;
2375 pos += copied;
2376 set_page_dirty(page);
2379 if (rc > 0) {
2380 spin_lock(&inode->i_lock);
2381 if (pos > inode->i_size)
2382 i_size_write(inode, pos);
2383 spin_unlock(&inode->i_lock);
2386 unlock_page(page);
2387 put_page(page);
2389 return rc;
2392 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2393 int datasync)
2395 unsigned int xid;
2396 int rc = 0;
2397 struct cifs_tcon *tcon;
2398 struct TCP_Server_Info *server;
2399 struct cifsFileInfo *smbfile = file->private_data;
2400 struct inode *inode = file_inode(file);
2401 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2403 rc = file_write_and_wait_range(file, start, end);
2404 if (rc)
2405 return rc;
2406 inode_lock(inode);
2408 xid = get_xid();
2410 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2411 file, datasync);
2413 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2414 rc = cifs_zap_mapping(inode);
2415 if (rc) {
2416 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2417 rc = 0; /* don't care about it in fsync */
2421 tcon = tlink_tcon(smbfile->tlink);
2422 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2423 server = tcon->ses->server;
2424 if (server->ops->flush)
2425 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2426 else
2427 rc = -ENOSYS;
2430 free_xid(xid);
2431 inode_unlock(inode);
2432 return rc;
2435 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2437 unsigned int xid;
2438 int rc = 0;
2439 struct cifs_tcon *tcon;
2440 struct TCP_Server_Info *server;
2441 struct cifsFileInfo *smbfile = file->private_data;
2442 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2443 struct inode *inode = file->f_mapping->host;
2445 rc = file_write_and_wait_range(file, start, end);
2446 if (rc)
2447 return rc;
2448 inode_lock(inode);
2450 xid = get_xid();
2452 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2453 file, datasync);
2455 tcon = tlink_tcon(smbfile->tlink);
2456 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2457 server = tcon->ses->server;
2458 if (server->ops->flush)
2459 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2460 else
2461 rc = -ENOSYS;
2464 free_xid(xid);
2465 inode_unlock(inode);
2466 return rc;
2470 * As file closes, flush all cached write data for this inode checking
2471 * for write behind errors.
2473 int cifs_flush(struct file *file, fl_owner_t id)
2475 struct inode *inode = file_inode(file);
2476 int rc = 0;
2478 if (file->f_mode & FMODE_WRITE)
2479 rc = filemap_write_and_wait(inode->i_mapping);
2481 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2483 return rc;
2486 static int
2487 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2489 int rc = 0;
2490 unsigned long i;
2492 for (i = 0; i < num_pages; i++) {
2493 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2494 if (!pages[i]) {
2496 * save number of pages we have already allocated and
2497 * return with ENOMEM error
2499 num_pages = i;
2500 rc = -ENOMEM;
2501 break;
2505 if (rc) {
2506 for (i = 0; i < num_pages; i++)
2507 put_page(pages[i]);
2509 return rc;
2512 static inline
2513 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2515 size_t num_pages;
2516 size_t clen;
2518 clen = min_t(const size_t, len, wsize);
2519 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2521 if (cur_len)
2522 *cur_len = clen;
2524 return num_pages;
2527 static void
2528 cifs_uncached_writedata_release(struct kref *refcount)
2530 int i;
2531 struct cifs_writedata *wdata = container_of(refcount,
2532 struct cifs_writedata, refcount);
2534 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2535 for (i = 0; i < wdata->nr_pages; i++)
2536 put_page(wdata->pages[i]);
2537 cifs_writedata_release(refcount);
2540 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2542 static void
2543 cifs_uncached_writev_complete(struct work_struct *work)
2545 struct cifs_writedata *wdata = container_of(work,
2546 struct cifs_writedata, work);
2547 struct inode *inode = d_inode(wdata->cfile->dentry);
2548 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2550 spin_lock(&inode->i_lock);
2551 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2552 if (cifsi->server_eof > inode->i_size)
2553 i_size_write(inode, cifsi->server_eof);
2554 spin_unlock(&inode->i_lock);
2556 complete(&wdata->done);
2557 collect_uncached_write_data(wdata->ctx);
2558 /* the below call can possibly free the last ref to aio ctx */
2559 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2562 static int
2563 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2564 size_t *len, unsigned long *num_pages)
2566 size_t save_len, copied, bytes, cur_len = *len;
2567 unsigned long i, nr_pages = *num_pages;
2569 save_len = cur_len;
2570 for (i = 0; i < nr_pages; i++) {
2571 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2572 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2573 cur_len -= copied;
2575 * If we didn't copy as much as we expected, then that
2576 * may mean we trod into an unmapped area. Stop copying
2577 * at that point. On the next pass through the big
2578 * loop, we'll likely end up getting a zero-length
2579 * write and bailing out of it.
2581 if (copied < bytes)
2582 break;
2584 cur_len = save_len - cur_len;
2585 *len = cur_len;
2588 * If we have no data to send, then that probably means that
2589 * the copy above failed altogether. That's most likely because
2590 * the address in the iovec was bogus. Return -EFAULT and let
2591 * the caller free anything we allocated and bail out.
2593 if (!cur_len)
2594 return -EFAULT;
2597 * i + 1 now represents the number of pages we actually used in
2598 * the copy phase above.
2600 *num_pages = i + 1;
2601 return 0;
2604 static int
2605 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2606 struct cifsFileInfo *open_file,
2607 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2608 struct cifs_aio_ctx *ctx)
2610 int rc = 0;
2611 size_t cur_len;
2612 unsigned long nr_pages, num_pages, i;
2613 struct cifs_writedata *wdata;
2614 struct iov_iter saved_from = *from;
2615 loff_t saved_offset = offset;
2616 pid_t pid;
2617 struct TCP_Server_Info *server;
2619 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2620 pid = open_file->pid;
2621 else
2622 pid = current->tgid;
2624 server = tlink_tcon(open_file->tlink)->ses->server;
2626 do {
2627 unsigned int wsize, credits;
2629 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2630 &wsize, &credits);
2631 if (rc)
2632 break;
2634 nr_pages = get_numpages(wsize, len, &cur_len);
2635 wdata = cifs_writedata_alloc(nr_pages,
2636 cifs_uncached_writev_complete);
2637 if (!wdata) {
2638 rc = -ENOMEM;
2639 add_credits_and_wake_if(server, credits, 0);
2640 break;
2643 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2644 if (rc) {
2645 kfree(wdata);
2646 add_credits_and_wake_if(server, credits, 0);
2647 break;
2650 num_pages = nr_pages;
2651 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2652 if (rc) {
2653 for (i = 0; i < nr_pages; i++)
2654 put_page(wdata->pages[i]);
2655 kfree(wdata);
2656 add_credits_and_wake_if(server, credits, 0);
2657 break;
2661 * Bring nr_pages down to the number of pages we actually used,
2662 * and free any pages that we didn't use.
2664 for ( ; nr_pages > num_pages; nr_pages--)
2665 put_page(wdata->pages[nr_pages - 1]);
2667 wdata->sync_mode = WB_SYNC_ALL;
2668 wdata->nr_pages = nr_pages;
2669 wdata->offset = (__u64)offset;
2670 wdata->cfile = cifsFileInfo_get(open_file);
2671 wdata->pid = pid;
2672 wdata->bytes = cur_len;
2673 wdata->pagesz = PAGE_SIZE;
2674 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2675 wdata->credits = credits;
2676 wdata->ctx = ctx;
2677 kref_get(&ctx->refcount);
2679 if (!wdata->cfile->invalidHandle ||
2680 !(rc = cifs_reopen_file(wdata->cfile, false)))
2681 rc = server->ops->async_writev(wdata,
2682 cifs_uncached_writedata_release);
2683 if (rc) {
2684 add_credits_and_wake_if(server, wdata->credits, 0);
2685 kref_put(&wdata->refcount,
2686 cifs_uncached_writedata_release);
2687 if (rc == -EAGAIN) {
2688 *from = saved_from;
2689 iov_iter_advance(from, offset - saved_offset);
2690 continue;
2692 break;
2695 list_add_tail(&wdata->list, wdata_list);
2696 offset += cur_len;
2697 len -= cur_len;
2698 } while (len > 0);
2700 return rc;
2703 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2705 struct cifs_writedata *wdata, *tmp;
2706 struct cifs_tcon *tcon;
2707 struct cifs_sb_info *cifs_sb;
2708 struct dentry *dentry = ctx->cfile->dentry;
2709 unsigned int i;
2710 int rc;
2712 tcon = tlink_tcon(ctx->cfile->tlink);
2713 cifs_sb = CIFS_SB(dentry->d_sb);
2715 mutex_lock(&ctx->aio_mutex);
2717 if (list_empty(&ctx->list)) {
2718 mutex_unlock(&ctx->aio_mutex);
2719 return;
2722 rc = ctx->rc;
2724 * Wait for and collect replies for any successful sends in order of
2725 * increasing offset. Once an error is hit, then return without waiting
2726 * for any more replies.
2728 restart_loop:
2729 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2730 if (!rc) {
2731 if (!try_wait_for_completion(&wdata->done)) {
2732 mutex_unlock(&ctx->aio_mutex);
2733 return;
2736 if (wdata->result)
2737 rc = wdata->result;
2738 else
2739 ctx->total_len += wdata->bytes;
2741 /* resend call if it's a retryable error */
2742 if (rc == -EAGAIN) {
2743 struct list_head tmp_list;
2744 struct iov_iter tmp_from = ctx->iter;
2746 INIT_LIST_HEAD(&tmp_list);
2747 list_del_init(&wdata->list);
2749 iov_iter_advance(&tmp_from,
2750 wdata->offset - ctx->pos);
2752 rc = cifs_write_from_iter(wdata->offset,
2753 wdata->bytes, &tmp_from,
2754 ctx->cfile, cifs_sb, &tmp_list,
2755 ctx);
2757 list_splice(&tmp_list, &ctx->list);
2759 kref_put(&wdata->refcount,
2760 cifs_uncached_writedata_release);
2761 goto restart_loop;
2764 list_del_init(&wdata->list);
2765 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2768 for (i = 0; i < ctx->npages; i++)
2769 put_page(ctx->bv[i].bv_page);
2771 cifs_stats_bytes_written(tcon, ctx->total_len);
2772 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2774 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2776 mutex_unlock(&ctx->aio_mutex);
2778 if (ctx->iocb && ctx->iocb->ki_complete)
2779 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2780 else
2781 complete(&ctx->done);
2784 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2786 struct file *file = iocb->ki_filp;
2787 ssize_t total_written = 0;
2788 struct cifsFileInfo *cfile;
2789 struct cifs_tcon *tcon;
2790 struct cifs_sb_info *cifs_sb;
2791 struct cifs_aio_ctx *ctx;
2792 struct iov_iter saved_from = *from;
2793 int rc;
2796 * BB - optimize the way when signing is disabled. We can drop this
2797 * extra memory-to-memory copying and use iovec buffers for constructing
2798 * write request.
2801 rc = generic_write_checks(iocb, from);
2802 if (rc <= 0)
2803 return rc;
2805 cifs_sb = CIFS_FILE_SB(file);
2806 cfile = file->private_data;
2807 tcon = tlink_tcon(cfile->tlink);
2809 if (!tcon->ses->server->ops->async_writev)
2810 return -ENOSYS;
2812 ctx = cifs_aio_ctx_alloc();
2813 if (!ctx)
2814 return -ENOMEM;
2816 ctx->cfile = cifsFileInfo_get(cfile);
2818 if (!is_sync_kiocb(iocb))
2819 ctx->iocb = iocb;
2821 ctx->pos = iocb->ki_pos;
2823 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2824 if (rc) {
2825 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2826 return rc;
2829 /* grab a lock here due to read response handlers can access ctx */
2830 mutex_lock(&ctx->aio_mutex);
2832 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2833 cfile, cifs_sb, &ctx->list, ctx);
2836 * If at least one write was successfully sent, then discard any rc
2837 * value from the later writes. If the other write succeeds, then
2838 * we'll end up returning whatever was written. If it fails, then
2839 * we'll get a new rc value from that.
2841 if (!list_empty(&ctx->list))
2842 rc = 0;
2844 mutex_unlock(&ctx->aio_mutex);
2846 if (rc) {
2847 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2848 return rc;
2851 if (!is_sync_kiocb(iocb)) {
2852 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2853 return -EIOCBQUEUED;
2856 rc = wait_for_completion_killable(&ctx->done);
2857 if (rc) {
2858 mutex_lock(&ctx->aio_mutex);
2859 ctx->rc = rc = -EINTR;
2860 total_written = ctx->total_len;
2861 mutex_unlock(&ctx->aio_mutex);
2862 } else {
2863 rc = ctx->rc;
2864 total_written = ctx->total_len;
2867 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2869 if (unlikely(!total_written))
2870 return rc;
2872 iocb->ki_pos += total_written;
2873 return total_written;
2876 static ssize_t
2877 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2879 struct file *file = iocb->ki_filp;
2880 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2881 struct inode *inode = file->f_mapping->host;
2882 struct cifsInodeInfo *cinode = CIFS_I(inode);
2883 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2884 ssize_t rc;
2886 inode_lock(inode);
2888 * We need to hold the sem to be sure nobody modifies lock list
2889 * with a brlock that prevents writing.
2891 down_read(&cinode->lock_sem);
2893 rc = generic_write_checks(iocb, from);
2894 if (rc <= 0)
2895 goto out;
2897 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2898 server->vals->exclusive_lock_type, NULL,
2899 CIFS_WRITE_OP))
2900 rc = __generic_file_write_iter(iocb, from);
2901 else
2902 rc = -EACCES;
2903 out:
2904 up_read(&cinode->lock_sem);
2905 inode_unlock(inode);
2907 if (rc > 0)
2908 rc = generic_write_sync(iocb, rc);
2909 return rc;
2912 ssize_t
2913 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2915 struct inode *inode = file_inode(iocb->ki_filp);
2916 struct cifsInodeInfo *cinode = CIFS_I(inode);
2917 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2918 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2919 iocb->ki_filp->private_data;
2920 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2921 ssize_t written;
2923 written = cifs_get_writer(cinode);
2924 if (written)
2925 return written;
2927 if (CIFS_CACHE_WRITE(cinode)) {
2928 if (cap_unix(tcon->ses) &&
2929 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2930 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2931 written = generic_file_write_iter(iocb, from);
2932 goto out;
2934 written = cifs_writev(iocb, from);
2935 goto out;
2938 * For non-oplocked files in strict cache mode we need to write the data
2939 * to the server exactly from the pos to pos+len-1 rather than flush all
2940 * affected pages because it may cause a error with mandatory locks on
2941 * these pages but not on the region from pos to ppos+len-1.
2943 written = cifs_user_writev(iocb, from);
2944 if (CIFS_CACHE_READ(cinode)) {
2946 * We have read level caching and we have just sent a write
2947 * request to the server thus making data in the cache stale.
2948 * Zap the cache and set oplock/lease level to NONE to avoid
2949 * reading stale data from the cache. All subsequent read
2950 * operations will read new data from the server.
2952 cifs_zap_mapping(inode);
2953 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
2954 inode);
2955 cinode->oplock = 0;
2957 out:
2958 cifs_put_writer(cinode);
2959 return written;
2962 static struct cifs_readdata *
2963 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
2965 struct cifs_readdata *rdata;
2967 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
2968 if (rdata != NULL) {
2969 rdata->pages = pages;
2970 kref_init(&rdata->refcount);
2971 INIT_LIST_HEAD(&rdata->list);
2972 init_completion(&rdata->done);
2973 INIT_WORK(&rdata->work, complete);
2976 return rdata;
2979 static struct cifs_readdata *
2980 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2982 struct page **pages =
2983 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
2984 struct cifs_readdata *ret = NULL;
2986 if (pages) {
2987 ret = cifs_readdata_direct_alloc(pages, complete);
2988 if (!ret)
2989 kfree(pages);
2992 return ret;
2995 void
2996 cifs_readdata_release(struct kref *refcount)
2998 struct cifs_readdata *rdata = container_of(refcount,
2999 struct cifs_readdata, refcount);
3000 #ifdef CONFIG_CIFS_SMB_DIRECT
3001 if (rdata->mr) {
3002 smbd_deregister_mr(rdata->mr);
3003 rdata->mr = NULL;
3005 #endif
3006 if (rdata->cfile)
3007 cifsFileInfo_put(rdata->cfile);
3009 kvfree(rdata->pages);
3010 kfree(rdata);
3013 static int
3014 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3016 int rc = 0;
3017 struct page *page;
3018 unsigned int i;
3020 for (i = 0; i < nr_pages; i++) {
3021 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3022 if (!page) {
3023 rc = -ENOMEM;
3024 break;
3026 rdata->pages[i] = page;
3029 if (rc) {
3030 unsigned int nr_page_failed = i;
3032 for (i = 0; i < nr_page_failed; i++) {
3033 put_page(rdata->pages[i]);
3034 rdata->pages[i] = NULL;
3037 return rc;
3040 static void
3041 cifs_uncached_readdata_release(struct kref *refcount)
3043 struct cifs_readdata *rdata = container_of(refcount,
3044 struct cifs_readdata, refcount);
3045 unsigned int i;
3047 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3048 for (i = 0; i < rdata->nr_pages; i++) {
3049 put_page(rdata->pages[i]);
3050 rdata->pages[i] = NULL;
3052 cifs_readdata_release(refcount);
3056 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3057 * @rdata: the readdata response with list of pages holding data
3058 * @iter: destination for our data
3060 * This function copies data from a list of pages in a readdata response into
3061 * an array of iovecs. It will first calculate where the data should go
3062 * based on the info in the readdata and then copy the data into that spot.
3064 static int
3065 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3067 size_t remaining = rdata->got_bytes;
3068 unsigned int i;
3070 for (i = 0; i < rdata->nr_pages; i++) {
3071 struct page *page = rdata->pages[i];
3072 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3073 size_t written;
3075 if (unlikely(iter->type & ITER_PIPE)) {
3076 void *addr = kmap_atomic(page);
3078 written = copy_to_iter(addr, copy, iter);
3079 kunmap_atomic(addr);
3080 } else
3081 written = copy_page_to_iter(page, 0, copy, iter);
3082 remaining -= written;
3083 if (written < copy && iov_iter_count(iter) > 0)
3084 break;
3086 return remaining ? -EFAULT : 0;
3089 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3091 static void
3092 cifs_uncached_readv_complete(struct work_struct *work)
3094 struct cifs_readdata *rdata = container_of(work,
3095 struct cifs_readdata, work);
3097 complete(&rdata->done);
3098 collect_uncached_read_data(rdata->ctx);
3099 /* the below call can possibly free the last ref to aio ctx */
3100 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3103 static int
3104 uncached_fill_pages(struct TCP_Server_Info *server,
3105 struct cifs_readdata *rdata, struct iov_iter *iter,
3106 unsigned int len)
3108 int result = 0;
3109 unsigned int i;
3110 unsigned int nr_pages = rdata->nr_pages;
3111 unsigned int page_offset = rdata->page_offset;
3113 rdata->got_bytes = 0;
3114 rdata->tailsz = PAGE_SIZE;
3115 for (i = 0; i < nr_pages; i++) {
3116 struct page *page = rdata->pages[i];
3117 size_t n;
3118 unsigned int segment_size = rdata->pagesz;
3120 if (i == 0)
3121 segment_size -= page_offset;
3122 else
3123 page_offset = 0;
3126 if (len <= 0) {
3127 /* no need to hold page hostage */
3128 rdata->pages[i] = NULL;
3129 rdata->nr_pages--;
3130 put_page(page);
3131 continue;
3134 n = len;
3135 if (len >= segment_size)
3136 /* enough data to fill the page */
3137 n = segment_size;
3138 else
3139 rdata->tailsz = len;
3140 len -= n;
3142 if (iter)
3143 result = copy_page_from_iter(
3144 page, page_offset, n, iter);
3145 #ifdef CONFIG_CIFS_SMB_DIRECT
3146 else if (rdata->mr)
3147 result = n;
3148 #endif
3149 else
3150 result = cifs_read_page_from_socket(
3151 server, page, page_offset, n);
3152 if (result < 0)
3153 break;
3155 rdata->got_bytes += result;
3158 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3159 rdata->got_bytes : result;
3162 static int
3163 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3164 struct cifs_readdata *rdata, unsigned int len)
3166 return uncached_fill_pages(server, rdata, NULL, len);
3169 static int
3170 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3171 struct cifs_readdata *rdata,
3172 struct iov_iter *iter)
3174 return uncached_fill_pages(server, rdata, iter, iter->count);
3177 static int
3178 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3179 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3180 struct cifs_aio_ctx *ctx)
3182 struct cifs_readdata *rdata;
3183 unsigned int npages, rsize, credits;
3184 size_t cur_len;
3185 int rc;
3186 pid_t pid;
3187 struct TCP_Server_Info *server;
3189 server = tlink_tcon(open_file->tlink)->ses->server;
3191 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3192 pid = open_file->pid;
3193 else
3194 pid = current->tgid;
3196 do {
3197 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3198 &rsize, &credits);
3199 if (rc)
3200 break;
3202 cur_len = min_t(const size_t, len, rsize);
3203 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3205 /* allocate a readdata struct */
3206 rdata = cifs_readdata_alloc(npages,
3207 cifs_uncached_readv_complete);
3208 if (!rdata) {
3209 add_credits_and_wake_if(server, credits, 0);
3210 rc = -ENOMEM;
3211 break;
3214 rc = cifs_read_allocate_pages(rdata, npages);
3215 if (rc)
3216 goto error;
3218 rdata->cfile = cifsFileInfo_get(open_file);
3219 rdata->nr_pages = npages;
3220 rdata->offset = offset;
3221 rdata->bytes = cur_len;
3222 rdata->pid = pid;
3223 rdata->pagesz = PAGE_SIZE;
3224 rdata->tailsz = PAGE_SIZE;
3225 rdata->read_into_pages = cifs_uncached_read_into_pages;
3226 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3227 rdata->credits = credits;
3228 rdata->ctx = ctx;
3229 kref_get(&ctx->refcount);
3231 if (!rdata->cfile->invalidHandle ||
3232 !(rc = cifs_reopen_file(rdata->cfile, true)))
3233 rc = server->ops->async_readv(rdata);
3234 error:
3235 if (rc) {
3236 add_credits_and_wake_if(server, rdata->credits, 0);
3237 kref_put(&rdata->refcount,
3238 cifs_uncached_readdata_release);
3239 if (rc == -EAGAIN)
3240 continue;
3241 break;
3244 list_add_tail(&rdata->list, rdata_list);
3245 offset += cur_len;
3246 len -= cur_len;
3247 } while (len > 0);
3249 return rc;
3252 static void
3253 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3255 struct cifs_readdata *rdata, *tmp;
3256 struct iov_iter *to = &ctx->iter;
3257 struct cifs_sb_info *cifs_sb;
3258 struct cifs_tcon *tcon;
3259 unsigned int i;
3260 int rc;
3262 tcon = tlink_tcon(ctx->cfile->tlink);
3263 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3265 mutex_lock(&ctx->aio_mutex);
3267 if (list_empty(&ctx->list)) {
3268 mutex_unlock(&ctx->aio_mutex);
3269 return;
3272 rc = ctx->rc;
3273 /* the loop below should proceed in the order of increasing offsets */
3274 again:
3275 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3276 if (!rc) {
3277 if (!try_wait_for_completion(&rdata->done)) {
3278 mutex_unlock(&ctx->aio_mutex);
3279 return;
3282 if (rdata->result == -EAGAIN) {
3283 /* resend call if it's a retryable error */
3284 struct list_head tmp_list;
3285 unsigned int got_bytes = rdata->got_bytes;
3287 list_del_init(&rdata->list);
3288 INIT_LIST_HEAD(&tmp_list);
3291 * Got a part of data and then reconnect has
3292 * happened -- fill the buffer and continue
3293 * reading.
3295 if (got_bytes && got_bytes < rdata->bytes) {
3296 rc = cifs_readdata_to_iov(rdata, to);
3297 if (rc) {
3298 kref_put(&rdata->refcount,
3299 cifs_uncached_readdata_release);
3300 continue;
3304 rc = cifs_send_async_read(
3305 rdata->offset + got_bytes,
3306 rdata->bytes - got_bytes,
3307 rdata->cfile, cifs_sb,
3308 &tmp_list, ctx);
3310 list_splice(&tmp_list, &ctx->list);
3312 kref_put(&rdata->refcount,
3313 cifs_uncached_readdata_release);
3314 goto again;
3315 } else if (rdata->result)
3316 rc = rdata->result;
3317 else
3318 rc = cifs_readdata_to_iov(rdata, to);
3320 /* if there was a short read -- discard anything left */
3321 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3322 rc = -ENODATA;
3324 list_del_init(&rdata->list);
3325 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3328 for (i = 0; i < ctx->npages; i++) {
3329 if (ctx->should_dirty)
3330 set_page_dirty(ctx->bv[i].bv_page);
3331 put_page(ctx->bv[i].bv_page);
3334 ctx->total_len = ctx->len - iov_iter_count(to);
3336 cifs_stats_bytes_read(tcon, ctx->total_len);
3338 /* mask nodata case */
3339 if (rc == -ENODATA)
3340 rc = 0;
3342 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3344 mutex_unlock(&ctx->aio_mutex);
3346 if (ctx->iocb && ctx->iocb->ki_complete)
3347 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3348 else
3349 complete(&ctx->done);
3352 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3354 struct file *file = iocb->ki_filp;
3355 ssize_t rc;
3356 size_t len;
3357 ssize_t total_read = 0;
3358 loff_t offset = iocb->ki_pos;
3359 struct cifs_sb_info *cifs_sb;
3360 struct cifs_tcon *tcon;
3361 struct cifsFileInfo *cfile;
3362 struct cifs_aio_ctx *ctx;
3364 len = iov_iter_count(to);
3365 if (!len)
3366 return 0;
3368 cifs_sb = CIFS_FILE_SB(file);
3369 cfile = file->private_data;
3370 tcon = tlink_tcon(cfile->tlink);
3372 if (!tcon->ses->server->ops->async_readv)
3373 return -ENOSYS;
3375 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3376 cifs_dbg(FYI, "attempting read on write only file instance\n");
3378 ctx = cifs_aio_ctx_alloc();
3379 if (!ctx)
3380 return -ENOMEM;
3382 ctx->cfile = cifsFileInfo_get(cfile);
3384 if (!is_sync_kiocb(iocb))
3385 ctx->iocb = iocb;
3387 if (to->type == ITER_IOVEC)
3388 ctx->should_dirty = true;
3390 rc = setup_aio_ctx_iter(ctx, to, READ);
3391 if (rc) {
3392 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3393 return rc;
3396 len = ctx->len;
3398 /* grab a lock here due to read response handlers can access ctx */
3399 mutex_lock(&ctx->aio_mutex);
3401 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3403 /* if at least one read request send succeeded, then reset rc */
3404 if (!list_empty(&ctx->list))
3405 rc = 0;
3407 mutex_unlock(&ctx->aio_mutex);
3409 if (rc) {
3410 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3411 return rc;
3414 if (!is_sync_kiocb(iocb)) {
3415 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3416 return -EIOCBQUEUED;
3419 rc = wait_for_completion_killable(&ctx->done);
3420 if (rc) {
3421 mutex_lock(&ctx->aio_mutex);
3422 ctx->rc = rc = -EINTR;
3423 total_read = ctx->total_len;
3424 mutex_unlock(&ctx->aio_mutex);
3425 } else {
3426 rc = ctx->rc;
3427 total_read = ctx->total_len;
3430 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3432 if (total_read) {
3433 iocb->ki_pos += total_read;
3434 return total_read;
3436 return rc;
3439 ssize_t
3440 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3442 struct inode *inode = file_inode(iocb->ki_filp);
3443 struct cifsInodeInfo *cinode = CIFS_I(inode);
3444 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3445 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3446 iocb->ki_filp->private_data;
3447 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3448 int rc = -EACCES;
3451 * In strict cache mode we need to read from the server all the time
3452 * if we don't have level II oplock because the server can delay mtime
3453 * change - so we can't make a decision about inode invalidating.
3454 * And we can also fail with pagereading if there are mandatory locks
3455 * on pages affected by this read but not on the region from pos to
3456 * pos+len-1.
3458 if (!CIFS_CACHE_READ(cinode))
3459 return cifs_user_readv(iocb, to);
3461 if (cap_unix(tcon->ses) &&
3462 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3463 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3464 return generic_file_read_iter(iocb, to);
3467 * We need to hold the sem to be sure nobody modifies lock list
3468 * with a brlock that prevents reading.
3470 down_read(&cinode->lock_sem);
3471 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3472 tcon->ses->server->vals->shared_lock_type,
3473 NULL, CIFS_READ_OP))
3474 rc = generic_file_read_iter(iocb, to);
3475 up_read(&cinode->lock_sem);
3476 return rc;
3479 static ssize_t
3480 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3482 int rc = -EACCES;
3483 unsigned int bytes_read = 0;
3484 unsigned int total_read;
3485 unsigned int current_read_size;
3486 unsigned int rsize;
3487 struct cifs_sb_info *cifs_sb;
3488 struct cifs_tcon *tcon;
3489 struct TCP_Server_Info *server;
3490 unsigned int xid;
3491 char *cur_offset;
3492 struct cifsFileInfo *open_file;
3493 struct cifs_io_parms io_parms;
3494 int buf_type = CIFS_NO_BUFFER;
3495 __u32 pid;
3497 xid = get_xid();
3498 cifs_sb = CIFS_FILE_SB(file);
3500 /* FIXME: set up handlers for larger reads and/or convert to async */
3501 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3503 if (file->private_data == NULL) {
3504 rc = -EBADF;
3505 free_xid(xid);
3506 return rc;
3508 open_file = file->private_data;
3509 tcon = tlink_tcon(open_file->tlink);
3510 server = tcon->ses->server;
3512 if (!server->ops->sync_read) {
3513 free_xid(xid);
3514 return -ENOSYS;
3517 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3518 pid = open_file->pid;
3519 else
3520 pid = current->tgid;
3522 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3523 cifs_dbg(FYI, "attempting read on write only file instance\n");
3525 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3526 total_read += bytes_read, cur_offset += bytes_read) {
3527 do {
3528 current_read_size = min_t(uint, read_size - total_read,
3529 rsize);
3531 * For windows me and 9x we do not want to request more
3532 * than it negotiated since it will refuse the read
3533 * then.
3535 if (!(tcon->ses->capabilities &
3536 tcon->ses->server->vals->cap_large_files)) {
3537 current_read_size = min_t(uint,
3538 current_read_size, CIFSMaxBufSize);
3540 if (open_file->invalidHandle) {
3541 rc = cifs_reopen_file(open_file, true);
3542 if (rc != 0)
3543 break;
3545 io_parms.pid = pid;
3546 io_parms.tcon = tcon;
3547 io_parms.offset = *offset;
3548 io_parms.length = current_read_size;
3549 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3550 &bytes_read, &cur_offset,
3551 &buf_type);
3552 } while (rc == -EAGAIN);
3554 if (rc || (bytes_read == 0)) {
3555 if (total_read) {
3556 break;
3557 } else {
3558 free_xid(xid);
3559 return rc;
3561 } else {
3562 cifs_stats_bytes_read(tcon, total_read);
3563 *offset += bytes_read;
3566 free_xid(xid);
3567 return total_read;
3571 * If the page is mmap'ed into a process' page tables, then we need to make
3572 * sure that it doesn't change while being written back.
3574 static vm_fault_t
3575 cifs_page_mkwrite(struct vm_fault *vmf)
3577 struct page *page = vmf->page;
3579 lock_page(page);
3580 return VM_FAULT_LOCKED;
3583 static const struct vm_operations_struct cifs_file_vm_ops = {
3584 .fault = filemap_fault,
3585 .map_pages = filemap_map_pages,
3586 .page_mkwrite = cifs_page_mkwrite,
3589 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3591 int xid, rc = 0;
3592 struct inode *inode = file_inode(file);
3594 xid = get_xid();
3596 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3597 rc = cifs_zap_mapping(inode);
3598 if (!rc)
3599 rc = generic_file_mmap(file, vma);
3600 if (!rc)
3601 vma->vm_ops = &cifs_file_vm_ops;
3603 free_xid(xid);
3604 return rc;
3607 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3609 int rc, xid;
3611 xid = get_xid();
3613 rc = cifs_revalidate_file(file);
3614 if (rc)
3615 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3616 rc);
3617 if (!rc)
3618 rc = generic_file_mmap(file, vma);
3619 if (!rc)
3620 vma->vm_ops = &cifs_file_vm_ops;
3622 free_xid(xid);
3623 return rc;
3626 static void
3627 cifs_readv_complete(struct work_struct *work)
3629 unsigned int i, got_bytes;
3630 struct cifs_readdata *rdata = container_of(work,
3631 struct cifs_readdata, work);
3633 got_bytes = rdata->got_bytes;
3634 for (i = 0; i < rdata->nr_pages; i++) {
3635 struct page *page = rdata->pages[i];
3637 lru_cache_add_file(page);
3639 if (rdata->result == 0 ||
3640 (rdata->result == -EAGAIN && got_bytes)) {
3641 flush_dcache_page(page);
3642 SetPageUptodate(page);
3645 unlock_page(page);
3647 if (rdata->result == 0 ||
3648 (rdata->result == -EAGAIN && got_bytes))
3649 cifs_readpage_to_fscache(rdata->mapping->host, page);
3651 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3653 put_page(page);
3654 rdata->pages[i] = NULL;
3656 kref_put(&rdata->refcount, cifs_readdata_release);
3659 static int
3660 readpages_fill_pages(struct TCP_Server_Info *server,
3661 struct cifs_readdata *rdata, struct iov_iter *iter,
3662 unsigned int len)
3664 int result = 0;
3665 unsigned int i;
3666 u64 eof;
3667 pgoff_t eof_index;
3668 unsigned int nr_pages = rdata->nr_pages;
3669 unsigned int page_offset = rdata->page_offset;
3671 /* determine the eof that the server (probably) has */
3672 eof = CIFS_I(rdata->mapping->host)->server_eof;
3673 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3674 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3676 rdata->got_bytes = 0;
3677 rdata->tailsz = PAGE_SIZE;
3678 for (i = 0; i < nr_pages; i++) {
3679 struct page *page = rdata->pages[i];
3680 unsigned int to_read = rdata->pagesz;
3681 size_t n;
3683 if (i == 0)
3684 to_read -= page_offset;
3685 else
3686 page_offset = 0;
3688 n = to_read;
3690 if (len >= to_read) {
3691 len -= to_read;
3692 } else if (len > 0) {
3693 /* enough for partial page, fill and zero the rest */
3694 zero_user(page, len + page_offset, to_read - len);
3695 n = rdata->tailsz = len;
3696 len = 0;
3697 } else if (page->index > eof_index) {
3699 * The VFS will not try to do readahead past the
3700 * i_size, but it's possible that we have outstanding
3701 * writes with gaps in the middle and the i_size hasn't
3702 * caught up yet. Populate those with zeroed out pages
3703 * to prevent the VFS from repeatedly attempting to
3704 * fill them until the writes are flushed.
3706 zero_user(page, 0, PAGE_SIZE);
3707 lru_cache_add_file(page);
3708 flush_dcache_page(page);
3709 SetPageUptodate(page);
3710 unlock_page(page);
3711 put_page(page);
3712 rdata->pages[i] = NULL;
3713 rdata->nr_pages--;
3714 continue;
3715 } else {
3716 /* no need to hold page hostage */
3717 lru_cache_add_file(page);
3718 unlock_page(page);
3719 put_page(page);
3720 rdata->pages[i] = NULL;
3721 rdata->nr_pages--;
3722 continue;
3725 if (iter)
3726 result = copy_page_from_iter(
3727 page, page_offset, n, iter);
3728 #ifdef CONFIG_CIFS_SMB_DIRECT
3729 else if (rdata->mr)
3730 result = n;
3731 #endif
3732 else
3733 result = cifs_read_page_from_socket(
3734 server, page, page_offset, n);
3735 if (result < 0)
3736 break;
3738 rdata->got_bytes += result;
3741 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3742 rdata->got_bytes : result;
3745 static int
3746 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3747 struct cifs_readdata *rdata, unsigned int len)
3749 return readpages_fill_pages(server, rdata, NULL, len);
3752 static int
3753 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3754 struct cifs_readdata *rdata,
3755 struct iov_iter *iter)
3757 return readpages_fill_pages(server, rdata, iter, iter->count);
3760 static int
3761 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3762 unsigned int rsize, struct list_head *tmplist,
3763 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3765 struct page *page, *tpage;
3766 unsigned int expected_index;
3767 int rc;
3768 gfp_t gfp = readahead_gfp_mask(mapping);
3770 INIT_LIST_HEAD(tmplist);
3772 page = list_entry(page_list->prev, struct page, lru);
3775 * Lock the page and put it in the cache. Since no one else
3776 * should have access to this page, we're safe to simply set
3777 * PG_locked without checking it first.
3779 __SetPageLocked(page);
3780 rc = add_to_page_cache_locked(page, mapping,
3781 page->index, gfp);
3783 /* give up if we can't stick it in the cache */
3784 if (rc) {
3785 __ClearPageLocked(page);
3786 return rc;
3789 /* move first page to the tmplist */
3790 *offset = (loff_t)page->index << PAGE_SHIFT;
3791 *bytes = PAGE_SIZE;
3792 *nr_pages = 1;
3793 list_move_tail(&page->lru, tmplist);
3795 /* now try and add more pages onto the request */
3796 expected_index = page->index + 1;
3797 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3798 /* discontinuity ? */
3799 if (page->index != expected_index)
3800 break;
3802 /* would this page push the read over the rsize? */
3803 if (*bytes + PAGE_SIZE > rsize)
3804 break;
3806 __SetPageLocked(page);
3807 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3808 __ClearPageLocked(page);
3809 break;
3811 list_move_tail(&page->lru, tmplist);
3812 (*bytes) += PAGE_SIZE;
3813 expected_index++;
3814 (*nr_pages)++;
3816 return rc;
3819 static int cifs_readpages(struct file *file, struct address_space *mapping,
3820 struct list_head *page_list, unsigned num_pages)
3822 int rc;
3823 struct list_head tmplist;
3824 struct cifsFileInfo *open_file = file->private_data;
3825 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3826 struct TCP_Server_Info *server;
3827 pid_t pid;
3830 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3831 * immediately if the cookie is negative
3833 * After this point, every page in the list might have PG_fscache set,
3834 * so we will need to clean that up off of every page we don't use.
3836 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3837 &num_pages);
3838 if (rc == 0)
3839 return rc;
3841 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3842 pid = open_file->pid;
3843 else
3844 pid = current->tgid;
3846 rc = 0;
3847 server = tlink_tcon(open_file->tlink)->ses->server;
3849 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3850 __func__, file, mapping, num_pages);
3853 * Start with the page at end of list and move it to private
3854 * list. Do the same with any following pages until we hit
3855 * the rsize limit, hit an index discontinuity, or run out of
3856 * pages. Issue the async read and then start the loop again
3857 * until the list is empty.
3859 * Note that list order is important. The page_list is in
3860 * the order of declining indexes. When we put the pages in
3861 * the rdata->pages, then we want them in increasing order.
3863 while (!list_empty(page_list)) {
3864 unsigned int i, nr_pages, bytes, rsize;
3865 loff_t offset;
3866 struct page *page, *tpage;
3867 struct cifs_readdata *rdata;
3868 unsigned credits;
3870 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3871 &rsize, &credits);
3872 if (rc)
3873 break;
3876 * Give up immediately if rsize is too small to read an entire
3877 * page. The VFS will fall back to readpage. We should never
3878 * reach this point however since we set ra_pages to 0 when the
3879 * rsize is smaller than a cache page.
3881 if (unlikely(rsize < PAGE_SIZE)) {
3882 add_credits_and_wake_if(server, credits, 0);
3883 return 0;
3886 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3887 &nr_pages, &offset, &bytes);
3888 if (rc) {
3889 add_credits_and_wake_if(server, credits, 0);
3890 break;
3893 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3894 if (!rdata) {
3895 /* best to give up if we're out of mem */
3896 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3897 list_del(&page->lru);
3898 lru_cache_add_file(page);
3899 unlock_page(page);
3900 put_page(page);
3902 rc = -ENOMEM;
3903 add_credits_and_wake_if(server, credits, 0);
3904 break;
3907 rdata->cfile = cifsFileInfo_get(open_file);
3908 rdata->mapping = mapping;
3909 rdata->offset = offset;
3910 rdata->bytes = bytes;
3911 rdata->pid = pid;
3912 rdata->pagesz = PAGE_SIZE;
3913 rdata->tailsz = PAGE_SIZE;
3914 rdata->read_into_pages = cifs_readpages_read_into_pages;
3915 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3916 rdata->credits = credits;
3918 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3919 list_del(&page->lru);
3920 rdata->pages[rdata->nr_pages++] = page;
3923 if (!rdata->cfile->invalidHandle ||
3924 !(rc = cifs_reopen_file(rdata->cfile, true)))
3925 rc = server->ops->async_readv(rdata);
3926 if (rc) {
3927 add_credits_and_wake_if(server, rdata->credits, 0);
3928 for (i = 0; i < rdata->nr_pages; i++) {
3929 page = rdata->pages[i];
3930 lru_cache_add_file(page);
3931 unlock_page(page);
3932 put_page(page);
3934 /* Fallback to the readpage in error/reconnect cases */
3935 kref_put(&rdata->refcount, cifs_readdata_release);
3936 break;
3939 kref_put(&rdata->refcount, cifs_readdata_release);
3942 /* Any pages that have been shown to fscache but didn't get added to
3943 * the pagecache must be uncached before they get returned to the
3944 * allocator.
3946 cifs_fscache_readpages_cancel(mapping->host, page_list);
3947 return rc;
3951 * cifs_readpage_worker must be called with the page pinned
3953 static int cifs_readpage_worker(struct file *file, struct page *page,
3954 loff_t *poffset)
3956 char *read_data;
3957 int rc;
3959 /* Is the page cached? */
3960 rc = cifs_readpage_from_fscache(file_inode(file), page);
3961 if (rc == 0)
3962 goto read_complete;
3964 read_data = kmap(page);
3965 /* for reads over a certain size could initiate async read ahead */
3967 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3969 if (rc < 0)
3970 goto io_error;
3971 else
3972 cifs_dbg(FYI, "Bytes read %d\n", rc);
3974 file_inode(file)->i_atime =
3975 current_time(file_inode(file));
3977 if (PAGE_SIZE > rc)
3978 memset(read_data + rc, 0, PAGE_SIZE - rc);
3980 flush_dcache_page(page);
3981 SetPageUptodate(page);
3983 /* send this page to the cache */
3984 cifs_readpage_to_fscache(file_inode(file), page);
3986 rc = 0;
3988 io_error:
3989 kunmap(page);
3990 unlock_page(page);
3992 read_complete:
3993 return rc;
3996 static int cifs_readpage(struct file *file, struct page *page)
3998 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3999 int rc = -EACCES;
4000 unsigned int xid;
4002 xid = get_xid();
4004 if (file->private_data == NULL) {
4005 rc = -EBADF;
4006 free_xid(xid);
4007 return rc;
4010 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4011 page, (int)offset, (int)offset);
4013 rc = cifs_readpage_worker(file, page, &offset);
4015 free_xid(xid);
4016 return rc;
4019 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4021 struct cifsFileInfo *open_file;
4023 spin_lock(&cifs_inode->open_file_lock);
4024 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4025 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4026 spin_unlock(&cifs_inode->open_file_lock);
4027 return 1;
4030 spin_unlock(&cifs_inode->open_file_lock);
4031 return 0;
4034 /* We do not want to update the file size from server for inodes
4035 open for write - to avoid races with writepage extending
4036 the file - in the future we could consider allowing
4037 refreshing the inode only on increases in the file size
4038 but this is tricky to do without racing with writebehind
4039 page caching in the current Linux kernel design */
4040 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4042 if (!cifsInode)
4043 return true;
4045 if (is_inode_writable(cifsInode)) {
4046 /* This inode is open for write at least once */
4047 struct cifs_sb_info *cifs_sb;
4049 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4050 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4051 /* since no page cache to corrupt on directio
4052 we can change size safely */
4053 return true;
4056 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4057 return true;
4059 return false;
4060 } else
4061 return true;
4064 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4065 loff_t pos, unsigned len, unsigned flags,
4066 struct page **pagep, void **fsdata)
4068 int oncethru = 0;
4069 pgoff_t index = pos >> PAGE_SHIFT;
4070 loff_t offset = pos & (PAGE_SIZE - 1);
4071 loff_t page_start = pos & PAGE_MASK;
4072 loff_t i_size;
4073 struct page *page;
4074 int rc = 0;
4076 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4078 start:
4079 page = grab_cache_page_write_begin(mapping, index, flags);
4080 if (!page) {
4081 rc = -ENOMEM;
4082 goto out;
4085 if (PageUptodate(page))
4086 goto out;
4089 * If we write a full page it will be up to date, no need to read from
4090 * the server. If the write is short, we'll end up doing a sync write
4091 * instead.
4093 if (len == PAGE_SIZE)
4094 goto out;
4097 * optimize away the read when we have an oplock, and we're not
4098 * expecting to use any of the data we'd be reading in. That
4099 * is, when the page lies beyond the EOF, or straddles the EOF
4100 * and the write will cover all of the existing data.
4102 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4103 i_size = i_size_read(mapping->host);
4104 if (page_start >= i_size ||
4105 (offset == 0 && (pos + len) >= i_size)) {
4106 zero_user_segments(page, 0, offset,
4107 offset + len,
4108 PAGE_SIZE);
4110 * PageChecked means that the parts of the page
4111 * to which we're not writing are considered up
4112 * to date. Once the data is copied to the
4113 * page, it can be set uptodate.
4115 SetPageChecked(page);
4116 goto out;
4120 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4122 * might as well read a page, it is fast enough. If we get
4123 * an error, we don't need to return it. cifs_write_end will
4124 * do a sync write instead since PG_uptodate isn't set.
4126 cifs_readpage_worker(file, page, &page_start);
4127 put_page(page);
4128 oncethru = 1;
4129 goto start;
4130 } else {
4131 /* we could try using another file handle if there is one -
4132 but how would we lock it to prevent close of that handle
4133 racing with this read? In any case
4134 this will be written out by write_end so is fine */
4136 out:
4137 *pagep = page;
4138 return rc;
4141 static int cifs_release_page(struct page *page, gfp_t gfp)
4143 if (PagePrivate(page))
4144 return 0;
4146 return cifs_fscache_release_page(page, gfp);
4149 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4150 unsigned int length)
4152 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4154 if (offset == 0 && length == PAGE_SIZE)
4155 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4158 static int cifs_launder_page(struct page *page)
4160 int rc = 0;
4161 loff_t range_start = page_offset(page);
4162 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4163 struct writeback_control wbc = {
4164 .sync_mode = WB_SYNC_ALL,
4165 .nr_to_write = 0,
4166 .range_start = range_start,
4167 .range_end = range_end,
4170 cifs_dbg(FYI, "Launder page: %p\n", page);
4172 if (clear_page_dirty_for_io(page))
4173 rc = cifs_writepage_locked(page, &wbc);
4175 cifs_fscache_invalidate_page(page, page->mapping->host);
4176 return rc;
4179 void cifs_oplock_break(struct work_struct *work)
4181 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4182 oplock_break);
4183 struct inode *inode = d_inode(cfile->dentry);
4184 struct cifsInodeInfo *cinode = CIFS_I(inode);
4185 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4186 struct TCP_Server_Info *server = tcon->ses->server;
4187 int rc = 0;
4189 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4190 TASK_UNINTERRUPTIBLE);
4192 server->ops->downgrade_oplock(server, cinode,
4193 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4195 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4196 cifs_has_mand_locks(cinode)) {
4197 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4198 inode);
4199 cinode->oplock = 0;
4202 if (inode && S_ISREG(inode->i_mode)) {
4203 if (CIFS_CACHE_READ(cinode))
4204 break_lease(inode, O_RDONLY);
4205 else
4206 break_lease(inode, O_WRONLY);
4207 rc = filemap_fdatawrite(inode->i_mapping);
4208 if (!CIFS_CACHE_READ(cinode)) {
4209 rc = filemap_fdatawait(inode->i_mapping);
4210 mapping_set_error(inode->i_mapping, rc);
4211 cifs_zap_mapping(inode);
4213 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4216 rc = cifs_push_locks(cfile);
4217 if (rc)
4218 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4221 * releasing stale oplock after recent reconnect of smb session using
4222 * a now incorrect file handle is not a data integrity issue but do
4223 * not bother sending an oplock release if session to server still is
4224 * disconnected since oplock already released by the server
4226 if (!cfile->oplock_break_cancelled) {
4227 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4228 cinode);
4229 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4231 _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4232 cifs_done_oplock_break(cinode);
4236 * The presence of cifs_direct_io() in the address space ops vector
4237 * allowes open() O_DIRECT flags which would have failed otherwise.
4239 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4240 * so this method should never be called.
4242 * Direct IO is not yet supported in the cached mode.
4244 static ssize_t
4245 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4248 * FIXME
4249 * Eventually need to support direct IO for non forcedirectio mounts
4251 return -EINVAL;
4255 const struct address_space_operations cifs_addr_ops = {
4256 .readpage = cifs_readpage,
4257 .readpages = cifs_readpages,
4258 .writepage = cifs_writepage,
4259 .writepages = cifs_writepages,
4260 .write_begin = cifs_write_begin,
4261 .write_end = cifs_write_end,
4262 .set_page_dirty = __set_page_dirty_nobuffers,
4263 .releasepage = cifs_release_page,
4264 .direct_IO = cifs_direct_io,
4265 .invalidatepage = cifs_invalidate_page,
4266 .launder_page = cifs_launder_page,
4270 * cifs_readpages requires the server to support a buffer large enough to
4271 * contain the header plus one complete page of data. Otherwise, we need
4272 * to leave cifs_readpages out of the address space operations.
4274 const struct address_space_operations cifs_addr_ops_smallbuf = {
4275 .readpage = cifs_readpage,
4276 .writepage = cifs_writepage,
4277 .writepages = cifs_writepages,
4278 .write_begin = cifs_write_begin,
4279 .write_end = cifs_write_end,
4280 .set_page_dirty = __set_page_dirty_nobuffers,
4281 .releasepage = cifs_release_page,
4282 .invalidatepage = cifs_invalidate_page,
4283 .launder_page = cifs_launder_page,