arm64: dts: Revert "specify console via command line"
[linux/fpc-iii.git] / fs / cifs / file.c
blobbc9516ab4b34f22281f52056efab27b117afa558
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
48 static inline int cifs_convert_flags(unsigned int flags)
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
66 static u32 cifs_posix_convert_flags(unsigned int flags)
68 u32 posix_flags = 0;
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
97 return posix_flags;
100 static inline int cifs_get_disposition(unsigned int flags)
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
126 cifs_dbg(FYI, "posix open %s\n", full_path);
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
147 if (rc)
148 goto posix_open_ret;
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
189 return -ENOSYS;
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
225 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226 if (f_flags & O_SYNC)
227 create_options |= CREATE_WRITE_THROUGH;
229 if (f_flags & O_DIRECT)
230 create_options |= CREATE_NO_BUFFER;
232 oparms.tcon = tcon;
233 oparms.cifs_sb = cifs_sb;
234 oparms.desired_access = desired_access;
235 oparms.create_options = cifs_create_options(cifs_sb, create_options);
236 oparms.disposition = disposition;
237 oparms.path = full_path;
238 oparms.fid = fid;
239 oparms.reconnect = false;
241 rc = server->ops->open(xid, &oparms, oplock, buf);
243 if (rc)
244 goto out;
246 if (tcon->unix_ext)
247 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248 xid);
249 else
250 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251 xid, fid);
253 if (rc) {
254 server->ops->close(xid, tcon, fid);
255 if (rc == -ESTALE)
256 rc = -EOPENSTALE;
259 out:
260 kfree(buf);
261 return rc;
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
267 struct cifs_fid_locks *cur;
268 bool has_locks = false;
270 down_read(&cinode->lock_sem);
271 list_for_each_entry(cur, &cinode->llist, llist) {
272 if (!list_empty(&cur->locks)) {
273 has_locks = true;
274 break;
277 up_read(&cinode->lock_sem);
278 return has_locks;
281 void
282 cifs_down_write(struct rw_semaphore *sem)
284 while (!down_write_trylock(sem))
285 msleep(10);
288 static void cifsFileInfo_put_work(struct work_struct *work);
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292 struct tcon_link *tlink, __u32 oplock)
294 struct dentry *dentry = file_dentry(file);
295 struct inode *inode = d_inode(dentry);
296 struct cifsInodeInfo *cinode = CIFS_I(inode);
297 struct cifsFileInfo *cfile;
298 struct cifs_fid_locks *fdlocks;
299 struct cifs_tcon *tcon = tlink_tcon(tlink);
300 struct TCP_Server_Info *server = tcon->ses->server;
302 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303 if (cfile == NULL)
304 return cfile;
306 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307 if (!fdlocks) {
308 kfree(cfile);
309 return NULL;
312 INIT_LIST_HEAD(&fdlocks->locks);
313 fdlocks->cfile = cfile;
314 cfile->llist = fdlocks;
316 cfile->count = 1;
317 cfile->pid = current->tgid;
318 cfile->uid = current_fsuid();
319 cfile->dentry = dget(dentry);
320 cfile->f_flags = file->f_flags;
321 cfile->invalidHandle = false;
322 cfile->tlink = cifs_get_tlink(tlink);
323 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325 mutex_init(&cfile->fh_mutex);
326 spin_lock_init(&cfile->file_info_lock);
328 cifs_sb_active(inode->i_sb);
331 * If the server returned a read oplock and we have mandatory brlocks,
332 * set oplock level to None.
334 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336 oplock = 0;
339 cifs_down_write(&cinode->lock_sem);
340 list_add(&fdlocks->llist, &cinode->llist);
341 up_write(&cinode->lock_sem);
343 spin_lock(&tcon->open_file_lock);
344 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345 oplock = fid->pending_open->oplock;
346 list_del(&fid->pending_open->olist);
348 fid->purge_cache = false;
349 server->ops->set_fid(cfile, fid, oplock);
351 list_add(&cfile->tlist, &tcon->openFileList);
352 atomic_inc(&tcon->num_local_opens);
354 /* if readable file instance put first in list*/
355 spin_lock(&cinode->open_file_lock);
356 if (file->f_mode & FMODE_READ)
357 list_add(&cfile->flist, &cinode->openFileList);
358 else
359 list_add_tail(&cfile->flist, &cinode->openFileList);
360 spin_unlock(&cinode->open_file_lock);
361 spin_unlock(&tcon->open_file_lock);
363 if (fid->purge_cache)
364 cifs_zap_mapping(inode);
366 file->private_data = cfile;
367 return cfile;
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
373 spin_lock(&cifs_file->file_info_lock);
374 cifsFileInfo_get_locked(cifs_file);
375 spin_unlock(&cifs_file->file_info_lock);
376 return cifs_file;
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
381 struct inode *inode = d_inode(cifs_file->dentry);
382 struct cifsInodeInfo *cifsi = CIFS_I(inode);
383 struct cifsLockInfo *li, *tmp;
384 struct super_block *sb = inode->i_sb;
387 * Delete any outstanding lock records. We'll lose them when the file
388 * is closed anyway.
390 cifs_down_write(&cifsi->lock_sem);
391 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392 list_del(&li->llist);
393 cifs_del_lock_waiters(li);
394 kfree(li);
396 list_del(&cifs_file->llist->llist);
397 kfree(cifs_file->llist);
398 up_write(&cifsi->lock_sem);
400 cifs_put_tlink(cifs_file->tlink);
401 dput(cifs_file->dentry);
402 cifs_sb_deactive(sb);
403 kfree(cifs_file);
406 static void cifsFileInfo_put_work(struct work_struct *work)
408 struct cifsFileInfo *cifs_file = container_of(work,
409 struct cifsFileInfo, put);
411 cifsFileInfo_put_final(cifs_file);
415 * cifsFileInfo_put - release a reference of file priv data
417 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
421 _cifsFileInfo_put(cifs_file, true, true);
425 * _cifsFileInfo_put - release a reference of file priv data
427 * This may involve closing the filehandle @cifs_file out on the
428 * server. Must be called without holding tcon->open_file_lock,
429 * cinode->open_file_lock and cifs_file->file_info_lock.
431 * If @wait_for_oplock_handler is true and we are releasing the last
432 * reference, wait for any running oplock break handler of the file
433 * and cancel any pending one. If calling this function from the
434 * oplock break handler, you need to pass false.
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438 bool wait_oplock_handler, bool offload)
440 struct inode *inode = d_inode(cifs_file->dentry);
441 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442 struct TCP_Server_Info *server = tcon->ses->server;
443 struct cifsInodeInfo *cifsi = CIFS_I(inode);
444 struct super_block *sb = inode->i_sb;
445 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446 struct cifs_fid fid;
447 struct cifs_pending_open open;
448 bool oplock_break_cancelled;
450 spin_lock(&tcon->open_file_lock);
451 spin_lock(&cifsi->open_file_lock);
452 spin_lock(&cifs_file->file_info_lock);
453 if (--cifs_file->count > 0) {
454 spin_unlock(&cifs_file->file_info_lock);
455 spin_unlock(&cifsi->open_file_lock);
456 spin_unlock(&tcon->open_file_lock);
457 return;
459 spin_unlock(&cifs_file->file_info_lock);
461 if (server->ops->get_lease_key)
462 server->ops->get_lease_key(inode, &fid);
464 /* store open in pending opens to make sure we don't miss lease break */
465 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467 /* remove it from the lists */
468 list_del(&cifs_file->flist);
469 list_del(&cifs_file->tlist);
470 atomic_dec(&tcon->num_local_opens);
472 if (list_empty(&cifsi->openFileList)) {
473 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474 d_inode(cifs_file->dentry));
476 * In strict cache mode we need invalidate mapping on the last
477 * close because it may cause a error when we open this file
478 * again and get at least level II oplock.
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482 cifs_set_oplock_level(cifsi, 0);
485 spin_unlock(&cifsi->open_file_lock);
486 spin_unlock(&tcon->open_file_lock);
488 oplock_break_cancelled = wait_oplock_handler ?
489 cancel_work_sync(&cifs_file->oplock_break) : false;
491 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492 struct TCP_Server_Info *server = tcon->ses->server;
493 unsigned int xid;
495 xid = get_xid();
496 if (server->ops->close_getattr)
497 server->ops->close_getattr(xid, tcon, cifs_file);
498 else if (server->ops->close)
499 server->ops->close(xid, tcon, &cifs_file->fid);
500 _free_xid(xid);
503 if (oplock_break_cancelled)
504 cifs_done_oplock_break(cifsi);
506 cifs_del_pending_open(&open);
508 if (offload)
509 queue_work(fileinfo_put_wq, &cifs_file->put);
510 else
511 cifsFileInfo_put_final(cifs_file);
514 int cifs_open(struct inode *inode, struct file *file)
517 int rc = -EACCES;
518 unsigned int xid;
519 __u32 oplock;
520 struct cifs_sb_info *cifs_sb;
521 struct TCP_Server_Info *server;
522 struct cifs_tcon *tcon;
523 struct tcon_link *tlink;
524 struct cifsFileInfo *cfile = NULL;
525 char *full_path = NULL;
526 bool posix_open_ok = false;
527 struct cifs_fid fid;
528 struct cifs_pending_open open;
530 xid = get_xid();
532 cifs_sb = CIFS_SB(inode->i_sb);
533 tlink = cifs_sb_tlink(cifs_sb);
534 if (IS_ERR(tlink)) {
535 free_xid(xid);
536 return PTR_ERR(tlink);
538 tcon = tlink_tcon(tlink);
539 server = tcon->ses->server;
541 full_path = build_path_from_dentry(file_dentry(file));
542 if (full_path == NULL) {
543 rc = -ENOMEM;
544 goto out;
547 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548 inode, file->f_flags, full_path);
550 if (file->f_flags & O_DIRECT &&
551 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553 file->f_op = &cifs_file_direct_nobrl_ops;
554 else
555 file->f_op = &cifs_file_direct_ops;
558 if (server->oplocks)
559 oplock = REQ_OPLOCK;
560 else
561 oplock = 0;
563 if (!tcon->broken_posix_open && tcon->unix_ext &&
564 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566 /* can not refresh inode info since size could be stale */
567 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568 cifs_sb->mnt_file_mode /* ignored */,
569 file->f_flags, &oplock, &fid.netfid, xid);
570 if (rc == 0) {
571 cifs_dbg(FYI, "posix open succeeded\n");
572 posix_open_ok = true;
573 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574 if (tcon->ses->serverNOS)
575 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576 tcon->ses->serverName,
577 tcon->ses->serverNOS);
578 tcon->broken_posix_open = true;
579 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580 (rc != -EOPNOTSUPP)) /* path not found or net err */
581 goto out;
583 * Else fallthrough to retry open the old way on network i/o
584 * or DFS errors.
588 if (server->ops->get_lease_key)
589 server->ops->get_lease_key(inode, &fid);
591 cifs_add_pending_open(&fid, tlink, &open);
593 if (!posix_open_ok) {
594 if (server->ops->get_lease_key)
595 server->ops->get_lease_key(inode, &fid);
597 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598 file->f_flags, &oplock, &fid, xid);
599 if (rc) {
600 cifs_del_pending_open(&open);
601 goto out;
605 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606 if (cfile == NULL) {
607 if (server->ops->close)
608 server->ops->close(xid, tcon, &fid);
609 cifs_del_pending_open(&open);
610 rc = -ENOMEM;
611 goto out;
614 cifs_fscache_set_inode_cookie(inode, file);
616 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
618 * Time to set mode which we can not set earlier due to
619 * problems creating new read-only files.
621 struct cifs_unix_set_info_args args = {
622 .mode = inode->i_mode,
623 .uid = INVALID_UID, /* no change */
624 .gid = INVALID_GID, /* no change */
625 .ctime = NO_CHANGE_64,
626 .atime = NO_CHANGE_64,
627 .mtime = NO_CHANGE_64,
628 .device = 0,
630 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631 cfile->pid);
634 out:
635 kfree(full_path);
636 free_xid(xid);
637 cifs_put_tlink(tlink);
638 return rc;
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
644 * Try to reacquire byte range locks that were released when session
645 * to server was lost.
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
650 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653 int rc = 0;
655 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656 if (cinode->can_cache_brlcks) {
657 /* can cache locks - no need to relock */
658 up_read(&cinode->lock_sem);
659 return rc;
662 if (cap_unix(tcon->ses) &&
663 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665 rc = cifs_push_posix_locks(cfile);
666 else
667 rc = tcon->ses->server->ops->push_mand_locks(cfile);
669 up_read(&cinode->lock_sem);
670 return rc;
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
676 int rc = -EACCES;
677 unsigned int xid;
678 __u32 oplock;
679 struct cifs_sb_info *cifs_sb;
680 struct cifs_tcon *tcon;
681 struct TCP_Server_Info *server;
682 struct cifsInodeInfo *cinode;
683 struct inode *inode;
684 char *full_path = NULL;
685 int desired_access;
686 int disposition = FILE_OPEN;
687 int create_options = CREATE_NOT_DIR;
688 struct cifs_open_parms oparms;
690 xid = get_xid();
691 mutex_lock(&cfile->fh_mutex);
692 if (!cfile->invalidHandle) {
693 mutex_unlock(&cfile->fh_mutex);
694 rc = 0;
695 free_xid(xid);
696 return rc;
699 inode = d_inode(cfile->dentry);
700 cifs_sb = CIFS_SB(inode->i_sb);
701 tcon = tlink_tcon(cfile->tlink);
702 server = tcon->ses->server;
705 * Can not grab rename sem here because various ops, including those
706 * that already have the rename sem can end up causing writepage to get
707 * called and if the server was down that means we end up here, and we
708 * can never tell if the caller already has the rename_sem.
710 full_path = build_path_from_dentry(cfile->dentry);
711 if (full_path == NULL) {
712 rc = -ENOMEM;
713 mutex_unlock(&cfile->fh_mutex);
714 free_xid(xid);
715 return rc;
718 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719 inode, cfile->f_flags, full_path);
721 if (tcon->ses->server->oplocks)
722 oplock = REQ_OPLOCK;
723 else
724 oplock = 0;
726 if (tcon->unix_ext && cap_unix(tcon->ses) &&
727 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
730 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731 * original open. Must mask them off for a reopen.
733 unsigned int oflags = cfile->f_flags &
734 ~(O_CREAT | O_EXCL | O_TRUNC);
736 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737 cifs_sb->mnt_file_mode /* ignored */,
738 oflags, &oplock, &cfile->fid.netfid, xid);
739 if (rc == 0) {
740 cifs_dbg(FYI, "posix reopen succeeded\n");
741 oparms.reconnect = true;
742 goto reopen_success;
745 * fallthrough to retry open the old way on errors, especially
746 * in the reconnect path it is important to retry hard
750 desired_access = cifs_convert_flags(cfile->f_flags);
752 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753 if (cfile->f_flags & O_SYNC)
754 create_options |= CREATE_WRITE_THROUGH;
756 if (cfile->f_flags & O_DIRECT)
757 create_options |= CREATE_NO_BUFFER;
759 if (server->ops->get_lease_key)
760 server->ops->get_lease_key(inode, &cfile->fid);
762 oparms.tcon = tcon;
763 oparms.cifs_sb = cifs_sb;
764 oparms.desired_access = desired_access;
765 oparms.create_options = cifs_create_options(cifs_sb, create_options);
766 oparms.disposition = disposition;
767 oparms.path = full_path;
768 oparms.fid = &cfile->fid;
769 oparms.reconnect = true;
772 * Can not refresh inode by passing in file_info buf to be returned by
773 * ops->open and then calling get_inode_info with returned buf since
774 * file might have write behind data that needs to be flushed and server
775 * version of file size can be stale. If we knew for sure that inode was
776 * not dirty locally we could do this.
778 rc = server->ops->open(xid, &oparms, &oplock, NULL);
779 if (rc == -ENOENT && oparms.reconnect == false) {
780 /* durable handle timeout is expired - open the file again */
781 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782 /* indicate that we need to relock the file */
783 oparms.reconnect = true;
786 if (rc) {
787 mutex_unlock(&cfile->fh_mutex);
788 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789 cifs_dbg(FYI, "oplock: %d\n", oplock);
790 goto reopen_error_exit;
793 reopen_success:
794 cfile->invalidHandle = false;
795 mutex_unlock(&cfile->fh_mutex);
796 cinode = CIFS_I(inode);
798 if (can_flush) {
799 rc = filemap_write_and_wait(inode->i_mapping);
800 if (!is_interrupt_error(rc))
801 mapping_set_error(inode->i_mapping, rc);
803 if (tcon->unix_ext)
804 rc = cifs_get_inode_info_unix(&inode, full_path,
805 inode->i_sb, xid);
806 else
807 rc = cifs_get_inode_info(&inode, full_path, NULL,
808 inode->i_sb, xid, NULL);
811 * Else we are writing out data to server already and could deadlock if
812 * we tried to flush data, and since we do not know if we have data that
813 * would invalidate the current end of file on the server we can not go
814 * to the server to get the new inode info.
818 * If the server returned a read oplock and we have mandatory brlocks,
819 * set oplock level to None.
821 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823 oplock = 0;
826 server->ops->set_fid(cfile, &cfile->fid, oplock);
827 if (oparms.reconnect)
828 cifs_relock_file(cfile);
830 reopen_error_exit:
831 kfree(full_path);
832 free_xid(xid);
833 return rc;
836 int cifs_close(struct inode *inode, struct file *file)
838 if (file->private_data != NULL) {
839 _cifsFileInfo_put(file->private_data, true, false);
840 file->private_data = NULL;
843 /* return code from the ->release op is always ignored */
844 return 0;
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
850 struct cifsFileInfo *open_file;
851 struct list_head *tmp;
852 struct list_head *tmp1;
853 struct list_head tmp_list;
855 if (!tcon->use_persistent || !tcon->need_reopen_files)
856 return;
858 tcon->need_reopen_files = false;
860 cifs_dbg(FYI, "Reopen persistent handles");
861 INIT_LIST_HEAD(&tmp_list);
863 /* list all files open on tree connection, reopen resilient handles */
864 spin_lock(&tcon->open_file_lock);
865 list_for_each(tmp, &tcon->openFileList) {
866 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867 if (!open_file->invalidHandle)
868 continue;
869 cifsFileInfo_get(open_file);
870 list_add_tail(&open_file->rlist, &tmp_list);
872 spin_unlock(&tcon->open_file_lock);
874 list_for_each_safe(tmp, tmp1, &tmp_list) {
875 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876 if (cifs_reopen_file(open_file, false /* do not flush */))
877 tcon->need_reopen_files = true;
878 list_del_init(&open_file->rlist);
879 cifsFileInfo_put(open_file);
883 int cifs_closedir(struct inode *inode, struct file *file)
885 int rc = 0;
886 unsigned int xid;
887 struct cifsFileInfo *cfile = file->private_data;
888 struct cifs_tcon *tcon;
889 struct TCP_Server_Info *server;
890 char *buf;
892 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
894 if (cfile == NULL)
895 return rc;
897 xid = get_xid();
898 tcon = tlink_tcon(cfile->tlink);
899 server = tcon->ses->server;
901 cifs_dbg(FYI, "Freeing private data in close dir\n");
902 spin_lock(&cfile->file_info_lock);
903 if (server->ops->dir_needs_close(cfile)) {
904 cfile->invalidHandle = true;
905 spin_unlock(&cfile->file_info_lock);
906 if (server->ops->close_dir)
907 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908 else
909 rc = -ENOSYS;
910 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911 /* not much we can do if it fails anyway, ignore rc */
912 rc = 0;
913 } else
914 spin_unlock(&cfile->file_info_lock);
916 buf = cfile->srch_inf.ntwrk_buf_start;
917 if (buf) {
918 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919 cfile->srch_inf.ntwrk_buf_start = NULL;
920 if (cfile->srch_inf.smallBuf)
921 cifs_small_buf_release(buf);
922 else
923 cifs_buf_release(buf);
926 cifs_put_tlink(cfile->tlink);
927 kfree(file->private_data);
928 file->private_data = NULL;
929 /* BB can we lock the filestruct while this is going on? */
930 free_xid(xid);
931 return rc;
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
937 struct cifsLockInfo *lock =
938 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939 if (!lock)
940 return lock;
941 lock->offset = offset;
942 lock->length = length;
943 lock->type = type;
944 lock->pid = current->tgid;
945 lock->flags = flags;
946 INIT_LIST_HEAD(&lock->blist);
947 init_waitqueue_head(&lock->block_q);
948 return lock;
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
954 struct cifsLockInfo *li, *tmp;
955 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956 list_del_init(&li->blist);
957 wake_up(&li->block_q);
961 #define CIFS_LOCK_OP 0
962 #define CIFS_READ_OP 1
963 #define CIFS_WRITE_OP 2
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968 __u64 length, __u8 type, __u16 flags,
969 struct cifsFileInfo *cfile,
970 struct cifsLockInfo **conf_lock, int rw_check)
972 struct cifsLockInfo *li;
973 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
976 list_for_each_entry(li, &fdlocks->locks, llist) {
977 if (offset + length <= li->offset ||
978 offset >= li->offset + li->length)
979 continue;
980 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981 server->ops->compare_fids(cfile, cur_cfile)) {
982 /* shared lock prevents write op through the same fid */
983 if (!(li->type & server->vals->shared_lock_type) ||
984 rw_check != CIFS_WRITE_OP)
985 continue;
987 if ((type & server->vals->shared_lock_type) &&
988 ((server->ops->compare_fids(cfile, cur_cfile) &&
989 current->tgid == li->pid) || type == li->type))
990 continue;
991 if (rw_check == CIFS_LOCK_OP &&
992 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993 server->ops->compare_fids(cfile, cur_cfile))
994 continue;
995 if (conf_lock)
996 *conf_lock = li;
997 return true;
999 return false;
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004 __u8 type, __u16 flags,
1005 struct cifsLockInfo **conf_lock, int rw_check)
1007 bool rc = false;
1008 struct cifs_fid_locks *cur;
1009 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1011 list_for_each_entry(cur, &cinode->llist, llist) {
1012 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013 flags, cfile, conf_lock,
1014 rw_check);
1015 if (rc)
1016 break;
1019 return rc;
1023 * Check if there is another lock that prevents us to set the lock (mandatory
1024 * style). If such a lock exists, update the flock structure with its
1025 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026 * or leave it the same if we can't. Returns 0 if we don't need to request to
1027 * the server or 1 otherwise.
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031 __u8 type, struct file_lock *flock)
1033 int rc = 0;
1034 struct cifsLockInfo *conf_lock;
1035 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037 bool exist;
1039 down_read(&cinode->lock_sem);
1041 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042 flock->fl_flags, &conf_lock,
1043 CIFS_LOCK_OP);
1044 if (exist) {
1045 flock->fl_start = conf_lock->offset;
1046 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047 flock->fl_pid = conf_lock->pid;
1048 if (conf_lock->type & server->vals->shared_lock_type)
1049 flock->fl_type = F_RDLCK;
1050 else
1051 flock->fl_type = F_WRLCK;
1052 } else if (!cinode->can_cache_brlcks)
1053 rc = 1;
1054 else
1055 flock->fl_type = F_UNLCK;
1057 up_read(&cinode->lock_sem);
1058 return rc;
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1064 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065 cifs_down_write(&cinode->lock_sem);
1066 list_add_tail(&lock->llist, &cfile->llist->locks);
1067 up_write(&cinode->lock_sem);
1071 * Set the byte-range lock (mandatory style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if no locks prevent us but we need to request to the server;
1074 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078 bool wait)
1080 struct cifsLockInfo *conf_lock;
1081 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082 bool exist;
1083 int rc = 0;
1085 try_again:
1086 exist = false;
1087 cifs_down_write(&cinode->lock_sem);
1089 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090 lock->type, lock->flags, &conf_lock,
1091 CIFS_LOCK_OP);
1092 if (!exist && cinode->can_cache_brlcks) {
1093 list_add_tail(&lock->llist, &cfile->llist->locks);
1094 up_write(&cinode->lock_sem);
1095 return rc;
1098 if (!exist)
1099 rc = 1;
1100 else if (!wait)
1101 rc = -EACCES;
1102 else {
1103 list_add_tail(&lock->blist, &conf_lock->blist);
1104 up_write(&cinode->lock_sem);
1105 rc = wait_event_interruptible(lock->block_q,
1106 (lock->blist.prev == &lock->blist) &&
1107 (lock->blist.next == &lock->blist));
1108 if (!rc)
1109 goto try_again;
1110 cifs_down_write(&cinode->lock_sem);
1111 list_del_init(&lock->blist);
1114 up_write(&cinode->lock_sem);
1115 return rc;
1119 * Check if there is another lock that prevents us to set the lock (posix
1120 * style). If such a lock exists, update the flock structure with its
1121 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122 * or leave it the same if we can't. Returns 0 if we don't need to request to
1123 * the server or 1 otherwise.
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1128 int rc = 0;
1129 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130 unsigned char saved_type = flock->fl_type;
1132 if ((flock->fl_flags & FL_POSIX) == 0)
1133 return 1;
1135 down_read(&cinode->lock_sem);
1136 posix_test_lock(file, flock);
1138 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139 flock->fl_type = saved_type;
1140 rc = 1;
1143 up_read(&cinode->lock_sem);
1144 return rc;
1148 * Set the byte-range lock (posix style). Returns:
1149 * 1) 0, if we set the lock and don't need to request to the server;
1150 * 2) 1, if we need to request to the server;
1151 * 3) <0, if the error occurs while setting the lock.
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1156 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157 int rc = 1;
1159 if ((flock->fl_flags & FL_POSIX) == 0)
1160 return rc;
1162 try_again:
1163 cifs_down_write(&cinode->lock_sem);
1164 if (!cinode->can_cache_brlcks) {
1165 up_write(&cinode->lock_sem);
1166 return rc;
1169 rc = posix_lock_file(file, flock, NULL);
1170 up_write(&cinode->lock_sem);
1171 if (rc == FILE_LOCK_DEFERRED) {
1172 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1173 if (!rc)
1174 goto try_again;
1175 locks_delete_block(flock);
1177 return rc;
1181 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1183 unsigned int xid;
1184 int rc = 0, stored_rc;
1185 struct cifsLockInfo *li, *tmp;
1186 struct cifs_tcon *tcon;
1187 unsigned int num, max_num, max_buf;
1188 LOCKING_ANDX_RANGE *buf, *cur;
1189 static const int types[] = {
1190 LOCKING_ANDX_LARGE_FILES,
1191 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1193 int i;
1195 xid = get_xid();
1196 tcon = tlink_tcon(cfile->tlink);
1199 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1200 * and check it before using.
1202 max_buf = tcon->ses->server->maxBuf;
1203 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1204 free_xid(xid);
1205 return -EINVAL;
1208 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1209 PAGE_SIZE);
1210 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1211 PAGE_SIZE);
1212 max_num = (max_buf - sizeof(struct smb_hdr)) /
1213 sizeof(LOCKING_ANDX_RANGE);
1214 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1215 if (!buf) {
1216 free_xid(xid);
1217 return -ENOMEM;
1220 for (i = 0; i < 2; i++) {
1221 cur = buf;
1222 num = 0;
1223 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1224 if (li->type != types[i])
1225 continue;
1226 cur->Pid = cpu_to_le16(li->pid);
1227 cur->LengthLow = cpu_to_le32((u32)li->length);
1228 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1229 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1230 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1231 if (++num == max_num) {
1232 stored_rc = cifs_lockv(xid, tcon,
1233 cfile->fid.netfid,
1234 (__u8)li->type, 0, num,
1235 buf);
1236 if (stored_rc)
1237 rc = stored_rc;
1238 cur = buf;
1239 num = 0;
1240 } else
1241 cur++;
1244 if (num) {
1245 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1246 (__u8)types[i], 0, num, buf);
1247 if (stored_rc)
1248 rc = stored_rc;
1252 kfree(buf);
1253 free_xid(xid);
1254 return rc;
1257 static __u32
1258 hash_lockowner(fl_owner_t owner)
1260 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1263 struct lock_to_push {
1264 struct list_head llist;
1265 __u64 offset;
1266 __u64 length;
1267 __u32 pid;
1268 __u16 netfid;
1269 __u8 type;
1272 static int
1273 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1275 struct inode *inode = d_inode(cfile->dentry);
1276 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1277 struct file_lock *flock;
1278 struct file_lock_context *flctx = inode->i_flctx;
1279 unsigned int count = 0, i;
1280 int rc = 0, xid, type;
1281 struct list_head locks_to_send, *el;
1282 struct lock_to_push *lck, *tmp;
1283 __u64 length;
1285 xid = get_xid();
1287 if (!flctx)
1288 goto out;
1290 spin_lock(&flctx->flc_lock);
1291 list_for_each(el, &flctx->flc_posix) {
1292 count++;
1294 spin_unlock(&flctx->flc_lock);
1296 INIT_LIST_HEAD(&locks_to_send);
1299 * Allocating count locks is enough because no FL_POSIX locks can be
1300 * added to the list while we are holding cinode->lock_sem that
1301 * protects locking operations of this inode.
1303 for (i = 0; i < count; i++) {
1304 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1305 if (!lck) {
1306 rc = -ENOMEM;
1307 goto err_out;
1309 list_add_tail(&lck->llist, &locks_to_send);
1312 el = locks_to_send.next;
1313 spin_lock(&flctx->flc_lock);
1314 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1315 if (el == &locks_to_send) {
1317 * The list ended. We don't have enough allocated
1318 * structures - something is really wrong.
1320 cifs_dbg(VFS, "Can't push all brlocks!\n");
1321 break;
1323 length = 1 + flock->fl_end - flock->fl_start;
1324 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1325 type = CIFS_RDLCK;
1326 else
1327 type = CIFS_WRLCK;
1328 lck = list_entry(el, struct lock_to_push, llist);
1329 lck->pid = hash_lockowner(flock->fl_owner);
1330 lck->netfid = cfile->fid.netfid;
1331 lck->length = length;
1332 lck->type = type;
1333 lck->offset = flock->fl_start;
1335 spin_unlock(&flctx->flc_lock);
1337 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1338 int stored_rc;
1340 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1341 lck->offset, lck->length, NULL,
1342 lck->type, 0);
1343 if (stored_rc)
1344 rc = stored_rc;
1345 list_del(&lck->llist);
1346 kfree(lck);
1349 out:
1350 free_xid(xid);
1351 return rc;
1352 err_out:
1353 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1354 list_del(&lck->llist);
1355 kfree(lck);
1357 goto out;
1360 static int
1361 cifs_push_locks(struct cifsFileInfo *cfile)
1363 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1364 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1366 int rc = 0;
1368 /* we are going to update can_cache_brlcks here - need a write access */
1369 cifs_down_write(&cinode->lock_sem);
1370 if (!cinode->can_cache_brlcks) {
1371 up_write(&cinode->lock_sem);
1372 return rc;
1375 if (cap_unix(tcon->ses) &&
1376 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1377 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1378 rc = cifs_push_posix_locks(cfile);
1379 else
1380 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1382 cinode->can_cache_brlcks = false;
1383 up_write(&cinode->lock_sem);
1384 return rc;
1387 static void
1388 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1389 bool *wait_flag, struct TCP_Server_Info *server)
1391 if (flock->fl_flags & FL_POSIX)
1392 cifs_dbg(FYI, "Posix\n");
1393 if (flock->fl_flags & FL_FLOCK)
1394 cifs_dbg(FYI, "Flock\n");
1395 if (flock->fl_flags & FL_SLEEP) {
1396 cifs_dbg(FYI, "Blocking lock\n");
1397 *wait_flag = true;
1399 if (flock->fl_flags & FL_ACCESS)
1400 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1401 if (flock->fl_flags & FL_LEASE)
1402 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1403 if (flock->fl_flags &
1404 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1405 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1406 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1408 *type = server->vals->large_lock_type;
1409 if (flock->fl_type == F_WRLCK) {
1410 cifs_dbg(FYI, "F_WRLCK\n");
1411 *type |= server->vals->exclusive_lock_type;
1412 *lock = 1;
1413 } else if (flock->fl_type == F_UNLCK) {
1414 cifs_dbg(FYI, "F_UNLCK\n");
1415 *type |= server->vals->unlock_lock_type;
1416 *unlock = 1;
1417 /* Check if unlock includes more than one lock range */
1418 } else if (flock->fl_type == F_RDLCK) {
1419 cifs_dbg(FYI, "F_RDLCK\n");
1420 *type |= server->vals->shared_lock_type;
1421 *lock = 1;
1422 } else if (flock->fl_type == F_EXLCK) {
1423 cifs_dbg(FYI, "F_EXLCK\n");
1424 *type |= server->vals->exclusive_lock_type;
1425 *lock = 1;
1426 } else if (flock->fl_type == F_SHLCK) {
1427 cifs_dbg(FYI, "F_SHLCK\n");
1428 *type |= server->vals->shared_lock_type;
1429 *lock = 1;
1430 } else
1431 cifs_dbg(FYI, "Unknown type of lock\n");
1434 static int
1435 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1436 bool wait_flag, bool posix_lck, unsigned int xid)
1438 int rc = 0;
1439 __u64 length = 1 + flock->fl_end - flock->fl_start;
1440 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1441 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1442 struct TCP_Server_Info *server = tcon->ses->server;
1443 __u16 netfid = cfile->fid.netfid;
1445 if (posix_lck) {
1446 int posix_lock_type;
1448 rc = cifs_posix_lock_test(file, flock);
1449 if (!rc)
1450 return rc;
1452 if (type & server->vals->shared_lock_type)
1453 posix_lock_type = CIFS_RDLCK;
1454 else
1455 posix_lock_type = CIFS_WRLCK;
1456 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1457 hash_lockowner(flock->fl_owner),
1458 flock->fl_start, length, flock,
1459 posix_lock_type, wait_flag);
1460 return rc;
1463 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1464 if (!rc)
1465 return rc;
1467 /* BB we could chain these into one lock request BB */
1468 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1469 1, 0, false);
1470 if (rc == 0) {
1471 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1472 type, 0, 1, false);
1473 flock->fl_type = F_UNLCK;
1474 if (rc != 0)
1475 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1476 rc);
1477 return 0;
1480 if (type & server->vals->shared_lock_type) {
1481 flock->fl_type = F_WRLCK;
1482 return 0;
1485 type &= ~server->vals->exclusive_lock_type;
1487 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1488 type | server->vals->shared_lock_type,
1489 1, 0, false);
1490 if (rc == 0) {
1491 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492 type | server->vals->shared_lock_type, 0, 1, false);
1493 flock->fl_type = F_RDLCK;
1494 if (rc != 0)
1495 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1496 rc);
1497 } else
1498 flock->fl_type = F_WRLCK;
1500 return 0;
1503 void
1504 cifs_move_llist(struct list_head *source, struct list_head *dest)
1506 struct list_head *li, *tmp;
1507 list_for_each_safe(li, tmp, source)
1508 list_move(li, dest);
1511 void
1512 cifs_free_llist(struct list_head *llist)
1514 struct cifsLockInfo *li, *tmp;
1515 list_for_each_entry_safe(li, tmp, llist, llist) {
1516 cifs_del_lock_waiters(li);
1517 list_del(&li->llist);
1518 kfree(li);
1523 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1524 unsigned int xid)
1526 int rc = 0, stored_rc;
1527 static const int types[] = {
1528 LOCKING_ANDX_LARGE_FILES,
1529 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1531 unsigned int i;
1532 unsigned int max_num, num, max_buf;
1533 LOCKING_ANDX_RANGE *buf, *cur;
1534 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1535 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1536 struct cifsLockInfo *li, *tmp;
1537 __u64 length = 1 + flock->fl_end - flock->fl_start;
1538 struct list_head tmp_llist;
1540 INIT_LIST_HEAD(&tmp_llist);
1543 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1544 * and check it before using.
1546 max_buf = tcon->ses->server->maxBuf;
1547 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1548 return -EINVAL;
1550 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1551 PAGE_SIZE);
1552 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1553 PAGE_SIZE);
1554 max_num = (max_buf - sizeof(struct smb_hdr)) /
1555 sizeof(LOCKING_ANDX_RANGE);
1556 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1557 if (!buf)
1558 return -ENOMEM;
1560 cifs_down_write(&cinode->lock_sem);
1561 for (i = 0; i < 2; i++) {
1562 cur = buf;
1563 num = 0;
1564 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1565 if (flock->fl_start > li->offset ||
1566 (flock->fl_start + length) <
1567 (li->offset + li->length))
1568 continue;
1569 if (current->tgid != li->pid)
1570 continue;
1571 if (types[i] != li->type)
1572 continue;
1573 if (cinode->can_cache_brlcks) {
1575 * We can cache brlock requests - simply remove
1576 * a lock from the file's list.
1578 list_del(&li->llist);
1579 cifs_del_lock_waiters(li);
1580 kfree(li);
1581 continue;
1583 cur->Pid = cpu_to_le16(li->pid);
1584 cur->LengthLow = cpu_to_le32((u32)li->length);
1585 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1586 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1587 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1589 * We need to save a lock here to let us add it again to
1590 * the file's list if the unlock range request fails on
1591 * the server.
1593 list_move(&li->llist, &tmp_llist);
1594 if (++num == max_num) {
1595 stored_rc = cifs_lockv(xid, tcon,
1596 cfile->fid.netfid,
1597 li->type, num, 0, buf);
1598 if (stored_rc) {
1600 * We failed on the unlock range
1601 * request - add all locks from the tmp
1602 * list to the head of the file's list.
1604 cifs_move_llist(&tmp_llist,
1605 &cfile->llist->locks);
1606 rc = stored_rc;
1607 } else
1609 * The unlock range request succeed -
1610 * free the tmp list.
1612 cifs_free_llist(&tmp_llist);
1613 cur = buf;
1614 num = 0;
1615 } else
1616 cur++;
1618 if (num) {
1619 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1620 types[i], num, 0, buf);
1621 if (stored_rc) {
1622 cifs_move_llist(&tmp_llist,
1623 &cfile->llist->locks);
1624 rc = stored_rc;
1625 } else
1626 cifs_free_llist(&tmp_llist);
1630 up_write(&cinode->lock_sem);
1631 kfree(buf);
1632 return rc;
1635 static int
1636 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1637 bool wait_flag, bool posix_lck, int lock, int unlock,
1638 unsigned int xid)
1640 int rc = 0;
1641 __u64 length = 1 + flock->fl_end - flock->fl_start;
1642 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1643 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1644 struct TCP_Server_Info *server = tcon->ses->server;
1645 struct inode *inode = d_inode(cfile->dentry);
1647 if (posix_lck) {
1648 int posix_lock_type;
1650 rc = cifs_posix_lock_set(file, flock);
1651 if (!rc || rc < 0)
1652 return rc;
1654 if (type & server->vals->shared_lock_type)
1655 posix_lock_type = CIFS_RDLCK;
1656 else
1657 posix_lock_type = CIFS_WRLCK;
1659 if (unlock == 1)
1660 posix_lock_type = CIFS_UNLCK;
1662 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1663 hash_lockowner(flock->fl_owner),
1664 flock->fl_start, length,
1665 NULL, posix_lock_type, wait_flag);
1666 goto out;
1669 if (lock) {
1670 struct cifsLockInfo *lock;
1672 lock = cifs_lock_init(flock->fl_start, length, type,
1673 flock->fl_flags);
1674 if (!lock)
1675 return -ENOMEM;
1677 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1678 if (rc < 0) {
1679 kfree(lock);
1680 return rc;
1682 if (!rc)
1683 goto out;
1686 * Windows 7 server can delay breaking lease from read to None
1687 * if we set a byte-range lock on a file - break it explicitly
1688 * before sending the lock to the server to be sure the next
1689 * read won't conflict with non-overlapted locks due to
1690 * pagereading.
1692 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1693 CIFS_CACHE_READ(CIFS_I(inode))) {
1694 cifs_zap_mapping(inode);
1695 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1696 inode);
1697 CIFS_I(inode)->oplock = 0;
1700 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1701 type, 1, 0, wait_flag);
1702 if (rc) {
1703 kfree(lock);
1704 return rc;
1707 cifs_lock_add(cfile, lock);
1708 } else if (unlock)
1709 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1711 out:
1712 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1714 * If this is a request to remove all locks because we
1715 * are closing the file, it doesn't matter if the
1716 * unlocking failed as both cifs.ko and the SMB server
1717 * remove the lock on file close
1719 if (rc) {
1720 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1721 if (!(flock->fl_flags & FL_CLOSE))
1722 return rc;
1724 rc = locks_lock_file_wait(file, flock);
1726 return rc;
1729 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1731 int rc, xid;
1732 int lock = 0, unlock = 0;
1733 bool wait_flag = false;
1734 bool posix_lck = false;
1735 struct cifs_sb_info *cifs_sb;
1736 struct cifs_tcon *tcon;
1737 struct cifsFileInfo *cfile;
1738 __u32 type;
1740 rc = -EACCES;
1741 xid = get_xid();
1743 if (!(fl->fl_flags & FL_FLOCK))
1744 return -ENOLCK;
1746 cfile = (struct cifsFileInfo *)file->private_data;
1747 tcon = tlink_tcon(cfile->tlink);
1749 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1750 tcon->ses->server);
1751 cifs_sb = CIFS_FILE_SB(file);
1753 if (cap_unix(tcon->ses) &&
1754 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1755 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1756 posix_lck = true;
1758 if (!lock && !unlock) {
1760 * if no lock or unlock then nothing to do since we do not
1761 * know what it is
1763 free_xid(xid);
1764 return -EOPNOTSUPP;
1767 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1768 xid);
1769 free_xid(xid);
1770 return rc;
1775 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1777 int rc, xid;
1778 int lock = 0, unlock = 0;
1779 bool wait_flag = false;
1780 bool posix_lck = false;
1781 struct cifs_sb_info *cifs_sb;
1782 struct cifs_tcon *tcon;
1783 struct cifsFileInfo *cfile;
1784 __u32 type;
1786 rc = -EACCES;
1787 xid = get_xid();
1789 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1790 cmd, flock->fl_flags, flock->fl_type,
1791 flock->fl_start, flock->fl_end);
1793 cfile = (struct cifsFileInfo *)file->private_data;
1794 tcon = tlink_tcon(cfile->tlink);
1796 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1797 tcon->ses->server);
1798 cifs_sb = CIFS_FILE_SB(file);
1800 if (cap_unix(tcon->ses) &&
1801 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1802 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1803 posix_lck = true;
1805 * BB add code here to normalize offset and length to account for
1806 * negative length which we can not accept over the wire.
1808 if (IS_GETLK(cmd)) {
1809 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1810 free_xid(xid);
1811 return rc;
1814 if (!lock && !unlock) {
1816 * if no lock or unlock then nothing to do since we do not
1817 * know what it is
1819 free_xid(xid);
1820 return -EOPNOTSUPP;
1823 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1824 xid);
1825 free_xid(xid);
1826 return rc;
1830 * update the file size (if needed) after a write. Should be called with
1831 * the inode->i_lock held
1833 void
1834 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1835 unsigned int bytes_written)
1837 loff_t end_of_write = offset + bytes_written;
1839 if (end_of_write > cifsi->server_eof)
1840 cifsi->server_eof = end_of_write;
1843 static ssize_t
1844 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1845 size_t write_size, loff_t *offset)
1847 int rc = 0;
1848 unsigned int bytes_written = 0;
1849 unsigned int total_written;
1850 struct cifs_tcon *tcon;
1851 struct TCP_Server_Info *server;
1852 unsigned int xid;
1853 struct dentry *dentry = open_file->dentry;
1854 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1855 struct cifs_io_parms io_parms;
1857 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1858 write_size, *offset, dentry);
1860 tcon = tlink_tcon(open_file->tlink);
1861 server = tcon->ses->server;
1863 if (!server->ops->sync_write)
1864 return -ENOSYS;
1866 xid = get_xid();
1868 for (total_written = 0; write_size > total_written;
1869 total_written += bytes_written) {
1870 rc = -EAGAIN;
1871 while (rc == -EAGAIN) {
1872 struct kvec iov[2];
1873 unsigned int len;
1875 if (open_file->invalidHandle) {
1876 /* we could deadlock if we called
1877 filemap_fdatawait from here so tell
1878 reopen_file not to flush data to
1879 server now */
1880 rc = cifs_reopen_file(open_file, false);
1881 if (rc != 0)
1882 break;
1885 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1886 (unsigned int)write_size - total_written);
1887 /* iov[0] is reserved for smb header */
1888 iov[1].iov_base = (char *)write_data + total_written;
1889 iov[1].iov_len = len;
1890 io_parms.pid = pid;
1891 io_parms.tcon = tcon;
1892 io_parms.offset = *offset;
1893 io_parms.length = len;
1894 rc = server->ops->sync_write(xid, &open_file->fid,
1895 &io_parms, &bytes_written, iov, 1);
1897 if (rc || (bytes_written == 0)) {
1898 if (total_written)
1899 break;
1900 else {
1901 free_xid(xid);
1902 return rc;
1904 } else {
1905 spin_lock(&d_inode(dentry)->i_lock);
1906 cifs_update_eof(cifsi, *offset, bytes_written);
1907 spin_unlock(&d_inode(dentry)->i_lock);
1908 *offset += bytes_written;
1912 cifs_stats_bytes_written(tcon, total_written);
1914 if (total_written > 0) {
1915 spin_lock(&d_inode(dentry)->i_lock);
1916 if (*offset > d_inode(dentry)->i_size)
1917 i_size_write(d_inode(dentry), *offset);
1918 spin_unlock(&d_inode(dentry)->i_lock);
1920 mark_inode_dirty_sync(d_inode(dentry));
1921 free_xid(xid);
1922 return total_written;
1925 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1926 bool fsuid_only)
1928 struct cifsFileInfo *open_file = NULL;
1929 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1931 /* only filter by fsuid on multiuser mounts */
1932 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1933 fsuid_only = false;
1935 spin_lock(&cifs_inode->open_file_lock);
1936 /* we could simply get the first_list_entry since write-only entries
1937 are always at the end of the list but since the first entry might
1938 have a close pending, we go through the whole list */
1939 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1940 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1941 continue;
1942 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1943 if (!open_file->invalidHandle) {
1944 /* found a good file */
1945 /* lock it so it will not be closed on us */
1946 cifsFileInfo_get(open_file);
1947 spin_unlock(&cifs_inode->open_file_lock);
1948 return open_file;
1949 } /* else might as well continue, and look for
1950 another, or simply have the caller reopen it
1951 again rather than trying to fix this handle */
1952 } else /* write only file */
1953 break; /* write only files are last so must be done */
1955 spin_unlock(&cifs_inode->open_file_lock);
1956 return NULL;
1959 /* Return -EBADF if no handle is found and general rc otherwise */
1961 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1962 struct cifsFileInfo **ret_file)
1964 struct cifsFileInfo *open_file, *inv_file = NULL;
1965 struct cifs_sb_info *cifs_sb;
1966 bool any_available = false;
1967 int rc = -EBADF;
1968 unsigned int refind = 0;
1970 *ret_file = NULL;
1973 * Having a null inode here (because mapping->host was set to zero by
1974 * the VFS or MM) should not happen but we had reports of on oops (due
1975 * to it being zero) during stress testcases so we need to check for it
1978 if (cifs_inode == NULL) {
1979 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1980 dump_stack();
1981 return rc;
1984 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1986 /* only filter by fsuid on multiuser mounts */
1987 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1988 fsuid_only = false;
1990 spin_lock(&cifs_inode->open_file_lock);
1991 refind_writable:
1992 if (refind > MAX_REOPEN_ATT) {
1993 spin_unlock(&cifs_inode->open_file_lock);
1994 return rc;
1996 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1997 if (!any_available && open_file->pid != current->tgid)
1998 continue;
1999 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2000 continue;
2001 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2002 if (!open_file->invalidHandle) {
2003 /* found a good writable file */
2004 cifsFileInfo_get(open_file);
2005 spin_unlock(&cifs_inode->open_file_lock);
2006 *ret_file = open_file;
2007 return 0;
2008 } else {
2009 if (!inv_file)
2010 inv_file = open_file;
2014 /* couldn't find useable FH with same pid, try any available */
2015 if (!any_available) {
2016 any_available = true;
2017 goto refind_writable;
2020 if (inv_file) {
2021 any_available = false;
2022 cifsFileInfo_get(inv_file);
2025 spin_unlock(&cifs_inode->open_file_lock);
2027 if (inv_file) {
2028 rc = cifs_reopen_file(inv_file, false);
2029 if (!rc) {
2030 *ret_file = inv_file;
2031 return 0;
2034 spin_lock(&cifs_inode->open_file_lock);
2035 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2036 spin_unlock(&cifs_inode->open_file_lock);
2037 cifsFileInfo_put(inv_file);
2038 ++refind;
2039 inv_file = NULL;
2040 spin_lock(&cifs_inode->open_file_lock);
2041 goto refind_writable;
2044 return rc;
2047 struct cifsFileInfo *
2048 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
2050 struct cifsFileInfo *cfile;
2051 int rc;
2053 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
2054 if (rc)
2055 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2057 return cfile;
2061 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2062 struct cifsFileInfo **ret_file)
2064 struct list_head *tmp;
2065 struct cifsFileInfo *cfile;
2066 struct cifsInodeInfo *cinode;
2067 char *full_path;
2069 *ret_file = NULL;
2071 spin_lock(&tcon->open_file_lock);
2072 list_for_each(tmp, &tcon->openFileList) {
2073 cfile = list_entry(tmp, struct cifsFileInfo,
2074 tlist);
2075 full_path = build_path_from_dentry(cfile->dentry);
2076 if (full_path == NULL) {
2077 spin_unlock(&tcon->open_file_lock);
2078 return -ENOMEM;
2080 if (strcmp(full_path, name)) {
2081 kfree(full_path);
2082 continue;
2085 kfree(full_path);
2086 cinode = CIFS_I(d_inode(cfile->dentry));
2087 spin_unlock(&tcon->open_file_lock);
2088 return cifs_get_writable_file(cinode, 0, ret_file);
2091 spin_unlock(&tcon->open_file_lock);
2092 return -ENOENT;
2096 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2097 struct cifsFileInfo **ret_file)
2099 struct list_head *tmp;
2100 struct cifsFileInfo *cfile;
2101 struct cifsInodeInfo *cinode;
2102 char *full_path;
2104 *ret_file = NULL;
2106 spin_lock(&tcon->open_file_lock);
2107 list_for_each(tmp, &tcon->openFileList) {
2108 cfile = list_entry(tmp, struct cifsFileInfo,
2109 tlist);
2110 full_path = build_path_from_dentry(cfile->dentry);
2111 if (full_path == NULL) {
2112 spin_unlock(&tcon->open_file_lock);
2113 return -ENOMEM;
2115 if (strcmp(full_path, name)) {
2116 kfree(full_path);
2117 continue;
2120 kfree(full_path);
2121 cinode = CIFS_I(d_inode(cfile->dentry));
2122 spin_unlock(&tcon->open_file_lock);
2123 *ret_file = find_readable_file(cinode, 0);
2124 return *ret_file ? 0 : -ENOENT;
2127 spin_unlock(&tcon->open_file_lock);
2128 return -ENOENT;
2131 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2133 struct address_space *mapping = page->mapping;
2134 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2135 char *write_data;
2136 int rc = -EFAULT;
2137 int bytes_written = 0;
2138 struct inode *inode;
2139 struct cifsFileInfo *open_file;
2141 if (!mapping || !mapping->host)
2142 return -EFAULT;
2144 inode = page->mapping->host;
2146 offset += (loff_t)from;
2147 write_data = kmap(page);
2148 write_data += from;
2150 if ((to > PAGE_SIZE) || (from > to)) {
2151 kunmap(page);
2152 return -EIO;
2155 /* racing with truncate? */
2156 if (offset > mapping->host->i_size) {
2157 kunmap(page);
2158 return 0; /* don't care */
2161 /* check to make sure that we are not extending the file */
2162 if (mapping->host->i_size - offset < (loff_t)to)
2163 to = (unsigned)(mapping->host->i_size - offset);
2165 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2166 if (!rc) {
2167 bytes_written = cifs_write(open_file, open_file->pid,
2168 write_data, to - from, &offset);
2169 cifsFileInfo_put(open_file);
2170 /* Does mm or vfs already set times? */
2171 inode->i_atime = inode->i_mtime = current_time(inode);
2172 if ((bytes_written > 0) && (offset))
2173 rc = 0;
2174 else if (bytes_written < 0)
2175 rc = bytes_written;
2176 else
2177 rc = -EFAULT;
2178 } else {
2179 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2180 if (!is_retryable_error(rc))
2181 rc = -EIO;
2184 kunmap(page);
2185 return rc;
2188 static struct cifs_writedata *
2189 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2190 pgoff_t end, pgoff_t *index,
2191 unsigned int *found_pages)
2193 struct cifs_writedata *wdata;
2195 wdata = cifs_writedata_alloc((unsigned int)tofind,
2196 cifs_writev_complete);
2197 if (!wdata)
2198 return NULL;
2200 *found_pages = find_get_pages_range_tag(mapping, index, end,
2201 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2202 return wdata;
2205 static unsigned int
2206 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2207 struct address_space *mapping,
2208 struct writeback_control *wbc,
2209 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2211 unsigned int nr_pages = 0, i;
2212 struct page *page;
2214 for (i = 0; i < found_pages; i++) {
2215 page = wdata->pages[i];
2217 * At this point we hold neither the i_pages lock nor the
2218 * page lock: the page may be truncated or invalidated
2219 * (changing page->mapping to NULL), or even swizzled
2220 * back from swapper_space to tmpfs file mapping
2223 if (nr_pages == 0)
2224 lock_page(page);
2225 else if (!trylock_page(page))
2226 break;
2228 if (unlikely(page->mapping != mapping)) {
2229 unlock_page(page);
2230 break;
2233 if (!wbc->range_cyclic && page->index > end) {
2234 *done = true;
2235 unlock_page(page);
2236 break;
2239 if (*next && (page->index != *next)) {
2240 /* Not next consecutive page */
2241 unlock_page(page);
2242 break;
2245 if (wbc->sync_mode != WB_SYNC_NONE)
2246 wait_on_page_writeback(page);
2248 if (PageWriteback(page) ||
2249 !clear_page_dirty_for_io(page)) {
2250 unlock_page(page);
2251 break;
2255 * This actually clears the dirty bit in the radix tree.
2256 * See cifs_writepage() for more commentary.
2258 set_page_writeback(page);
2259 if (page_offset(page) >= i_size_read(mapping->host)) {
2260 *done = true;
2261 unlock_page(page);
2262 end_page_writeback(page);
2263 break;
2266 wdata->pages[i] = page;
2267 *next = page->index + 1;
2268 ++nr_pages;
2271 /* reset index to refind any pages skipped */
2272 if (nr_pages == 0)
2273 *index = wdata->pages[0]->index + 1;
2275 /* put any pages we aren't going to use */
2276 for (i = nr_pages; i < found_pages; i++) {
2277 put_page(wdata->pages[i]);
2278 wdata->pages[i] = NULL;
2281 return nr_pages;
2284 static int
2285 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2286 struct address_space *mapping, struct writeback_control *wbc)
2288 int rc;
2289 struct TCP_Server_Info *server =
2290 tlink_tcon(wdata->cfile->tlink)->ses->server;
2292 wdata->sync_mode = wbc->sync_mode;
2293 wdata->nr_pages = nr_pages;
2294 wdata->offset = page_offset(wdata->pages[0]);
2295 wdata->pagesz = PAGE_SIZE;
2296 wdata->tailsz = min(i_size_read(mapping->host) -
2297 page_offset(wdata->pages[nr_pages - 1]),
2298 (loff_t)PAGE_SIZE);
2299 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2300 wdata->pid = wdata->cfile->pid;
2302 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2303 if (rc)
2304 return rc;
2306 if (wdata->cfile->invalidHandle)
2307 rc = -EAGAIN;
2308 else
2309 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2311 return rc;
2314 static int cifs_writepages(struct address_space *mapping,
2315 struct writeback_control *wbc)
2317 struct inode *inode = mapping->host;
2318 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2319 struct TCP_Server_Info *server;
2320 bool done = false, scanned = false, range_whole = false;
2321 pgoff_t end, index;
2322 struct cifs_writedata *wdata;
2323 struct cifsFileInfo *cfile = NULL;
2324 int rc = 0;
2325 int saved_rc = 0;
2326 unsigned int xid;
2329 * If wsize is smaller than the page cache size, default to writing
2330 * one page at a time via cifs_writepage
2332 if (cifs_sb->wsize < PAGE_SIZE)
2333 return generic_writepages(mapping, wbc);
2335 xid = get_xid();
2336 if (wbc->range_cyclic) {
2337 index = mapping->writeback_index; /* Start from prev offset */
2338 end = -1;
2339 } else {
2340 index = wbc->range_start >> PAGE_SHIFT;
2341 end = wbc->range_end >> PAGE_SHIFT;
2342 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2343 range_whole = true;
2344 scanned = true;
2346 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2347 retry:
2348 while (!done && index <= end) {
2349 unsigned int i, nr_pages, found_pages, wsize;
2350 pgoff_t next = 0, tofind, saved_index = index;
2351 struct cifs_credits credits_on_stack;
2352 struct cifs_credits *credits = &credits_on_stack;
2353 int get_file_rc = 0;
2355 if (cfile)
2356 cifsFileInfo_put(cfile);
2358 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2360 /* in case of an error store it to return later */
2361 if (rc)
2362 get_file_rc = rc;
2364 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2365 &wsize, credits);
2366 if (rc != 0) {
2367 done = true;
2368 break;
2371 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2373 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2374 &found_pages);
2375 if (!wdata) {
2376 rc = -ENOMEM;
2377 done = true;
2378 add_credits_and_wake_if(server, credits, 0);
2379 break;
2382 if (found_pages == 0) {
2383 kref_put(&wdata->refcount, cifs_writedata_release);
2384 add_credits_and_wake_if(server, credits, 0);
2385 break;
2388 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2389 end, &index, &next, &done);
2391 /* nothing to write? */
2392 if (nr_pages == 0) {
2393 kref_put(&wdata->refcount, cifs_writedata_release);
2394 add_credits_and_wake_if(server, credits, 0);
2395 continue;
2398 wdata->credits = credits_on_stack;
2399 wdata->cfile = cfile;
2400 cfile = NULL;
2402 if (!wdata->cfile) {
2403 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2404 get_file_rc);
2405 if (is_retryable_error(get_file_rc))
2406 rc = get_file_rc;
2407 else
2408 rc = -EBADF;
2409 } else
2410 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2412 for (i = 0; i < nr_pages; ++i)
2413 unlock_page(wdata->pages[i]);
2415 /* send failure -- clean up the mess */
2416 if (rc != 0) {
2417 add_credits_and_wake_if(server, &wdata->credits, 0);
2418 for (i = 0; i < nr_pages; ++i) {
2419 if (is_retryable_error(rc))
2420 redirty_page_for_writepage(wbc,
2421 wdata->pages[i]);
2422 else
2423 SetPageError(wdata->pages[i]);
2424 end_page_writeback(wdata->pages[i]);
2425 put_page(wdata->pages[i]);
2427 if (!is_retryable_error(rc))
2428 mapping_set_error(mapping, rc);
2430 kref_put(&wdata->refcount, cifs_writedata_release);
2432 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2433 index = saved_index;
2434 continue;
2437 /* Return immediately if we received a signal during writing */
2438 if (is_interrupt_error(rc)) {
2439 done = true;
2440 break;
2443 if (rc != 0 && saved_rc == 0)
2444 saved_rc = rc;
2446 wbc->nr_to_write -= nr_pages;
2447 if (wbc->nr_to_write <= 0)
2448 done = true;
2450 index = next;
2453 if (!scanned && !done) {
2455 * We hit the last page and there is more work to be done: wrap
2456 * back to the start of the file
2458 scanned = true;
2459 index = 0;
2460 goto retry;
2463 if (saved_rc != 0)
2464 rc = saved_rc;
2466 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2467 mapping->writeback_index = index;
2469 if (cfile)
2470 cifsFileInfo_put(cfile);
2471 free_xid(xid);
2472 return rc;
2475 static int
2476 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2478 int rc;
2479 unsigned int xid;
2481 xid = get_xid();
2482 /* BB add check for wbc flags */
2483 get_page(page);
2484 if (!PageUptodate(page))
2485 cifs_dbg(FYI, "ppw - page not up to date\n");
2488 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2490 * A writepage() implementation always needs to do either this,
2491 * or re-dirty the page with "redirty_page_for_writepage()" in
2492 * the case of a failure.
2494 * Just unlocking the page will cause the radix tree tag-bits
2495 * to fail to update with the state of the page correctly.
2497 set_page_writeback(page);
2498 retry_write:
2499 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2500 if (is_retryable_error(rc)) {
2501 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2502 goto retry_write;
2503 redirty_page_for_writepage(wbc, page);
2504 } else if (rc != 0) {
2505 SetPageError(page);
2506 mapping_set_error(page->mapping, rc);
2507 } else {
2508 SetPageUptodate(page);
2510 end_page_writeback(page);
2511 put_page(page);
2512 free_xid(xid);
2513 return rc;
2516 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2518 int rc = cifs_writepage_locked(page, wbc);
2519 unlock_page(page);
2520 return rc;
2523 static int cifs_write_end(struct file *file, struct address_space *mapping,
2524 loff_t pos, unsigned len, unsigned copied,
2525 struct page *page, void *fsdata)
2527 int rc;
2528 struct inode *inode = mapping->host;
2529 struct cifsFileInfo *cfile = file->private_data;
2530 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2531 __u32 pid;
2533 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2534 pid = cfile->pid;
2535 else
2536 pid = current->tgid;
2538 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2539 page, pos, copied);
2541 if (PageChecked(page)) {
2542 if (copied == len)
2543 SetPageUptodate(page);
2544 ClearPageChecked(page);
2545 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2546 SetPageUptodate(page);
2548 if (!PageUptodate(page)) {
2549 char *page_data;
2550 unsigned offset = pos & (PAGE_SIZE - 1);
2551 unsigned int xid;
2553 xid = get_xid();
2554 /* this is probably better than directly calling
2555 partialpage_write since in this function the file handle is
2556 known which we might as well leverage */
2557 /* BB check if anything else missing out of ppw
2558 such as updating last write time */
2559 page_data = kmap(page);
2560 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2561 /* if (rc < 0) should we set writebehind rc? */
2562 kunmap(page);
2564 free_xid(xid);
2565 } else {
2566 rc = copied;
2567 pos += copied;
2568 set_page_dirty(page);
2571 if (rc > 0) {
2572 spin_lock(&inode->i_lock);
2573 if (pos > inode->i_size)
2574 i_size_write(inode, pos);
2575 spin_unlock(&inode->i_lock);
2578 unlock_page(page);
2579 put_page(page);
2581 return rc;
2584 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2585 int datasync)
2587 unsigned int xid;
2588 int rc = 0;
2589 struct cifs_tcon *tcon;
2590 struct TCP_Server_Info *server;
2591 struct cifsFileInfo *smbfile = file->private_data;
2592 struct inode *inode = file_inode(file);
2593 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2595 rc = file_write_and_wait_range(file, start, end);
2596 if (rc) {
2597 trace_cifs_fsync_err(inode->i_ino, rc);
2598 return rc;
2601 xid = get_xid();
2603 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2604 file, datasync);
2606 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2607 rc = cifs_zap_mapping(inode);
2608 if (rc) {
2609 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2610 rc = 0; /* don't care about it in fsync */
2614 tcon = tlink_tcon(smbfile->tlink);
2615 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2616 server = tcon->ses->server;
2617 if (server->ops->flush)
2618 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2619 else
2620 rc = -ENOSYS;
2623 free_xid(xid);
2624 return rc;
2627 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2629 unsigned int xid;
2630 int rc = 0;
2631 struct cifs_tcon *tcon;
2632 struct TCP_Server_Info *server;
2633 struct cifsFileInfo *smbfile = file->private_data;
2634 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2636 rc = file_write_and_wait_range(file, start, end);
2637 if (rc) {
2638 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2639 return rc;
2642 xid = get_xid();
2644 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2645 file, datasync);
2647 tcon = tlink_tcon(smbfile->tlink);
2648 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2649 server = tcon->ses->server;
2650 if (server->ops->flush)
2651 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2652 else
2653 rc = -ENOSYS;
2656 free_xid(xid);
2657 return rc;
2661 * As file closes, flush all cached write data for this inode checking
2662 * for write behind errors.
2664 int cifs_flush(struct file *file, fl_owner_t id)
2666 struct inode *inode = file_inode(file);
2667 int rc = 0;
2669 if (file->f_mode & FMODE_WRITE)
2670 rc = filemap_write_and_wait(inode->i_mapping);
2672 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2673 if (rc)
2674 trace_cifs_flush_err(inode->i_ino, rc);
2675 return rc;
2678 static int
2679 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2681 int rc = 0;
2682 unsigned long i;
2684 for (i = 0; i < num_pages; i++) {
2685 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2686 if (!pages[i]) {
2688 * save number of pages we have already allocated and
2689 * return with ENOMEM error
2691 num_pages = i;
2692 rc = -ENOMEM;
2693 break;
2697 if (rc) {
2698 for (i = 0; i < num_pages; i++)
2699 put_page(pages[i]);
2701 return rc;
2704 static inline
2705 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2707 size_t num_pages;
2708 size_t clen;
2710 clen = min_t(const size_t, len, wsize);
2711 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2713 if (cur_len)
2714 *cur_len = clen;
2716 return num_pages;
2719 static void
2720 cifs_uncached_writedata_release(struct kref *refcount)
2722 int i;
2723 struct cifs_writedata *wdata = container_of(refcount,
2724 struct cifs_writedata, refcount);
2726 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2727 for (i = 0; i < wdata->nr_pages; i++)
2728 put_page(wdata->pages[i]);
2729 cifs_writedata_release(refcount);
2732 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2734 static void
2735 cifs_uncached_writev_complete(struct work_struct *work)
2737 struct cifs_writedata *wdata = container_of(work,
2738 struct cifs_writedata, work);
2739 struct inode *inode = d_inode(wdata->cfile->dentry);
2740 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2742 spin_lock(&inode->i_lock);
2743 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2744 if (cifsi->server_eof > inode->i_size)
2745 i_size_write(inode, cifsi->server_eof);
2746 spin_unlock(&inode->i_lock);
2748 complete(&wdata->done);
2749 collect_uncached_write_data(wdata->ctx);
2750 /* the below call can possibly free the last ref to aio ctx */
2751 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2754 static int
2755 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2756 size_t *len, unsigned long *num_pages)
2758 size_t save_len, copied, bytes, cur_len = *len;
2759 unsigned long i, nr_pages = *num_pages;
2761 save_len = cur_len;
2762 for (i = 0; i < nr_pages; i++) {
2763 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2764 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2765 cur_len -= copied;
2767 * If we didn't copy as much as we expected, then that
2768 * may mean we trod into an unmapped area. Stop copying
2769 * at that point. On the next pass through the big
2770 * loop, we'll likely end up getting a zero-length
2771 * write and bailing out of it.
2773 if (copied < bytes)
2774 break;
2776 cur_len = save_len - cur_len;
2777 *len = cur_len;
2780 * If we have no data to send, then that probably means that
2781 * the copy above failed altogether. That's most likely because
2782 * the address in the iovec was bogus. Return -EFAULT and let
2783 * the caller free anything we allocated and bail out.
2785 if (!cur_len)
2786 return -EFAULT;
2789 * i + 1 now represents the number of pages we actually used in
2790 * the copy phase above.
2792 *num_pages = i + 1;
2793 return 0;
2796 static int
2797 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2798 struct cifs_aio_ctx *ctx)
2800 unsigned int wsize;
2801 struct cifs_credits credits;
2802 int rc;
2803 struct TCP_Server_Info *server =
2804 tlink_tcon(wdata->cfile->tlink)->ses->server;
2806 do {
2807 if (wdata->cfile->invalidHandle) {
2808 rc = cifs_reopen_file(wdata->cfile, false);
2809 if (rc == -EAGAIN)
2810 continue;
2811 else if (rc)
2812 break;
2817 * Wait for credits to resend this wdata.
2818 * Note: we are attempting to resend the whole wdata not in
2819 * segments
2821 do {
2822 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2823 &wsize, &credits);
2824 if (rc)
2825 goto fail;
2827 if (wsize < wdata->bytes) {
2828 add_credits_and_wake_if(server, &credits, 0);
2829 msleep(1000);
2831 } while (wsize < wdata->bytes);
2832 wdata->credits = credits;
2834 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2836 if (!rc) {
2837 if (wdata->cfile->invalidHandle)
2838 rc = -EAGAIN;
2839 else {
2840 #ifdef CONFIG_CIFS_SMB_DIRECT
2841 if (wdata->mr) {
2842 wdata->mr->need_invalidate = true;
2843 smbd_deregister_mr(wdata->mr);
2844 wdata->mr = NULL;
2846 #endif
2847 rc = server->ops->async_writev(wdata,
2848 cifs_uncached_writedata_release);
2852 /* If the write was successfully sent, we are done */
2853 if (!rc) {
2854 list_add_tail(&wdata->list, wdata_list);
2855 return 0;
2858 /* Roll back credits and retry if needed */
2859 add_credits_and_wake_if(server, &wdata->credits, 0);
2860 } while (rc == -EAGAIN);
2862 fail:
2863 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2864 return rc;
2867 static int
2868 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2869 struct cifsFileInfo *open_file,
2870 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2871 struct cifs_aio_ctx *ctx)
2873 int rc = 0;
2874 size_t cur_len;
2875 unsigned long nr_pages, num_pages, i;
2876 struct cifs_writedata *wdata;
2877 struct iov_iter saved_from = *from;
2878 loff_t saved_offset = offset;
2879 pid_t pid;
2880 struct TCP_Server_Info *server;
2881 struct page **pagevec;
2882 size_t start;
2883 unsigned int xid;
2885 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2886 pid = open_file->pid;
2887 else
2888 pid = current->tgid;
2890 server = tlink_tcon(open_file->tlink)->ses->server;
2891 xid = get_xid();
2893 do {
2894 unsigned int wsize;
2895 struct cifs_credits credits_on_stack;
2896 struct cifs_credits *credits = &credits_on_stack;
2898 if (open_file->invalidHandle) {
2899 rc = cifs_reopen_file(open_file, false);
2900 if (rc == -EAGAIN)
2901 continue;
2902 else if (rc)
2903 break;
2906 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2907 &wsize, credits);
2908 if (rc)
2909 break;
2911 cur_len = min_t(const size_t, len, wsize);
2913 if (ctx->direct_io) {
2914 ssize_t result;
2916 result = iov_iter_get_pages_alloc(
2917 from, &pagevec, cur_len, &start);
2918 if (result < 0) {
2919 cifs_dbg(VFS,
2920 "direct_writev couldn't get user pages "
2921 "(rc=%zd) iter type %d iov_offset %zd "
2922 "count %zd\n",
2923 result, iov_iter_type(from),
2924 from->iov_offset, from->count);
2925 dump_stack();
2927 rc = result;
2928 add_credits_and_wake_if(server, credits, 0);
2929 break;
2931 cur_len = (size_t)result;
2932 iov_iter_advance(from, cur_len);
2934 nr_pages =
2935 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2937 wdata = cifs_writedata_direct_alloc(pagevec,
2938 cifs_uncached_writev_complete);
2939 if (!wdata) {
2940 rc = -ENOMEM;
2941 add_credits_and_wake_if(server, credits, 0);
2942 break;
2946 wdata->page_offset = start;
2947 wdata->tailsz =
2948 nr_pages > 1 ?
2949 cur_len - (PAGE_SIZE - start) -
2950 (nr_pages - 2) * PAGE_SIZE :
2951 cur_len;
2952 } else {
2953 nr_pages = get_numpages(wsize, len, &cur_len);
2954 wdata = cifs_writedata_alloc(nr_pages,
2955 cifs_uncached_writev_complete);
2956 if (!wdata) {
2957 rc = -ENOMEM;
2958 add_credits_and_wake_if(server, credits, 0);
2959 break;
2962 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2963 if (rc) {
2964 kvfree(wdata->pages);
2965 kfree(wdata);
2966 add_credits_and_wake_if(server, credits, 0);
2967 break;
2970 num_pages = nr_pages;
2971 rc = wdata_fill_from_iovec(
2972 wdata, from, &cur_len, &num_pages);
2973 if (rc) {
2974 for (i = 0; i < nr_pages; i++)
2975 put_page(wdata->pages[i]);
2976 kvfree(wdata->pages);
2977 kfree(wdata);
2978 add_credits_and_wake_if(server, credits, 0);
2979 break;
2983 * Bring nr_pages down to the number of pages we
2984 * actually used, and free any pages that we didn't use.
2986 for ( ; nr_pages > num_pages; nr_pages--)
2987 put_page(wdata->pages[nr_pages - 1]);
2989 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2992 wdata->sync_mode = WB_SYNC_ALL;
2993 wdata->nr_pages = nr_pages;
2994 wdata->offset = (__u64)offset;
2995 wdata->cfile = cifsFileInfo_get(open_file);
2996 wdata->pid = pid;
2997 wdata->bytes = cur_len;
2998 wdata->pagesz = PAGE_SIZE;
2999 wdata->credits = credits_on_stack;
3000 wdata->ctx = ctx;
3001 kref_get(&ctx->refcount);
3003 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3005 if (!rc) {
3006 if (wdata->cfile->invalidHandle)
3007 rc = -EAGAIN;
3008 else
3009 rc = server->ops->async_writev(wdata,
3010 cifs_uncached_writedata_release);
3013 if (rc) {
3014 add_credits_and_wake_if(server, &wdata->credits, 0);
3015 kref_put(&wdata->refcount,
3016 cifs_uncached_writedata_release);
3017 if (rc == -EAGAIN) {
3018 *from = saved_from;
3019 iov_iter_advance(from, offset - saved_offset);
3020 continue;
3022 break;
3025 list_add_tail(&wdata->list, wdata_list);
3026 offset += cur_len;
3027 len -= cur_len;
3028 } while (len > 0);
3030 free_xid(xid);
3031 return rc;
3034 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3036 struct cifs_writedata *wdata, *tmp;
3037 struct cifs_tcon *tcon;
3038 struct cifs_sb_info *cifs_sb;
3039 struct dentry *dentry = ctx->cfile->dentry;
3040 int rc;
3042 tcon = tlink_tcon(ctx->cfile->tlink);
3043 cifs_sb = CIFS_SB(dentry->d_sb);
3045 mutex_lock(&ctx->aio_mutex);
3047 if (list_empty(&ctx->list)) {
3048 mutex_unlock(&ctx->aio_mutex);
3049 return;
3052 rc = ctx->rc;
3054 * Wait for and collect replies for any successful sends in order of
3055 * increasing offset. Once an error is hit, then return without waiting
3056 * for any more replies.
3058 restart_loop:
3059 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3060 if (!rc) {
3061 if (!try_wait_for_completion(&wdata->done)) {
3062 mutex_unlock(&ctx->aio_mutex);
3063 return;
3066 if (wdata->result)
3067 rc = wdata->result;
3068 else
3069 ctx->total_len += wdata->bytes;
3071 /* resend call if it's a retryable error */
3072 if (rc == -EAGAIN) {
3073 struct list_head tmp_list;
3074 struct iov_iter tmp_from = ctx->iter;
3076 INIT_LIST_HEAD(&tmp_list);
3077 list_del_init(&wdata->list);
3079 if (ctx->direct_io)
3080 rc = cifs_resend_wdata(
3081 wdata, &tmp_list, ctx);
3082 else {
3083 iov_iter_advance(&tmp_from,
3084 wdata->offset - ctx->pos);
3086 rc = cifs_write_from_iter(wdata->offset,
3087 wdata->bytes, &tmp_from,
3088 ctx->cfile, cifs_sb, &tmp_list,
3089 ctx);
3091 kref_put(&wdata->refcount,
3092 cifs_uncached_writedata_release);
3095 list_splice(&tmp_list, &ctx->list);
3096 goto restart_loop;
3099 list_del_init(&wdata->list);
3100 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3103 cifs_stats_bytes_written(tcon, ctx->total_len);
3104 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3106 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3108 mutex_unlock(&ctx->aio_mutex);
3110 if (ctx->iocb && ctx->iocb->ki_complete)
3111 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3112 else
3113 complete(&ctx->done);
3116 static ssize_t __cifs_writev(
3117 struct kiocb *iocb, struct iov_iter *from, bool direct)
3119 struct file *file = iocb->ki_filp;
3120 ssize_t total_written = 0;
3121 struct cifsFileInfo *cfile;
3122 struct cifs_tcon *tcon;
3123 struct cifs_sb_info *cifs_sb;
3124 struct cifs_aio_ctx *ctx;
3125 struct iov_iter saved_from = *from;
3126 size_t len = iov_iter_count(from);
3127 int rc;
3130 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3131 * In this case, fall back to non-direct write function.
3132 * this could be improved by getting pages directly in ITER_KVEC
3134 if (direct && iov_iter_is_kvec(from)) {
3135 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3136 direct = false;
3139 rc = generic_write_checks(iocb, from);
3140 if (rc <= 0)
3141 return rc;
3143 cifs_sb = CIFS_FILE_SB(file);
3144 cfile = file->private_data;
3145 tcon = tlink_tcon(cfile->tlink);
3147 if (!tcon->ses->server->ops->async_writev)
3148 return -ENOSYS;
3150 ctx = cifs_aio_ctx_alloc();
3151 if (!ctx)
3152 return -ENOMEM;
3154 ctx->cfile = cifsFileInfo_get(cfile);
3156 if (!is_sync_kiocb(iocb))
3157 ctx->iocb = iocb;
3159 ctx->pos = iocb->ki_pos;
3161 if (direct) {
3162 ctx->direct_io = true;
3163 ctx->iter = *from;
3164 ctx->len = len;
3165 } else {
3166 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3167 if (rc) {
3168 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3169 return rc;
3173 /* grab a lock here due to read response handlers can access ctx */
3174 mutex_lock(&ctx->aio_mutex);
3176 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3177 cfile, cifs_sb, &ctx->list, ctx);
3180 * If at least one write was successfully sent, then discard any rc
3181 * value from the later writes. If the other write succeeds, then
3182 * we'll end up returning whatever was written. If it fails, then
3183 * we'll get a new rc value from that.
3185 if (!list_empty(&ctx->list))
3186 rc = 0;
3188 mutex_unlock(&ctx->aio_mutex);
3190 if (rc) {
3191 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3192 return rc;
3195 if (!is_sync_kiocb(iocb)) {
3196 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3197 return -EIOCBQUEUED;
3200 rc = wait_for_completion_killable(&ctx->done);
3201 if (rc) {
3202 mutex_lock(&ctx->aio_mutex);
3203 ctx->rc = rc = -EINTR;
3204 total_written = ctx->total_len;
3205 mutex_unlock(&ctx->aio_mutex);
3206 } else {
3207 rc = ctx->rc;
3208 total_written = ctx->total_len;
3211 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3213 if (unlikely(!total_written))
3214 return rc;
3216 iocb->ki_pos += total_written;
3217 return total_written;
3220 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3222 return __cifs_writev(iocb, from, true);
3225 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3227 return __cifs_writev(iocb, from, false);
3230 static ssize_t
3231 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3233 struct file *file = iocb->ki_filp;
3234 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3235 struct inode *inode = file->f_mapping->host;
3236 struct cifsInodeInfo *cinode = CIFS_I(inode);
3237 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3238 ssize_t rc;
3240 inode_lock(inode);
3242 * We need to hold the sem to be sure nobody modifies lock list
3243 * with a brlock that prevents writing.
3245 down_read(&cinode->lock_sem);
3247 rc = generic_write_checks(iocb, from);
3248 if (rc <= 0)
3249 goto out;
3251 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3252 server->vals->exclusive_lock_type, 0,
3253 NULL, CIFS_WRITE_OP))
3254 rc = __generic_file_write_iter(iocb, from);
3255 else
3256 rc = -EACCES;
3257 out:
3258 up_read(&cinode->lock_sem);
3259 inode_unlock(inode);
3261 if (rc > 0)
3262 rc = generic_write_sync(iocb, rc);
3263 return rc;
3266 ssize_t
3267 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3269 struct inode *inode = file_inode(iocb->ki_filp);
3270 struct cifsInodeInfo *cinode = CIFS_I(inode);
3271 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3272 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3273 iocb->ki_filp->private_data;
3274 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3275 ssize_t written;
3277 written = cifs_get_writer(cinode);
3278 if (written)
3279 return written;
3281 if (CIFS_CACHE_WRITE(cinode)) {
3282 if (cap_unix(tcon->ses) &&
3283 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3284 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3285 written = generic_file_write_iter(iocb, from);
3286 goto out;
3288 written = cifs_writev(iocb, from);
3289 goto out;
3292 * For non-oplocked files in strict cache mode we need to write the data
3293 * to the server exactly from the pos to pos+len-1 rather than flush all
3294 * affected pages because it may cause a error with mandatory locks on
3295 * these pages but not on the region from pos to ppos+len-1.
3297 written = cifs_user_writev(iocb, from);
3298 if (CIFS_CACHE_READ(cinode)) {
3300 * We have read level caching and we have just sent a write
3301 * request to the server thus making data in the cache stale.
3302 * Zap the cache and set oplock/lease level to NONE to avoid
3303 * reading stale data from the cache. All subsequent read
3304 * operations will read new data from the server.
3306 cifs_zap_mapping(inode);
3307 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3308 inode);
3309 cinode->oplock = 0;
3311 out:
3312 cifs_put_writer(cinode);
3313 return written;
3316 static struct cifs_readdata *
3317 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3319 struct cifs_readdata *rdata;
3321 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3322 if (rdata != NULL) {
3323 rdata->pages = pages;
3324 kref_init(&rdata->refcount);
3325 INIT_LIST_HEAD(&rdata->list);
3326 init_completion(&rdata->done);
3327 INIT_WORK(&rdata->work, complete);
3330 return rdata;
3333 static struct cifs_readdata *
3334 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3336 struct page **pages =
3337 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3338 struct cifs_readdata *ret = NULL;
3340 if (pages) {
3341 ret = cifs_readdata_direct_alloc(pages, complete);
3342 if (!ret)
3343 kfree(pages);
3346 return ret;
3349 void
3350 cifs_readdata_release(struct kref *refcount)
3352 struct cifs_readdata *rdata = container_of(refcount,
3353 struct cifs_readdata, refcount);
3354 #ifdef CONFIG_CIFS_SMB_DIRECT
3355 if (rdata->mr) {
3356 smbd_deregister_mr(rdata->mr);
3357 rdata->mr = NULL;
3359 #endif
3360 if (rdata->cfile)
3361 cifsFileInfo_put(rdata->cfile);
3363 kvfree(rdata->pages);
3364 kfree(rdata);
3367 static int
3368 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3370 int rc = 0;
3371 struct page *page;
3372 unsigned int i;
3374 for (i = 0; i < nr_pages; i++) {
3375 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3376 if (!page) {
3377 rc = -ENOMEM;
3378 break;
3380 rdata->pages[i] = page;
3383 if (rc) {
3384 unsigned int nr_page_failed = i;
3386 for (i = 0; i < nr_page_failed; i++) {
3387 put_page(rdata->pages[i]);
3388 rdata->pages[i] = NULL;
3391 return rc;
3394 static void
3395 cifs_uncached_readdata_release(struct kref *refcount)
3397 struct cifs_readdata *rdata = container_of(refcount,
3398 struct cifs_readdata, refcount);
3399 unsigned int i;
3401 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3402 for (i = 0; i < rdata->nr_pages; i++) {
3403 put_page(rdata->pages[i]);
3405 cifs_readdata_release(refcount);
3409 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3410 * @rdata: the readdata response with list of pages holding data
3411 * @iter: destination for our data
3413 * This function copies data from a list of pages in a readdata response into
3414 * an array of iovecs. It will first calculate where the data should go
3415 * based on the info in the readdata and then copy the data into that spot.
3417 static int
3418 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3420 size_t remaining = rdata->got_bytes;
3421 unsigned int i;
3423 for (i = 0; i < rdata->nr_pages; i++) {
3424 struct page *page = rdata->pages[i];
3425 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3426 size_t written;
3428 if (unlikely(iov_iter_is_pipe(iter))) {
3429 void *addr = kmap_atomic(page);
3431 written = copy_to_iter(addr, copy, iter);
3432 kunmap_atomic(addr);
3433 } else
3434 written = copy_page_to_iter(page, 0, copy, iter);
3435 remaining -= written;
3436 if (written < copy && iov_iter_count(iter) > 0)
3437 break;
3439 return remaining ? -EFAULT : 0;
3442 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3444 static void
3445 cifs_uncached_readv_complete(struct work_struct *work)
3447 struct cifs_readdata *rdata = container_of(work,
3448 struct cifs_readdata, work);
3450 complete(&rdata->done);
3451 collect_uncached_read_data(rdata->ctx);
3452 /* the below call can possibly free the last ref to aio ctx */
3453 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3456 static int
3457 uncached_fill_pages(struct TCP_Server_Info *server,
3458 struct cifs_readdata *rdata, struct iov_iter *iter,
3459 unsigned int len)
3461 int result = 0;
3462 unsigned int i;
3463 unsigned int nr_pages = rdata->nr_pages;
3464 unsigned int page_offset = rdata->page_offset;
3466 rdata->got_bytes = 0;
3467 rdata->tailsz = PAGE_SIZE;
3468 for (i = 0; i < nr_pages; i++) {
3469 struct page *page = rdata->pages[i];
3470 size_t n;
3471 unsigned int segment_size = rdata->pagesz;
3473 if (i == 0)
3474 segment_size -= page_offset;
3475 else
3476 page_offset = 0;
3479 if (len <= 0) {
3480 /* no need to hold page hostage */
3481 rdata->pages[i] = NULL;
3482 rdata->nr_pages--;
3483 put_page(page);
3484 continue;
3487 n = len;
3488 if (len >= segment_size)
3489 /* enough data to fill the page */
3490 n = segment_size;
3491 else
3492 rdata->tailsz = len;
3493 len -= n;
3495 if (iter)
3496 result = copy_page_from_iter(
3497 page, page_offset, n, iter);
3498 #ifdef CONFIG_CIFS_SMB_DIRECT
3499 else if (rdata->mr)
3500 result = n;
3501 #endif
3502 else
3503 result = cifs_read_page_from_socket(
3504 server, page, page_offset, n);
3505 if (result < 0)
3506 break;
3508 rdata->got_bytes += result;
3511 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3512 rdata->got_bytes : result;
3515 static int
3516 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3517 struct cifs_readdata *rdata, unsigned int len)
3519 return uncached_fill_pages(server, rdata, NULL, len);
3522 static int
3523 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3524 struct cifs_readdata *rdata,
3525 struct iov_iter *iter)
3527 return uncached_fill_pages(server, rdata, iter, iter->count);
3530 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3531 struct list_head *rdata_list,
3532 struct cifs_aio_ctx *ctx)
3534 unsigned int rsize;
3535 struct cifs_credits credits;
3536 int rc;
3537 struct TCP_Server_Info *server =
3538 tlink_tcon(rdata->cfile->tlink)->ses->server;
3540 do {
3541 if (rdata->cfile->invalidHandle) {
3542 rc = cifs_reopen_file(rdata->cfile, true);
3543 if (rc == -EAGAIN)
3544 continue;
3545 else if (rc)
3546 break;
3550 * Wait for credits to resend this rdata.
3551 * Note: we are attempting to resend the whole rdata not in
3552 * segments
3554 do {
3555 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3556 &rsize, &credits);
3558 if (rc)
3559 goto fail;
3561 if (rsize < rdata->bytes) {
3562 add_credits_and_wake_if(server, &credits, 0);
3563 msleep(1000);
3565 } while (rsize < rdata->bytes);
3566 rdata->credits = credits;
3568 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3569 if (!rc) {
3570 if (rdata->cfile->invalidHandle)
3571 rc = -EAGAIN;
3572 else {
3573 #ifdef CONFIG_CIFS_SMB_DIRECT
3574 if (rdata->mr) {
3575 rdata->mr->need_invalidate = true;
3576 smbd_deregister_mr(rdata->mr);
3577 rdata->mr = NULL;
3579 #endif
3580 rc = server->ops->async_readv(rdata);
3584 /* If the read was successfully sent, we are done */
3585 if (!rc) {
3586 /* Add to aio pending list */
3587 list_add_tail(&rdata->list, rdata_list);
3588 return 0;
3591 /* Roll back credits and retry if needed */
3592 add_credits_and_wake_if(server, &rdata->credits, 0);
3593 } while (rc == -EAGAIN);
3595 fail:
3596 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3597 return rc;
3600 static int
3601 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3602 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3603 struct cifs_aio_ctx *ctx)
3605 struct cifs_readdata *rdata;
3606 unsigned int npages, rsize;
3607 struct cifs_credits credits_on_stack;
3608 struct cifs_credits *credits = &credits_on_stack;
3609 size_t cur_len;
3610 int rc;
3611 pid_t pid;
3612 struct TCP_Server_Info *server;
3613 struct page **pagevec;
3614 size_t start;
3615 struct iov_iter direct_iov = ctx->iter;
3617 server = tlink_tcon(open_file->tlink)->ses->server;
3619 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3620 pid = open_file->pid;
3621 else
3622 pid = current->tgid;
3624 if (ctx->direct_io)
3625 iov_iter_advance(&direct_iov, offset - ctx->pos);
3627 do {
3628 if (open_file->invalidHandle) {
3629 rc = cifs_reopen_file(open_file, true);
3630 if (rc == -EAGAIN)
3631 continue;
3632 else if (rc)
3633 break;
3636 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3637 &rsize, credits);
3638 if (rc)
3639 break;
3641 cur_len = min_t(const size_t, len, rsize);
3643 if (ctx->direct_io) {
3644 ssize_t result;
3646 result = iov_iter_get_pages_alloc(
3647 &direct_iov, &pagevec,
3648 cur_len, &start);
3649 if (result < 0) {
3650 cifs_dbg(VFS,
3651 "couldn't get user pages (rc=%zd)"
3652 " iter type %d"
3653 " iov_offset %zd count %zd\n",
3654 result, iov_iter_type(&direct_iov),
3655 direct_iov.iov_offset,
3656 direct_iov.count);
3657 dump_stack();
3659 rc = result;
3660 add_credits_and_wake_if(server, credits, 0);
3661 break;
3663 cur_len = (size_t)result;
3664 iov_iter_advance(&direct_iov, cur_len);
3666 rdata = cifs_readdata_direct_alloc(
3667 pagevec, cifs_uncached_readv_complete);
3668 if (!rdata) {
3669 add_credits_and_wake_if(server, credits, 0);
3670 rc = -ENOMEM;
3671 break;
3674 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3675 rdata->page_offset = start;
3676 rdata->tailsz = npages > 1 ?
3677 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3678 cur_len;
3680 } else {
3682 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3683 /* allocate a readdata struct */
3684 rdata = cifs_readdata_alloc(npages,
3685 cifs_uncached_readv_complete);
3686 if (!rdata) {
3687 add_credits_and_wake_if(server, credits, 0);
3688 rc = -ENOMEM;
3689 break;
3692 rc = cifs_read_allocate_pages(rdata, npages);
3693 if (rc) {
3694 kvfree(rdata->pages);
3695 kfree(rdata);
3696 add_credits_and_wake_if(server, credits, 0);
3697 break;
3700 rdata->tailsz = PAGE_SIZE;
3703 rdata->cfile = cifsFileInfo_get(open_file);
3704 rdata->nr_pages = npages;
3705 rdata->offset = offset;
3706 rdata->bytes = cur_len;
3707 rdata->pid = pid;
3708 rdata->pagesz = PAGE_SIZE;
3709 rdata->read_into_pages = cifs_uncached_read_into_pages;
3710 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3711 rdata->credits = credits_on_stack;
3712 rdata->ctx = ctx;
3713 kref_get(&ctx->refcount);
3715 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3717 if (!rc) {
3718 if (rdata->cfile->invalidHandle)
3719 rc = -EAGAIN;
3720 else
3721 rc = server->ops->async_readv(rdata);
3724 if (rc) {
3725 add_credits_and_wake_if(server, &rdata->credits, 0);
3726 kref_put(&rdata->refcount,
3727 cifs_uncached_readdata_release);
3728 if (rc == -EAGAIN) {
3729 iov_iter_revert(&direct_iov, cur_len);
3730 continue;
3732 break;
3735 list_add_tail(&rdata->list, rdata_list);
3736 offset += cur_len;
3737 len -= cur_len;
3738 } while (len > 0);
3740 return rc;
3743 static void
3744 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3746 struct cifs_readdata *rdata, *tmp;
3747 struct iov_iter *to = &ctx->iter;
3748 struct cifs_sb_info *cifs_sb;
3749 int rc;
3751 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3753 mutex_lock(&ctx->aio_mutex);
3755 if (list_empty(&ctx->list)) {
3756 mutex_unlock(&ctx->aio_mutex);
3757 return;
3760 rc = ctx->rc;
3761 /* the loop below should proceed in the order of increasing offsets */
3762 again:
3763 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3764 if (!rc) {
3765 if (!try_wait_for_completion(&rdata->done)) {
3766 mutex_unlock(&ctx->aio_mutex);
3767 return;
3770 if (rdata->result == -EAGAIN) {
3771 /* resend call if it's a retryable error */
3772 struct list_head tmp_list;
3773 unsigned int got_bytes = rdata->got_bytes;
3775 list_del_init(&rdata->list);
3776 INIT_LIST_HEAD(&tmp_list);
3779 * Got a part of data and then reconnect has
3780 * happened -- fill the buffer and continue
3781 * reading.
3783 if (got_bytes && got_bytes < rdata->bytes) {
3784 rc = 0;
3785 if (!ctx->direct_io)
3786 rc = cifs_readdata_to_iov(rdata, to);
3787 if (rc) {
3788 kref_put(&rdata->refcount,
3789 cifs_uncached_readdata_release);
3790 continue;
3794 if (ctx->direct_io) {
3796 * Re-use rdata as this is a
3797 * direct I/O
3799 rc = cifs_resend_rdata(
3800 rdata,
3801 &tmp_list, ctx);
3802 } else {
3803 rc = cifs_send_async_read(
3804 rdata->offset + got_bytes,
3805 rdata->bytes - got_bytes,
3806 rdata->cfile, cifs_sb,
3807 &tmp_list, ctx);
3809 kref_put(&rdata->refcount,
3810 cifs_uncached_readdata_release);
3813 list_splice(&tmp_list, &ctx->list);
3815 goto again;
3816 } else if (rdata->result)
3817 rc = rdata->result;
3818 else if (!ctx->direct_io)
3819 rc = cifs_readdata_to_iov(rdata, to);
3821 /* if there was a short read -- discard anything left */
3822 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3823 rc = -ENODATA;
3825 ctx->total_len += rdata->got_bytes;
3827 list_del_init(&rdata->list);
3828 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3831 if (!ctx->direct_io)
3832 ctx->total_len = ctx->len - iov_iter_count(to);
3834 /* mask nodata case */
3835 if (rc == -ENODATA)
3836 rc = 0;
3838 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3840 mutex_unlock(&ctx->aio_mutex);
3842 if (ctx->iocb && ctx->iocb->ki_complete)
3843 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3844 else
3845 complete(&ctx->done);
3848 static ssize_t __cifs_readv(
3849 struct kiocb *iocb, struct iov_iter *to, bool direct)
3851 size_t len;
3852 struct file *file = iocb->ki_filp;
3853 struct cifs_sb_info *cifs_sb;
3854 struct cifsFileInfo *cfile;
3855 struct cifs_tcon *tcon;
3856 ssize_t rc, total_read = 0;
3857 loff_t offset = iocb->ki_pos;
3858 struct cifs_aio_ctx *ctx;
3861 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3862 * fall back to data copy read path
3863 * this could be improved by getting pages directly in ITER_KVEC
3865 if (direct && iov_iter_is_kvec(to)) {
3866 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3867 direct = false;
3870 len = iov_iter_count(to);
3871 if (!len)
3872 return 0;
3874 cifs_sb = CIFS_FILE_SB(file);
3875 cfile = file->private_data;
3876 tcon = tlink_tcon(cfile->tlink);
3878 if (!tcon->ses->server->ops->async_readv)
3879 return -ENOSYS;
3881 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3882 cifs_dbg(FYI, "attempting read on write only file instance\n");
3884 ctx = cifs_aio_ctx_alloc();
3885 if (!ctx)
3886 return -ENOMEM;
3888 ctx->cfile = cifsFileInfo_get(cfile);
3890 if (!is_sync_kiocb(iocb))
3891 ctx->iocb = iocb;
3893 if (iter_is_iovec(to))
3894 ctx->should_dirty = true;
3896 if (direct) {
3897 ctx->pos = offset;
3898 ctx->direct_io = true;
3899 ctx->iter = *to;
3900 ctx->len = len;
3901 } else {
3902 rc = setup_aio_ctx_iter(ctx, to, READ);
3903 if (rc) {
3904 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3905 return rc;
3907 len = ctx->len;
3910 /* grab a lock here due to read response handlers can access ctx */
3911 mutex_lock(&ctx->aio_mutex);
3913 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3915 /* if at least one read request send succeeded, then reset rc */
3916 if (!list_empty(&ctx->list))
3917 rc = 0;
3919 mutex_unlock(&ctx->aio_mutex);
3921 if (rc) {
3922 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3923 return rc;
3926 if (!is_sync_kiocb(iocb)) {
3927 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3928 return -EIOCBQUEUED;
3931 rc = wait_for_completion_killable(&ctx->done);
3932 if (rc) {
3933 mutex_lock(&ctx->aio_mutex);
3934 ctx->rc = rc = -EINTR;
3935 total_read = ctx->total_len;
3936 mutex_unlock(&ctx->aio_mutex);
3937 } else {
3938 rc = ctx->rc;
3939 total_read = ctx->total_len;
3942 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3944 if (total_read) {
3945 iocb->ki_pos += total_read;
3946 return total_read;
3948 return rc;
3951 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3953 return __cifs_readv(iocb, to, true);
3956 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3958 return __cifs_readv(iocb, to, false);
3961 ssize_t
3962 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3964 struct inode *inode = file_inode(iocb->ki_filp);
3965 struct cifsInodeInfo *cinode = CIFS_I(inode);
3966 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3967 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3968 iocb->ki_filp->private_data;
3969 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3970 int rc = -EACCES;
3973 * In strict cache mode we need to read from the server all the time
3974 * if we don't have level II oplock because the server can delay mtime
3975 * change - so we can't make a decision about inode invalidating.
3976 * And we can also fail with pagereading if there are mandatory locks
3977 * on pages affected by this read but not on the region from pos to
3978 * pos+len-1.
3980 if (!CIFS_CACHE_READ(cinode))
3981 return cifs_user_readv(iocb, to);
3983 if (cap_unix(tcon->ses) &&
3984 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3985 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3986 return generic_file_read_iter(iocb, to);
3989 * We need to hold the sem to be sure nobody modifies lock list
3990 * with a brlock that prevents reading.
3992 down_read(&cinode->lock_sem);
3993 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3994 tcon->ses->server->vals->shared_lock_type,
3995 0, NULL, CIFS_READ_OP))
3996 rc = generic_file_read_iter(iocb, to);
3997 up_read(&cinode->lock_sem);
3998 return rc;
4001 static ssize_t
4002 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4004 int rc = -EACCES;
4005 unsigned int bytes_read = 0;
4006 unsigned int total_read;
4007 unsigned int current_read_size;
4008 unsigned int rsize;
4009 struct cifs_sb_info *cifs_sb;
4010 struct cifs_tcon *tcon;
4011 struct TCP_Server_Info *server;
4012 unsigned int xid;
4013 char *cur_offset;
4014 struct cifsFileInfo *open_file;
4015 struct cifs_io_parms io_parms;
4016 int buf_type = CIFS_NO_BUFFER;
4017 __u32 pid;
4019 xid = get_xid();
4020 cifs_sb = CIFS_FILE_SB(file);
4022 /* FIXME: set up handlers for larger reads and/or convert to async */
4023 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4025 if (file->private_data == NULL) {
4026 rc = -EBADF;
4027 free_xid(xid);
4028 return rc;
4030 open_file = file->private_data;
4031 tcon = tlink_tcon(open_file->tlink);
4032 server = tcon->ses->server;
4034 if (!server->ops->sync_read) {
4035 free_xid(xid);
4036 return -ENOSYS;
4039 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4040 pid = open_file->pid;
4041 else
4042 pid = current->tgid;
4044 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4045 cifs_dbg(FYI, "attempting read on write only file instance\n");
4047 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4048 total_read += bytes_read, cur_offset += bytes_read) {
4049 do {
4050 current_read_size = min_t(uint, read_size - total_read,
4051 rsize);
4053 * For windows me and 9x we do not want to request more
4054 * than it negotiated since it will refuse the read
4055 * then.
4057 if ((tcon->ses) && !(tcon->ses->capabilities &
4058 tcon->ses->server->vals->cap_large_files)) {
4059 current_read_size = min_t(uint,
4060 current_read_size, CIFSMaxBufSize);
4062 if (open_file->invalidHandle) {
4063 rc = cifs_reopen_file(open_file, true);
4064 if (rc != 0)
4065 break;
4067 io_parms.pid = pid;
4068 io_parms.tcon = tcon;
4069 io_parms.offset = *offset;
4070 io_parms.length = current_read_size;
4071 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4072 &bytes_read, &cur_offset,
4073 &buf_type);
4074 } while (rc == -EAGAIN);
4076 if (rc || (bytes_read == 0)) {
4077 if (total_read) {
4078 break;
4079 } else {
4080 free_xid(xid);
4081 return rc;
4083 } else {
4084 cifs_stats_bytes_read(tcon, total_read);
4085 *offset += bytes_read;
4088 free_xid(xid);
4089 return total_read;
4093 * If the page is mmap'ed into a process' page tables, then we need to make
4094 * sure that it doesn't change while being written back.
4096 static vm_fault_t
4097 cifs_page_mkwrite(struct vm_fault *vmf)
4099 struct page *page = vmf->page;
4101 lock_page(page);
4102 return VM_FAULT_LOCKED;
4105 static const struct vm_operations_struct cifs_file_vm_ops = {
4106 .fault = filemap_fault,
4107 .map_pages = filemap_map_pages,
4108 .page_mkwrite = cifs_page_mkwrite,
4111 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4113 int xid, rc = 0;
4114 struct inode *inode = file_inode(file);
4116 xid = get_xid();
4118 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4119 rc = cifs_zap_mapping(inode);
4120 if (!rc)
4121 rc = generic_file_mmap(file, vma);
4122 if (!rc)
4123 vma->vm_ops = &cifs_file_vm_ops;
4125 free_xid(xid);
4126 return rc;
4129 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4131 int rc, xid;
4133 xid = get_xid();
4135 rc = cifs_revalidate_file(file);
4136 if (rc)
4137 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4138 rc);
4139 if (!rc)
4140 rc = generic_file_mmap(file, vma);
4141 if (!rc)
4142 vma->vm_ops = &cifs_file_vm_ops;
4144 free_xid(xid);
4145 return rc;
4148 static void
4149 cifs_readv_complete(struct work_struct *work)
4151 unsigned int i, got_bytes;
4152 struct cifs_readdata *rdata = container_of(work,
4153 struct cifs_readdata, work);
4155 got_bytes = rdata->got_bytes;
4156 for (i = 0; i < rdata->nr_pages; i++) {
4157 struct page *page = rdata->pages[i];
4159 lru_cache_add_file(page);
4161 if (rdata->result == 0 ||
4162 (rdata->result == -EAGAIN && got_bytes)) {
4163 flush_dcache_page(page);
4164 SetPageUptodate(page);
4167 unlock_page(page);
4169 if (rdata->result == 0 ||
4170 (rdata->result == -EAGAIN && got_bytes))
4171 cifs_readpage_to_fscache(rdata->mapping->host, page);
4173 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4175 put_page(page);
4176 rdata->pages[i] = NULL;
4178 kref_put(&rdata->refcount, cifs_readdata_release);
4181 static int
4182 readpages_fill_pages(struct TCP_Server_Info *server,
4183 struct cifs_readdata *rdata, struct iov_iter *iter,
4184 unsigned int len)
4186 int result = 0;
4187 unsigned int i;
4188 u64 eof;
4189 pgoff_t eof_index;
4190 unsigned int nr_pages = rdata->nr_pages;
4191 unsigned int page_offset = rdata->page_offset;
4193 /* determine the eof that the server (probably) has */
4194 eof = CIFS_I(rdata->mapping->host)->server_eof;
4195 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4196 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4198 rdata->got_bytes = 0;
4199 rdata->tailsz = PAGE_SIZE;
4200 for (i = 0; i < nr_pages; i++) {
4201 struct page *page = rdata->pages[i];
4202 unsigned int to_read = rdata->pagesz;
4203 size_t n;
4205 if (i == 0)
4206 to_read -= page_offset;
4207 else
4208 page_offset = 0;
4210 n = to_read;
4212 if (len >= to_read) {
4213 len -= to_read;
4214 } else if (len > 0) {
4215 /* enough for partial page, fill and zero the rest */
4216 zero_user(page, len + page_offset, to_read - len);
4217 n = rdata->tailsz = len;
4218 len = 0;
4219 } else if (page->index > eof_index) {
4221 * The VFS will not try to do readahead past the
4222 * i_size, but it's possible that we have outstanding
4223 * writes with gaps in the middle and the i_size hasn't
4224 * caught up yet. Populate those with zeroed out pages
4225 * to prevent the VFS from repeatedly attempting to
4226 * fill them until the writes are flushed.
4228 zero_user(page, 0, PAGE_SIZE);
4229 lru_cache_add_file(page);
4230 flush_dcache_page(page);
4231 SetPageUptodate(page);
4232 unlock_page(page);
4233 put_page(page);
4234 rdata->pages[i] = NULL;
4235 rdata->nr_pages--;
4236 continue;
4237 } else {
4238 /* no need to hold page hostage */
4239 lru_cache_add_file(page);
4240 unlock_page(page);
4241 put_page(page);
4242 rdata->pages[i] = NULL;
4243 rdata->nr_pages--;
4244 continue;
4247 if (iter)
4248 result = copy_page_from_iter(
4249 page, page_offset, n, iter);
4250 #ifdef CONFIG_CIFS_SMB_DIRECT
4251 else if (rdata->mr)
4252 result = n;
4253 #endif
4254 else
4255 result = cifs_read_page_from_socket(
4256 server, page, page_offset, n);
4257 if (result < 0)
4258 break;
4260 rdata->got_bytes += result;
4263 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4264 rdata->got_bytes : result;
4267 static int
4268 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4269 struct cifs_readdata *rdata, unsigned int len)
4271 return readpages_fill_pages(server, rdata, NULL, len);
4274 static int
4275 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4276 struct cifs_readdata *rdata,
4277 struct iov_iter *iter)
4279 return readpages_fill_pages(server, rdata, iter, iter->count);
4282 static int
4283 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4284 unsigned int rsize, struct list_head *tmplist,
4285 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4287 struct page *page, *tpage;
4288 unsigned int expected_index;
4289 int rc;
4290 gfp_t gfp = readahead_gfp_mask(mapping);
4292 INIT_LIST_HEAD(tmplist);
4294 page = lru_to_page(page_list);
4297 * Lock the page and put it in the cache. Since no one else
4298 * should have access to this page, we're safe to simply set
4299 * PG_locked without checking it first.
4301 __SetPageLocked(page);
4302 rc = add_to_page_cache_locked(page, mapping,
4303 page->index, gfp);
4305 /* give up if we can't stick it in the cache */
4306 if (rc) {
4307 __ClearPageLocked(page);
4308 return rc;
4311 /* move first page to the tmplist */
4312 *offset = (loff_t)page->index << PAGE_SHIFT;
4313 *bytes = PAGE_SIZE;
4314 *nr_pages = 1;
4315 list_move_tail(&page->lru, tmplist);
4317 /* now try and add more pages onto the request */
4318 expected_index = page->index + 1;
4319 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4320 /* discontinuity ? */
4321 if (page->index != expected_index)
4322 break;
4324 /* would this page push the read over the rsize? */
4325 if (*bytes + PAGE_SIZE > rsize)
4326 break;
4328 __SetPageLocked(page);
4329 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4330 __ClearPageLocked(page);
4331 break;
4333 list_move_tail(&page->lru, tmplist);
4334 (*bytes) += PAGE_SIZE;
4335 expected_index++;
4336 (*nr_pages)++;
4338 return rc;
4341 static int cifs_readpages(struct file *file, struct address_space *mapping,
4342 struct list_head *page_list, unsigned num_pages)
4344 int rc;
4345 struct list_head tmplist;
4346 struct cifsFileInfo *open_file = file->private_data;
4347 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4348 struct TCP_Server_Info *server;
4349 pid_t pid;
4350 unsigned int xid;
4352 xid = get_xid();
4354 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4355 * immediately if the cookie is negative
4357 * After this point, every page in the list might have PG_fscache set,
4358 * so we will need to clean that up off of every page we don't use.
4360 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4361 &num_pages);
4362 if (rc == 0) {
4363 free_xid(xid);
4364 return rc;
4367 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4368 pid = open_file->pid;
4369 else
4370 pid = current->tgid;
4372 rc = 0;
4373 server = tlink_tcon(open_file->tlink)->ses->server;
4375 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4376 __func__, file, mapping, num_pages);
4379 * Start with the page at end of list and move it to private
4380 * list. Do the same with any following pages until we hit
4381 * the rsize limit, hit an index discontinuity, or run out of
4382 * pages. Issue the async read and then start the loop again
4383 * until the list is empty.
4385 * Note that list order is important. The page_list is in
4386 * the order of declining indexes. When we put the pages in
4387 * the rdata->pages, then we want them in increasing order.
4389 while (!list_empty(page_list)) {
4390 unsigned int i, nr_pages, bytes, rsize;
4391 loff_t offset;
4392 struct page *page, *tpage;
4393 struct cifs_readdata *rdata;
4394 struct cifs_credits credits_on_stack;
4395 struct cifs_credits *credits = &credits_on_stack;
4397 if (open_file->invalidHandle) {
4398 rc = cifs_reopen_file(open_file, true);
4399 if (rc == -EAGAIN)
4400 continue;
4401 else if (rc)
4402 break;
4405 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4406 &rsize, credits);
4407 if (rc)
4408 break;
4411 * Give up immediately if rsize is too small to read an entire
4412 * page. The VFS will fall back to readpage. We should never
4413 * reach this point however since we set ra_pages to 0 when the
4414 * rsize is smaller than a cache page.
4416 if (unlikely(rsize < PAGE_SIZE)) {
4417 add_credits_and_wake_if(server, credits, 0);
4418 free_xid(xid);
4419 return 0;
4422 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4423 &nr_pages, &offset, &bytes);
4424 if (rc) {
4425 add_credits_and_wake_if(server, credits, 0);
4426 break;
4429 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4430 if (!rdata) {
4431 /* best to give up if we're out of mem */
4432 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4433 list_del(&page->lru);
4434 lru_cache_add_file(page);
4435 unlock_page(page);
4436 put_page(page);
4438 rc = -ENOMEM;
4439 add_credits_and_wake_if(server, credits, 0);
4440 break;
4443 rdata->cfile = cifsFileInfo_get(open_file);
4444 rdata->mapping = mapping;
4445 rdata->offset = offset;
4446 rdata->bytes = bytes;
4447 rdata->pid = pid;
4448 rdata->pagesz = PAGE_SIZE;
4449 rdata->tailsz = PAGE_SIZE;
4450 rdata->read_into_pages = cifs_readpages_read_into_pages;
4451 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4452 rdata->credits = credits_on_stack;
4454 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4455 list_del(&page->lru);
4456 rdata->pages[rdata->nr_pages++] = page;
4459 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4461 if (!rc) {
4462 if (rdata->cfile->invalidHandle)
4463 rc = -EAGAIN;
4464 else
4465 rc = server->ops->async_readv(rdata);
4468 if (rc) {
4469 add_credits_and_wake_if(server, &rdata->credits, 0);
4470 for (i = 0; i < rdata->nr_pages; i++) {
4471 page = rdata->pages[i];
4472 lru_cache_add_file(page);
4473 unlock_page(page);
4474 put_page(page);
4476 /* Fallback to the readpage in error/reconnect cases */
4477 kref_put(&rdata->refcount, cifs_readdata_release);
4478 break;
4481 kref_put(&rdata->refcount, cifs_readdata_release);
4484 /* Any pages that have been shown to fscache but didn't get added to
4485 * the pagecache must be uncached before they get returned to the
4486 * allocator.
4488 cifs_fscache_readpages_cancel(mapping->host, page_list);
4489 free_xid(xid);
4490 return rc;
4494 * cifs_readpage_worker must be called with the page pinned
4496 static int cifs_readpage_worker(struct file *file, struct page *page,
4497 loff_t *poffset)
4499 char *read_data;
4500 int rc;
4502 /* Is the page cached? */
4503 rc = cifs_readpage_from_fscache(file_inode(file), page);
4504 if (rc == 0)
4505 goto read_complete;
4507 read_data = kmap(page);
4508 /* for reads over a certain size could initiate async read ahead */
4510 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4512 if (rc < 0)
4513 goto io_error;
4514 else
4515 cifs_dbg(FYI, "Bytes read %d\n", rc);
4517 /* we do not want atime to be less than mtime, it broke some apps */
4518 file_inode(file)->i_atime = current_time(file_inode(file));
4519 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4520 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4521 else
4522 file_inode(file)->i_atime = current_time(file_inode(file));
4524 if (PAGE_SIZE > rc)
4525 memset(read_data + rc, 0, PAGE_SIZE - rc);
4527 flush_dcache_page(page);
4528 SetPageUptodate(page);
4530 /* send this page to the cache */
4531 cifs_readpage_to_fscache(file_inode(file), page);
4533 rc = 0;
4535 io_error:
4536 kunmap(page);
4537 unlock_page(page);
4539 read_complete:
4540 return rc;
4543 static int cifs_readpage(struct file *file, struct page *page)
4545 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4546 int rc = -EACCES;
4547 unsigned int xid;
4549 xid = get_xid();
4551 if (file->private_data == NULL) {
4552 rc = -EBADF;
4553 free_xid(xid);
4554 return rc;
4557 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4558 page, (int)offset, (int)offset);
4560 rc = cifs_readpage_worker(file, page, &offset);
4562 free_xid(xid);
4563 return rc;
4566 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4568 struct cifsFileInfo *open_file;
4570 spin_lock(&cifs_inode->open_file_lock);
4571 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4572 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4573 spin_unlock(&cifs_inode->open_file_lock);
4574 return 1;
4577 spin_unlock(&cifs_inode->open_file_lock);
4578 return 0;
4581 /* We do not want to update the file size from server for inodes
4582 open for write - to avoid races with writepage extending
4583 the file - in the future we could consider allowing
4584 refreshing the inode only on increases in the file size
4585 but this is tricky to do without racing with writebehind
4586 page caching in the current Linux kernel design */
4587 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4589 if (!cifsInode)
4590 return true;
4592 if (is_inode_writable(cifsInode)) {
4593 /* This inode is open for write at least once */
4594 struct cifs_sb_info *cifs_sb;
4596 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4597 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4598 /* since no page cache to corrupt on directio
4599 we can change size safely */
4600 return true;
4603 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4604 return true;
4606 return false;
4607 } else
4608 return true;
4611 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4612 loff_t pos, unsigned len, unsigned flags,
4613 struct page **pagep, void **fsdata)
4615 int oncethru = 0;
4616 pgoff_t index = pos >> PAGE_SHIFT;
4617 loff_t offset = pos & (PAGE_SIZE - 1);
4618 loff_t page_start = pos & PAGE_MASK;
4619 loff_t i_size;
4620 struct page *page;
4621 int rc = 0;
4623 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4625 start:
4626 page = grab_cache_page_write_begin(mapping, index, flags);
4627 if (!page) {
4628 rc = -ENOMEM;
4629 goto out;
4632 if (PageUptodate(page))
4633 goto out;
4636 * If we write a full page it will be up to date, no need to read from
4637 * the server. If the write is short, we'll end up doing a sync write
4638 * instead.
4640 if (len == PAGE_SIZE)
4641 goto out;
4644 * optimize away the read when we have an oplock, and we're not
4645 * expecting to use any of the data we'd be reading in. That
4646 * is, when the page lies beyond the EOF, or straddles the EOF
4647 * and the write will cover all of the existing data.
4649 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4650 i_size = i_size_read(mapping->host);
4651 if (page_start >= i_size ||
4652 (offset == 0 && (pos + len) >= i_size)) {
4653 zero_user_segments(page, 0, offset,
4654 offset + len,
4655 PAGE_SIZE);
4657 * PageChecked means that the parts of the page
4658 * to which we're not writing are considered up
4659 * to date. Once the data is copied to the
4660 * page, it can be set uptodate.
4662 SetPageChecked(page);
4663 goto out;
4667 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4669 * might as well read a page, it is fast enough. If we get
4670 * an error, we don't need to return it. cifs_write_end will
4671 * do a sync write instead since PG_uptodate isn't set.
4673 cifs_readpage_worker(file, page, &page_start);
4674 put_page(page);
4675 oncethru = 1;
4676 goto start;
4677 } else {
4678 /* we could try using another file handle if there is one -
4679 but how would we lock it to prevent close of that handle
4680 racing with this read? In any case
4681 this will be written out by write_end so is fine */
4683 out:
4684 *pagep = page;
4685 return rc;
4688 static int cifs_release_page(struct page *page, gfp_t gfp)
4690 if (PagePrivate(page))
4691 return 0;
4693 return cifs_fscache_release_page(page, gfp);
4696 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4697 unsigned int length)
4699 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4701 if (offset == 0 && length == PAGE_SIZE)
4702 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4705 static int cifs_launder_page(struct page *page)
4707 int rc = 0;
4708 loff_t range_start = page_offset(page);
4709 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4710 struct writeback_control wbc = {
4711 .sync_mode = WB_SYNC_ALL,
4712 .nr_to_write = 0,
4713 .range_start = range_start,
4714 .range_end = range_end,
4717 cifs_dbg(FYI, "Launder page: %p\n", page);
4719 if (clear_page_dirty_for_io(page))
4720 rc = cifs_writepage_locked(page, &wbc);
4722 cifs_fscache_invalidate_page(page, page->mapping->host);
4723 return rc;
4726 void cifs_oplock_break(struct work_struct *work)
4728 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4729 oplock_break);
4730 struct inode *inode = d_inode(cfile->dentry);
4731 struct cifsInodeInfo *cinode = CIFS_I(inode);
4732 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4733 struct TCP_Server_Info *server = tcon->ses->server;
4734 int rc = 0;
4735 bool purge_cache = false;
4737 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4738 TASK_UNINTERRUPTIBLE);
4740 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4741 cfile->oplock_epoch, &purge_cache);
4743 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4744 cifs_has_mand_locks(cinode)) {
4745 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4746 inode);
4747 cinode->oplock = 0;
4750 if (inode && S_ISREG(inode->i_mode)) {
4751 if (CIFS_CACHE_READ(cinode))
4752 break_lease(inode, O_RDONLY);
4753 else
4754 break_lease(inode, O_WRONLY);
4755 rc = filemap_fdatawrite(inode->i_mapping);
4756 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4757 rc = filemap_fdatawait(inode->i_mapping);
4758 mapping_set_error(inode->i_mapping, rc);
4759 cifs_zap_mapping(inode);
4761 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4762 if (CIFS_CACHE_WRITE(cinode))
4763 goto oplock_break_ack;
4766 rc = cifs_push_locks(cfile);
4767 if (rc)
4768 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4770 oplock_break_ack:
4772 * releasing stale oplock after recent reconnect of smb session using
4773 * a now incorrect file handle is not a data integrity issue but do
4774 * not bother sending an oplock release if session to server still is
4775 * disconnected since oplock already released by the server
4777 if (!cfile->oplock_break_cancelled) {
4778 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4779 cinode);
4780 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4782 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4783 cifs_done_oplock_break(cinode);
4787 * The presence of cifs_direct_io() in the address space ops vector
4788 * allowes open() O_DIRECT flags which would have failed otherwise.
4790 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4791 * so this method should never be called.
4793 * Direct IO is not yet supported in the cached mode.
4795 static ssize_t
4796 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4799 * FIXME
4800 * Eventually need to support direct IO for non forcedirectio mounts
4802 return -EINVAL;
4806 const struct address_space_operations cifs_addr_ops = {
4807 .readpage = cifs_readpage,
4808 .readpages = cifs_readpages,
4809 .writepage = cifs_writepage,
4810 .writepages = cifs_writepages,
4811 .write_begin = cifs_write_begin,
4812 .write_end = cifs_write_end,
4813 .set_page_dirty = __set_page_dirty_nobuffers,
4814 .releasepage = cifs_release_page,
4815 .direct_IO = cifs_direct_io,
4816 .invalidatepage = cifs_invalidate_page,
4817 .launder_page = cifs_launder_page,
4821 * cifs_readpages requires the server to support a buffer large enough to
4822 * contain the header plus one complete page of data. Otherwise, we need
4823 * to leave cifs_readpages out of the address space operations.
4825 const struct address_space_operations cifs_addr_ops_smallbuf = {
4826 .readpage = cifs_readpage,
4827 .writepage = cifs_writepage,
4828 .writepages = cifs_writepages,
4829 .write_begin = cifs_write_begin,
4830 .write_end = cifs_write_end,
4831 .set_page_dirty = __set_page_dirty_nobuffers,
4832 .releasepage = cifs_release_page,
4833 .invalidatepage = cifs_invalidate_page,
4834 .launder_page = cifs_launder_page,