Linux 5.8-rc4
[linux/fpc-iii.git] / fs / cifs / file.c
blob9b0f8f33f832ceb941750fa0fb31f98aa884b937
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
48 static inline int cifs_convert_flags(unsigned int flags)
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
66 static u32 cifs_posix_convert_flags(unsigned int flags)
68 u32 posix_flags = 0;
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
97 return posix_flags;
100 static inline int cifs_get_disposition(unsigned int flags)
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
126 cifs_dbg(FYI, "posix open %s\n", full_path);
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
147 if (rc)
148 goto posix_open_ret;
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
189 return -ENOSYS;
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
225 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226 if (f_flags & O_SYNC)
227 create_options |= CREATE_WRITE_THROUGH;
229 if (f_flags & O_DIRECT)
230 create_options |= CREATE_NO_BUFFER;
232 oparms.tcon = tcon;
233 oparms.cifs_sb = cifs_sb;
234 oparms.desired_access = desired_access;
235 oparms.create_options = cifs_create_options(cifs_sb, create_options);
236 oparms.disposition = disposition;
237 oparms.path = full_path;
238 oparms.fid = fid;
239 oparms.reconnect = false;
241 rc = server->ops->open(xid, &oparms, oplock, buf);
243 if (rc)
244 goto out;
246 /* TODO: Add support for calling posix query info but with passing in fid */
247 if (tcon->unix_ext)
248 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
249 xid);
250 else
251 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
252 xid, fid);
254 if (rc) {
255 server->ops->close(xid, tcon, fid);
256 if (rc == -ESTALE)
257 rc = -EOPENSTALE;
260 out:
261 kfree(buf);
262 return rc;
265 static bool
266 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
268 struct cifs_fid_locks *cur;
269 bool has_locks = false;
271 down_read(&cinode->lock_sem);
272 list_for_each_entry(cur, &cinode->llist, llist) {
273 if (!list_empty(&cur->locks)) {
274 has_locks = true;
275 break;
278 up_read(&cinode->lock_sem);
279 return has_locks;
282 void
283 cifs_down_write(struct rw_semaphore *sem)
285 while (!down_write_trylock(sem))
286 msleep(10);
289 static void cifsFileInfo_put_work(struct work_struct *work);
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293 struct tcon_link *tlink, __u32 oplock)
295 struct dentry *dentry = file_dentry(file);
296 struct inode *inode = d_inode(dentry);
297 struct cifsInodeInfo *cinode = CIFS_I(inode);
298 struct cifsFileInfo *cfile;
299 struct cifs_fid_locks *fdlocks;
300 struct cifs_tcon *tcon = tlink_tcon(tlink);
301 struct TCP_Server_Info *server = tcon->ses->server;
303 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304 if (cfile == NULL)
305 return cfile;
307 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308 if (!fdlocks) {
309 kfree(cfile);
310 return NULL;
313 INIT_LIST_HEAD(&fdlocks->locks);
314 fdlocks->cfile = cfile;
315 cfile->llist = fdlocks;
317 cfile->count = 1;
318 cfile->pid = current->tgid;
319 cfile->uid = current_fsuid();
320 cfile->dentry = dget(dentry);
321 cfile->f_flags = file->f_flags;
322 cfile->invalidHandle = false;
323 cfile->tlink = cifs_get_tlink(tlink);
324 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
326 mutex_init(&cfile->fh_mutex);
327 spin_lock_init(&cfile->file_info_lock);
329 cifs_sb_active(inode->i_sb);
332 * If the server returned a read oplock and we have mandatory brlocks,
333 * set oplock level to None.
335 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
336 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
337 oplock = 0;
340 cifs_down_write(&cinode->lock_sem);
341 list_add(&fdlocks->llist, &cinode->llist);
342 up_write(&cinode->lock_sem);
344 spin_lock(&tcon->open_file_lock);
345 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
346 oplock = fid->pending_open->oplock;
347 list_del(&fid->pending_open->olist);
349 fid->purge_cache = false;
350 server->ops->set_fid(cfile, fid, oplock);
352 list_add(&cfile->tlist, &tcon->openFileList);
353 atomic_inc(&tcon->num_local_opens);
355 /* if readable file instance put first in list*/
356 spin_lock(&cinode->open_file_lock);
357 if (file->f_mode & FMODE_READ)
358 list_add(&cfile->flist, &cinode->openFileList);
359 else
360 list_add_tail(&cfile->flist, &cinode->openFileList);
361 spin_unlock(&cinode->open_file_lock);
362 spin_unlock(&tcon->open_file_lock);
364 if (fid->purge_cache)
365 cifs_zap_mapping(inode);
367 file->private_data = cfile;
368 return cfile;
371 struct cifsFileInfo *
372 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
374 spin_lock(&cifs_file->file_info_lock);
375 cifsFileInfo_get_locked(cifs_file);
376 spin_unlock(&cifs_file->file_info_lock);
377 return cifs_file;
380 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
382 struct inode *inode = d_inode(cifs_file->dentry);
383 struct cifsInodeInfo *cifsi = CIFS_I(inode);
384 struct cifsLockInfo *li, *tmp;
385 struct super_block *sb = inode->i_sb;
388 * Delete any outstanding lock records. We'll lose them when the file
389 * is closed anyway.
391 cifs_down_write(&cifsi->lock_sem);
392 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
393 list_del(&li->llist);
394 cifs_del_lock_waiters(li);
395 kfree(li);
397 list_del(&cifs_file->llist->llist);
398 kfree(cifs_file->llist);
399 up_write(&cifsi->lock_sem);
401 cifs_put_tlink(cifs_file->tlink);
402 dput(cifs_file->dentry);
403 cifs_sb_deactive(sb);
404 kfree(cifs_file);
407 static void cifsFileInfo_put_work(struct work_struct *work)
409 struct cifsFileInfo *cifs_file = container_of(work,
410 struct cifsFileInfo, put);
412 cifsFileInfo_put_final(cifs_file);
416 * cifsFileInfo_put - release a reference of file priv data
418 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
420 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
422 _cifsFileInfo_put(cifs_file, true, true);
426 * _cifsFileInfo_put - release a reference of file priv data
428 * This may involve closing the filehandle @cifs_file out on the
429 * server. Must be called without holding tcon->open_file_lock,
430 * cinode->open_file_lock and cifs_file->file_info_lock.
432 * If @wait_for_oplock_handler is true and we are releasing the last
433 * reference, wait for any running oplock break handler of the file
434 * and cancel any pending one. If calling this function from the
435 * oplock break handler, you need to pass false.
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439 bool wait_oplock_handler, bool offload)
441 struct inode *inode = d_inode(cifs_file->dentry);
442 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443 struct TCP_Server_Info *server = tcon->ses->server;
444 struct cifsInodeInfo *cifsi = CIFS_I(inode);
445 struct super_block *sb = inode->i_sb;
446 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447 struct cifs_fid fid;
448 struct cifs_pending_open open;
449 bool oplock_break_cancelled;
451 spin_lock(&tcon->open_file_lock);
452 spin_lock(&cifsi->open_file_lock);
453 spin_lock(&cifs_file->file_info_lock);
454 if (--cifs_file->count > 0) {
455 spin_unlock(&cifs_file->file_info_lock);
456 spin_unlock(&cifsi->open_file_lock);
457 spin_unlock(&tcon->open_file_lock);
458 return;
460 spin_unlock(&cifs_file->file_info_lock);
462 if (server->ops->get_lease_key)
463 server->ops->get_lease_key(inode, &fid);
465 /* store open in pending opens to make sure we don't miss lease break */
466 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
468 /* remove it from the lists */
469 list_del(&cifs_file->flist);
470 list_del(&cifs_file->tlist);
471 atomic_dec(&tcon->num_local_opens);
473 if (list_empty(&cifsi->openFileList)) {
474 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475 d_inode(cifs_file->dentry));
477 * In strict cache mode we need invalidate mapping on the last
478 * close because it may cause a error when we open this file
479 * again and get at least level II oplock.
481 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483 cifs_set_oplock_level(cifsi, 0);
486 spin_unlock(&cifsi->open_file_lock);
487 spin_unlock(&tcon->open_file_lock);
489 oplock_break_cancelled = wait_oplock_handler ?
490 cancel_work_sync(&cifs_file->oplock_break) : false;
492 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493 struct TCP_Server_Info *server = tcon->ses->server;
494 unsigned int xid;
496 xid = get_xid();
497 if (server->ops->close_getattr)
498 server->ops->close_getattr(xid, tcon, cifs_file);
499 else if (server->ops->close)
500 server->ops->close(xid, tcon, &cifs_file->fid);
501 _free_xid(xid);
504 if (oplock_break_cancelled)
505 cifs_done_oplock_break(cifsi);
507 cifs_del_pending_open(&open);
509 if (offload)
510 queue_work(fileinfo_put_wq, &cifs_file->put);
511 else
512 cifsFileInfo_put_final(cifs_file);
515 int cifs_open(struct inode *inode, struct file *file)
518 int rc = -EACCES;
519 unsigned int xid;
520 __u32 oplock;
521 struct cifs_sb_info *cifs_sb;
522 struct TCP_Server_Info *server;
523 struct cifs_tcon *tcon;
524 struct tcon_link *tlink;
525 struct cifsFileInfo *cfile = NULL;
526 char *full_path = NULL;
527 bool posix_open_ok = false;
528 struct cifs_fid fid;
529 struct cifs_pending_open open;
531 xid = get_xid();
533 cifs_sb = CIFS_SB(inode->i_sb);
534 tlink = cifs_sb_tlink(cifs_sb);
535 if (IS_ERR(tlink)) {
536 free_xid(xid);
537 return PTR_ERR(tlink);
539 tcon = tlink_tcon(tlink);
540 server = tcon->ses->server;
542 full_path = build_path_from_dentry(file_dentry(file));
543 if (full_path == NULL) {
544 rc = -ENOMEM;
545 goto out;
548 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
549 inode, file->f_flags, full_path);
551 if (file->f_flags & O_DIRECT &&
552 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
553 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
554 file->f_op = &cifs_file_direct_nobrl_ops;
555 else
556 file->f_op = &cifs_file_direct_ops;
559 if (server->oplocks)
560 oplock = REQ_OPLOCK;
561 else
562 oplock = 0;
564 if (!tcon->broken_posix_open && tcon->unix_ext &&
565 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
566 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
567 /* can not refresh inode info since size could be stale */
568 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
569 cifs_sb->mnt_file_mode /* ignored */,
570 file->f_flags, &oplock, &fid.netfid, xid);
571 if (rc == 0) {
572 cifs_dbg(FYI, "posix open succeeded\n");
573 posix_open_ok = true;
574 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
575 if (tcon->ses->serverNOS)
576 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
577 tcon->ses->serverName,
578 tcon->ses->serverNOS);
579 tcon->broken_posix_open = true;
580 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
581 (rc != -EOPNOTSUPP)) /* path not found or net err */
582 goto out;
584 * Else fallthrough to retry open the old way on network i/o
585 * or DFS errors.
589 if (server->ops->get_lease_key)
590 server->ops->get_lease_key(inode, &fid);
592 cifs_add_pending_open(&fid, tlink, &open);
594 if (!posix_open_ok) {
595 if (server->ops->get_lease_key)
596 server->ops->get_lease_key(inode, &fid);
598 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
599 file->f_flags, &oplock, &fid, xid);
600 if (rc) {
601 cifs_del_pending_open(&open);
602 goto out;
606 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
607 if (cfile == NULL) {
608 if (server->ops->close)
609 server->ops->close(xid, tcon, &fid);
610 cifs_del_pending_open(&open);
611 rc = -ENOMEM;
612 goto out;
615 cifs_fscache_set_inode_cookie(inode, file);
617 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
619 * Time to set mode which we can not set earlier due to
620 * problems creating new read-only files.
622 struct cifs_unix_set_info_args args = {
623 .mode = inode->i_mode,
624 .uid = INVALID_UID, /* no change */
625 .gid = INVALID_GID, /* no change */
626 .ctime = NO_CHANGE_64,
627 .atime = NO_CHANGE_64,
628 .mtime = NO_CHANGE_64,
629 .device = 0,
631 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
632 cfile->pid);
635 out:
636 kfree(full_path);
637 free_xid(xid);
638 cifs_put_tlink(tlink);
639 return rc;
642 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
645 * Try to reacquire byte range locks that were released when session
646 * to server was lost.
648 static int
649 cifs_relock_file(struct cifsFileInfo *cfile)
651 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
652 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
653 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
654 int rc = 0;
656 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
657 if (cinode->can_cache_brlcks) {
658 /* can cache locks - no need to relock */
659 up_read(&cinode->lock_sem);
660 return rc;
663 if (cap_unix(tcon->ses) &&
664 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
665 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
666 rc = cifs_push_posix_locks(cfile);
667 else
668 rc = tcon->ses->server->ops->push_mand_locks(cfile);
670 up_read(&cinode->lock_sem);
671 return rc;
674 static int
675 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
677 int rc = -EACCES;
678 unsigned int xid;
679 __u32 oplock;
680 struct cifs_sb_info *cifs_sb;
681 struct cifs_tcon *tcon;
682 struct TCP_Server_Info *server;
683 struct cifsInodeInfo *cinode;
684 struct inode *inode;
685 char *full_path = NULL;
686 int desired_access;
687 int disposition = FILE_OPEN;
688 int create_options = CREATE_NOT_DIR;
689 struct cifs_open_parms oparms;
691 xid = get_xid();
692 mutex_lock(&cfile->fh_mutex);
693 if (!cfile->invalidHandle) {
694 mutex_unlock(&cfile->fh_mutex);
695 rc = 0;
696 free_xid(xid);
697 return rc;
700 inode = d_inode(cfile->dentry);
701 cifs_sb = CIFS_SB(inode->i_sb);
702 tcon = tlink_tcon(cfile->tlink);
703 server = tcon->ses->server;
706 * Can not grab rename sem here because various ops, including those
707 * that already have the rename sem can end up causing writepage to get
708 * called and if the server was down that means we end up here, and we
709 * can never tell if the caller already has the rename_sem.
711 full_path = build_path_from_dentry(cfile->dentry);
712 if (full_path == NULL) {
713 rc = -ENOMEM;
714 mutex_unlock(&cfile->fh_mutex);
715 free_xid(xid);
716 return rc;
719 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
720 inode, cfile->f_flags, full_path);
722 if (tcon->ses->server->oplocks)
723 oplock = REQ_OPLOCK;
724 else
725 oplock = 0;
727 if (tcon->unix_ext && cap_unix(tcon->ses) &&
728 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
729 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
731 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
732 * original open. Must mask them off for a reopen.
734 unsigned int oflags = cfile->f_flags &
735 ~(O_CREAT | O_EXCL | O_TRUNC);
737 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
738 cifs_sb->mnt_file_mode /* ignored */,
739 oflags, &oplock, &cfile->fid.netfid, xid);
740 if (rc == 0) {
741 cifs_dbg(FYI, "posix reopen succeeded\n");
742 oparms.reconnect = true;
743 goto reopen_success;
746 * fallthrough to retry open the old way on errors, especially
747 * in the reconnect path it is important to retry hard
751 desired_access = cifs_convert_flags(cfile->f_flags);
753 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
754 if (cfile->f_flags & O_SYNC)
755 create_options |= CREATE_WRITE_THROUGH;
757 if (cfile->f_flags & O_DIRECT)
758 create_options |= CREATE_NO_BUFFER;
760 if (server->ops->get_lease_key)
761 server->ops->get_lease_key(inode, &cfile->fid);
763 oparms.tcon = tcon;
764 oparms.cifs_sb = cifs_sb;
765 oparms.desired_access = desired_access;
766 oparms.create_options = cifs_create_options(cifs_sb, create_options);
767 oparms.disposition = disposition;
768 oparms.path = full_path;
769 oparms.fid = &cfile->fid;
770 oparms.reconnect = true;
773 * Can not refresh inode by passing in file_info buf to be returned by
774 * ops->open and then calling get_inode_info with returned buf since
775 * file might have write behind data that needs to be flushed and server
776 * version of file size can be stale. If we knew for sure that inode was
777 * not dirty locally we could do this.
779 rc = server->ops->open(xid, &oparms, &oplock, NULL);
780 if (rc == -ENOENT && oparms.reconnect == false) {
781 /* durable handle timeout is expired - open the file again */
782 rc = server->ops->open(xid, &oparms, &oplock, NULL);
783 /* indicate that we need to relock the file */
784 oparms.reconnect = true;
787 if (rc) {
788 mutex_unlock(&cfile->fh_mutex);
789 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
790 cifs_dbg(FYI, "oplock: %d\n", oplock);
791 goto reopen_error_exit;
794 reopen_success:
795 cfile->invalidHandle = false;
796 mutex_unlock(&cfile->fh_mutex);
797 cinode = CIFS_I(inode);
799 if (can_flush) {
800 rc = filemap_write_and_wait(inode->i_mapping);
801 if (!is_interrupt_error(rc))
802 mapping_set_error(inode->i_mapping, rc);
804 if (tcon->posix_extensions)
805 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
806 else if (tcon->unix_ext)
807 rc = cifs_get_inode_info_unix(&inode, full_path,
808 inode->i_sb, xid);
809 else
810 rc = cifs_get_inode_info(&inode, full_path, NULL,
811 inode->i_sb, xid, NULL);
814 * Else we are writing out data to server already and could deadlock if
815 * we tried to flush data, and since we do not know if we have data that
816 * would invalidate the current end of file on the server we can not go
817 * to the server to get the new inode info.
821 * If the server returned a read oplock and we have mandatory brlocks,
822 * set oplock level to None.
824 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
825 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
826 oplock = 0;
829 server->ops->set_fid(cfile, &cfile->fid, oplock);
830 if (oparms.reconnect)
831 cifs_relock_file(cfile);
833 reopen_error_exit:
834 kfree(full_path);
835 free_xid(xid);
836 return rc;
839 int cifs_close(struct inode *inode, struct file *file)
841 if (file->private_data != NULL) {
842 _cifsFileInfo_put(file->private_data, true, false);
843 file->private_data = NULL;
846 /* return code from the ->release op is always ignored */
847 return 0;
850 void
851 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
853 struct cifsFileInfo *open_file;
854 struct list_head *tmp;
855 struct list_head *tmp1;
856 struct list_head tmp_list;
858 if (!tcon->use_persistent || !tcon->need_reopen_files)
859 return;
861 tcon->need_reopen_files = false;
863 cifs_dbg(FYI, "Reopen persistent handles\n");
864 INIT_LIST_HEAD(&tmp_list);
866 /* list all files open on tree connection, reopen resilient handles */
867 spin_lock(&tcon->open_file_lock);
868 list_for_each(tmp, &tcon->openFileList) {
869 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
870 if (!open_file->invalidHandle)
871 continue;
872 cifsFileInfo_get(open_file);
873 list_add_tail(&open_file->rlist, &tmp_list);
875 spin_unlock(&tcon->open_file_lock);
877 list_for_each_safe(tmp, tmp1, &tmp_list) {
878 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
879 if (cifs_reopen_file(open_file, false /* do not flush */))
880 tcon->need_reopen_files = true;
881 list_del_init(&open_file->rlist);
882 cifsFileInfo_put(open_file);
886 int cifs_closedir(struct inode *inode, struct file *file)
888 int rc = 0;
889 unsigned int xid;
890 struct cifsFileInfo *cfile = file->private_data;
891 struct cifs_tcon *tcon;
892 struct TCP_Server_Info *server;
893 char *buf;
895 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
897 if (cfile == NULL)
898 return rc;
900 xid = get_xid();
901 tcon = tlink_tcon(cfile->tlink);
902 server = tcon->ses->server;
904 cifs_dbg(FYI, "Freeing private data in close dir\n");
905 spin_lock(&cfile->file_info_lock);
906 if (server->ops->dir_needs_close(cfile)) {
907 cfile->invalidHandle = true;
908 spin_unlock(&cfile->file_info_lock);
909 if (server->ops->close_dir)
910 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
911 else
912 rc = -ENOSYS;
913 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
914 /* not much we can do if it fails anyway, ignore rc */
915 rc = 0;
916 } else
917 spin_unlock(&cfile->file_info_lock);
919 buf = cfile->srch_inf.ntwrk_buf_start;
920 if (buf) {
921 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
922 cfile->srch_inf.ntwrk_buf_start = NULL;
923 if (cfile->srch_inf.smallBuf)
924 cifs_small_buf_release(buf);
925 else
926 cifs_buf_release(buf);
929 cifs_put_tlink(cfile->tlink);
930 kfree(file->private_data);
931 file->private_data = NULL;
932 /* BB can we lock the filestruct while this is going on? */
933 free_xid(xid);
934 return rc;
937 static struct cifsLockInfo *
938 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
940 struct cifsLockInfo *lock =
941 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
942 if (!lock)
943 return lock;
944 lock->offset = offset;
945 lock->length = length;
946 lock->type = type;
947 lock->pid = current->tgid;
948 lock->flags = flags;
949 INIT_LIST_HEAD(&lock->blist);
950 init_waitqueue_head(&lock->block_q);
951 return lock;
954 void
955 cifs_del_lock_waiters(struct cifsLockInfo *lock)
957 struct cifsLockInfo *li, *tmp;
958 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
959 list_del_init(&li->blist);
960 wake_up(&li->block_q);
964 #define CIFS_LOCK_OP 0
965 #define CIFS_READ_OP 1
966 #define CIFS_WRITE_OP 2
968 /* @rw_check : 0 - no op, 1 - read, 2 - write */
969 static bool
970 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
971 __u64 length, __u8 type, __u16 flags,
972 struct cifsFileInfo *cfile,
973 struct cifsLockInfo **conf_lock, int rw_check)
975 struct cifsLockInfo *li;
976 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
977 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
979 list_for_each_entry(li, &fdlocks->locks, llist) {
980 if (offset + length <= li->offset ||
981 offset >= li->offset + li->length)
982 continue;
983 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
984 server->ops->compare_fids(cfile, cur_cfile)) {
985 /* shared lock prevents write op through the same fid */
986 if (!(li->type & server->vals->shared_lock_type) ||
987 rw_check != CIFS_WRITE_OP)
988 continue;
990 if ((type & server->vals->shared_lock_type) &&
991 ((server->ops->compare_fids(cfile, cur_cfile) &&
992 current->tgid == li->pid) || type == li->type))
993 continue;
994 if (rw_check == CIFS_LOCK_OP &&
995 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
996 server->ops->compare_fids(cfile, cur_cfile))
997 continue;
998 if (conf_lock)
999 *conf_lock = li;
1000 return true;
1002 return false;
1005 bool
1006 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1007 __u8 type, __u16 flags,
1008 struct cifsLockInfo **conf_lock, int rw_check)
1010 bool rc = false;
1011 struct cifs_fid_locks *cur;
1012 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1014 list_for_each_entry(cur, &cinode->llist, llist) {
1015 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1016 flags, cfile, conf_lock,
1017 rw_check);
1018 if (rc)
1019 break;
1022 return rc;
1026 * Check if there is another lock that prevents us to set the lock (mandatory
1027 * style). If such a lock exists, update the flock structure with its
1028 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1029 * or leave it the same if we can't. Returns 0 if we don't need to request to
1030 * the server or 1 otherwise.
1032 static int
1033 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1034 __u8 type, struct file_lock *flock)
1036 int rc = 0;
1037 struct cifsLockInfo *conf_lock;
1038 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1039 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1040 bool exist;
1042 down_read(&cinode->lock_sem);
1044 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1045 flock->fl_flags, &conf_lock,
1046 CIFS_LOCK_OP);
1047 if (exist) {
1048 flock->fl_start = conf_lock->offset;
1049 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1050 flock->fl_pid = conf_lock->pid;
1051 if (conf_lock->type & server->vals->shared_lock_type)
1052 flock->fl_type = F_RDLCK;
1053 else
1054 flock->fl_type = F_WRLCK;
1055 } else if (!cinode->can_cache_brlcks)
1056 rc = 1;
1057 else
1058 flock->fl_type = F_UNLCK;
1060 up_read(&cinode->lock_sem);
1061 return rc;
1064 static void
1065 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1067 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1068 cifs_down_write(&cinode->lock_sem);
1069 list_add_tail(&lock->llist, &cfile->llist->locks);
1070 up_write(&cinode->lock_sem);
1074 * Set the byte-range lock (mandatory style). Returns:
1075 * 1) 0, if we set the lock and don't need to request to the server;
1076 * 2) 1, if no locks prevent us but we need to request to the server;
1077 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1079 static int
1080 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1081 bool wait)
1083 struct cifsLockInfo *conf_lock;
1084 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085 bool exist;
1086 int rc = 0;
1088 try_again:
1089 exist = false;
1090 cifs_down_write(&cinode->lock_sem);
1092 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1093 lock->type, lock->flags, &conf_lock,
1094 CIFS_LOCK_OP);
1095 if (!exist && cinode->can_cache_brlcks) {
1096 list_add_tail(&lock->llist, &cfile->llist->locks);
1097 up_write(&cinode->lock_sem);
1098 return rc;
1101 if (!exist)
1102 rc = 1;
1103 else if (!wait)
1104 rc = -EACCES;
1105 else {
1106 list_add_tail(&lock->blist, &conf_lock->blist);
1107 up_write(&cinode->lock_sem);
1108 rc = wait_event_interruptible(lock->block_q,
1109 (lock->blist.prev == &lock->blist) &&
1110 (lock->blist.next == &lock->blist));
1111 if (!rc)
1112 goto try_again;
1113 cifs_down_write(&cinode->lock_sem);
1114 list_del_init(&lock->blist);
1117 up_write(&cinode->lock_sem);
1118 return rc;
1122 * Check if there is another lock that prevents us to set the lock (posix
1123 * style). If such a lock exists, update the flock structure with its
1124 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1125 * or leave it the same if we can't. Returns 0 if we don't need to request to
1126 * the server or 1 otherwise.
1128 static int
1129 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1131 int rc = 0;
1132 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1133 unsigned char saved_type = flock->fl_type;
1135 if ((flock->fl_flags & FL_POSIX) == 0)
1136 return 1;
1138 down_read(&cinode->lock_sem);
1139 posix_test_lock(file, flock);
1141 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1142 flock->fl_type = saved_type;
1143 rc = 1;
1146 up_read(&cinode->lock_sem);
1147 return rc;
1151 * Set the byte-range lock (posix style). Returns:
1152 * 1) 0, if we set the lock and don't need to request to the server;
1153 * 2) 1, if we need to request to the server;
1154 * 3) <0, if the error occurs while setting the lock.
1156 static int
1157 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1159 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1160 int rc = 1;
1162 if ((flock->fl_flags & FL_POSIX) == 0)
1163 return rc;
1165 try_again:
1166 cifs_down_write(&cinode->lock_sem);
1167 if (!cinode->can_cache_brlcks) {
1168 up_write(&cinode->lock_sem);
1169 return rc;
1172 rc = posix_lock_file(file, flock, NULL);
1173 up_write(&cinode->lock_sem);
1174 if (rc == FILE_LOCK_DEFERRED) {
1175 rc = wait_event_interruptible(flock->fl_wait,
1176 list_empty(&flock->fl_blocked_member));
1177 if (!rc)
1178 goto try_again;
1179 locks_delete_block(flock);
1181 return rc;
1185 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1187 unsigned int xid;
1188 int rc = 0, stored_rc;
1189 struct cifsLockInfo *li, *tmp;
1190 struct cifs_tcon *tcon;
1191 unsigned int num, max_num, max_buf;
1192 LOCKING_ANDX_RANGE *buf, *cur;
1193 static const int types[] = {
1194 LOCKING_ANDX_LARGE_FILES,
1195 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1197 int i;
1199 xid = get_xid();
1200 tcon = tlink_tcon(cfile->tlink);
1203 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1204 * and check it before using.
1206 max_buf = tcon->ses->server->maxBuf;
1207 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1208 free_xid(xid);
1209 return -EINVAL;
1212 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1213 PAGE_SIZE);
1214 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1215 PAGE_SIZE);
1216 max_num = (max_buf - sizeof(struct smb_hdr)) /
1217 sizeof(LOCKING_ANDX_RANGE);
1218 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1219 if (!buf) {
1220 free_xid(xid);
1221 return -ENOMEM;
1224 for (i = 0; i < 2; i++) {
1225 cur = buf;
1226 num = 0;
1227 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1228 if (li->type != types[i])
1229 continue;
1230 cur->Pid = cpu_to_le16(li->pid);
1231 cur->LengthLow = cpu_to_le32((u32)li->length);
1232 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1233 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1234 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1235 if (++num == max_num) {
1236 stored_rc = cifs_lockv(xid, tcon,
1237 cfile->fid.netfid,
1238 (__u8)li->type, 0, num,
1239 buf);
1240 if (stored_rc)
1241 rc = stored_rc;
1242 cur = buf;
1243 num = 0;
1244 } else
1245 cur++;
1248 if (num) {
1249 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1250 (__u8)types[i], 0, num, buf);
1251 if (stored_rc)
1252 rc = stored_rc;
1256 kfree(buf);
1257 free_xid(xid);
1258 return rc;
1261 static __u32
1262 hash_lockowner(fl_owner_t owner)
1264 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1267 struct lock_to_push {
1268 struct list_head llist;
1269 __u64 offset;
1270 __u64 length;
1271 __u32 pid;
1272 __u16 netfid;
1273 __u8 type;
1276 static int
1277 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1279 struct inode *inode = d_inode(cfile->dentry);
1280 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1281 struct file_lock *flock;
1282 struct file_lock_context *flctx = inode->i_flctx;
1283 unsigned int count = 0, i;
1284 int rc = 0, xid, type;
1285 struct list_head locks_to_send, *el;
1286 struct lock_to_push *lck, *tmp;
1287 __u64 length;
1289 xid = get_xid();
1291 if (!flctx)
1292 goto out;
1294 spin_lock(&flctx->flc_lock);
1295 list_for_each(el, &flctx->flc_posix) {
1296 count++;
1298 spin_unlock(&flctx->flc_lock);
1300 INIT_LIST_HEAD(&locks_to_send);
1303 * Allocating count locks is enough because no FL_POSIX locks can be
1304 * added to the list while we are holding cinode->lock_sem that
1305 * protects locking operations of this inode.
1307 for (i = 0; i < count; i++) {
1308 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1309 if (!lck) {
1310 rc = -ENOMEM;
1311 goto err_out;
1313 list_add_tail(&lck->llist, &locks_to_send);
1316 el = locks_to_send.next;
1317 spin_lock(&flctx->flc_lock);
1318 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1319 if (el == &locks_to_send) {
1321 * The list ended. We don't have enough allocated
1322 * structures - something is really wrong.
1324 cifs_dbg(VFS, "Can't push all brlocks!\n");
1325 break;
1327 length = 1 + flock->fl_end - flock->fl_start;
1328 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1329 type = CIFS_RDLCK;
1330 else
1331 type = CIFS_WRLCK;
1332 lck = list_entry(el, struct lock_to_push, llist);
1333 lck->pid = hash_lockowner(flock->fl_owner);
1334 lck->netfid = cfile->fid.netfid;
1335 lck->length = length;
1336 lck->type = type;
1337 lck->offset = flock->fl_start;
1339 spin_unlock(&flctx->flc_lock);
1341 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1342 int stored_rc;
1344 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1345 lck->offset, lck->length, NULL,
1346 lck->type, 0);
1347 if (stored_rc)
1348 rc = stored_rc;
1349 list_del(&lck->llist);
1350 kfree(lck);
1353 out:
1354 free_xid(xid);
1355 return rc;
1356 err_out:
1357 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1358 list_del(&lck->llist);
1359 kfree(lck);
1361 goto out;
1364 static int
1365 cifs_push_locks(struct cifsFileInfo *cfile)
1367 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1368 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1369 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1370 int rc = 0;
1372 /* we are going to update can_cache_brlcks here - need a write access */
1373 cifs_down_write(&cinode->lock_sem);
1374 if (!cinode->can_cache_brlcks) {
1375 up_write(&cinode->lock_sem);
1376 return rc;
1379 if (cap_unix(tcon->ses) &&
1380 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1381 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1382 rc = cifs_push_posix_locks(cfile);
1383 else
1384 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1386 cinode->can_cache_brlcks = false;
1387 up_write(&cinode->lock_sem);
1388 return rc;
1391 static void
1392 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1393 bool *wait_flag, struct TCP_Server_Info *server)
1395 if (flock->fl_flags & FL_POSIX)
1396 cifs_dbg(FYI, "Posix\n");
1397 if (flock->fl_flags & FL_FLOCK)
1398 cifs_dbg(FYI, "Flock\n");
1399 if (flock->fl_flags & FL_SLEEP) {
1400 cifs_dbg(FYI, "Blocking lock\n");
1401 *wait_flag = true;
1403 if (flock->fl_flags & FL_ACCESS)
1404 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1405 if (flock->fl_flags & FL_LEASE)
1406 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1407 if (flock->fl_flags &
1408 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1409 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1410 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1412 *type = server->vals->large_lock_type;
1413 if (flock->fl_type == F_WRLCK) {
1414 cifs_dbg(FYI, "F_WRLCK\n");
1415 *type |= server->vals->exclusive_lock_type;
1416 *lock = 1;
1417 } else if (flock->fl_type == F_UNLCK) {
1418 cifs_dbg(FYI, "F_UNLCK\n");
1419 *type |= server->vals->unlock_lock_type;
1420 *unlock = 1;
1421 /* Check if unlock includes more than one lock range */
1422 } else if (flock->fl_type == F_RDLCK) {
1423 cifs_dbg(FYI, "F_RDLCK\n");
1424 *type |= server->vals->shared_lock_type;
1425 *lock = 1;
1426 } else if (flock->fl_type == F_EXLCK) {
1427 cifs_dbg(FYI, "F_EXLCK\n");
1428 *type |= server->vals->exclusive_lock_type;
1429 *lock = 1;
1430 } else if (flock->fl_type == F_SHLCK) {
1431 cifs_dbg(FYI, "F_SHLCK\n");
1432 *type |= server->vals->shared_lock_type;
1433 *lock = 1;
1434 } else
1435 cifs_dbg(FYI, "Unknown type of lock\n");
1438 static int
1439 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1440 bool wait_flag, bool posix_lck, unsigned int xid)
1442 int rc = 0;
1443 __u64 length = 1 + flock->fl_end - flock->fl_start;
1444 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1445 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1446 struct TCP_Server_Info *server = tcon->ses->server;
1447 __u16 netfid = cfile->fid.netfid;
1449 if (posix_lck) {
1450 int posix_lock_type;
1452 rc = cifs_posix_lock_test(file, flock);
1453 if (!rc)
1454 return rc;
1456 if (type & server->vals->shared_lock_type)
1457 posix_lock_type = CIFS_RDLCK;
1458 else
1459 posix_lock_type = CIFS_WRLCK;
1460 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1461 hash_lockowner(flock->fl_owner),
1462 flock->fl_start, length, flock,
1463 posix_lock_type, wait_flag);
1464 return rc;
1467 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1468 if (!rc)
1469 return rc;
1471 /* BB we could chain these into one lock request BB */
1472 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1473 1, 0, false);
1474 if (rc == 0) {
1475 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1476 type, 0, 1, false);
1477 flock->fl_type = F_UNLCK;
1478 if (rc != 0)
1479 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1480 rc);
1481 return 0;
1484 if (type & server->vals->shared_lock_type) {
1485 flock->fl_type = F_WRLCK;
1486 return 0;
1489 type &= ~server->vals->exclusive_lock_type;
1491 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492 type | server->vals->shared_lock_type,
1493 1, 0, false);
1494 if (rc == 0) {
1495 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1496 type | server->vals->shared_lock_type, 0, 1, false);
1497 flock->fl_type = F_RDLCK;
1498 if (rc != 0)
1499 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1500 rc);
1501 } else
1502 flock->fl_type = F_WRLCK;
1504 return 0;
1507 void
1508 cifs_move_llist(struct list_head *source, struct list_head *dest)
1510 struct list_head *li, *tmp;
1511 list_for_each_safe(li, tmp, source)
1512 list_move(li, dest);
1515 void
1516 cifs_free_llist(struct list_head *llist)
1518 struct cifsLockInfo *li, *tmp;
1519 list_for_each_entry_safe(li, tmp, llist, llist) {
1520 cifs_del_lock_waiters(li);
1521 list_del(&li->llist);
1522 kfree(li);
1527 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1528 unsigned int xid)
1530 int rc = 0, stored_rc;
1531 static const int types[] = {
1532 LOCKING_ANDX_LARGE_FILES,
1533 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1535 unsigned int i;
1536 unsigned int max_num, num, max_buf;
1537 LOCKING_ANDX_RANGE *buf, *cur;
1538 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1539 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1540 struct cifsLockInfo *li, *tmp;
1541 __u64 length = 1 + flock->fl_end - flock->fl_start;
1542 struct list_head tmp_llist;
1544 INIT_LIST_HEAD(&tmp_llist);
1547 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1548 * and check it before using.
1550 max_buf = tcon->ses->server->maxBuf;
1551 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1552 return -EINVAL;
1554 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1555 PAGE_SIZE);
1556 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1557 PAGE_SIZE);
1558 max_num = (max_buf - sizeof(struct smb_hdr)) /
1559 sizeof(LOCKING_ANDX_RANGE);
1560 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1561 if (!buf)
1562 return -ENOMEM;
1564 cifs_down_write(&cinode->lock_sem);
1565 for (i = 0; i < 2; i++) {
1566 cur = buf;
1567 num = 0;
1568 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1569 if (flock->fl_start > li->offset ||
1570 (flock->fl_start + length) <
1571 (li->offset + li->length))
1572 continue;
1573 if (current->tgid != li->pid)
1574 continue;
1575 if (types[i] != li->type)
1576 continue;
1577 if (cinode->can_cache_brlcks) {
1579 * We can cache brlock requests - simply remove
1580 * a lock from the file's list.
1582 list_del(&li->llist);
1583 cifs_del_lock_waiters(li);
1584 kfree(li);
1585 continue;
1587 cur->Pid = cpu_to_le16(li->pid);
1588 cur->LengthLow = cpu_to_le32((u32)li->length);
1589 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1593 * We need to save a lock here to let us add it again to
1594 * the file's list if the unlock range request fails on
1595 * the server.
1597 list_move(&li->llist, &tmp_llist);
1598 if (++num == max_num) {
1599 stored_rc = cifs_lockv(xid, tcon,
1600 cfile->fid.netfid,
1601 li->type, num, 0, buf);
1602 if (stored_rc) {
1604 * We failed on the unlock range
1605 * request - add all locks from the tmp
1606 * list to the head of the file's list.
1608 cifs_move_llist(&tmp_llist,
1609 &cfile->llist->locks);
1610 rc = stored_rc;
1611 } else
1613 * The unlock range request succeed -
1614 * free the tmp list.
1616 cifs_free_llist(&tmp_llist);
1617 cur = buf;
1618 num = 0;
1619 } else
1620 cur++;
1622 if (num) {
1623 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1624 types[i], num, 0, buf);
1625 if (stored_rc) {
1626 cifs_move_llist(&tmp_llist,
1627 &cfile->llist->locks);
1628 rc = stored_rc;
1629 } else
1630 cifs_free_llist(&tmp_llist);
1634 up_write(&cinode->lock_sem);
1635 kfree(buf);
1636 return rc;
1639 static int
1640 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1641 bool wait_flag, bool posix_lck, int lock, int unlock,
1642 unsigned int xid)
1644 int rc = 0;
1645 __u64 length = 1 + flock->fl_end - flock->fl_start;
1646 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1647 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1648 struct TCP_Server_Info *server = tcon->ses->server;
1649 struct inode *inode = d_inode(cfile->dentry);
1651 if (posix_lck) {
1652 int posix_lock_type;
1654 rc = cifs_posix_lock_set(file, flock);
1655 if (!rc || rc < 0)
1656 return rc;
1658 if (type & server->vals->shared_lock_type)
1659 posix_lock_type = CIFS_RDLCK;
1660 else
1661 posix_lock_type = CIFS_WRLCK;
1663 if (unlock == 1)
1664 posix_lock_type = CIFS_UNLCK;
1666 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1667 hash_lockowner(flock->fl_owner),
1668 flock->fl_start, length,
1669 NULL, posix_lock_type, wait_flag);
1670 goto out;
1673 if (lock) {
1674 struct cifsLockInfo *lock;
1676 lock = cifs_lock_init(flock->fl_start, length, type,
1677 flock->fl_flags);
1678 if (!lock)
1679 return -ENOMEM;
1681 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1682 if (rc < 0) {
1683 kfree(lock);
1684 return rc;
1686 if (!rc)
1687 goto out;
1690 * Windows 7 server can delay breaking lease from read to None
1691 * if we set a byte-range lock on a file - break it explicitly
1692 * before sending the lock to the server to be sure the next
1693 * read won't conflict with non-overlapted locks due to
1694 * pagereading.
1696 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1697 CIFS_CACHE_READ(CIFS_I(inode))) {
1698 cifs_zap_mapping(inode);
1699 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1700 inode);
1701 CIFS_I(inode)->oplock = 0;
1704 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1705 type, 1, 0, wait_flag);
1706 if (rc) {
1707 kfree(lock);
1708 return rc;
1711 cifs_lock_add(cfile, lock);
1712 } else if (unlock)
1713 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1715 out:
1716 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1718 * If this is a request to remove all locks because we
1719 * are closing the file, it doesn't matter if the
1720 * unlocking failed as both cifs.ko and the SMB server
1721 * remove the lock on file close
1723 if (rc) {
1724 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1725 if (!(flock->fl_flags & FL_CLOSE))
1726 return rc;
1728 rc = locks_lock_file_wait(file, flock);
1730 return rc;
1733 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1735 int rc, xid;
1736 int lock = 0, unlock = 0;
1737 bool wait_flag = false;
1738 bool posix_lck = false;
1739 struct cifs_sb_info *cifs_sb;
1740 struct cifs_tcon *tcon;
1741 struct cifsFileInfo *cfile;
1742 __u32 type;
1744 rc = -EACCES;
1745 xid = get_xid();
1747 if (!(fl->fl_flags & FL_FLOCK))
1748 return -ENOLCK;
1750 cfile = (struct cifsFileInfo *)file->private_data;
1751 tcon = tlink_tcon(cfile->tlink);
1753 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1754 tcon->ses->server);
1755 cifs_sb = CIFS_FILE_SB(file);
1757 if (cap_unix(tcon->ses) &&
1758 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1759 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1760 posix_lck = true;
1762 if (!lock && !unlock) {
1764 * if no lock or unlock then nothing to do since we do not
1765 * know what it is
1767 free_xid(xid);
1768 return -EOPNOTSUPP;
1771 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1772 xid);
1773 free_xid(xid);
1774 return rc;
1779 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1781 int rc, xid;
1782 int lock = 0, unlock = 0;
1783 bool wait_flag = false;
1784 bool posix_lck = false;
1785 struct cifs_sb_info *cifs_sb;
1786 struct cifs_tcon *tcon;
1787 struct cifsFileInfo *cfile;
1788 __u32 type;
1790 rc = -EACCES;
1791 xid = get_xid();
1793 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1794 cmd, flock->fl_flags, flock->fl_type,
1795 flock->fl_start, flock->fl_end);
1797 cfile = (struct cifsFileInfo *)file->private_data;
1798 tcon = tlink_tcon(cfile->tlink);
1800 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1801 tcon->ses->server);
1802 cifs_sb = CIFS_FILE_SB(file);
1804 if (cap_unix(tcon->ses) &&
1805 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1806 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1807 posix_lck = true;
1809 * BB add code here to normalize offset and length to account for
1810 * negative length which we can not accept over the wire.
1812 if (IS_GETLK(cmd)) {
1813 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1814 free_xid(xid);
1815 return rc;
1818 if (!lock && !unlock) {
1820 * if no lock or unlock then nothing to do since we do not
1821 * know what it is
1823 free_xid(xid);
1824 return -EOPNOTSUPP;
1827 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1828 xid);
1829 free_xid(xid);
1830 return rc;
1834 * update the file size (if needed) after a write. Should be called with
1835 * the inode->i_lock held
1837 void
1838 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1839 unsigned int bytes_written)
1841 loff_t end_of_write = offset + bytes_written;
1843 if (end_of_write > cifsi->server_eof)
1844 cifsi->server_eof = end_of_write;
1847 static ssize_t
1848 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1849 size_t write_size, loff_t *offset)
1851 int rc = 0;
1852 unsigned int bytes_written = 0;
1853 unsigned int total_written;
1854 struct cifs_tcon *tcon;
1855 struct TCP_Server_Info *server;
1856 unsigned int xid;
1857 struct dentry *dentry = open_file->dentry;
1858 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1859 struct cifs_io_parms io_parms = {0};
1861 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1862 write_size, *offset, dentry);
1864 tcon = tlink_tcon(open_file->tlink);
1865 server = tcon->ses->server;
1867 if (!server->ops->sync_write)
1868 return -ENOSYS;
1870 xid = get_xid();
1872 for (total_written = 0; write_size > total_written;
1873 total_written += bytes_written) {
1874 rc = -EAGAIN;
1875 while (rc == -EAGAIN) {
1876 struct kvec iov[2];
1877 unsigned int len;
1879 if (open_file->invalidHandle) {
1880 /* we could deadlock if we called
1881 filemap_fdatawait from here so tell
1882 reopen_file not to flush data to
1883 server now */
1884 rc = cifs_reopen_file(open_file, false);
1885 if (rc != 0)
1886 break;
1889 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1890 (unsigned int)write_size - total_written);
1891 /* iov[0] is reserved for smb header */
1892 iov[1].iov_base = (char *)write_data + total_written;
1893 iov[1].iov_len = len;
1894 io_parms.pid = pid;
1895 io_parms.tcon = tcon;
1896 io_parms.offset = *offset;
1897 io_parms.length = len;
1898 rc = server->ops->sync_write(xid, &open_file->fid,
1899 &io_parms, &bytes_written, iov, 1);
1901 if (rc || (bytes_written == 0)) {
1902 if (total_written)
1903 break;
1904 else {
1905 free_xid(xid);
1906 return rc;
1908 } else {
1909 spin_lock(&d_inode(dentry)->i_lock);
1910 cifs_update_eof(cifsi, *offset, bytes_written);
1911 spin_unlock(&d_inode(dentry)->i_lock);
1912 *offset += bytes_written;
1916 cifs_stats_bytes_written(tcon, total_written);
1918 if (total_written > 0) {
1919 spin_lock(&d_inode(dentry)->i_lock);
1920 if (*offset > d_inode(dentry)->i_size)
1921 i_size_write(d_inode(dentry), *offset);
1922 spin_unlock(&d_inode(dentry)->i_lock);
1924 mark_inode_dirty_sync(d_inode(dentry));
1925 free_xid(xid);
1926 return total_written;
1929 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1930 bool fsuid_only)
1932 struct cifsFileInfo *open_file = NULL;
1933 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1935 /* only filter by fsuid on multiuser mounts */
1936 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1937 fsuid_only = false;
1939 spin_lock(&cifs_inode->open_file_lock);
1940 /* we could simply get the first_list_entry since write-only entries
1941 are always at the end of the list but since the first entry might
1942 have a close pending, we go through the whole list */
1943 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1944 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1945 continue;
1946 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1947 if (!open_file->invalidHandle) {
1948 /* found a good file */
1949 /* lock it so it will not be closed on us */
1950 cifsFileInfo_get(open_file);
1951 spin_unlock(&cifs_inode->open_file_lock);
1952 return open_file;
1953 } /* else might as well continue, and look for
1954 another, or simply have the caller reopen it
1955 again rather than trying to fix this handle */
1956 } else /* write only file */
1957 break; /* write only files are last so must be done */
1959 spin_unlock(&cifs_inode->open_file_lock);
1960 return NULL;
1963 /* Return -EBADF if no handle is found and general rc otherwise */
1965 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1966 struct cifsFileInfo **ret_file)
1968 struct cifsFileInfo *open_file, *inv_file = NULL;
1969 struct cifs_sb_info *cifs_sb;
1970 bool any_available = false;
1971 int rc = -EBADF;
1972 unsigned int refind = 0;
1973 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1974 bool with_delete = flags & FIND_WR_WITH_DELETE;
1975 *ret_file = NULL;
1978 * Having a null inode here (because mapping->host was set to zero by
1979 * the VFS or MM) should not happen but we had reports of on oops (due
1980 * to it being zero) during stress testcases so we need to check for it
1983 if (cifs_inode == NULL) {
1984 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1985 dump_stack();
1986 return rc;
1989 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1991 /* only filter by fsuid on multiuser mounts */
1992 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1993 fsuid_only = false;
1995 spin_lock(&cifs_inode->open_file_lock);
1996 refind_writable:
1997 if (refind > MAX_REOPEN_ATT) {
1998 spin_unlock(&cifs_inode->open_file_lock);
1999 return rc;
2001 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2002 if (!any_available && open_file->pid != current->tgid)
2003 continue;
2004 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2005 continue;
2006 if (with_delete && !(open_file->fid.access & DELETE))
2007 continue;
2008 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2009 if (!open_file->invalidHandle) {
2010 /* found a good writable file */
2011 cifsFileInfo_get(open_file);
2012 spin_unlock(&cifs_inode->open_file_lock);
2013 *ret_file = open_file;
2014 return 0;
2015 } else {
2016 if (!inv_file)
2017 inv_file = open_file;
2021 /* couldn't find useable FH with same pid, try any available */
2022 if (!any_available) {
2023 any_available = true;
2024 goto refind_writable;
2027 if (inv_file) {
2028 any_available = false;
2029 cifsFileInfo_get(inv_file);
2032 spin_unlock(&cifs_inode->open_file_lock);
2034 if (inv_file) {
2035 rc = cifs_reopen_file(inv_file, false);
2036 if (!rc) {
2037 *ret_file = inv_file;
2038 return 0;
2041 spin_lock(&cifs_inode->open_file_lock);
2042 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2043 spin_unlock(&cifs_inode->open_file_lock);
2044 cifsFileInfo_put(inv_file);
2045 ++refind;
2046 inv_file = NULL;
2047 spin_lock(&cifs_inode->open_file_lock);
2048 goto refind_writable;
2051 return rc;
2054 struct cifsFileInfo *
2055 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2057 struct cifsFileInfo *cfile;
2058 int rc;
2060 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2061 if (rc)
2062 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2064 return cfile;
2068 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2069 int flags,
2070 struct cifsFileInfo **ret_file)
2072 struct list_head *tmp;
2073 struct cifsFileInfo *cfile;
2074 struct cifsInodeInfo *cinode;
2075 char *full_path;
2077 *ret_file = NULL;
2079 spin_lock(&tcon->open_file_lock);
2080 list_for_each(tmp, &tcon->openFileList) {
2081 cfile = list_entry(tmp, struct cifsFileInfo,
2082 tlist);
2083 full_path = build_path_from_dentry(cfile->dentry);
2084 if (full_path == NULL) {
2085 spin_unlock(&tcon->open_file_lock);
2086 return -ENOMEM;
2088 if (strcmp(full_path, name)) {
2089 kfree(full_path);
2090 continue;
2093 kfree(full_path);
2094 cinode = CIFS_I(d_inode(cfile->dentry));
2095 spin_unlock(&tcon->open_file_lock);
2096 return cifs_get_writable_file(cinode, flags, ret_file);
2099 spin_unlock(&tcon->open_file_lock);
2100 return -ENOENT;
2104 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2105 struct cifsFileInfo **ret_file)
2107 struct list_head *tmp;
2108 struct cifsFileInfo *cfile;
2109 struct cifsInodeInfo *cinode;
2110 char *full_path;
2112 *ret_file = NULL;
2114 spin_lock(&tcon->open_file_lock);
2115 list_for_each(tmp, &tcon->openFileList) {
2116 cfile = list_entry(tmp, struct cifsFileInfo,
2117 tlist);
2118 full_path = build_path_from_dentry(cfile->dentry);
2119 if (full_path == NULL) {
2120 spin_unlock(&tcon->open_file_lock);
2121 return -ENOMEM;
2123 if (strcmp(full_path, name)) {
2124 kfree(full_path);
2125 continue;
2128 kfree(full_path);
2129 cinode = CIFS_I(d_inode(cfile->dentry));
2130 spin_unlock(&tcon->open_file_lock);
2131 *ret_file = find_readable_file(cinode, 0);
2132 return *ret_file ? 0 : -ENOENT;
2135 spin_unlock(&tcon->open_file_lock);
2136 return -ENOENT;
2139 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2141 struct address_space *mapping = page->mapping;
2142 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2143 char *write_data;
2144 int rc = -EFAULT;
2145 int bytes_written = 0;
2146 struct inode *inode;
2147 struct cifsFileInfo *open_file;
2149 if (!mapping || !mapping->host)
2150 return -EFAULT;
2152 inode = page->mapping->host;
2154 offset += (loff_t)from;
2155 write_data = kmap(page);
2156 write_data += from;
2158 if ((to > PAGE_SIZE) || (from > to)) {
2159 kunmap(page);
2160 return -EIO;
2163 /* racing with truncate? */
2164 if (offset > mapping->host->i_size) {
2165 kunmap(page);
2166 return 0; /* don't care */
2169 /* check to make sure that we are not extending the file */
2170 if (mapping->host->i_size - offset < (loff_t)to)
2171 to = (unsigned)(mapping->host->i_size - offset);
2173 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2174 &open_file);
2175 if (!rc) {
2176 bytes_written = cifs_write(open_file, open_file->pid,
2177 write_data, to - from, &offset);
2178 cifsFileInfo_put(open_file);
2179 /* Does mm or vfs already set times? */
2180 inode->i_atime = inode->i_mtime = current_time(inode);
2181 if ((bytes_written > 0) && (offset))
2182 rc = 0;
2183 else if (bytes_written < 0)
2184 rc = bytes_written;
2185 else
2186 rc = -EFAULT;
2187 } else {
2188 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2189 if (!is_retryable_error(rc))
2190 rc = -EIO;
2193 kunmap(page);
2194 return rc;
2197 static struct cifs_writedata *
2198 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2199 pgoff_t end, pgoff_t *index,
2200 unsigned int *found_pages)
2202 struct cifs_writedata *wdata;
2204 wdata = cifs_writedata_alloc((unsigned int)tofind,
2205 cifs_writev_complete);
2206 if (!wdata)
2207 return NULL;
2209 *found_pages = find_get_pages_range_tag(mapping, index, end,
2210 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2211 return wdata;
2214 static unsigned int
2215 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2216 struct address_space *mapping,
2217 struct writeback_control *wbc,
2218 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2220 unsigned int nr_pages = 0, i;
2221 struct page *page;
2223 for (i = 0; i < found_pages; i++) {
2224 page = wdata->pages[i];
2226 * At this point we hold neither the i_pages lock nor the
2227 * page lock: the page may be truncated or invalidated
2228 * (changing page->mapping to NULL), or even swizzled
2229 * back from swapper_space to tmpfs file mapping
2232 if (nr_pages == 0)
2233 lock_page(page);
2234 else if (!trylock_page(page))
2235 break;
2237 if (unlikely(page->mapping != mapping)) {
2238 unlock_page(page);
2239 break;
2242 if (!wbc->range_cyclic && page->index > end) {
2243 *done = true;
2244 unlock_page(page);
2245 break;
2248 if (*next && (page->index != *next)) {
2249 /* Not next consecutive page */
2250 unlock_page(page);
2251 break;
2254 if (wbc->sync_mode != WB_SYNC_NONE)
2255 wait_on_page_writeback(page);
2257 if (PageWriteback(page) ||
2258 !clear_page_dirty_for_io(page)) {
2259 unlock_page(page);
2260 break;
2264 * This actually clears the dirty bit in the radix tree.
2265 * See cifs_writepage() for more commentary.
2267 set_page_writeback(page);
2268 if (page_offset(page) >= i_size_read(mapping->host)) {
2269 *done = true;
2270 unlock_page(page);
2271 end_page_writeback(page);
2272 break;
2275 wdata->pages[i] = page;
2276 *next = page->index + 1;
2277 ++nr_pages;
2280 /* reset index to refind any pages skipped */
2281 if (nr_pages == 0)
2282 *index = wdata->pages[0]->index + 1;
2284 /* put any pages we aren't going to use */
2285 for (i = nr_pages; i < found_pages; i++) {
2286 put_page(wdata->pages[i]);
2287 wdata->pages[i] = NULL;
2290 return nr_pages;
2293 static int
2294 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2295 struct address_space *mapping, struct writeback_control *wbc)
2297 int rc;
2299 wdata->sync_mode = wbc->sync_mode;
2300 wdata->nr_pages = nr_pages;
2301 wdata->offset = page_offset(wdata->pages[0]);
2302 wdata->pagesz = PAGE_SIZE;
2303 wdata->tailsz = min(i_size_read(mapping->host) -
2304 page_offset(wdata->pages[nr_pages - 1]),
2305 (loff_t)PAGE_SIZE);
2306 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2307 wdata->pid = wdata->cfile->pid;
2309 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2310 if (rc)
2311 return rc;
2313 if (wdata->cfile->invalidHandle)
2314 rc = -EAGAIN;
2315 else
2316 rc = wdata->server->ops->async_writev(wdata,
2317 cifs_writedata_release);
2319 return rc;
2322 static int cifs_writepages(struct address_space *mapping,
2323 struct writeback_control *wbc)
2325 struct inode *inode = mapping->host;
2326 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2327 struct TCP_Server_Info *server;
2328 bool done = false, scanned = false, range_whole = false;
2329 pgoff_t end, index;
2330 struct cifs_writedata *wdata;
2331 struct cifsFileInfo *cfile = NULL;
2332 int rc = 0;
2333 int saved_rc = 0;
2334 unsigned int xid;
2337 * If wsize is smaller than the page cache size, default to writing
2338 * one page at a time via cifs_writepage
2340 if (cifs_sb->wsize < PAGE_SIZE)
2341 return generic_writepages(mapping, wbc);
2343 xid = get_xid();
2344 if (wbc->range_cyclic) {
2345 index = mapping->writeback_index; /* Start from prev offset */
2346 end = -1;
2347 } else {
2348 index = wbc->range_start >> PAGE_SHIFT;
2349 end = wbc->range_end >> PAGE_SHIFT;
2350 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2351 range_whole = true;
2352 scanned = true;
2354 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2356 retry:
2357 while (!done && index <= end) {
2358 unsigned int i, nr_pages, found_pages, wsize;
2359 pgoff_t next = 0, tofind, saved_index = index;
2360 struct cifs_credits credits_on_stack;
2361 struct cifs_credits *credits = &credits_on_stack;
2362 int get_file_rc = 0;
2364 if (cfile)
2365 cifsFileInfo_put(cfile);
2367 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2369 /* in case of an error store it to return later */
2370 if (rc)
2371 get_file_rc = rc;
2373 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2374 &wsize, credits);
2375 if (rc != 0) {
2376 done = true;
2377 break;
2380 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2382 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2383 &found_pages);
2384 if (!wdata) {
2385 rc = -ENOMEM;
2386 done = true;
2387 add_credits_and_wake_if(server, credits, 0);
2388 break;
2391 if (found_pages == 0) {
2392 kref_put(&wdata->refcount, cifs_writedata_release);
2393 add_credits_and_wake_if(server, credits, 0);
2394 break;
2397 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2398 end, &index, &next, &done);
2400 /* nothing to write? */
2401 if (nr_pages == 0) {
2402 kref_put(&wdata->refcount, cifs_writedata_release);
2403 add_credits_and_wake_if(server, credits, 0);
2404 continue;
2407 wdata->credits = credits_on_stack;
2408 wdata->cfile = cfile;
2409 wdata->server = server;
2410 cfile = NULL;
2412 if (!wdata->cfile) {
2413 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2414 get_file_rc);
2415 if (is_retryable_error(get_file_rc))
2416 rc = get_file_rc;
2417 else
2418 rc = -EBADF;
2419 } else
2420 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2422 for (i = 0; i < nr_pages; ++i)
2423 unlock_page(wdata->pages[i]);
2425 /* send failure -- clean up the mess */
2426 if (rc != 0) {
2427 add_credits_and_wake_if(server, &wdata->credits, 0);
2428 for (i = 0; i < nr_pages; ++i) {
2429 if (is_retryable_error(rc))
2430 redirty_page_for_writepage(wbc,
2431 wdata->pages[i]);
2432 else
2433 SetPageError(wdata->pages[i]);
2434 end_page_writeback(wdata->pages[i]);
2435 put_page(wdata->pages[i]);
2437 if (!is_retryable_error(rc))
2438 mapping_set_error(mapping, rc);
2440 kref_put(&wdata->refcount, cifs_writedata_release);
2442 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2443 index = saved_index;
2444 continue;
2447 /* Return immediately if we received a signal during writing */
2448 if (is_interrupt_error(rc)) {
2449 done = true;
2450 break;
2453 if (rc != 0 && saved_rc == 0)
2454 saved_rc = rc;
2456 wbc->nr_to_write -= nr_pages;
2457 if (wbc->nr_to_write <= 0)
2458 done = true;
2460 index = next;
2463 if (!scanned && !done) {
2465 * We hit the last page and there is more work to be done: wrap
2466 * back to the start of the file
2468 scanned = true;
2469 index = 0;
2470 goto retry;
2473 if (saved_rc != 0)
2474 rc = saved_rc;
2476 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2477 mapping->writeback_index = index;
2479 if (cfile)
2480 cifsFileInfo_put(cfile);
2481 free_xid(xid);
2482 return rc;
2485 static int
2486 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2488 int rc;
2489 unsigned int xid;
2491 xid = get_xid();
2492 /* BB add check for wbc flags */
2493 get_page(page);
2494 if (!PageUptodate(page))
2495 cifs_dbg(FYI, "ppw - page not up to date\n");
2498 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2500 * A writepage() implementation always needs to do either this,
2501 * or re-dirty the page with "redirty_page_for_writepage()" in
2502 * the case of a failure.
2504 * Just unlocking the page will cause the radix tree tag-bits
2505 * to fail to update with the state of the page correctly.
2507 set_page_writeback(page);
2508 retry_write:
2509 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2510 if (is_retryable_error(rc)) {
2511 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2512 goto retry_write;
2513 redirty_page_for_writepage(wbc, page);
2514 } else if (rc != 0) {
2515 SetPageError(page);
2516 mapping_set_error(page->mapping, rc);
2517 } else {
2518 SetPageUptodate(page);
2520 end_page_writeback(page);
2521 put_page(page);
2522 free_xid(xid);
2523 return rc;
2526 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2528 int rc = cifs_writepage_locked(page, wbc);
2529 unlock_page(page);
2530 return rc;
2533 static int cifs_write_end(struct file *file, struct address_space *mapping,
2534 loff_t pos, unsigned len, unsigned copied,
2535 struct page *page, void *fsdata)
2537 int rc;
2538 struct inode *inode = mapping->host;
2539 struct cifsFileInfo *cfile = file->private_data;
2540 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2541 __u32 pid;
2543 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2544 pid = cfile->pid;
2545 else
2546 pid = current->tgid;
2548 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2549 page, pos, copied);
2551 if (PageChecked(page)) {
2552 if (copied == len)
2553 SetPageUptodate(page);
2554 ClearPageChecked(page);
2555 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2556 SetPageUptodate(page);
2558 if (!PageUptodate(page)) {
2559 char *page_data;
2560 unsigned offset = pos & (PAGE_SIZE - 1);
2561 unsigned int xid;
2563 xid = get_xid();
2564 /* this is probably better than directly calling
2565 partialpage_write since in this function the file handle is
2566 known which we might as well leverage */
2567 /* BB check if anything else missing out of ppw
2568 such as updating last write time */
2569 page_data = kmap(page);
2570 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2571 /* if (rc < 0) should we set writebehind rc? */
2572 kunmap(page);
2574 free_xid(xid);
2575 } else {
2576 rc = copied;
2577 pos += copied;
2578 set_page_dirty(page);
2581 if (rc > 0) {
2582 spin_lock(&inode->i_lock);
2583 if (pos > inode->i_size)
2584 i_size_write(inode, pos);
2585 spin_unlock(&inode->i_lock);
2588 unlock_page(page);
2589 put_page(page);
2591 return rc;
2594 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2595 int datasync)
2597 unsigned int xid;
2598 int rc = 0;
2599 struct cifs_tcon *tcon;
2600 struct TCP_Server_Info *server;
2601 struct cifsFileInfo *smbfile = file->private_data;
2602 struct inode *inode = file_inode(file);
2603 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2605 rc = file_write_and_wait_range(file, start, end);
2606 if (rc) {
2607 trace_cifs_fsync_err(inode->i_ino, rc);
2608 return rc;
2611 xid = get_xid();
2613 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2614 file, datasync);
2616 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2617 rc = cifs_zap_mapping(inode);
2618 if (rc) {
2619 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2620 rc = 0; /* don't care about it in fsync */
2624 tcon = tlink_tcon(smbfile->tlink);
2625 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2626 server = tcon->ses->server;
2627 if (server->ops->flush)
2628 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2629 else
2630 rc = -ENOSYS;
2633 free_xid(xid);
2634 return rc;
2637 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2639 unsigned int xid;
2640 int rc = 0;
2641 struct cifs_tcon *tcon;
2642 struct TCP_Server_Info *server;
2643 struct cifsFileInfo *smbfile = file->private_data;
2644 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2646 rc = file_write_and_wait_range(file, start, end);
2647 if (rc) {
2648 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2649 return rc;
2652 xid = get_xid();
2654 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2655 file, datasync);
2657 tcon = tlink_tcon(smbfile->tlink);
2658 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2659 server = tcon->ses->server;
2660 if (server->ops->flush)
2661 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2662 else
2663 rc = -ENOSYS;
2666 free_xid(xid);
2667 return rc;
2671 * As file closes, flush all cached write data for this inode checking
2672 * for write behind errors.
2674 int cifs_flush(struct file *file, fl_owner_t id)
2676 struct inode *inode = file_inode(file);
2677 int rc = 0;
2679 if (file->f_mode & FMODE_WRITE)
2680 rc = filemap_write_and_wait(inode->i_mapping);
2682 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2683 if (rc)
2684 trace_cifs_flush_err(inode->i_ino, rc);
2685 return rc;
2688 static int
2689 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2691 int rc = 0;
2692 unsigned long i;
2694 for (i = 0; i < num_pages; i++) {
2695 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2696 if (!pages[i]) {
2698 * save number of pages we have already allocated and
2699 * return with ENOMEM error
2701 num_pages = i;
2702 rc = -ENOMEM;
2703 break;
2707 if (rc) {
2708 for (i = 0; i < num_pages; i++)
2709 put_page(pages[i]);
2711 return rc;
2714 static inline
2715 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2717 size_t num_pages;
2718 size_t clen;
2720 clen = min_t(const size_t, len, wsize);
2721 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2723 if (cur_len)
2724 *cur_len = clen;
2726 return num_pages;
2729 static void
2730 cifs_uncached_writedata_release(struct kref *refcount)
2732 int i;
2733 struct cifs_writedata *wdata = container_of(refcount,
2734 struct cifs_writedata, refcount);
2736 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2737 for (i = 0; i < wdata->nr_pages; i++)
2738 put_page(wdata->pages[i]);
2739 cifs_writedata_release(refcount);
2742 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2744 static void
2745 cifs_uncached_writev_complete(struct work_struct *work)
2747 struct cifs_writedata *wdata = container_of(work,
2748 struct cifs_writedata, work);
2749 struct inode *inode = d_inode(wdata->cfile->dentry);
2750 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2752 spin_lock(&inode->i_lock);
2753 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2754 if (cifsi->server_eof > inode->i_size)
2755 i_size_write(inode, cifsi->server_eof);
2756 spin_unlock(&inode->i_lock);
2758 complete(&wdata->done);
2759 collect_uncached_write_data(wdata->ctx);
2760 /* the below call can possibly free the last ref to aio ctx */
2761 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2764 static int
2765 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2766 size_t *len, unsigned long *num_pages)
2768 size_t save_len, copied, bytes, cur_len = *len;
2769 unsigned long i, nr_pages = *num_pages;
2771 save_len = cur_len;
2772 for (i = 0; i < nr_pages; i++) {
2773 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2774 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2775 cur_len -= copied;
2777 * If we didn't copy as much as we expected, then that
2778 * may mean we trod into an unmapped area. Stop copying
2779 * at that point. On the next pass through the big
2780 * loop, we'll likely end up getting a zero-length
2781 * write and bailing out of it.
2783 if (copied < bytes)
2784 break;
2786 cur_len = save_len - cur_len;
2787 *len = cur_len;
2790 * If we have no data to send, then that probably means that
2791 * the copy above failed altogether. That's most likely because
2792 * the address in the iovec was bogus. Return -EFAULT and let
2793 * the caller free anything we allocated and bail out.
2795 if (!cur_len)
2796 return -EFAULT;
2799 * i + 1 now represents the number of pages we actually used in
2800 * the copy phase above.
2802 *num_pages = i + 1;
2803 return 0;
2806 static int
2807 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2808 struct cifs_aio_ctx *ctx)
2810 unsigned int wsize;
2811 struct cifs_credits credits;
2812 int rc;
2813 struct TCP_Server_Info *server = wdata->server;
2815 do {
2816 if (wdata->cfile->invalidHandle) {
2817 rc = cifs_reopen_file(wdata->cfile, false);
2818 if (rc == -EAGAIN)
2819 continue;
2820 else if (rc)
2821 break;
2826 * Wait for credits to resend this wdata.
2827 * Note: we are attempting to resend the whole wdata not in
2828 * segments
2830 do {
2831 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2832 &wsize, &credits);
2833 if (rc)
2834 goto fail;
2836 if (wsize < wdata->bytes) {
2837 add_credits_and_wake_if(server, &credits, 0);
2838 msleep(1000);
2840 } while (wsize < wdata->bytes);
2841 wdata->credits = credits;
2843 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2845 if (!rc) {
2846 if (wdata->cfile->invalidHandle)
2847 rc = -EAGAIN;
2848 else {
2849 #ifdef CONFIG_CIFS_SMB_DIRECT
2850 if (wdata->mr) {
2851 wdata->mr->need_invalidate = true;
2852 smbd_deregister_mr(wdata->mr);
2853 wdata->mr = NULL;
2855 #endif
2856 rc = server->ops->async_writev(wdata,
2857 cifs_uncached_writedata_release);
2861 /* If the write was successfully sent, we are done */
2862 if (!rc) {
2863 list_add_tail(&wdata->list, wdata_list);
2864 return 0;
2867 /* Roll back credits and retry if needed */
2868 add_credits_and_wake_if(server, &wdata->credits, 0);
2869 } while (rc == -EAGAIN);
2871 fail:
2872 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2873 return rc;
2876 static int
2877 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2878 struct cifsFileInfo *open_file,
2879 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2880 struct cifs_aio_ctx *ctx)
2882 int rc = 0;
2883 size_t cur_len;
2884 unsigned long nr_pages, num_pages, i;
2885 struct cifs_writedata *wdata;
2886 struct iov_iter saved_from = *from;
2887 loff_t saved_offset = offset;
2888 pid_t pid;
2889 struct TCP_Server_Info *server;
2890 struct page **pagevec;
2891 size_t start;
2892 unsigned int xid;
2894 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2895 pid = open_file->pid;
2896 else
2897 pid = current->tgid;
2899 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2900 xid = get_xid();
2902 do {
2903 unsigned int wsize;
2904 struct cifs_credits credits_on_stack;
2905 struct cifs_credits *credits = &credits_on_stack;
2907 if (open_file->invalidHandle) {
2908 rc = cifs_reopen_file(open_file, false);
2909 if (rc == -EAGAIN)
2910 continue;
2911 else if (rc)
2912 break;
2915 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2916 &wsize, credits);
2917 if (rc)
2918 break;
2920 cur_len = min_t(const size_t, len, wsize);
2922 if (ctx->direct_io) {
2923 ssize_t result;
2925 result = iov_iter_get_pages_alloc(
2926 from, &pagevec, cur_len, &start);
2927 if (result < 0) {
2928 cifs_dbg(VFS,
2929 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2930 result, iov_iter_type(from),
2931 from->iov_offset, from->count);
2932 dump_stack();
2934 rc = result;
2935 add_credits_and_wake_if(server, credits, 0);
2936 break;
2938 cur_len = (size_t)result;
2939 iov_iter_advance(from, cur_len);
2941 nr_pages =
2942 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2944 wdata = cifs_writedata_direct_alloc(pagevec,
2945 cifs_uncached_writev_complete);
2946 if (!wdata) {
2947 rc = -ENOMEM;
2948 add_credits_and_wake_if(server, credits, 0);
2949 break;
2953 wdata->page_offset = start;
2954 wdata->tailsz =
2955 nr_pages > 1 ?
2956 cur_len - (PAGE_SIZE - start) -
2957 (nr_pages - 2) * PAGE_SIZE :
2958 cur_len;
2959 } else {
2960 nr_pages = get_numpages(wsize, len, &cur_len);
2961 wdata = cifs_writedata_alloc(nr_pages,
2962 cifs_uncached_writev_complete);
2963 if (!wdata) {
2964 rc = -ENOMEM;
2965 add_credits_and_wake_if(server, credits, 0);
2966 break;
2969 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2970 if (rc) {
2971 kvfree(wdata->pages);
2972 kfree(wdata);
2973 add_credits_and_wake_if(server, credits, 0);
2974 break;
2977 num_pages = nr_pages;
2978 rc = wdata_fill_from_iovec(
2979 wdata, from, &cur_len, &num_pages);
2980 if (rc) {
2981 for (i = 0; i < nr_pages; i++)
2982 put_page(wdata->pages[i]);
2983 kvfree(wdata->pages);
2984 kfree(wdata);
2985 add_credits_and_wake_if(server, credits, 0);
2986 break;
2990 * Bring nr_pages down to the number of pages we
2991 * actually used, and free any pages that we didn't use.
2993 for ( ; nr_pages > num_pages; nr_pages--)
2994 put_page(wdata->pages[nr_pages - 1]);
2996 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2999 wdata->sync_mode = WB_SYNC_ALL;
3000 wdata->nr_pages = nr_pages;
3001 wdata->offset = (__u64)offset;
3002 wdata->cfile = cifsFileInfo_get(open_file);
3003 wdata->server = server;
3004 wdata->pid = pid;
3005 wdata->bytes = cur_len;
3006 wdata->pagesz = PAGE_SIZE;
3007 wdata->credits = credits_on_stack;
3008 wdata->ctx = ctx;
3009 kref_get(&ctx->refcount);
3011 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3013 if (!rc) {
3014 if (wdata->cfile->invalidHandle)
3015 rc = -EAGAIN;
3016 else
3017 rc = server->ops->async_writev(wdata,
3018 cifs_uncached_writedata_release);
3021 if (rc) {
3022 add_credits_and_wake_if(server, &wdata->credits, 0);
3023 kref_put(&wdata->refcount,
3024 cifs_uncached_writedata_release);
3025 if (rc == -EAGAIN) {
3026 *from = saved_from;
3027 iov_iter_advance(from, offset - saved_offset);
3028 continue;
3030 break;
3033 list_add_tail(&wdata->list, wdata_list);
3034 offset += cur_len;
3035 len -= cur_len;
3036 } while (len > 0);
3038 free_xid(xid);
3039 return rc;
3042 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3044 struct cifs_writedata *wdata, *tmp;
3045 struct cifs_tcon *tcon;
3046 struct cifs_sb_info *cifs_sb;
3047 struct dentry *dentry = ctx->cfile->dentry;
3048 int rc;
3050 tcon = tlink_tcon(ctx->cfile->tlink);
3051 cifs_sb = CIFS_SB(dentry->d_sb);
3053 mutex_lock(&ctx->aio_mutex);
3055 if (list_empty(&ctx->list)) {
3056 mutex_unlock(&ctx->aio_mutex);
3057 return;
3060 rc = ctx->rc;
3062 * Wait for and collect replies for any successful sends in order of
3063 * increasing offset. Once an error is hit, then return without waiting
3064 * for any more replies.
3066 restart_loop:
3067 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3068 if (!rc) {
3069 if (!try_wait_for_completion(&wdata->done)) {
3070 mutex_unlock(&ctx->aio_mutex);
3071 return;
3074 if (wdata->result)
3075 rc = wdata->result;
3076 else
3077 ctx->total_len += wdata->bytes;
3079 /* resend call if it's a retryable error */
3080 if (rc == -EAGAIN) {
3081 struct list_head tmp_list;
3082 struct iov_iter tmp_from = ctx->iter;
3084 INIT_LIST_HEAD(&tmp_list);
3085 list_del_init(&wdata->list);
3087 if (ctx->direct_io)
3088 rc = cifs_resend_wdata(
3089 wdata, &tmp_list, ctx);
3090 else {
3091 iov_iter_advance(&tmp_from,
3092 wdata->offset - ctx->pos);
3094 rc = cifs_write_from_iter(wdata->offset,
3095 wdata->bytes, &tmp_from,
3096 ctx->cfile, cifs_sb, &tmp_list,
3097 ctx);
3099 kref_put(&wdata->refcount,
3100 cifs_uncached_writedata_release);
3103 list_splice(&tmp_list, &ctx->list);
3104 goto restart_loop;
3107 list_del_init(&wdata->list);
3108 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3111 cifs_stats_bytes_written(tcon, ctx->total_len);
3112 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3114 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3116 mutex_unlock(&ctx->aio_mutex);
3118 if (ctx->iocb && ctx->iocb->ki_complete)
3119 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3120 else
3121 complete(&ctx->done);
3124 static ssize_t __cifs_writev(
3125 struct kiocb *iocb, struct iov_iter *from, bool direct)
3127 struct file *file = iocb->ki_filp;
3128 ssize_t total_written = 0;
3129 struct cifsFileInfo *cfile;
3130 struct cifs_tcon *tcon;
3131 struct cifs_sb_info *cifs_sb;
3132 struct cifs_aio_ctx *ctx;
3133 struct iov_iter saved_from = *from;
3134 size_t len = iov_iter_count(from);
3135 int rc;
3138 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3139 * In this case, fall back to non-direct write function.
3140 * this could be improved by getting pages directly in ITER_KVEC
3142 if (direct && iov_iter_is_kvec(from)) {
3143 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3144 direct = false;
3147 rc = generic_write_checks(iocb, from);
3148 if (rc <= 0)
3149 return rc;
3151 cifs_sb = CIFS_FILE_SB(file);
3152 cfile = file->private_data;
3153 tcon = tlink_tcon(cfile->tlink);
3155 if (!tcon->ses->server->ops->async_writev)
3156 return -ENOSYS;
3158 ctx = cifs_aio_ctx_alloc();
3159 if (!ctx)
3160 return -ENOMEM;
3162 ctx->cfile = cifsFileInfo_get(cfile);
3164 if (!is_sync_kiocb(iocb))
3165 ctx->iocb = iocb;
3167 ctx->pos = iocb->ki_pos;
3169 if (direct) {
3170 ctx->direct_io = true;
3171 ctx->iter = *from;
3172 ctx->len = len;
3173 } else {
3174 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3175 if (rc) {
3176 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3177 return rc;
3181 /* grab a lock here due to read response handlers can access ctx */
3182 mutex_lock(&ctx->aio_mutex);
3184 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3185 cfile, cifs_sb, &ctx->list, ctx);
3188 * If at least one write was successfully sent, then discard any rc
3189 * value from the later writes. If the other write succeeds, then
3190 * we'll end up returning whatever was written. If it fails, then
3191 * we'll get a new rc value from that.
3193 if (!list_empty(&ctx->list))
3194 rc = 0;
3196 mutex_unlock(&ctx->aio_mutex);
3198 if (rc) {
3199 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3200 return rc;
3203 if (!is_sync_kiocb(iocb)) {
3204 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3205 return -EIOCBQUEUED;
3208 rc = wait_for_completion_killable(&ctx->done);
3209 if (rc) {
3210 mutex_lock(&ctx->aio_mutex);
3211 ctx->rc = rc = -EINTR;
3212 total_written = ctx->total_len;
3213 mutex_unlock(&ctx->aio_mutex);
3214 } else {
3215 rc = ctx->rc;
3216 total_written = ctx->total_len;
3219 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3221 if (unlikely(!total_written))
3222 return rc;
3224 iocb->ki_pos += total_written;
3225 return total_written;
3228 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3230 return __cifs_writev(iocb, from, true);
3233 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3235 return __cifs_writev(iocb, from, false);
3238 static ssize_t
3239 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3241 struct file *file = iocb->ki_filp;
3242 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3243 struct inode *inode = file->f_mapping->host;
3244 struct cifsInodeInfo *cinode = CIFS_I(inode);
3245 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3246 ssize_t rc;
3248 inode_lock(inode);
3250 * We need to hold the sem to be sure nobody modifies lock list
3251 * with a brlock that prevents writing.
3253 down_read(&cinode->lock_sem);
3255 rc = generic_write_checks(iocb, from);
3256 if (rc <= 0)
3257 goto out;
3259 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3260 server->vals->exclusive_lock_type, 0,
3261 NULL, CIFS_WRITE_OP))
3262 rc = __generic_file_write_iter(iocb, from);
3263 else
3264 rc = -EACCES;
3265 out:
3266 up_read(&cinode->lock_sem);
3267 inode_unlock(inode);
3269 if (rc > 0)
3270 rc = generic_write_sync(iocb, rc);
3271 return rc;
3274 ssize_t
3275 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3277 struct inode *inode = file_inode(iocb->ki_filp);
3278 struct cifsInodeInfo *cinode = CIFS_I(inode);
3279 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3280 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3281 iocb->ki_filp->private_data;
3282 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3283 ssize_t written;
3285 written = cifs_get_writer(cinode);
3286 if (written)
3287 return written;
3289 if (CIFS_CACHE_WRITE(cinode)) {
3290 if (cap_unix(tcon->ses) &&
3291 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3292 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3293 written = generic_file_write_iter(iocb, from);
3294 goto out;
3296 written = cifs_writev(iocb, from);
3297 goto out;
3300 * For non-oplocked files in strict cache mode we need to write the data
3301 * to the server exactly from the pos to pos+len-1 rather than flush all
3302 * affected pages because it may cause a error with mandatory locks on
3303 * these pages but not on the region from pos to ppos+len-1.
3305 written = cifs_user_writev(iocb, from);
3306 if (CIFS_CACHE_READ(cinode)) {
3308 * We have read level caching and we have just sent a write
3309 * request to the server thus making data in the cache stale.
3310 * Zap the cache and set oplock/lease level to NONE to avoid
3311 * reading stale data from the cache. All subsequent read
3312 * operations will read new data from the server.
3314 cifs_zap_mapping(inode);
3315 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3316 inode);
3317 cinode->oplock = 0;
3319 out:
3320 cifs_put_writer(cinode);
3321 return written;
3324 static struct cifs_readdata *
3325 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3327 struct cifs_readdata *rdata;
3329 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3330 if (rdata != NULL) {
3331 rdata->pages = pages;
3332 kref_init(&rdata->refcount);
3333 INIT_LIST_HEAD(&rdata->list);
3334 init_completion(&rdata->done);
3335 INIT_WORK(&rdata->work, complete);
3338 return rdata;
3341 static struct cifs_readdata *
3342 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3344 struct page **pages =
3345 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3346 struct cifs_readdata *ret = NULL;
3348 if (pages) {
3349 ret = cifs_readdata_direct_alloc(pages, complete);
3350 if (!ret)
3351 kfree(pages);
3354 return ret;
3357 void
3358 cifs_readdata_release(struct kref *refcount)
3360 struct cifs_readdata *rdata = container_of(refcount,
3361 struct cifs_readdata, refcount);
3362 #ifdef CONFIG_CIFS_SMB_DIRECT
3363 if (rdata->mr) {
3364 smbd_deregister_mr(rdata->mr);
3365 rdata->mr = NULL;
3367 #endif
3368 if (rdata->cfile)
3369 cifsFileInfo_put(rdata->cfile);
3371 kvfree(rdata->pages);
3372 kfree(rdata);
3375 static int
3376 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3378 int rc = 0;
3379 struct page *page;
3380 unsigned int i;
3382 for (i = 0; i < nr_pages; i++) {
3383 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3384 if (!page) {
3385 rc = -ENOMEM;
3386 break;
3388 rdata->pages[i] = page;
3391 if (rc) {
3392 unsigned int nr_page_failed = i;
3394 for (i = 0; i < nr_page_failed; i++) {
3395 put_page(rdata->pages[i]);
3396 rdata->pages[i] = NULL;
3399 return rc;
3402 static void
3403 cifs_uncached_readdata_release(struct kref *refcount)
3405 struct cifs_readdata *rdata = container_of(refcount,
3406 struct cifs_readdata, refcount);
3407 unsigned int i;
3409 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3410 for (i = 0; i < rdata->nr_pages; i++) {
3411 put_page(rdata->pages[i]);
3413 cifs_readdata_release(refcount);
3417 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3418 * @rdata: the readdata response with list of pages holding data
3419 * @iter: destination for our data
3421 * This function copies data from a list of pages in a readdata response into
3422 * an array of iovecs. It will first calculate where the data should go
3423 * based on the info in the readdata and then copy the data into that spot.
3425 static int
3426 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3428 size_t remaining = rdata->got_bytes;
3429 unsigned int i;
3431 for (i = 0; i < rdata->nr_pages; i++) {
3432 struct page *page = rdata->pages[i];
3433 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3434 size_t written;
3436 if (unlikely(iov_iter_is_pipe(iter))) {
3437 void *addr = kmap_atomic(page);
3439 written = copy_to_iter(addr, copy, iter);
3440 kunmap_atomic(addr);
3441 } else
3442 written = copy_page_to_iter(page, 0, copy, iter);
3443 remaining -= written;
3444 if (written < copy && iov_iter_count(iter) > 0)
3445 break;
3447 return remaining ? -EFAULT : 0;
3450 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3452 static void
3453 cifs_uncached_readv_complete(struct work_struct *work)
3455 struct cifs_readdata *rdata = container_of(work,
3456 struct cifs_readdata, work);
3458 complete(&rdata->done);
3459 collect_uncached_read_data(rdata->ctx);
3460 /* the below call can possibly free the last ref to aio ctx */
3461 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3464 static int
3465 uncached_fill_pages(struct TCP_Server_Info *server,
3466 struct cifs_readdata *rdata, struct iov_iter *iter,
3467 unsigned int len)
3469 int result = 0;
3470 unsigned int i;
3471 unsigned int nr_pages = rdata->nr_pages;
3472 unsigned int page_offset = rdata->page_offset;
3474 rdata->got_bytes = 0;
3475 rdata->tailsz = PAGE_SIZE;
3476 for (i = 0; i < nr_pages; i++) {
3477 struct page *page = rdata->pages[i];
3478 size_t n;
3479 unsigned int segment_size = rdata->pagesz;
3481 if (i == 0)
3482 segment_size -= page_offset;
3483 else
3484 page_offset = 0;
3487 if (len <= 0) {
3488 /* no need to hold page hostage */
3489 rdata->pages[i] = NULL;
3490 rdata->nr_pages--;
3491 put_page(page);
3492 continue;
3495 n = len;
3496 if (len >= segment_size)
3497 /* enough data to fill the page */
3498 n = segment_size;
3499 else
3500 rdata->tailsz = len;
3501 len -= n;
3503 if (iter)
3504 result = copy_page_from_iter(
3505 page, page_offset, n, iter);
3506 #ifdef CONFIG_CIFS_SMB_DIRECT
3507 else if (rdata->mr)
3508 result = n;
3509 #endif
3510 else
3511 result = cifs_read_page_from_socket(
3512 server, page, page_offset, n);
3513 if (result < 0)
3514 break;
3516 rdata->got_bytes += result;
3519 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3520 rdata->got_bytes : result;
3523 static int
3524 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3525 struct cifs_readdata *rdata, unsigned int len)
3527 return uncached_fill_pages(server, rdata, NULL, len);
3530 static int
3531 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3532 struct cifs_readdata *rdata,
3533 struct iov_iter *iter)
3535 return uncached_fill_pages(server, rdata, iter, iter->count);
3538 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3539 struct list_head *rdata_list,
3540 struct cifs_aio_ctx *ctx)
3542 unsigned int rsize;
3543 struct cifs_credits credits;
3544 int rc;
3545 struct TCP_Server_Info *server;
3547 /* XXX: should we pick a new channel here? */
3548 server = rdata->server;
3550 do {
3551 if (rdata->cfile->invalidHandle) {
3552 rc = cifs_reopen_file(rdata->cfile, true);
3553 if (rc == -EAGAIN)
3554 continue;
3555 else if (rc)
3556 break;
3560 * Wait for credits to resend this rdata.
3561 * Note: we are attempting to resend the whole rdata not in
3562 * segments
3564 do {
3565 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3566 &rsize, &credits);
3568 if (rc)
3569 goto fail;
3571 if (rsize < rdata->bytes) {
3572 add_credits_and_wake_if(server, &credits, 0);
3573 msleep(1000);
3575 } while (rsize < rdata->bytes);
3576 rdata->credits = credits;
3578 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3579 if (!rc) {
3580 if (rdata->cfile->invalidHandle)
3581 rc = -EAGAIN;
3582 else {
3583 #ifdef CONFIG_CIFS_SMB_DIRECT
3584 if (rdata->mr) {
3585 rdata->mr->need_invalidate = true;
3586 smbd_deregister_mr(rdata->mr);
3587 rdata->mr = NULL;
3589 #endif
3590 rc = server->ops->async_readv(rdata);
3594 /* If the read was successfully sent, we are done */
3595 if (!rc) {
3596 /* Add to aio pending list */
3597 list_add_tail(&rdata->list, rdata_list);
3598 return 0;
3601 /* Roll back credits and retry if needed */
3602 add_credits_and_wake_if(server, &rdata->credits, 0);
3603 } while (rc == -EAGAIN);
3605 fail:
3606 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3607 return rc;
3610 static int
3611 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3612 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3613 struct cifs_aio_ctx *ctx)
3615 struct cifs_readdata *rdata;
3616 unsigned int npages, rsize;
3617 struct cifs_credits credits_on_stack;
3618 struct cifs_credits *credits = &credits_on_stack;
3619 size_t cur_len;
3620 int rc;
3621 pid_t pid;
3622 struct TCP_Server_Info *server;
3623 struct page **pagevec;
3624 size_t start;
3625 struct iov_iter direct_iov = ctx->iter;
3627 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3629 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3630 pid = open_file->pid;
3631 else
3632 pid = current->tgid;
3634 if (ctx->direct_io)
3635 iov_iter_advance(&direct_iov, offset - ctx->pos);
3637 do {
3638 if (open_file->invalidHandle) {
3639 rc = cifs_reopen_file(open_file, true);
3640 if (rc == -EAGAIN)
3641 continue;
3642 else if (rc)
3643 break;
3646 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3647 &rsize, credits);
3648 if (rc)
3649 break;
3651 cur_len = min_t(const size_t, len, rsize);
3653 if (ctx->direct_io) {
3654 ssize_t result;
3656 result = iov_iter_get_pages_alloc(
3657 &direct_iov, &pagevec,
3658 cur_len, &start);
3659 if (result < 0) {
3660 cifs_dbg(VFS,
3661 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3662 result, iov_iter_type(&direct_iov),
3663 direct_iov.iov_offset,
3664 direct_iov.count);
3665 dump_stack();
3667 rc = result;
3668 add_credits_and_wake_if(server, credits, 0);
3669 break;
3671 cur_len = (size_t)result;
3672 iov_iter_advance(&direct_iov, cur_len);
3674 rdata = cifs_readdata_direct_alloc(
3675 pagevec, cifs_uncached_readv_complete);
3676 if (!rdata) {
3677 add_credits_and_wake_if(server, credits, 0);
3678 rc = -ENOMEM;
3679 break;
3682 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3683 rdata->page_offset = start;
3684 rdata->tailsz = npages > 1 ?
3685 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3686 cur_len;
3688 } else {
3690 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3691 /* allocate a readdata struct */
3692 rdata = cifs_readdata_alloc(npages,
3693 cifs_uncached_readv_complete);
3694 if (!rdata) {
3695 add_credits_and_wake_if(server, credits, 0);
3696 rc = -ENOMEM;
3697 break;
3700 rc = cifs_read_allocate_pages(rdata, npages);
3701 if (rc) {
3702 kvfree(rdata->pages);
3703 kfree(rdata);
3704 add_credits_and_wake_if(server, credits, 0);
3705 break;
3708 rdata->tailsz = PAGE_SIZE;
3711 rdata->server = server;
3712 rdata->cfile = cifsFileInfo_get(open_file);
3713 rdata->nr_pages = npages;
3714 rdata->offset = offset;
3715 rdata->bytes = cur_len;
3716 rdata->pid = pid;
3717 rdata->pagesz = PAGE_SIZE;
3718 rdata->read_into_pages = cifs_uncached_read_into_pages;
3719 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3720 rdata->credits = credits_on_stack;
3721 rdata->ctx = ctx;
3722 kref_get(&ctx->refcount);
3724 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3726 if (!rc) {
3727 if (rdata->cfile->invalidHandle)
3728 rc = -EAGAIN;
3729 else
3730 rc = server->ops->async_readv(rdata);
3733 if (rc) {
3734 add_credits_and_wake_if(server, &rdata->credits, 0);
3735 kref_put(&rdata->refcount,
3736 cifs_uncached_readdata_release);
3737 if (rc == -EAGAIN) {
3738 iov_iter_revert(&direct_iov, cur_len);
3739 continue;
3741 break;
3744 list_add_tail(&rdata->list, rdata_list);
3745 offset += cur_len;
3746 len -= cur_len;
3747 } while (len > 0);
3749 return rc;
3752 static void
3753 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3755 struct cifs_readdata *rdata, *tmp;
3756 struct iov_iter *to = &ctx->iter;
3757 struct cifs_sb_info *cifs_sb;
3758 int rc;
3760 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3762 mutex_lock(&ctx->aio_mutex);
3764 if (list_empty(&ctx->list)) {
3765 mutex_unlock(&ctx->aio_mutex);
3766 return;
3769 rc = ctx->rc;
3770 /* the loop below should proceed in the order of increasing offsets */
3771 again:
3772 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3773 if (!rc) {
3774 if (!try_wait_for_completion(&rdata->done)) {
3775 mutex_unlock(&ctx->aio_mutex);
3776 return;
3779 if (rdata->result == -EAGAIN) {
3780 /* resend call if it's a retryable error */
3781 struct list_head tmp_list;
3782 unsigned int got_bytes = rdata->got_bytes;
3784 list_del_init(&rdata->list);
3785 INIT_LIST_HEAD(&tmp_list);
3788 * Got a part of data and then reconnect has
3789 * happened -- fill the buffer and continue
3790 * reading.
3792 if (got_bytes && got_bytes < rdata->bytes) {
3793 rc = 0;
3794 if (!ctx->direct_io)
3795 rc = cifs_readdata_to_iov(rdata, to);
3796 if (rc) {
3797 kref_put(&rdata->refcount,
3798 cifs_uncached_readdata_release);
3799 continue;
3803 if (ctx->direct_io) {
3805 * Re-use rdata as this is a
3806 * direct I/O
3808 rc = cifs_resend_rdata(
3809 rdata,
3810 &tmp_list, ctx);
3811 } else {
3812 rc = cifs_send_async_read(
3813 rdata->offset + got_bytes,
3814 rdata->bytes - got_bytes,
3815 rdata->cfile, cifs_sb,
3816 &tmp_list, ctx);
3818 kref_put(&rdata->refcount,
3819 cifs_uncached_readdata_release);
3822 list_splice(&tmp_list, &ctx->list);
3824 goto again;
3825 } else if (rdata->result)
3826 rc = rdata->result;
3827 else if (!ctx->direct_io)
3828 rc = cifs_readdata_to_iov(rdata, to);
3830 /* if there was a short read -- discard anything left */
3831 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3832 rc = -ENODATA;
3834 ctx->total_len += rdata->got_bytes;
3836 list_del_init(&rdata->list);
3837 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3840 if (!ctx->direct_io)
3841 ctx->total_len = ctx->len - iov_iter_count(to);
3843 /* mask nodata case */
3844 if (rc == -ENODATA)
3845 rc = 0;
3847 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3849 mutex_unlock(&ctx->aio_mutex);
3851 if (ctx->iocb && ctx->iocb->ki_complete)
3852 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3853 else
3854 complete(&ctx->done);
3857 static ssize_t __cifs_readv(
3858 struct kiocb *iocb, struct iov_iter *to, bool direct)
3860 size_t len;
3861 struct file *file = iocb->ki_filp;
3862 struct cifs_sb_info *cifs_sb;
3863 struct cifsFileInfo *cfile;
3864 struct cifs_tcon *tcon;
3865 ssize_t rc, total_read = 0;
3866 loff_t offset = iocb->ki_pos;
3867 struct cifs_aio_ctx *ctx;
3870 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3871 * fall back to data copy read path
3872 * this could be improved by getting pages directly in ITER_KVEC
3874 if (direct && iov_iter_is_kvec(to)) {
3875 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3876 direct = false;
3879 len = iov_iter_count(to);
3880 if (!len)
3881 return 0;
3883 cifs_sb = CIFS_FILE_SB(file);
3884 cfile = file->private_data;
3885 tcon = tlink_tcon(cfile->tlink);
3887 if (!tcon->ses->server->ops->async_readv)
3888 return -ENOSYS;
3890 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3891 cifs_dbg(FYI, "attempting read on write only file instance\n");
3893 ctx = cifs_aio_ctx_alloc();
3894 if (!ctx)
3895 return -ENOMEM;
3897 ctx->cfile = cifsFileInfo_get(cfile);
3899 if (!is_sync_kiocb(iocb))
3900 ctx->iocb = iocb;
3902 if (iter_is_iovec(to))
3903 ctx->should_dirty = true;
3905 if (direct) {
3906 ctx->pos = offset;
3907 ctx->direct_io = true;
3908 ctx->iter = *to;
3909 ctx->len = len;
3910 } else {
3911 rc = setup_aio_ctx_iter(ctx, to, READ);
3912 if (rc) {
3913 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3914 return rc;
3916 len = ctx->len;
3919 /* grab a lock here due to read response handlers can access ctx */
3920 mutex_lock(&ctx->aio_mutex);
3922 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3924 /* if at least one read request send succeeded, then reset rc */
3925 if (!list_empty(&ctx->list))
3926 rc = 0;
3928 mutex_unlock(&ctx->aio_mutex);
3930 if (rc) {
3931 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3932 return rc;
3935 if (!is_sync_kiocb(iocb)) {
3936 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3937 return -EIOCBQUEUED;
3940 rc = wait_for_completion_killable(&ctx->done);
3941 if (rc) {
3942 mutex_lock(&ctx->aio_mutex);
3943 ctx->rc = rc = -EINTR;
3944 total_read = ctx->total_len;
3945 mutex_unlock(&ctx->aio_mutex);
3946 } else {
3947 rc = ctx->rc;
3948 total_read = ctx->total_len;
3951 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3953 if (total_read) {
3954 iocb->ki_pos += total_read;
3955 return total_read;
3957 return rc;
3960 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3962 return __cifs_readv(iocb, to, true);
3965 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3967 return __cifs_readv(iocb, to, false);
3970 ssize_t
3971 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3973 struct inode *inode = file_inode(iocb->ki_filp);
3974 struct cifsInodeInfo *cinode = CIFS_I(inode);
3975 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3976 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3977 iocb->ki_filp->private_data;
3978 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3979 int rc = -EACCES;
3982 * In strict cache mode we need to read from the server all the time
3983 * if we don't have level II oplock because the server can delay mtime
3984 * change - so we can't make a decision about inode invalidating.
3985 * And we can also fail with pagereading if there are mandatory locks
3986 * on pages affected by this read but not on the region from pos to
3987 * pos+len-1.
3989 if (!CIFS_CACHE_READ(cinode))
3990 return cifs_user_readv(iocb, to);
3992 if (cap_unix(tcon->ses) &&
3993 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3994 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3995 return generic_file_read_iter(iocb, to);
3998 * We need to hold the sem to be sure nobody modifies lock list
3999 * with a brlock that prevents reading.
4001 down_read(&cinode->lock_sem);
4002 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4003 tcon->ses->server->vals->shared_lock_type,
4004 0, NULL, CIFS_READ_OP))
4005 rc = generic_file_read_iter(iocb, to);
4006 up_read(&cinode->lock_sem);
4007 return rc;
4010 static ssize_t
4011 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4013 int rc = -EACCES;
4014 unsigned int bytes_read = 0;
4015 unsigned int total_read;
4016 unsigned int current_read_size;
4017 unsigned int rsize;
4018 struct cifs_sb_info *cifs_sb;
4019 struct cifs_tcon *tcon;
4020 struct TCP_Server_Info *server;
4021 unsigned int xid;
4022 char *cur_offset;
4023 struct cifsFileInfo *open_file;
4024 struct cifs_io_parms io_parms = {0};
4025 int buf_type = CIFS_NO_BUFFER;
4026 __u32 pid;
4028 xid = get_xid();
4029 cifs_sb = CIFS_FILE_SB(file);
4031 /* FIXME: set up handlers for larger reads and/or convert to async */
4032 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4034 if (file->private_data == NULL) {
4035 rc = -EBADF;
4036 free_xid(xid);
4037 return rc;
4039 open_file = file->private_data;
4040 tcon = tlink_tcon(open_file->tlink);
4041 server = cifs_pick_channel(tcon->ses);
4043 if (!server->ops->sync_read) {
4044 free_xid(xid);
4045 return -ENOSYS;
4048 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4049 pid = open_file->pid;
4050 else
4051 pid = current->tgid;
4053 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4054 cifs_dbg(FYI, "attempting read on write only file instance\n");
4056 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4057 total_read += bytes_read, cur_offset += bytes_read) {
4058 do {
4059 current_read_size = min_t(uint, read_size - total_read,
4060 rsize);
4062 * For windows me and 9x we do not want to request more
4063 * than it negotiated since it will refuse the read
4064 * then.
4066 if (!(tcon->ses->capabilities &
4067 tcon->ses->server->vals->cap_large_files)) {
4068 current_read_size = min_t(uint,
4069 current_read_size, CIFSMaxBufSize);
4071 if (open_file->invalidHandle) {
4072 rc = cifs_reopen_file(open_file, true);
4073 if (rc != 0)
4074 break;
4076 io_parms.pid = pid;
4077 io_parms.tcon = tcon;
4078 io_parms.offset = *offset;
4079 io_parms.length = current_read_size;
4080 io_parms.server = server;
4081 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4082 &bytes_read, &cur_offset,
4083 &buf_type);
4084 } while (rc == -EAGAIN);
4086 if (rc || (bytes_read == 0)) {
4087 if (total_read) {
4088 break;
4089 } else {
4090 free_xid(xid);
4091 return rc;
4093 } else {
4094 cifs_stats_bytes_read(tcon, total_read);
4095 *offset += bytes_read;
4098 free_xid(xid);
4099 return total_read;
4103 * If the page is mmap'ed into a process' page tables, then we need to make
4104 * sure that it doesn't change while being written back.
4106 static vm_fault_t
4107 cifs_page_mkwrite(struct vm_fault *vmf)
4109 struct page *page = vmf->page;
4111 lock_page(page);
4112 return VM_FAULT_LOCKED;
4115 static const struct vm_operations_struct cifs_file_vm_ops = {
4116 .fault = filemap_fault,
4117 .map_pages = filemap_map_pages,
4118 .page_mkwrite = cifs_page_mkwrite,
4121 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4123 int xid, rc = 0;
4124 struct inode *inode = file_inode(file);
4126 xid = get_xid();
4128 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4129 rc = cifs_zap_mapping(inode);
4130 if (!rc)
4131 rc = generic_file_mmap(file, vma);
4132 if (!rc)
4133 vma->vm_ops = &cifs_file_vm_ops;
4135 free_xid(xid);
4136 return rc;
4139 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4141 int rc, xid;
4143 xid = get_xid();
4145 rc = cifs_revalidate_file(file);
4146 if (rc)
4147 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4148 rc);
4149 if (!rc)
4150 rc = generic_file_mmap(file, vma);
4151 if (!rc)
4152 vma->vm_ops = &cifs_file_vm_ops;
4154 free_xid(xid);
4155 return rc;
4158 static void
4159 cifs_readv_complete(struct work_struct *work)
4161 unsigned int i, got_bytes;
4162 struct cifs_readdata *rdata = container_of(work,
4163 struct cifs_readdata, work);
4165 got_bytes = rdata->got_bytes;
4166 for (i = 0; i < rdata->nr_pages; i++) {
4167 struct page *page = rdata->pages[i];
4169 lru_cache_add(page);
4171 if (rdata->result == 0 ||
4172 (rdata->result == -EAGAIN && got_bytes)) {
4173 flush_dcache_page(page);
4174 SetPageUptodate(page);
4177 unlock_page(page);
4179 if (rdata->result == 0 ||
4180 (rdata->result == -EAGAIN && got_bytes))
4181 cifs_readpage_to_fscache(rdata->mapping->host, page);
4183 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4185 put_page(page);
4186 rdata->pages[i] = NULL;
4188 kref_put(&rdata->refcount, cifs_readdata_release);
4191 static int
4192 readpages_fill_pages(struct TCP_Server_Info *server,
4193 struct cifs_readdata *rdata, struct iov_iter *iter,
4194 unsigned int len)
4196 int result = 0;
4197 unsigned int i;
4198 u64 eof;
4199 pgoff_t eof_index;
4200 unsigned int nr_pages = rdata->nr_pages;
4201 unsigned int page_offset = rdata->page_offset;
4203 /* determine the eof that the server (probably) has */
4204 eof = CIFS_I(rdata->mapping->host)->server_eof;
4205 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4206 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4208 rdata->got_bytes = 0;
4209 rdata->tailsz = PAGE_SIZE;
4210 for (i = 0; i < nr_pages; i++) {
4211 struct page *page = rdata->pages[i];
4212 unsigned int to_read = rdata->pagesz;
4213 size_t n;
4215 if (i == 0)
4216 to_read -= page_offset;
4217 else
4218 page_offset = 0;
4220 n = to_read;
4222 if (len >= to_read) {
4223 len -= to_read;
4224 } else if (len > 0) {
4225 /* enough for partial page, fill and zero the rest */
4226 zero_user(page, len + page_offset, to_read - len);
4227 n = rdata->tailsz = len;
4228 len = 0;
4229 } else if (page->index > eof_index) {
4231 * The VFS will not try to do readahead past the
4232 * i_size, but it's possible that we have outstanding
4233 * writes with gaps in the middle and the i_size hasn't
4234 * caught up yet. Populate those with zeroed out pages
4235 * to prevent the VFS from repeatedly attempting to
4236 * fill them until the writes are flushed.
4238 zero_user(page, 0, PAGE_SIZE);
4239 lru_cache_add(page);
4240 flush_dcache_page(page);
4241 SetPageUptodate(page);
4242 unlock_page(page);
4243 put_page(page);
4244 rdata->pages[i] = NULL;
4245 rdata->nr_pages--;
4246 continue;
4247 } else {
4248 /* no need to hold page hostage */
4249 lru_cache_add(page);
4250 unlock_page(page);
4251 put_page(page);
4252 rdata->pages[i] = NULL;
4253 rdata->nr_pages--;
4254 continue;
4257 if (iter)
4258 result = copy_page_from_iter(
4259 page, page_offset, n, iter);
4260 #ifdef CONFIG_CIFS_SMB_DIRECT
4261 else if (rdata->mr)
4262 result = n;
4263 #endif
4264 else
4265 result = cifs_read_page_from_socket(
4266 server, page, page_offset, n);
4267 if (result < 0)
4268 break;
4270 rdata->got_bytes += result;
4273 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4274 rdata->got_bytes : result;
4277 static int
4278 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4279 struct cifs_readdata *rdata, unsigned int len)
4281 return readpages_fill_pages(server, rdata, NULL, len);
4284 static int
4285 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4286 struct cifs_readdata *rdata,
4287 struct iov_iter *iter)
4289 return readpages_fill_pages(server, rdata, iter, iter->count);
4292 static int
4293 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4294 unsigned int rsize, struct list_head *tmplist,
4295 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4297 struct page *page, *tpage;
4298 unsigned int expected_index;
4299 int rc;
4300 gfp_t gfp = readahead_gfp_mask(mapping);
4302 INIT_LIST_HEAD(tmplist);
4304 page = lru_to_page(page_list);
4307 * Lock the page and put it in the cache. Since no one else
4308 * should have access to this page, we're safe to simply set
4309 * PG_locked without checking it first.
4311 __SetPageLocked(page);
4312 rc = add_to_page_cache_locked(page, mapping,
4313 page->index, gfp);
4315 /* give up if we can't stick it in the cache */
4316 if (rc) {
4317 __ClearPageLocked(page);
4318 return rc;
4321 /* move first page to the tmplist */
4322 *offset = (loff_t)page->index << PAGE_SHIFT;
4323 *bytes = PAGE_SIZE;
4324 *nr_pages = 1;
4325 list_move_tail(&page->lru, tmplist);
4327 /* now try and add more pages onto the request */
4328 expected_index = page->index + 1;
4329 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4330 /* discontinuity ? */
4331 if (page->index != expected_index)
4332 break;
4334 /* would this page push the read over the rsize? */
4335 if (*bytes + PAGE_SIZE > rsize)
4336 break;
4338 __SetPageLocked(page);
4339 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4340 if (rc) {
4341 __ClearPageLocked(page);
4342 break;
4344 list_move_tail(&page->lru, tmplist);
4345 (*bytes) += PAGE_SIZE;
4346 expected_index++;
4347 (*nr_pages)++;
4349 return rc;
4352 static int cifs_readpages(struct file *file, struct address_space *mapping,
4353 struct list_head *page_list, unsigned num_pages)
4355 int rc;
4356 int err = 0;
4357 struct list_head tmplist;
4358 struct cifsFileInfo *open_file = file->private_data;
4359 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4360 struct TCP_Server_Info *server;
4361 pid_t pid;
4362 unsigned int xid;
4364 xid = get_xid();
4366 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4367 * immediately if the cookie is negative
4369 * After this point, every page in the list might have PG_fscache set,
4370 * so we will need to clean that up off of every page we don't use.
4372 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4373 &num_pages);
4374 if (rc == 0) {
4375 free_xid(xid);
4376 return rc;
4379 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4380 pid = open_file->pid;
4381 else
4382 pid = current->tgid;
4384 rc = 0;
4385 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4387 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4388 __func__, file, mapping, num_pages);
4391 * Start with the page at end of list and move it to private
4392 * list. Do the same with any following pages until we hit
4393 * the rsize limit, hit an index discontinuity, or run out of
4394 * pages. Issue the async read and then start the loop again
4395 * until the list is empty.
4397 * Note that list order is important. The page_list is in
4398 * the order of declining indexes. When we put the pages in
4399 * the rdata->pages, then we want them in increasing order.
4401 while (!list_empty(page_list) && !err) {
4402 unsigned int i, nr_pages, bytes, rsize;
4403 loff_t offset;
4404 struct page *page, *tpage;
4405 struct cifs_readdata *rdata;
4406 struct cifs_credits credits_on_stack;
4407 struct cifs_credits *credits = &credits_on_stack;
4409 if (open_file->invalidHandle) {
4410 rc = cifs_reopen_file(open_file, true);
4411 if (rc == -EAGAIN)
4412 continue;
4413 else if (rc)
4414 break;
4417 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4418 &rsize, credits);
4419 if (rc)
4420 break;
4423 * Give up immediately if rsize is too small to read an entire
4424 * page. The VFS will fall back to readpage. We should never
4425 * reach this point however since we set ra_pages to 0 when the
4426 * rsize is smaller than a cache page.
4428 if (unlikely(rsize < PAGE_SIZE)) {
4429 add_credits_and_wake_if(server, credits, 0);
4430 free_xid(xid);
4431 return 0;
4434 nr_pages = 0;
4435 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4436 &nr_pages, &offset, &bytes);
4437 if (!nr_pages) {
4438 add_credits_and_wake_if(server, credits, 0);
4439 break;
4442 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4443 if (!rdata) {
4444 /* best to give up if we're out of mem */
4445 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4446 list_del(&page->lru);
4447 lru_cache_add(page);
4448 unlock_page(page);
4449 put_page(page);
4451 rc = -ENOMEM;
4452 add_credits_and_wake_if(server, credits, 0);
4453 break;
4456 rdata->cfile = cifsFileInfo_get(open_file);
4457 rdata->server = server;
4458 rdata->mapping = mapping;
4459 rdata->offset = offset;
4460 rdata->bytes = bytes;
4461 rdata->pid = pid;
4462 rdata->pagesz = PAGE_SIZE;
4463 rdata->tailsz = PAGE_SIZE;
4464 rdata->read_into_pages = cifs_readpages_read_into_pages;
4465 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4466 rdata->credits = credits_on_stack;
4468 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4469 list_del(&page->lru);
4470 rdata->pages[rdata->nr_pages++] = page;
4473 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4475 if (!rc) {
4476 if (rdata->cfile->invalidHandle)
4477 rc = -EAGAIN;
4478 else
4479 rc = server->ops->async_readv(rdata);
4482 if (rc) {
4483 add_credits_and_wake_if(server, &rdata->credits, 0);
4484 for (i = 0; i < rdata->nr_pages; i++) {
4485 page = rdata->pages[i];
4486 lru_cache_add(page);
4487 unlock_page(page);
4488 put_page(page);
4490 /* Fallback to the readpage in error/reconnect cases */
4491 kref_put(&rdata->refcount, cifs_readdata_release);
4492 break;
4495 kref_put(&rdata->refcount, cifs_readdata_release);
4498 /* Any pages that have been shown to fscache but didn't get added to
4499 * the pagecache must be uncached before they get returned to the
4500 * allocator.
4502 cifs_fscache_readpages_cancel(mapping->host, page_list);
4503 free_xid(xid);
4504 return rc;
4508 * cifs_readpage_worker must be called with the page pinned
4510 static int cifs_readpage_worker(struct file *file, struct page *page,
4511 loff_t *poffset)
4513 char *read_data;
4514 int rc;
4516 /* Is the page cached? */
4517 rc = cifs_readpage_from_fscache(file_inode(file), page);
4518 if (rc == 0)
4519 goto read_complete;
4521 read_data = kmap(page);
4522 /* for reads over a certain size could initiate async read ahead */
4524 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4526 if (rc < 0)
4527 goto io_error;
4528 else
4529 cifs_dbg(FYI, "Bytes read %d\n", rc);
4531 /* we do not want atime to be less than mtime, it broke some apps */
4532 file_inode(file)->i_atime = current_time(file_inode(file));
4533 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4534 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4535 else
4536 file_inode(file)->i_atime = current_time(file_inode(file));
4538 if (PAGE_SIZE > rc)
4539 memset(read_data + rc, 0, PAGE_SIZE - rc);
4541 flush_dcache_page(page);
4542 SetPageUptodate(page);
4544 /* send this page to the cache */
4545 cifs_readpage_to_fscache(file_inode(file), page);
4547 rc = 0;
4549 io_error:
4550 kunmap(page);
4551 unlock_page(page);
4553 read_complete:
4554 return rc;
4557 static int cifs_readpage(struct file *file, struct page *page)
4559 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4560 int rc = -EACCES;
4561 unsigned int xid;
4563 xid = get_xid();
4565 if (file->private_data == NULL) {
4566 rc = -EBADF;
4567 free_xid(xid);
4568 return rc;
4571 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4572 page, (int)offset, (int)offset);
4574 rc = cifs_readpage_worker(file, page, &offset);
4576 free_xid(xid);
4577 return rc;
4580 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4582 struct cifsFileInfo *open_file;
4584 spin_lock(&cifs_inode->open_file_lock);
4585 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4586 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4587 spin_unlock(&cifs_inode->open_file_lock);
4588 return 1;
4591 spin_unlock(&cifs_inode->open_file_lock);
4592 return 0;
4595 /* We do not want to update the file size from server for inodes
4596 open for write - to avoid races with writepage extending
4597 the file - in the future we could consider allowing
4598 refreshing the inode only on increases in the file size
4599 but this is tricky to do without racing with writebehind
4600 page caching in the current Linux kernel design */
4601 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4603 if (!cifsInode)
4604 return true;
4606 if (is_inode_writable(cifsInode)) {
4607 /* This inode is open for write at least once */
4608 struct cifs_sb_info *cifs_sb;
4610 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4611 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4612 /* since no page cache to corrupt on directio
4613 we can change size safely */
4614 return true;
4617 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4618 return true;
4620 return false;
4621 } else
4622 return true;
4625 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4626 loff_t pos, unsigned len, unsigned flags,
4627 struct page **pagep, void **fsdata)
4629 int oncethru = 0;
4630 pgoff_t index = pos >> PAGE_SHIFT;
4631 loff_t offset = pos & (PAGE_SIZE - 1);
4632 loff_t page_start = pos & PAGE_MASK;
4633 loff_t i_size;
4634 struct page *page;
4635 int rc = 0;
4637 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4639 start:
4640 page = grab_cache_page_write_begin(mapping, index, flags);
4641 if (!page) {
4642 rc = -ENOMEM;
4643 goto out;
4646 if (PageUptodate(page))
4647 goto out;
4650 * If we write a full page it will be up to date, no need to read from
4651 * the server. If the write is short, we'll end up doing a sync write
4652 * instead.
4654 if (len == PAGE_SIZE)
4655 goto out;
4658 * optimize away the read when we have an oplock, and we're not
4659 * expecting to use any of the data we'd be reading in. That
4660 * is, when the page lies beyond the EOF, or straddles the EOF
4661 * and the write will cover all of the existing data.
4663 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4664 i_size = i_size_read(mapping->host);
4665 if (page_start >= i_size ||
4666 (offset == 0 && (pos + len) >= i_size)) {
4667 zero_user_segments(page, 0, offset,
4668 offset + len,
4669 PAGE_SIZE);
4671 * PageChecked means that the parts of the page
4672 * to which we're not writing are considered up
4673 * to date. Once the data is copied to the
4674 * page, it can be set uptodate.
4676 SetPageChecked(page);
4677 goto out;
4681 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4683 * might as well read a page, it is fast enough. If we get
4684 * an error, we don't need to return it. cifs_write_end will
4685 * do a sync write instead since PG_uptodate isn't set.
4687 cifs_readpage_worker(file, page, &page_start);
4688 put_page(page);
4689 oncethru = 1;
4690 goto start;
4691 } else {
4692 /* we could try using another file handle if there is one -
4693 but how would we lock it to prevent close of that handle
4694 racing with this read? In any case
4695 this will be written out by write_end so is fine */
4697 out:
4698 *pagep = page;
4699 return rc;
4702 static int cifs_release_page(struct page *page, gfp_t gfp)
4704 if (PagePrivate(page))
4705 return 0;
4707 return cifs_fscache_release_page(page, gfp);
4710 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4711 unsigned int length)
4713 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4715 if (offset == 0 && length == PAGE_SIZE)
4716 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4719 static int cifs_launder_page(struct page *page)
4721 int rc = 0;
4722 loff_t range_start = page_offset(page);
4723 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4724 struct writeback_control wbc = {
4725 .sync_mode = WB_SYNC_ALL,
4726 .nr_to_write = 0,
4727 .range_start = range_start,
4728 .range_end = range_end,
4731 cifs_dbg(FYI, "Launder page: %p\n", page);
4733 if (clear_page_dirty_for_io(page))
4734 rc = cifs_writepage_locked(page, &wbc);
4736 cifs_fscache_invalidate_page(page, page->mapping->host);
4737 return rc;
4740 void cifs_oplock_break(struct work_struct *work)
4742 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4743 oplock_break);
4744 struct inode *inode = d_inode(cfile->dentry);
4745 struct cifsInodeInfo *cinode = CIFS_I(inode);
4746 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4747 struct TCP_Server_Info *server = tcon->ses->server;
4748 int rc = 0;
4749 bool purge_cache = false;
4751 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4752 TASK_UNINTERRUPTIBLE);
4754 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4755 cfile->oplock_epoch, &purge_cache);
4757 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4758 cifs_has_mand_locks(cinode)) {
4759 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4760 inode);
4761 cinode->oplock = 0;
4764 if (inode && S_ISREG(inode->i_mode)) {
4765 if (CIFS_CACHE_READ(cinode))
4766 break_lease(inode, O_RDONLY);
4767 else
4768 break_lease(inode, O_WRONLY);
4769 rc = filemap_fdatawrite(inode->i_mapping);
4770 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4771 rc = filemap_fdatawait(inode->i_mapping);
4772 mapping_set_error(inode->i_mapping, rc);
4773 cifs_zap_mapping(inode);
4775 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4776 if (CIFS_CACHE_WRITE(cinode))
4777 goto oplock_break_ack;
4780 rc = cifs_push_locks(cfile);
4781 if (rc)
4782 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4784 oplock_break_ack:
4786 * releasing stale oplock after recent reconnect of smb session using
4787 * a now incorrect file handle is not a data integrity issue but do
4788 * not bother sending an oplock release if session to server still is
4789 * disconnected since oplock already released by the server
4791 if (!cfile->oplock_break_cancelled) {
4792 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4793 cinode);
4794 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4796 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4797 cifs_done_oplock_break(cinode);
4801 * The presence of cifs_direct_io() in the address space ops vector
4802 * allowes open() O_DIRECT flags which would have failed otherwise.
4804 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4805 * so this method should never be called.
4807 * Direct IO is not yet supported in the cached mode.
4809 static ssize_t
4810 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4813 * FIXME
4814 * Eventually need to support direct IO for non forcedirectio mounts
4816 return -EINVAL;
4819 static int cifs_swap_activate(struct swap_info_struct *sis,
4820 struct file *swap_file, sector_t *span)
4822 struct cifsFileInfo *cfile = swap_file->private_data;
4823 struct inode *inode = swap_file->f_mapping->host;
4824 unsigned long blocks;
4825 long long isize;
4827 cifs_dbg(FYI, "swap activate\n");
4829 spin_lock(&inode->i_lock);
4830 blocks = inode->i_blocks;
4831 isize = inode->i_size;
4832 spin_unlock(&inode->i_lock);
4833 if (blocks*512 < isize) {
4834 pr_warn("swap activate: swapfile has holes\n");
4835 return -EINVAL;
4837 *span = sis->pages;
4839 pr_warn_once("Swap support over SMB3 is experimental\n");
4842 * TODO: consider adding ACL (or documenting how) to prevent other
4843 * users (on this or other systems) from reading it
4847 /* TODO: add sk_set_memalloc(inet) or similar */
4849 if (cfile)
4850 cfile->swapfile = true;
4852 * TODO: Since file already open, we can't open with DENY_ALL here
4853 * but we could add call to grab a byte range lock to prevent others
4854 * from reading or writing the file
4857 return 0;
4860 static void cifs_swap_deactivate(struct file *file)
4862 struct cifsFileInfo *cfile = file->private_data;
4864 cifs_dbg(FYI, "swap deactivate\n");
4866 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4868 if (cfile)
4869 cfile->swapfile = false;
4871 /* do we need to unpin (or unlock) the file */
4874 const struct address_space_operations cifs_addr_ops = {
4875 .readpage = cifs_readpage,
4876 .readpages = cifs_readpages,
4877 .writepage = cifs_writepage,
4878 .writepages = cifs_writepages,
4879 .write_begin = cifs_write_begin,
4880 .write_end = cifs_write_end,
4881 .set_page_dirty = __set_page_dirty_nobuffers,
4882 .releasepage = cifs_release_page,
4883 .direct_IO = cifs_direct_io,
4884 .invalidatepage = cifs_invalidate_page,
4885 .launder_page = cifs_launder_page,
4887 * TODO: investigate and if useful we could add an cifs_migratePage
4888 * helper (under an CONFIG_MIGRATION) in the future, and also
4889 * investigate and add an is_dirty_writeback helper if needed
4891 .swap_activate = cifs_swap_activate,
4892 .swap_deactivate = cifs_swap_deactivate,
4896 * cifs_readpages requires the server to support a buffer large enough to
4897 * contain the header plus one complete page of data. Otherwise, we need
4898 * to leave cifs_readpages out of the address space operations.
4900 const struct address_space_operations cifs_addr_ops_smallbuf = {
4901 .readpage = cifs_readpage,
4902 .writepage = cifs_writepage,
4903 .writepages = cifs_writepages,
4904 .write_begin = cifs_write_begin,
4905 .write_end = cifs_write_end,
4906 .set_page_dirty = __set_page_dirty_nobuffers,
4907 .releasepage = cifs_release_page,
4908 .invalidatepage = cifs_invalidate_page,
4909 .launder_page = cifs_launder_page,