kaiser: fix perf crashes
[linux/fpc-iii.git] / fs / cifs / file.c
blobcf192f9ce25465950bb7c2069ae3bb117cdefd4f
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
65 static u32 cifs_posix_convert_flags(unsigned int flags)
67 u32 posix_flags = 0;
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
96 return posix_flags;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
146 if (rc)
147 goto posix_open_ret;
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
152 if (!pinode)
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
161 if (!*pinode) {
162 rc = -ENOMEM;
163 goto posix_open_ret;
165 } else {
166 cifs_fattr_to_inode(*pinode, &fattr);
169 posix_open_ret:
170 kfree(presp_data);
171 return rc;
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
179 int rc;
180 int desired_access;
181 int disposition;
182 int create_options = CREATE_NOT_DIR;
183 FILE_ALL_INFO *buf;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
187 if (!server->ops->open)
188 return -ENOSYS;
190 desired_access = cifs_convert_flags(f_flags);
192 /*********************************************************************
193 * open flag mapping table:
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
216 disposition = cifs_get_disposition(f_flags);
218 /* BB pass O_SYNC flag through on file attributes .. BB */
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221 if (!buf)
222 return -ENOMEM;
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
234 oparms.tcon = tcon;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = create_options;
238 oparms.disposition = disposition;
239 oparms.path = full_path;
240 oparms.fid = fid;
241 oparms.reconnect = false;
243 rc = server->ops->open(xid, &oparms, oplock, buf);
245 if (rc)
246 goto out;
248 if (tcon->unix_ext)
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250 xid);
251 else
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253 xid, fid);
255 out:
256 kfree(buf);
257 return rc;
260 static bool
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 struct cifs_fid_locks *cur;
264 bool has_locks = false;
266 down_read(&cinode->lock_sem);
267 list_for_each_entry(cur, &cinode->llist, llist) {
268 if (!list_empty(&cur->locks)) {
269 has_locks = true;
270 break;
273 up_read(&cinode->lock_sem);
274 return has_locks;
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279 struct tcon_link *tlink, __u32 oplock)
281 struct dentry *dentry = file_dentry(file);
282 struct inode *inode = d_inode(dentry);
283 struct cifsInodeInfo *cinode = CIFS_I(inode);
284 struct cifsFileInfo *cfile;
285 struct cifs_fid_locks *fdlocks;
286 struct cifs_tcon *tcon = tlink_tcon(tlink);
287 struct TCP_Server_Info *server = tcon->ses->server;
289 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290 if (cfile == NULL)
291 return cfile;
293 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294 if (!fdlocks) {
295 kfree(cfile);
296 return NULL;
299 INIT_LIST_HEAD(&fdlocks->locks);
300 fdlocks->cfile = cfile;
301 cfile->llist = fdlocks;
302 down_write(&cinode->lock_sem);
303 list_add(&fdlocks->llist, &cinode->llist);
304 up_write(&cinode->lock_sem);
306 cfile->count = 1;
307 cfile->pid = current->tgid;
308 cfile->uid = current_fsuid();
309 cfile->dentry = dget(dentry);
310 cfile->f_flags = file->f_flags;
311 cfile->invalidHandle = false;
312 cfile->tlink = cifs_get_tlink(tlink);
313 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314 mutex_init(&cfile->fh_mutex);
315 spin_lock_init(&cfile->file_info_lock);
317 cifs_sb_active(inode->i_sb);
320 * If the server returned a read oplock and we have mandatory brlocks,
321 * set oplock level to None.
323 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325 oplock = 0;
328 spin_lock(&tcon->open_file_lock);
329 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330 oplock = fid->pending_open->oplock;
331 list_del(&fid->pending_open->olist);
333 fid->purge_cache = false;
334 server->ops->set_fid(cfile, fid, oplock);
336 list_add(&cfile->tlist, &tcon->openFileList);
338 /* if readable file instance put first in list*/
339 if (file->f_mode & FMODE_READ)
340 list_add(&cfile->flist, &cinode->openFileList);
341 else
342 list_add_tail(&cfile->flist, &cinode->openFileList);
343 spin_unlock(&tcon->open_file_lock);
345 if (fid->purge_cache)
346 cifs_zap_mapping(inode);
348 file->private_data = cfile;
349 return cfile;
352 struct cifsFileInfo *
353 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
355 spin_lock(&cifs_file->file_info_lock);
356 cifsFileInfo_get_locked(cifs_file);
357 spin_unlock(&cifs_file->file_info_lock);
358 return cifs_file;
362 * Release a reference on the file private data. This may involve closing
363 * the filehandle out on the server. Must be called without holding
364 * tcon->open_file_lock and cifs_file->file_info_lock.
366 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
368 struct inode *inode = d_inode(cifs_file->dentry);
369 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370 struct TCP_Server_Info *server = tcon->ses->server;
371 struct cifsInodeInfo *cifsi = CIFS_I(inode);
372 struct super_block *sb = inode->i_sb;
373 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374 struct cifsLockInfo *li, *tmp;
375 struct cifs_fid fid;
376 struct cifs_pending_open open;
377 bool oplock_break_cancelled;
379 spin_lock(&tcon->open_file_lock);
381 spin_lock(&cifs_file->file_info_lock);
382 if (--cifs_file->count > 0) {
383 spin_unlock(&cifs_file->file_info_lock);
384 spin_unlock(&tcon->open_file_lock);
385 return;
387 spin_unlock(&cifs_file->file_info_lock);
389 if (server->ops->get_lease_key)
390 server->ops->get_lease_key(inode, &fid);
392 /* store open in pending opens to make sure we don't miss lease break */
393 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
395 /* remove it from the lists */
396 list_del(&cifs_file->flist);
397 list_del(&cifs_file->tlist);
399 if (list_empty(&cifsi->openFileList)) {
400 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401 d_inode(cifs_file->dentry));
403 * In strict cache mode we need invalidate mapping on the last
404 * close because it may cause a error when we open this file
405 * again and get at least level II oplock.
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409 cifs_set_oplock_level(cifsi, 0);
412 spin_unlock(&tcon->open_file_lock);
414 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
416 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417 struct TCP_Server_Info *server = tcon->ses->server;
418 unsigned int xid;
420 xid = get_xid();
421 if (server->ops->close)
422 server->ops->close(xid, tcon, &cifs_file->fid);
423 _free_xid(xid);
426 if (oplock_break_cancelled)
427 cifs_done_oplock_break(cifsi);
429 cifs_del_pending_open(&open);
432 * Delete any outstanding lock records. We'll lose them when the file
433 * is closed anyway.
435 down_write(&cifsi->lock_sem);
436 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437 list_del(&li->llist);
438 cifs_del_lock_waiters(li);
439 kfree(li);
441 list_del(&cifs_file->llist->llist);
442 kfree(cifs_file->llist);
443 up_write(&cifsi->lock_sem);
445 cifs_put_tlink(cifs_file->tlink);
446 dput(cifs_file->dentry);
447 cifs_sb_deactive(sb);
448 kfree(cifs_file);
451 int cifs_open(struct inode *inode, struct file *file)
454 int rc = -EACCES;
455 unsigned int xid;
456 __u32 oplock;
457 struct cifs_sb_info *cifs_sb;
458 struct TCP_Server_Info *server;
459 struct cifs_tcon *tcon;
460 struct tcon_link *tlink;
461 struct cifsFileInfo *cfile = NULL;
462 char *full_path = NULL;
463 bool posix_open_ok = false;
464 struct cifs_fid fid;
465 struct cifs_pending_open open;
467 xid = get_xid();
469 cifs_sb = CIFS_SB(inode->i_sb);
470 tlink = cifs_sb_tlink(cifs_sb);
471 if (IS_ERR(tlink)) {
472 free_xid(xid);
473 return PTR_ERR(tlink);
475 tcon = tlink_tcon(tlink);
476 server = tcon->ses->server;
478 full_path = build_path_from_dentry(file_dentry(file));
479 if (full_path == NULL) {
480 rc = -ENOMEM;
481 goto out;
484 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485 inode, file->f_flags, full_path);
487 if (file->f_flags & O_DIRECT &&
488 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490 file->f_op = &cifs_file_direct_nobrl_ops;
491 else
492 file->f_op = &cifs_file_direct_ops;
495 if (server->oplocks)
496 oplock = REQ_OPLOCK;
497 else
498 oplock = 0;
500 if (!tcon->broken_posix_open && tcon->unix_ext &&
501 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503 /* can not refresh inode info since size could be stale */
504 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505 cifs_sb->mnt_file_mode /* ignored */,
506 file->f_flags, &oplock, &fid.netfid, xid);
507 if (rc == 0) {
508 cifs_dbg(FYI, "posix open succeeded\n");
509 posix_open_ok = true;
510 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511 if (tcon->ses->serverNOS)
512 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513 tcon->ses->serverName,
514 tcon->ses->serverNOS);
515 tcon->broken_posix_open = true;
516 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517 (rc != -EOPNOTSUPP)) /* path not found or net err */
518 goto out;
520 * Else fallthrough to retry open the old way on network i/o
521 * or DFS errors.
525 if (server->ops->get_lease_key)
526 server->ops->get_lease_key(inode, &fid);
528 cifs_add_pending_open(&fid, tlink, &open);
530 if (!posix_open_ok) {
531 if (server->ops->get_lease_key)
532 server->ops->get_lease_key(inode, &fid);
534 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535 file->f_flags, &oplock, &fid, xid);
536 if (rc) {
537 cifs_del_pending_open(&open);
538 goto out;
542 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
543 if (cfile == NULL) {
544 if (server->ops->close)
545 server->ops->close(xid, tcon, &fid);
546 cifs_del_pending_open(&open);
547 rc = -ENOMEM;
548 goto out;
551 cifs_fscache_set_inode_cookie(inode, file);
553 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
555 * Time to set mode which we can not set earlier due to
556 * problems creating new read-only files.
558 struct cifs_unix_set_info_args args = {
559 .mode = inode->i_mode,
560 .uid = INVALID_UID, /* no change */
561 .gid = INVALID_GID, /* no change */
562 .ctime = NO_CHANGE_64,
563 .atime = NO_CHANGE_64,
564 .mtime = NO_CHANGE_64,
565 .device = 0,
567 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
568 cfile->pid);
571 out:
572 kfree(full_path);
573 free_xid(xid);
574 cifs_put_tlink(tlink);
575 return rc;
578 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
581 * Try to reacquire byte range locks that were released when session
582 * to server was lost.
584 static int
585 cifs_relock_file(struct cifsFileInfo *cfile)
587 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
590 int rc = 0;
592 down_read(&cinode->lock_sem);
593 if (cinode->can_cache_brlcks) {
594 /* can cache locks - no need to relock */
595 up_read(&cinode->lock_sem);
596 return rc;
599 if (cap_unix(tcon->ses) &&
600 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602 rc = cifs_push_posix_locks(cfile);
603 else
604 rc = tcon->ses->server->ops->push_mand_locks(cfile);
606 up_read(&cinode->lock_sem);
607 return rc;
610 static int
611 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
613 int rc = -EACCES;
614 unsigned int xid;
615 __u32 oplock;
616 struct cifs_sb_info *cifs_sb;
617 struct cifs_tcon *tcon;
618 struct TCP_Server_Info *server;
619 struct cifsInodeInfo *cinode;
620 struct inode *inode;
621 char *full_path = NULL;
622 int desired_access;
623 int disposition = FILE_OPEN;
624 int create_options = CREATE_NOT_DIR;
625 struct cifs_open_parms oparms;
627 xid = get_xid();
628 mutex_lock(&cfile->fh_mutex);
629 if (!cfile->invalidHandle) {
630 mutex_unlock(&cfile->fh_mutex);
631 rc = 0;
632 free_xid(xid);
633 return rc;
636 inode = d_inode(cfile->dentry);
637 cifs_sb = CIFS_SB(inode->i_sb);
638 tcon = tlink_tcon(cfile->tlink);
639 server = tcon->ses->server;
642 * Can not grab rename sem here because various ops, including those
643 * that already have the rename sem can end up causing writepage to get
644 * called and if the server was down that means we end up here, and we
645 * can never tell if the caller already has the rename_sem.
647 full_path = build_path_from_dentry(cfile->dentry);
648 if (full_path == NULL) {
649 rc = -ENOMEM;
650 mutex_unlock(&cfile->fh_mutex);
651 free_xid(xid);
652 return rc;
655 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656 inode, cfile->f_flags, full_path);
658 if (tcon->ses->server->oplocks)
659 oplock = REQ_OPLOCK;
660 else
661 oplock = 0;
663 if (tcon->unix_ext && cap_unix(tcon->ses) &&
664 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
667 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668 * original open. Must mask them off for a reopen.
670 unsigned int oflags = cfile->f_flags &
671 ~(O_CREAT | O_EXCL | O_TRUNC);
673 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674 cifs_sb->mnt_file_mode /* ignored */,
675 oflags, &oplock, &cfile->fid.netfid, xid);
676 if (rc == 0) {
677 cifs_dbg(FYI, "posix reopen succeeded\n");
678 oparms.reconnect = true;
679 goto reopen_success;
682 * fallthrough to retry open the old way on errors, especially
683 * in the reconnect path it is important to retry hard
687 desired_access = cifs_convert_flags(cfile->f_flags);
689 if (backup_cred(cifs_sb))
690 create_options |= CREATE_OPEN_BACKUP_INTENT;
692 if (server->ops->get_lease_key)
693 server->ops->get_lease_key(inode, &cfile->fid);
695 oparms.tcon = tcon;
696 oparms.cifs_sb = cifs_sb;
697 oparms.desired_access = desired_access;
698 oparms.create_options = create_options;
699 oparms.disposition = disposition;
700 oparms.path = full_path;
701 oparms.fid = &cfile->fid;
702 oparms.reconnect = true;
705 * Can not refresh inode by passing in file_info buf to be returned by
706 * ops->open and then calling get_inode_info with returned buf since
707 * file might have write behind data that needs to be flushed and server
708 * version of file size can be stale. If we knew for sure that inode was
709 * not dirty locally we could do this.
711 rc = server->ops->open(xid, &oparms, &oplock, NULL);
712 if (rc == -ENOENT && oparms.reconnect == false) {
713 /* durable handle timeout is expired - open the file again */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 /* indicate that we need to relock the file */
716 oparms.reconnect = true;
719 if (rc) {
720 mutex_unlock(&cfile->fh_mutex);
721 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722 cifs_dbg(FYI, "oplock: %d\n", oplock);
723 goto reopen_error_exit;
726 reopen_success:
727 cfile->invalidHandle = false;
728 mutex_unlock(&cfile->fh_mutex);
729 cinode = CIFS_I(inode);
731 if (can_flush) {
732 rc = filemap_write_and_wait(inode->i_mapping);
733 mapping_set_error(inode->i_mapping, rc);
735 if (tcon->unix_ext)
736 rc = cifs_get_inode_info_unix(&inode, full_path,
737 inode->i_sb, xid);
738 else
739 rc = cifs_get_inode_info(&inode, full_path, NULL,
740 inode->i_sb, xid, NULL);
743 * Else we are writing out data to server already and could deadlock if
744 * we tried to flush data, and since we do not know if we have data that
745 * would invalidate the current end of file on the server we can not go
746 * to the server to get the new inode info.
750 * If the server returned a read oplock and we have mandatory brlocks,
751 * set oplock level to None.
753 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
754 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
755 oplock = 0;
758 server->ops->set_fid(cfile, &cfile->fid, oplock);
759 if (oparms.reconnect)
760 cifs_relock_file(cfile);
762 reopen_error_exit:
763 kfree(full_path);
764 free_xid(xid);
765 return rc;
768 int cifs_close(struct inode *inode, struct file *file)
770 if (file->private_data != NULL) {
771 cifsFileInfo_put(file->private_data);
772 file->private_data = NULL;
775 /* return code from the ->release op is always ignored */
776 return 0;
779 void
780 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
782 struct cifsFileInfo *open_file;
783 struct list_head *tmp;
784 struct list_head *tmp1;
785 struct list_head tmp_list;
787 if (!tcon->use_persistent || !tcon->need_reopen_files)
788 return;
790 tcon->need_reopen_files = false;
792 cifs_dbg(FYI, "Reopen persistent handles");
793 INIT_LIST_HEAD(&tmp_list);
795 /* list all files open on tree connection, reopen resilient handles */
796 spin_lock(&tcon->open_file_lock);
797 list_for_each(tmp, &tcon->openFileList) {
798 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
799 if (!open_file->invalidHandle)
800 continue;
801 cifsFileInfo_get(open_file);
802 list_add_tail(&open_file->rlist, &tmp_list);
804 spin_unlock(&tcon->open_file_lock);
806 list_for_each_safe(tmp, tmp1, &tmp_list) {
807 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
808 if (cifs_reopen_file(open_file, false /* do not flush */))
809 tcon->need_reopen_files = true;
810 list_del_init(&open_file->rlist);
811 cifsFileInfo_put(open_file);
815 int cifs_closedir(struct inode *inode, struct file *file)
817 int rc = 0;
818 unsigned int xid;
819 struct cifsFileInfo *cfile = file->private_data;
820 struct cifs_tcon *tcon;
821 struct TCP_Server_Info *server;
822 char *buf;
824 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
826 if (cfile == NULL)
827 return rc;
829 xid = get_xid();
830 tcon = tlink_tcon(cfile->tlink);
831 server = tcon->ses->server;
833 cifs_dbg(FYI, "Freeing private data in close dir\n");
834 spin_lock(&cfile->file_info_lock);
835 if (server->ops->dir_needs_close(cfile)) {
836 cfile->invalidHandle = true;
837 spin_unlock(&cfile->file_info_lock);
838 if (server->ops->close_dir)
839 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
840 else
841 rc = -ENOSYS;
842 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
843 /* not much we can do if it fails anyway, ignore rc */
844 rc = 0;
845 } else
846 spin_unlock(&cfile->file_info_lock);
848 buf = cfile->srch_inf.ntwrk_buf_start;
849 if (buf) {
850 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
851 cfile->srch_inf.ntwrk_buf_start = NULL;
852 if (cfile->srch_inf.smallBuf)
853 cifs_small_buf_release(buf);
854 else
855 cifs_buf_release(buf);
858 cifs_put_tlink(cfile->tlink);
859 kfree(file->private_data);
860 file->private_data = NULL;
861 /* BB can we lock the filestruct while this is going on? */
862 free_xid(xid);
863 return rc;
866 static struct cifsLockInfo *
867 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
869 struct cifsLockInfo *lock =
870 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
871 if (!lock)
872 return lock;
873 lock->offset = offset;
874 lock->length = length;
875 lock->type = type;
876 lock->pid = current->tgid;
877 INIT_LIST_HEAD(&lock->blist);
878 init_waitqueue_head(&lock->block_q);
879 return lock;
882 void
883 cifs_del_lock_waiters(struct cifsLockInfo *lock)
885 struct cifsLockInfo *li, *tmp;
886 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
887 list_del_init(&li->blist);
888 wake_up(&li->block_q);
892 #define CIFS_LOCK_OP 0
893 #define CIFS_READ_OP 1
894 #define CIFS_WRITE_OP 2
896 /* @rw_check : 0 - no op, 1 - read, 2 - write */
897 static bool
898 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
899 __u64 length, __u8 type, struct cifsFileInfo *cfile,
900 struct cifsLockInfo **conf_lock, int rw_check)
902 struct cifsLockInfo *li;
903 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
904 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
906 list_for_each_entry(li, &fdlocks->locks, llist) {
907 if (offset + length <= li->offset ||
908 offset >= li->offset + li->length)
909 continue;
910 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
911 server->ops->compare_fids(cfile, cur_cfile)) {
912 /* shared lock prevents write op through the same fid */
913 if (!(li->type & server->vals->shared_lock_type) ||
914 rw_check != CIFS_WRITE_OP)
915 continue;
917 if ((type & server->vals->shared_lock_type) &&
918 ((server->ops->compare_fids(cfile, cur_cfile) &&
919 current->tgid == li->pid) || type == li->type))
920 continue;
921 if (conf_lock)
922 *conf_lock = li;
923 return true;
925 return false;
928 bool
929 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
930 __u8 type, struct cifsLockInfo **conf_lock,
931 int rw_check)
933 bool rc = false;
934 struct cifs_fid_locks *cur;
935 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
937 list_for_each_entry(cur, &cinode->llist, llist) {
938 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
939 cfile, conf_lock, rw_check);
940 if (rc)
941 break;
944 return rc;
948 * Check if there is another lock that prevents us to set the lock (mandatory
949 * style). If such a lock exists, update the flock structure with its
950 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
951 * or leave it the same if we can't. Returns 0 if we don't need to request to
952 * the server or 1 otherwise.
954 static int
955 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
956 __u8 type, struct file_lock *flock)
958 int rc = 0;
959 struct cifsLockInfo *conf_lock;
960 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
962 bool exist;
964 down_read(&cinode->lock_sem);
966 exist = cifs_find_lock_conflict(cfile, offset, length, type,
967 &conf_lock, CIFS_LOCK_OP);
968 if (exist) {
969 flock->fl_start = conf_lock->offset;
970 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
971 flock->fl_pid = conf_lock->pid;
972 if (conf_lock->type & server->vals->shared_lock_type)
973 flock->fl_type = F_RDLCK;
974 else
975 flock->fl_type = F_WRLCK;
976 } else if (!cinode->can_cache_brlcks)
977 rc = 1;
978 else
979 flock->fl_type = F_UNLCK;
981 up_read(&cinode->lock_sem);
982 return rc;
985 static void
986 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
988 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
989 down_write(&cinode->lock_sem);
990 list_add_tail(&lock->llist, &cfile->llist->locks);
991 up_write(&cinode->lock_sem);
995 * Set the byte-range lock (mandatory style). Returns:
996 * 1) 0, if we set the lock and don't need to request to the server;
997 * 2) 1, if no locks prevent us but we need to request to the server;
998 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1000 static int
1001 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1002 bool wait)
1004 struct cifsLockInfo *conf_lock;
1005 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006 bool exist;
1007 int rc = 0;
1009 try_again:
1010 exist = false;
1011 down_write(&cinode->lock_sem);
1013 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014 lock->type, &conf_lock, CIFS_LOCK_OP);
1015 if (!exist && cinode->can_cache_brlcks) {
1016 list_add_tail(&lock->llist, &cfile->llist->locks);
1017 up_write(&cinode->lock_sem);
1018 return rc;
1021 if (!exist)
1022 rc = 1;
1023 else if (!wait)
1024 rc = -EACCES;
1025 else {
1026 list_add_tail(&lock->blist, &conf_lock->blist);
1027 up_write(&cinode->lock_sem);
1028 rc = wait_event_interruptible(lock->block_q,
1029 (lock->blist.prev == &lock->blist) &&
1030 (lock->blist.next == &lock->blist));
1031 if (!rc)
1032 goto try_again;
1033 down_write(&cinode->lock_sem);
1034 list_del_init(&lock->blist);
1037 up_write(&cinode->lock_sem);
1038 return rc;
1042 * Check if there is another lock that prevents us to set the lock (posix
1043 * style). If such a lock exists, update the flock structure with its
1044 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045 * or leave it the same if we can't. Returns 0 if we don't need to request to
1046 * the server or 1 otherwise.
1048 static int
1049 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1051 int rc = 0;
1052 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053 unsigned char saved_type = flock->fl_type;
1055 if ((flock->fl_flags & FL_POSIX) == 0)
1056 return 1;
1058 down_read(&cinode->lock_sem);
1059 posix_test_lock(file, flock);
1061 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062 flock->fl_type = saved_type;
1063 rc = 1;
1066 up_read(&cinode->lock_sem);
1067 return rc;
1071 * Set the byte-range lock (posix style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if we need to request to the server;
1074 * 3) <0, if the error occurs while setting the lock.
1076 static int
1077 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1079 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1080 int rc = 1;
1082 if ((flock->fl_flags & FL_POSIX) == 0)
1083 return rc;
1085 try_again:
1086 down_write(&cinode->lock_sem);
1087 if (!cinode->can_cache_brlcks) {
1088 up_write(&cinode->lock_sem);
1089 return rc;
1092 rc = posix_lock_file(file, flock, NULL);
1093 up_write(&cinode->lock_sem);
1094 if (rc == FILE_LOCK_DEFERRED) {
1095 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1096 if (!rc)
1097 goto try_again;
1098 posix_unblock_lock(flock);
1100 return rc;
1104 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1106 unsigned int xid;
1107 int rc = 0, stored_rc;
1108 struct cifsLockInfo *li, *tmp;
1109 struct cifs_tcon *tcon;
1110 unsigned int num, max_num, max_buf;
1111 LOCKING_ANDX_RANGE *buf, *cur;
1112 int types[] = {LOCKING_ANDX_LARGE_FILES,
1113 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1114 int i;
1116 xid = get_xid();
1117 tcon = tlink_tcon(cfile->tlink);
1120 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1121 * and check it for zero before using.
1123 max_buf = tcon->ses->server->maxBuf;
1124 if (!max_buf) {
1125 free_xid(xid);
1126 return -EINVAL;
1129 max_num = (max_buf - sizeof(struct smb_hdr)) /
1130 sizeof(LOCKING_ANDX_RANGE);
1131 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1132 if (!buf) {
1133 free_xid(xid);
1134 return -ENOMEM;
1137 for (i = 0; i < 2; i++) {
1138 cur = buf;
1139 num = 0;
1140 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1141 if (li->type != types[i])
1142 continue;
1143 cur->Pid = cpu_to_le16(li->pid);
1144 cur->LengthLow = cpu_to_le32((u32)li->length);
1145 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1146 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1147 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1148 if (++num == max_num) {
1149 stored_rc = cifs_lockv(xid, tcon,
1150 cfile->fid.netfid,
1151 (__u8)li->type, 0, num,
1152 buf);
1153 if (stored_rc)
1154 rc = stored_rc;
1155 cur = buf;
1156 num = 0;
1157 } else
1158 cur++;
1161 if (num) {
1162 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1163 (__u8)types[i], 0, num, buf);
1164 if (stored_rc)
1165 rc = stored_rc;
1169 kfree(buf);
1170 free_xid(xid);
1171 return rc;
1174 static __u32
1175 hash_lockowner(fl_owner_t owner)
1177 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1180 struct lock_to_push {
1181 struct list_head llist;
1182 __u64 offset;
1183 __u64 length;
1184 __u32 pid;
1185 __u16 netfid;
1186 __u8 type;
1189 static int
1190 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1192 struct inode *inode = d_inode(cfile->dentry);
1193 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1194 struct file_lock *flock;
1195 struct file_lock_context *flctx = inode->i_flctx;
1196 unsigned int count = 0, i;
1197 int rc = 0, xid, type;
1198 struct list_head locks_to_send, *el;
1199 struct lock_to_push *lck, *tmp;
1200 __u64 length;
1202 xid = get_xid();
1204 if (!flctx)
1205 goto out;
1207 spin_lock(&flctx->flc_lock);
1208 list_for_each(el, &flctx->flc_posix) {
1209 count++;
1211 spin_unlock(&flctx->flc_lock);
1213 INIT_LIST_HEAD(&locks_to_send);
1216 * Allocating count locks is enough because no FL_POSIX locks can be
1217 * added to the list while we are holding cinode->lock_sem that
1218 * protects locking operations of this inode.
1220 for (i = 0; i < count; i++) {
1221 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1222 if (!lck) {
1223 rc = -ENOMEM;
1224 goto err_out;
1226 list_add_tail(&lck->llist, &locks_to_send);
1229 el = locks_to_send.next;
1230 spin_lock(&flctx->flc_lock);
1231 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1232 if (el == &locks_to_send) {
1234 * The list ended. We don't have enough allocated
1235 * structures - something is really wrong.
1237 cifs_dbg(VFS, "Can't push all brlocks!\n");
1238 break;
1240 length = 1 + flock->fl_end - flock->fl_start;
1241 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1242 type = CIFS_RDLCK;
1243 else
1244 type = CIFS_WRLCK;
1245 lck = list_entry(el, struct lock_to_push, llist);
1246 lck->pid = hash_lockowner(flock->fl_owner);
1247 lck->netfid = cfile->fid.netfid;
1248 lck->length = length;
1249 lck->type = type;
1250 lck->offset = flock->fl_start;
1252 spin_unlock(&flctx->flc_lock);
1254 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1255 int stored_rc;
1257 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1258 lck->offset, lck->length, NULL,
1259 lck->type, 0);
1260 if (stored_rc)
1261 rc = stored_rc;
1262 list_del(&lck->llist);
1263 kfree(lck);
1266 out:
1267 free_xid(xid);
1268 return rc;
1269 err_out:
1270 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1271 list_del(&lck->llist);
1272 kfree(lck);
1274 goto out;
1277 static int
1278 cifs_push_locks(struct cifsFileInfo *cfile)
1280 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1281 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1282 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1283 int rc = 0;
1285 /* we are going to update can_cache_brlcks here - need a write access */
1286 down_write(&cinode->lock_sem);
1287 if (!cinode->can_cache_brlcks) {
1288 up_write(&cinode->lock_sem);
1289 return rc;
1292 if (cap_unix(tcon->ses) &&
1293 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1294 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1295 rc = cifs_push_posix_locks(cfile);
1296 else
1297 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1299 cinode->can_cache_brlcks = false;
1300 up_write(&cinode->lock_sem);
1301 return rc;
1304 static void
1305 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1306 bool *wait_flag, struct TCP_Server_Info *server)
1308 if (flock->fl_flags & FL_POSIX)
1309 cifs_dbg(FYI, "Posix\n");
1310 if (flock->fl_flags & FL_FLOCK)
1311 cifs_dbg(FYI, "Flock\n");
1312 if (flock->fl_flags & FL_SLEEP) {
1313 cifs_dbg(FYI, "Blocking lock\n");
1314 *wait_flag = true;
1316 if (flock->fl_flags & FL_ACCESS)
1317 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1318 if (flock->fl_flags & FL_LEASE)
1319 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1320 if (flock->fl_flags &
1321 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1322 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1323 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1325 *type = server->vals->large_lock_type;
1326 if (flock->fl_type == F_WRLCK) {
1327 cifs_dbg(FYI, "F_WRLCK\n");
1328 *type |= server->vals->exclusive_lock_type;
1329 *lock = 1;
1330 } else if (flock->fl_type == F_UNLCK) {
1331 cifs_dbg(FYI, "F_UNLCK\n");
1332 *type |= server->vals->unlock_lock_type;
1333 *unlock = 1;
1334 /* Check if unlock includes more than one lock range */
1335 } else if (flock->fl_type == F_RDLCK) {
1336 cifs_dbg(FYI, "F_RDLCK\n");
1337 *type |= server->vals->shared_lock_type;
1338 *lock = 1;
1339 } else if (flock->fl_type == F_EXLCK) {
1340 cifs_dbg(FYI, "F_EXLCK\n");
1341 *type |= server->vals->exclusive_lock_type;
1342 *lock = 1;
1343 } else if (flock->fl_type == F_SHLCK) {
1344 cifs_dbg(FYI, "F_SHLCK\n");
1345 *type |= server->vals->shared_lock_type;
1346 *lock = 1;
1347 } else
1348 cifs_dbg(FYI, "Unknown type of lock\n");
1351 static int
1352 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1353 bool wait_flag, bool posix_lck, unsigned int xid)
1355 int rc = 0;
1356 __u64 length = 1 + flock->fl_end - flock->fl_start;
1357 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1358 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1359 struct TCP_Server_Info *server = tcon->ses->server;
1360 __u16 netfid = cfile->fid.netfid;
1362 if (posix_lck) {
1363 int posix_lock_type;
1365 rc = cifs_posix_lock_test(file, flock);
1366 if (!rc)
1367 return rc;
1369 if (type & server->vals->shared_lock_type)
1370 posix_lock_type = CIFS_RDLCK;
1371 else
1372 posix_lock_type = CIFS_WRLCK;
1373 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1374 hash_lockowner(flock->fl_owner),
1375 flock->fl_start, length, flock,
1376 posix_lock_type, wait_flag);
1377 return rc;
1380 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1381 if (!rc)
1382 return rc;
1384 /* BB we could chain these into one lock request BB */
1385 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1386 1, 0, false);
1387 if (rc == 0) {
1388 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1389 type, 0, 1, false);
1390 flock->fl_type = F_UNLCK;
1391 if (rc != 0)
1392 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1393 rc);
1394 return 0;
1397 if (type & server->vals->shared_lock_type) {
1398 flock->fl_type = F_WRLCK;
1399 return 0;
1402 type &= ~server->vals->exclusive_lock_type;
1404 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1405 type | server->vals->shared_lock_type,
1406 1, 0, false);
1407 if (rc == 0) {
1408 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1409 type | server->vals->shared_lock_type, 0, 1, false);
1410 flock->fl_type = F_RDLCK;
1411 if (rc != 0)
1412 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1413 rc);
1414 } else
1415 flock->fl_type = F_WRLCK;
1417 return 0;
1420 void
1421 cifs_move_llist(struct list_head *source, struct list_head *dest)
1423 struct list_head *li, *tmp;
1424 list_for_each_safe(li, tmp, source)
1425 list_move(li, dest);
1428 void
1429 cifs_free_llist(struct list_head *llist)
1431 struct cifsLockInfo *li, *tmp;
1432 list_for_each_entry_safe(li, tmp, llist, llist) {
1433 cifs_del_lock_waiters(li);
1434 list_del(&li->llist);
1435 kfree(li);
1440 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1441 unsigned int xid)
1443 int rc = 0, stored_rc;
1444 int types[] = {LOCKING_ANDX_LARGE_FILES,
1445 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1446 unsigned int i;
1447 unsigned int max_num, num, max_buf;
1448 LOCKING_ANDX_RANGE *buf, *cur;
1449 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1450 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1451 struct cifsLockInfo *li, *tmp;
1452 __u64 length = 1 + flock->fl_end - flock->fl_start;
1453 struct list_head tmp_llist;
1455 INIT_LIST_HEAD(&tmp_llist);
1458 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1459 * and check it for zero before using.
1461 max_buf = tcon->ses->server->maxBuf;
1462 if (!max_buf)
1463 return -EINVAL;
1465 max_num = (max_buf - sizeof(struct smb_hdr)) /
1466 sizeof(LOCKING_ANDX_RANGE);
1467 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1468 if (!buf)
1469 return -ENOMEM;
1471 down_write(&cinode->lock_sem);
1472 for (i = 0; i < 2; i++) {
1473 cur = buf;
1474 num = 0;
1475 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1476 if (flock->fl_start > li->offset ||
1477 (flock->fl_start + length) <
1478 (li->offset + li->length))
1479 continue;
1480 if (current->tgid != li->pid)
1481 continue;
1482 if (types[i] != li->type)
1483 continue;
1484 if (cinode->can_cache_brlcks) {
1486 * We can cache brlock requests - simply remove
1487 * a lock from the file's list.
1489 list_del(&li->llist);
1490 cifs_del_lock_waiters(li);
1491 kfree(li);
1492 continue;
1494 cur->Pid = cpu_to_le16(li->pid);
1495 cur->LengthLow = cpu_to_le32((u32)li->length);
1496 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1497 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1498 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1500 * We need to save a lock here to let us add it again to
1501 * the file's list if the unlock range request fails on
1502 * the server.
1504 list_move(&li->llist, &tmp_llist);
1505 if (++num == max_num) {
1506 stored_rc = cifs_lockv(xid, tcon,
1507 cfile->fid.netfid,
1508 li->type, num, 0, buf);
1509 if (stored_rc) {
1511 * We failed on the unlock range
1512 * request - add all locks from the tmp
1513 * list to the head of the file's list.
1515 cifs_move_llist(&tmp_llist,
1516 &cfile->llist->locks);
1517 rc = stored_rc;
1518 } else
1520 * The unlock range request succeed -
1521 * free the tmp list.
1523 cifs_free_llist(&tmp_llist);
1524 cur = buf;
1525 num = 0;
1526 } else
1527 cur++;
1529 if (num) {
1530 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1531 types[i], num, 0, buf);
1532 if (stored_rc) {
1533 cifs_move_llist(&tmp_llist,
1534 &cfile->llist->locks);
1535 rc = stored_rc;
1536 } else
1537 cifs_free_llist(&tmp_llist);
1541 up_write(&cinode->lock_sem);
1542 kfree(buf);
1543 return rc;
1546 static int
1547 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1548 bool wait_flag, bool posix_lck, int lock, int unlock,
1549 unsigned int xid)
1551 int rc = 0;
1552 __u64 length = 1 + flock->fl_end - flock->fl_start;
1553 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1554 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1555 struct TCP_Server_Info *server = tcon->ses->server;
1556 struct inode *inode = d_inode(cfile->dentry);
1558 if (posix_lck) {
1559 int posix_lock_type;
1561 rc = cifs_posix_lock_set(file, flock);
1562 if (!rc || rc < 0)
1563 return rc;
1565 if (type & server->vals->shared_lock_type)
1566 posix_lock_type = CIFS_RDLCK;
1567 else
1568 posix_lock_type = CIFS_WRLCK;
1570 if (unlock == 1)
1571 posix_lock_type = CIFS_UNLCK;
1573 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1574 hash_lockowner(flock->fl_owner),
1575 flock->fl_start, length,
1576 NULL, posix_lock_type, wait_flag);
1577 goto out;
1580 if (lock) {
1581 struct cifsLockInfo *lock;
1583 lock = cifs_lock_init(flock->fl_start, length, type);
1584 if (!lock)
1585 return -ENOMEM;
1587 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1588 if (rc < 0) {
1589 kfree(lock);
1590 return rc;
1592 if (!rc)
1593 goto out;
1596 * Windows 7 server can delay breaking lease from read to None
1597 * if we set a byte-range lock on a file - break it explicitly
1598 * before sending the lock to the server to be sure the next
1599 * read won't conflict with non-overlapted locks due to
1600 * pagereading.
1602 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1603 CIFS_CACHE_READ(CIFS_I(inode))) {
1604 cifs_zap_mapping(inode);
1605 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1606 inode);
1607 CIFS_I(inode)->oplock = 0;
1610 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1611 type, 1, 0, wait_flag);
1612 if (rc) {
1613 kfree(lock);
1614 return rc;
1617 cifs_lock_add(cfile, lock);
1618 } else if (unlock)
1619 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1621 out:
1622 if (flock->fl_flags & FL_POSIX && !rc)
1623 rc = locks_lock_file_wait(file, flock);
1624 return rc;
1627 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1629 int rc, xid;
1630 int lock = 0, unlock = 0;
1631 bool wait_flag = false;
1632 bool posix_lck = false;
1633 struct cifs_sb_info *cifs_sb;
1634 struct cifs_tcon *tcon;
1635 struct cifsInodeInfo *cinode;
1636 struct cifsFileInfo *cfile;
1637 __u16 netfid;
1638 __u32 type;
1640 rc = -EACCES;
1641 xid = get_xid();
1643 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1644 cmd, flock->fl_flags, flock->fl_type,
1645 flock->fl_start, flock->fl_end);
1647 cfile = (struct cifsFileInfo *)file->private_data;
1648 tcon = tlink_tcon(cfile->tlink);
1650 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1651 tcon->ses->server);
1653 cifs_sb = CIFS_FILE_SB(file);
1654 netfid = cfile->fid.netfid;
1655 cinode = CIFS_I(file_inode(file));
1657 if (cap_unix(tcon->ses) &&
1658 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1659 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1660 posix_lck = true;
1662 * BB add code here to normalize offset and length to account for
1663 * negative length which we can not accept over the wire.
1665 if (IS_GETLK(cmd)) {
1666 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1667 free_xid(xid);
1668 return rc;
1671 if (!lock && !unlock) {
1673 * if no lock or unlock then nothing to do since we do not
1674 * know what it is
1676 free_xid(xid);
1677 return -EOPNOTSUPP;
1680 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1681 xid);
1682 free_xid(xid);
1683 return rc;
1687 * update the file size (if needed) after a write. Should be called with
1688 * the inode->i_lock held
1690 void
1691 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1692 unsigned int bytes_written)
1694 loff_t end_of_write = offset + bytes_written;
1696 if (end_of_write > cifsi->server_eof)
1697 cifsi->server_eof = end_of_write;
1700 static ssize_t
1701 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1702 size_t write_size, loff_t *offset)
1704 int rc = 0;
1705 unsigned int bytes_written = 0;
1706 unsigned int total_written;
1707 struct cifs_sb_info *cifs_sb;
1708 struct cifs_tcon *tcon;
1709 struct TCP_Server_Info *server;
1710 unsigned int xid;
1711 struct dentry *dentry = open_file->dentry;
1712 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1713 struct cifs_io_parms io_parms;
1715 cifs_sb = CIFS_SB(dentry->d_sb);
1717 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1718 write_size, *offset, dentry);
1720 tcon = tlink_tcon(open_file->tlink);
1721 server = tcon->ses->server;
1723 if (!server->ops->sync_write)
1724 return -ENOSYS;
1726 xid = get_xid();
1728 for (total_written = 0; write_size > total_written;
1729 total_written += bytes_written) {
1730 rc = -EAGAIN;
1731 while (rc == -EAGAIN) {
1732 struct kvec iov[2];
1733 unsigned int len;
1735 if (open_file->invalidHandle) {
1736 /* we could deadlock if we called
1737 filemap_fdatawait from here so tell
1738 reopen_file not to flush data to
1739 server now */
1740 rc = cifs_reopen_file(open_file, false);
1741 if (rc != 0)
1742 break;
1745 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1746 (unsigned int)write_size - total_written);
1747 /* iov[0] is reserved for smb header */
1748 iov[1].iov_base = (char *)write_data + total_written;
1749 iov[1].iov_len = len;
1750 io_parms.pid = pid;
1751 io_parms.tcon = tcon;
1752 io_parms.offset = *offset;
1753 io_parms.length = len;
1754 rc = server->ops->sync_write(xid, &open_file->fid,
1755 &io_parms, &bytes_written, iov, 1);
1757 if (rc || (bytes_written == 0)) {
1758 if (total_written)
1759 break;
1760 else {
1761 free_xid(xid);
1762 return rc;
1764 } else {
1765 spin_lock(&d_inode(dentry)->i_lock);
1766 cifs_update_eof(cifsi, *offset, bytes_written);
1767 spin_unlock(&d_inode(dentry)->i_lock);
1768 *offset += bytes_written;
1772 cifs_stats_bytes_written(tcon, total_written);
1774 if (total_written > 0) {
1775 spin_lock(&d_inode(dentry)->i_lock);
1776 if (*offset > d_inode(dentry)->i_size)
1777 i_size_write(d_inode(dentry), *offset);
1778 spin_unlock(&d_inode(dentry)->i_lock);
1780 mark_inode_dirty_sync(d_inode(dentry));
1781 free_xid(xid);
1782 return total_written;
1785 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1786 bool fsuid_only)
1788 struct cifsFileInfo *open_file = NULL;
1789 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1790 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1792 /* only filter by fsuid on multiuser mounts */
1793 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1794 fsuid_only = false;
1796 spin_lock(&tcon->open_file_lock);
1797 /* we could simply get the first_list_entry since write-only entries
1798 are always at the end of the list but since the first entry might
1799 have a close pending, we go through the whole list */
1800 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1801 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1802 continue;
1803 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1804 if (!open_file->invalidHandle) {
1805 /* found a good file */
1806 /* lock it so it will not be closed on us */
1807 cifsFileInfo_get(open_file);
1808 spin_unlock(&tcon->open_file_lock);
1809 return open_file;
1810 } /* else might as well continue, and look for
1811 another, or simply have the caller reopen it
1812 again rather than trying to fix this handle */
1813 } else /* write only file */
1814 break; /* write only files are last so must be done */
1816 spin_unlock(&tcon->open_file_lock);
1817 return NULL;
1820 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1821 bool fsuid_only)
1823 struct cifsFileInfo *open_file, *inv_file = NULL;
1824 struct cifs_sb_info *cifs_sb;
1825 struct cifs_tcon *tcon;
1826 bool any_available = false;
1827 int rc;
1828 unsigned int refind = 0;
1830 /* Having a null inode here (because mapping->host was set to zero by
1831 the VFS or MM) should not happen but we had reports of on oops (due to
1832 it being zero) during stress testcases so we need to check for it */
1834 if (cifs_inode == NULL) {
1835 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1836 dump_stack();
1837 return NULL;
1840 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1841 tcon = cifs_sb_master_tcon(cifs_sb);
1843 /* only filter by fsuid on multiuser mounts */
1844 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1845 fsuid_only = false;
1847 spin_lock(&tcon->open_file_lock);
1848 refind_writable:
1849 if (refind > MAX_REOPEN_ATT) {
1850 spin_unlock(&tcon->open_file_lock);
1851 return NULL;
1853 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1854 if (!any_available && open_file->pid != current->tgid)
1855 continue;
1856 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1857 continue;
1858 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1859 if (!open_file->invalidHandle) {
1860 /* found a good writable file */
1861 cifsFileInfo_get(open_file);
1862 spin_unlock(&tcon->open_file_lock);
1863 return open_file;
1864 } else {
1865 if (!inv_file)
1866 inv_file = open_file;
1870 /* couldn't find useable FH with same pid, try any available */
1871 if (!any_available) {
1872 any_available = true;
1873 goto refind_writable;
1876 if (inv_file) {
1877 any_available = false;
1878 cifsFileInfo_get(inv_file);
1881 spin_unlock(&tcon->open_file_lock);
1883 if (inv_file) {
1884 rc = cifs_reopen_file(inv_file, false);
1885 if (!rc)
1886 return inv_file;
1887 else {
1888 spin_lock(&tcon->open_file_lock);
1889 list_move_tail(&inv_file->flist,
1890 &cifs_inode->openFileList);
1891 spin_unlock(&tcon->open_file_lock);
1892 cifsFileInfo_put(inv_file);
1893 ++refind;
1894 inv_file = NULL;
1895 spin_lock(&tcon->open_file_lock);
1896 goto refind_writable;
1900 return NULL;
1903 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1905 struct address_space *mapping = page->mapping;
1906 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1907 char *write_data;
1908 int rc = -EFAULT;
1909 int bytes_written = 0;
1910 struct inode *inode;
1911 struct cifsFileInfo *open_file;
1913 if (!mapping || !mapping->host)
1914 return -EFAULT;
1916 inode = page->mapping->host;
1918 offset += (loff_t)from;
1919 write_data = kmap(page);
1920 write_data += from;
1922 if ((to > PAGE_SIZE) || (from > to)) {
1923 kunmap(page);
1924 return -EIO;
1927 /* racing with truncate? */
1928 if (offset > mapping->host->i_size) {
1929 kunmap(page);
1930 return 0; /* don't care */
1933 /* check to make sure that we are not extending the file */
1934 if (mapping->host->i_size - offset < (loff_t)to)
1935 to = (unsigned)(mapping->host->i_size - offset);
1937 open_file = find_writable_file(CIFS_I(mapping->host), false);
1938 if (open_file) {
1939 bytes_written = cifs_write(open_file, open_file->pid,
1940 write_data, to - from, &offset);
1941 cifsFileInfo_put(open_file);
1942 /* Does mm or vfs already set times? */
1943 inode->i_atime = inode->i_mtime = current_time(inode);
1944 if ((bytes_written > 0) && (offset))
1945 rc = 0;
1946 else if (bytes_written < 0)
1947 rc = bytes_written;
1948 } else {
1949 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1950 rc = -EIO;
1953 kunmap(page);
1954 return rc;
1957 static struct cifs_writedata *
1958 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1959 pgoff_t end, pgoff_t *index,
1960 unsigned int *found_pages)
1962 unsigned int nr_pages;
1963 struct page **pages;
1964 struct cifs_writedata *wdata;
1966 wdata = cifs_writedata_alloc((unsigned int)tofind,
1967 cifs_writev_complete);
1968 if (!wdata)
1969 return NULL;
1972 * find_get_pages_tag seems to return a max of 256 on each
1973 * iteration, so we must call it several times in order to
1974 * fill the array or the wsize is effectively limited to
1975 * 256 * PAGE_SIZE.
1977 *found_pages = 0;
1978 pages = wdata->pages;
1979 do {
1980 nr_pages = find_get_pages_tag(mapping, index,
1981 PAGECACHE_TAG_DIRTY, tofind,
1982 pages);
1983 *found_pages += nr_pages;
1984 tofind -= nr_pages;
1985 pages += nr_pages;
1986 } while (nr_pages && tofind && *index <= end);
1988 return wdata;
1991 static unsigned int
1992 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1993 struct address_space *mapping,
1994 struct writeback_control *wbc,
1995 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1997 unsigned int nr_pages = 0, i;
1998 struct page *page;
2000 for (i = 0; i < found_pages; i++) {
2001 page = wdata->pages[i];
2003 * At this point we hold neither mapping->tree_lock nor
2004 * lock on the page itself: the page may be truncated or
2005 * invalidated (changing page->mapping to NULL), or even
2006 * swizzled back from swapper_space to tmpfs file
2007 * mapping
2010 if (nr_pages == 0)
2011 lock_page(page);
2012 else if (!trylock_page(page))
2013 break;
2015 if (unlikely(page->mapping != mapping)) {
2016 unlock_page(page);
2017 break;
2020 if (!wbc->range_cyclic && page->index > end) {
2021 *done = true;
2022 unlock_page(page);
2023 break;
2026 if (*next && (page->index != *next)) {
2027 /* Not next consecutive page */
2028 unlock_page(page);
2029 break;
2032 if (wbc->sync_mode != WB_SYNC_NONE)
2033 wait_on_page_writeback(page);
2035 if (PageWriteback(page) ||
2036 !clear_page_dirty_for_io(page)) {
2037 unlock_page(page);
2038 break;
2042 * This actually clears the dirty bit in the radix tree.
2043 * See cifs_writepage() for more commentary.
2045 set_page_writeback(page);
2046 if (page_offset(page) >= i_size_read(mapping->host)) {
2047 *done = true;
2048 unlock_page(page);
2049 end_page_writeback(page);
2050 break;
2053 wdata->pages[i] = page;
2054 *next = page->index + 1;
2055 ++nr_pages;
2058 /* reset index to refind any pages skipped */
2059 if (nr_pages == 0)
2060 *index = wdata->pages[0]->index + 1;
2062 /* put any pages we aren't going to use */
2063 for (i = nr_pages; i < found_pages; i++) {
2064 put_page(wdata->pages[i]);
2065 wdata->pages[i] = NULL;
2068 return nr_pages;
2071 static int
2072 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2073 struct address_space *mapping, struct writeback_control *wbc)
2075 int rc = 0;
2076 struct TCP_Server_Info *server;
2077 unsigned int i;
2079 wdata->sync_mode = wbc->sync_mode;
2080 wdata->nr_pages = nr_pages;
2081 wdata->offset = page_offset(wdata->pages[0]);
2082 wdata->pagesz = PAGE_SIZE;
2083 wdata->tailsz = min(i_size_read(mapping->host) -
2084 page_offset(wdata->pages[nr_pages - 1]),
2085 (loff_t)PAGE_SIZE);
2086 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2088 if (wdata->cfile != NULL)
2089 cifsFileInfo_put(wdata->cfile);
2090 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2091 if (!wdata->cfile) {
2092 cifs_dbg(VFS, "No writable handles for inode\n");
2093 rc = -EBADF;
2094 } else {
2095 wdata->pid = wdata->cfile->pid;
2096 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2097 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2100 for (i = 0; i < nr_pages; ++i)
2101 unlock_page(wdata->pages[i]);
2103 return rc;
2106 static int cifs_writepages(struct address_space *mapping,
2107 struct writeback_control *wbc)
2109 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2110 struct TCP_Server_Info *server;
2111 bool done = false, scanned = false, range_whole = false;
2112 pgoff_t end, index;
2113 struct cifs_writedata *wdata;
2114 int rc = 0;
2117 * If wsize is smaller than the page cache size, default to writing
2118 * one page at a time via cifs_writepage
2120 if (cifs_sb->wsize < PAGE_SIZE)
2121 return generic_writepages(mapping, wbc);
2123 if (wbc->range_cyclic) {
2124 index = mapping->writeback_index; /* Start from prev offset */
2125 end = -1;
2126 } else {
2127 index = wbc->range_start >> PAGE_SHIFT;
2128 end = wbc->range_end >> PAGE_SHIFT;
2129 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2130 range_whole = true;
2131 scanned = true;
2133 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2134 retry:
2135 while (!done && index <= end) {
2136 unsigned int i, nr_pages, found_pages, wsize, credits;
2137 pgoff_t next = 0, tofind, saved_index = index;
2139 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2140 &wsize, &credits);
2141 if (rc)
2142 break;
2144 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2146 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2147 &found_pages);
2148 if (!wdata) {
2149 rc = -ENOMEM;
2150 add_credits_and_wake_if(server, credits, 0);
2151 break;
2154 if (found_pages == 0) {
2155 kref_put(&wdata->refcount, cifs_writedata_release);
2156 add_credits_and_wake_if(server, credits, 0);
2157 break;
2160 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2161 end, &index, &next, &done);
2163 /* nothing to write? */
2164 if (nr_pages == 0) {
2165 kref_put(&wdata->refcount, cifs_writedata_release);
2166 add_credits_and_wake_if(server, credits, 0);
2167 continue;
2170 wdata->credits = credits;
2172 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2174 /* send failure -- clean up the mess */
2175 if (rc != 0) {
2176 add_credits_and_wake_if(server, wdata->credits, 0);
2177 for (i = 0; i < nr_pages; ++i) {
2178 if (rc == -EAGAIN)
2179 redirty_page_for_writepage(wbc,
2180 wdata->pages[i]);
2181 else
2182 SetPageError(wdata->pages[i]);
2183 end_page_writeback(wdata->pages[i]);
2184 put_page(wdata->pages[i]);
2186 if (rc != -EAGAIN)
2187 mapping_set_error(mapping, rc);
2189 kref_put(&wdata->refcount, cifs_writedata_release);
2191 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2192 index = saved_index;
2193 continue;
2196 wbc->nr_to_write -= nr_pages;
2197 if (wbc->nr_to_write <= 0)
2198 done = true;
2200 index = next;
2203 if (!scanned && !done) {
2205 * We hit the last page and there is more work to be done: wrap
2206 * back to the start of the file
2208 scanned = true;
2209 index = 0;
2210 goto retry;
2213 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2214 mapping->writeback_index = index;
2216 return rc;
2219 static int
2220 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2222 int rc;
2223 unsigned int xid;
2225 xid = get_xid();
2226 /* BB add check for wbc flags */
2227 get_page(page);
2228 if (!PageUptodate(page))
2229 cifs_dbg(FYI, "ppw - page not up to date\n");
2232 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2234 * A writepage() implementation always needs to do either this,
2235 * or re-dirty the page with "redirty_page_for_writepage()" in
2236 * the case of a failure.
2238 * Just unlocking the page will cause the radix tree tag-bits
2239 * to fail to update with the state of the page correctly.
2241 set_page_writeback(page);
2242 retry_write:
2243 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2244 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2245 goto retry_write;
2246 else if (rc == -EAGAIN)
2247 redirty_page_for_writepage(wbc, page);
2248 else if (rc != 0)
2249 SetPageError(page);
2250 else
2251 SetPageUptodate(page);
2252 end_page_writeback(page);
2253 put_page(page);
2254 free_xid(xid);
2255 return rc;
2258 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2260 int rc = cifs_writepage_locked(page, wbc);
2261 unlock_page(page);
2262 return rc;
2265 static int cifs_write_end(struct file *file, struct address_space *mapping,
2266 loff_t pos, unsigned len, unsigned copied,
2267 struct page *page, void *fsdata)
2269 int rc;
2270 struct inode *inode = mapping->host;
2271 struct cifsFileInfo *cfile = file->private_data;
2272 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2273 __u32 pid;
2275 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2276 pid = cfile->pid;
2277 else
2278 pid = current->tgid;
2280 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2281 page, pos, copied);
2283 if (PageChecked(page)) {
2284 if (copied == len)
2285 SetPageUptodate(page);
2286 ClearPageChecked(page);
2287 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2288 SetPageUptodate(page);
2290 if (!PageUptodate(page)) {
2291 char *page_data;
2292 unsigned offset = pos & (PAGE_SIZE - 1);
2293 unsigned int xid;
2295 xid = get_xid();
2296 /* this is probably better than directly calling
2297 partialpage_write since in this function the file handle is
2298 known which we might as well leverage */
2299 /* BB check if anything else missing out of ppw
2300 such as updating last write time */
2301 page_data = kmap(page);
2302 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2303 /* if (rc < 0) should we set writebehind rc? */
2304 kunmap(page);
2306 free_xid(xid);
2307 } else {
2308 rc = copied;
2309 pos += copied;
2310 set_page_dirty(page);
2313 if (rc > 0) {
2314 spin_lock(&inode->i_lock);
2315 if (pos > inode->i_size)
2316 i_size_write(inode, pos);
2317 spin_unlock(&inode->i_lock);
2320 unlock_page(page);
2321 put_page(page);
2323 return rc;
2326 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2327 int datasync)
2329 unsigned int xid;
2330 int rc = 0;
2331 struct cifs_tcon *tcon;
2332 struct TCP_Server_Info *server;
2333 struct cifsFileInfo *smbfile = file->private_data;
2334 struct inode *inode = file_inode(file);
2335 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2337 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2338 if (rc)
2339 return rc;
2340 inode_lock(inode);
2342 xid = get_xid();
2344 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2345 file, datasync);
2347 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2348 rc = cifs_zap_mapping(inode);
2349 if (rc) {
2350 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2351 rc = 0; /* don't care about it in fsync */
2355 tcon = tlink_tcon(smbfile->tlink);
2356 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2357 server = tcon->ses->server;
2358 if (server->ops->flush)
2359 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2360 else
2361 rc = -ENOSYS;
2364 free_xid(xid);
2365 inode_unlock(inode);
2366 return rc;
2369 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2371 unsigned int xid;
2372 int rc = 0;
2373 struct cifs_tcon *tcon;
2374 struct TCP_Server_Info *server;
2375 struct cifsFileInfo *smbfile = file->private_data;
2376 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2377 struct inode *inode = file->f_mapping->host;
2379 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2380 if (rc)
2381 return rc;
2382 inode_lock(inode);
2384 xid = get_xid();
2386 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2387 file, datasync);
2389 tcon = tlink_tcon(smbfile->tlink);
2390 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2391 server = tcon->ses->server;
2392 if (server->ops->flush)
2393 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2394 else
2395 rc = -ENOSYS;
2398 free_xid(xid);
2399 inode_unlock(inode);
2400 return rc;
2404 * As file closes, flush all cached write data for this inode checking
2405 * for write behind errors.
2407 int cifs_flush(struct file *file, fl_owner_t id)
2409 struct inode *inode = file_inode(file);
2410 int rc = 0;
2412 if (file->f_mode & FMODE_WRITE)
2413 rc = filemap_write_and_wait(inode->i_mapping);
2415 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2417 return rc;
2420 static int
2421 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2423 int rc = 0;
2424 unsigned long i;
2426 for (i = 0; i < num_pages; i++) {
2427 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2428 if (!pages[i]) {
2430 * save number of pages we have already allocated and
2431 * return with ENOMEM error
2433 num_pages = i;
2434 rc = -ENOMEM;
2435 break;
2439 if (rc) {
2440 for (i = 0; i < num_pages; i++)
2441 put_page(pages[i]);
2443 return rc;
2446 static inline
2447 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2449 size_t num_pages;
2450 size_t clen;
2452 clen = min_t(const size_t, len, wsize);
2453 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2455 if (cur_len)
2456 *cur_len = clen;
2458 return num_pages;
2461 static void
2462 cifs_uncached_writedata_release(struct kref *refcount)
2464 int i;
2465 struct cifs_writedata *wdata = container_of(refcount,
2466 struct cifs_writedata, refcount);
2468 for (i = 0; i < wdata->nr_pages; i++)
2469 put_page(wdata->pages[i]);
2470 cifs_writedata_release(refcount);
2473 static void
2474 cifs_uncached_writev_complete(struct work_struct *work)
2476 struct cifs_writedata *wdata = container_of(work,
2477 struct cifs_writedata, work);
2478 struct inode *inode = d_inode(wdata->cfile->dentry);
2479 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2481 spin_lock(&inode->i_lock);
2482 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2483 if (cifsi->server_eof > inode->i_size)
2484 i_size_write(inode, cifsi->server_eof);
2485 spin_unlock(&inode->i_lock);
2487 complete(&wdata->done);
2489 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2492 static int
2493 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2494 size_t *len, unsigned long *num_pages)
2496 size_t save_len, copied, bytes, cur_len = *len;
2497 unsigned long i, nr_pages = *num_pages;
2499 save_len = cur_len;
2500 for (i = 0; i < nr_pages; i++) {
2501 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2502 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2503 cur_len -= copied;
2505 * If we didn't copy as much as we expected, then that
2506 * may mean we trod into an unmapped area. Stop copying
2507 * at that point. On the next pass through the big
2508 * loop, we'll likely end up getting a zero-length
2509 * write and bailing out of it.
2511 if (copied < bytes)
2512 break;
2514 cur_len = save_len - cur_len;
2515 *len = cur_len;
2518 * If we have no data to send, then that probably means that
2519 * the copy above failed altogether. That's most likely because
2520 * the address in the iovec was bogus. Return -EFAULT and let
2521 * the caller free anything we allocated and bail out.
2523 if (!cur_len)
2524 return -EFAULT;
2527 * i + 1 now represents the number of pages we actually used in
2528 * the copy phase above.
2530 *num_pages = i + 1;
2531 return 0;
2534 static int
2535 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2536 struct cifsFileInfo *open_file,
2537 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2539 int rc = 0;
2540 size_t cur_len;
2541 unsigned long nr_pages, num_pages, i;
2542 struct cifs_writedata *wdata;
2543 struct iov_iter saved_from = *from;
2544 loff_t saved_offset = offset;
2545 pid_t pid;
2546 struct TCP_Server_Info *server;
2548 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2549 pid = open_file->pid;
2550 else
2551 pid = current->tgid;
2553 server = tlink_tcon(open_file->tlink)->ses->server;
2555 do {
2556 unsigned int wsize, credits;
2558 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2559 &wsize, &credits);
2560 if (rc)
2561 break;
2563 nr_pages = get_numpages(wsize, len, &cur_len);
2564 wdata = cifs_writedata_alloc(nr_pages,
2565 cifs_uncached_writev_complete);
2566 if (!wdata) {
2567 rc = -ENOMEM;
2568 add_credits_and_wake_if(server, credits, 0);
2569 break;
2572 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2573 if (rc) {
2574 kfree(wdata);
2575 add_credits_and_wake_if(server, credits, 0);
2576 break;
2579 num_pages = nr_pages;
2580 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2581 if (rc) {
2582 for (i = 0; i < nr_pages; i++)
2583 put_page(wdata->pages[i]);
2584 kfree(wdata);
2585 add_credits_and_wake_if(server, credits, 0);
2586 break;
2590 * Bring nr_pages down to the number of pages we actually used,
2591 * and free any pages that we didn't use.
2593 for ( ; nr_pages > num_pages; nr_pages--)
2594 put_page(wdata->pages[nr_pages - 1]);
2596 wdata->sync_mode = WB_SYNC_ALL;
2597 wdata->nr_pages = nr_pages;
2598 wdata->offset = (__u64)offset;
2599 wdata->cfile = cifsFileInfo_get(open_file);
2600 wdata->pid = pid;
2601 wdata->bytes = cur_len;
2602 wdata->pagesz = PAGE_SIZE;
2603 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2604 wdata->credits = credits;
2606 if (!wdata->cfile->invalidHandle ||
2607 !(rc = cifs_reopen_file(wdata->cfile, false)))
2608 rc = server->ops->async_writev(wdata,
2609 cifs_uncached_writedata_release);
2610 if (rc) {
2611 add_credits_and_wake_if(server, wdata->credits, 0);
2612 kref_put(&wdata->refcount,
2613 cifs_uncached_writedata_release);
2614 if (rc == -EAGAIN) {
2615 *from = saved_from;
2616 iov_iter_advance(from, offset - saved_offset);
2617 continue;
2619 break;
2622 list_add_tail(&wdata->list, wdata_list);
2623 offset += cur_len;
2624 len -= cur_len;
2625 } while (len > 0);
2627 return rc;
2630 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2632 struct file *file = iocb->ki_filp;
2633 ssize_t total_written = 0;
2634 struct cifsFileInfo *open_file;
2635 struct cifs_tcon *tcon;
2636 struct cifs_sb_info *cifs_sb;
2637 struct cifs_writedata *wdata, *tmp;
2638 struct list_head wdata_list;
2639 struct iov_iter saved_from = *from;
2640 int rc;
2643 * BB - optimize the way when signing is disabled. We can drop this
2644 * extra memory-to-memory copying and use iovec buffers for constructing
2645 * write request.
2648 rc = generic_write_checks(iocb, from);
2649 if (rc <= 0)
2650 return rc;
2652 INIT_LIST_HEAD(&wdata_list);
2653 cifs_sb = CIFS_FILE_SB(file);
2654 open_file = file->private_data;
2655 tcon = tlink_tcon(open_file->tlink);
2657 if (!tcon->ses->server->ops->async_writev)
2658 return -ENOSYS;
2660 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2661 open_file, cifs_sb, &wdata_list);
2664 * If at least one write was successfully sent, then discard any rc
2665 * value from the later writes. If the other write succeeds, then
2666 * we'll end up returning whatever was written. If it fails, then
2667 * we'll get a new rc value from that.
2669 if (!list_empty(&wdata_list))
2670 rc = 0;
2673 * Wait for and collect replies for any successful sends in order of
2674 * increasing offset. Once an error is hit or we get a fatal signal
2675 * while waiting, then return without waiting for any more replies.
2677 restart_loop:
2678 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2679 if (!rc) {
2680 /* FIXME: freezable too? */
2681 rc = wait_for_completion_killable(&wdata->done);
2682 if (rc)
2683 rc = -EINTR;
2684 else if (wdata->result)
2685 rc = wdata->result;
2686 else
2687 total_written += wdata->bytes;
2689 /* resend call if it's a retryable error */
2690 if (rc == -EAGAIN) {
2691 struct list_head tmp_list;
2692 struct iov_iter tmp_from = saved_from;
2694 INIT_LIST_HEAD(&tmp_list);
2695 list_del_init(&wdata->list);
2697 iov_iter_advance(&tmp_from,
2698 wdata->offset - iocb->ki_pos);
2700 rc = cifs_write_from_iter(wdata->offset,
2701 wdata->bytes, &tmp_from,
2702 open_file, cifs_sb, &tmp_list);
2704 list_splice(&tmp_list, &wdata_list);
2706 kref_put(&wdata->refcount,
2707 cifs_uncached_writedata_release);
2708 goto restart_loop;
2711 list_del_init(&wdata->list);
2712 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2715 if (unlikely(!total_written))
2716 return rc;
2718 iocb->ki_pos += total_written;
2719 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2720 cifs_stats_bytes_written(tcon, total_written);
2721 return total_written;
2724 static ssize_t
2725 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2727 struct file *file = iocb->ki_filp;
2728 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2729 struct inode *inode = file->f_mapping->host;
2730 struct cifsInodeInfo *cinode = CIFS_I(inode);
2731 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2732 ssize_t rc;
2735 * We need to hold the sem to be sure nobody modifies lock list
2736 * with a brlock that prevents writing.
2738 down_read(&cinode->lock_sem);
2739 inode_lock(inode);
2741 rc = generic_write_checks(iocb, from);
2742 if (rc <= 0)
2743 goto out;
2745 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2746 server->vals->exclusive_lock_type, NULL,
2747 CIFS_WRITE_OP))
2748 rc = __generic_file_write_iter(iocb, from);
2749 else
2750 rc = -EACCES;
2751 out:
2752 inode_unlock(inode);
2754 if (rc > 0)
2755 rc = generic_write_sync(iocb, rc);
2756 up_read(&cinode->lock_sem);
2757 return rc;
2760 ssize_t
2761 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2763 struct inode *inode = file_inode(iocb->ki_filp);
2764 struct cifsInodeInfo *cinode = CIFS_I(inode);
2765 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2766 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2767 iocb->ki_filp->private_data;
2768 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2769 ssize_t written;
2771 written = cifs_get_writer(cinode);
2772 if (written)
2773 return written;
2775 if (CIFS_CACHE_WRITE(cinode)) {
2776 if (cap_unix(tcon->ses) &&
2777 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2778 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2779 written = generic_file_write_iter(iocb, from);
2780 goto out;
2782 written = cifs_writev(iocb, from);
2783 goto out;
2786 * For non-oplocked files in strict cache mode we need to write the data
2787 * to the server exactly from the pos to pos+len-1 rather than flush all
2788 * affected pages because it may cause a error with mandatory locks on
2789 * these pages but not on the region from pos to ppos+len-1.
2791 written = cifs_user_writev(iocb, from);
2792 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2794 * Windows 7 server can delay breaking level2 oplock if a write
2795 * request comes - break it on the client to prevent reading
2796 * an old data.
2798 cifs_zap_mapping(inode);
2799 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2800 inode);
2801 cinode->oplock = 0;
2803 out:
2804 cifs_put_writer(cinode);
2805 return written;
2808 static struct cifs_readdata *
2809 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2811 struct cifs_readdata *rdata;
2813 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2814 GFP_KERNEL);
2815 if (rdata != NULL) {
2816 kref_init(&rdata->refcount);
2817 INIT_LIST_HEAD(&rdata->list);
2818 init_completion(&rdata->done);
2819 INIT_WORK(&rdata->work, complete);
2822 return rdata;
2825 void
2826 cifs_readdata_release(struct kref *refcount)
2828 struct cifs_readdata *rdata = container_of(refcount,
2829 struct cifs_readdata, refcount);
2831 if (rdata->cfile)
2832 cifsFileInfo_put(rdata->cfile);
2834 kfree(rdata);
2837 static int
2838 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2840 int rc = 0;
2841 struct page *page;
2842 unsigned int i;
2844 for (i = 0; i < nr_pages; i++) {
2845 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2846 if (!page) {
2847 rc = -ENOMEM;
2848 break;
2850 rdata->pages[i] = page;
2853 if (rc) {
2854 for (i = 0; i < nr_pages; i++) {
2855 put_page(rdata->pages[i]);
2856 rdata->pages[i] = NULL;
2859 return rc;
2862 static void
2863 cifs_uncached_readdata_release(struct kref *refcount)
2865 struct cifs_readdata *rdata = container_of(refcount,
2866 struct cifs_readdata, refcount);
2867 unsigned int i;
2869 for (i = 0; i < rdata->nr_pages; i++) {
2870 put_page(rdata->pages[i]);
2871 rdata->pages[i] = NULL;
2873 cifs_readdata_release(refcount);
2877 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2878 * @rdata: the readdata response with list of pages holding data
2879 * @iter: destination for our data
2881 * This function copies data from a list of pages in a readdata response into
2882 * an array of iovecs. It will first calculate where the data should go
2883 * based on the info in the readdata and then copy the data into that spot.
2885 static int
2886 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2888 size_t remaining = rdata->got_bytes;
2889 unsigned int i;
2891 for (i = 0; i < rdata->nr_pages; i++) {
2892 struct page *page = rdata->pages[i];
2893 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2894 size_t written;
2896 if (unlikely(iter->type & ITER_PIPE)) {
2897 void *addr = kmap_atomic(page);
2899 written = copy_to_iter(addr, copy, iter);
2900 kunmap_atomic(addr);
2901 } else
2902 written = copy_page_to_iter(page, 0, copy, iter);
2903 remaining -= written;
2904 if (written < copy && iov_iter_count(iter) > 0)
2905 break;
2907 return remaining ? -EFAULT : 0;
2910 static void
2911 cifs_uncached_readv_complete(struct work_struct *work)
2913 struct cifs_readdata *rdata = container_of(work,
2914 struct cifs_readdata, work);
2916 complete(&rdata->done);
2917 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2920 static int
2921 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2922 struct cifs_readdata *rdata, unsigned int len)
2924 int result = 0;
2925 unsigned int i;
2926 unsigned int nr_pages = rdata->nr_pages;
2928 rdata->got_bytes = 0;
2929 rdata->tailsz = PAGE_SIZE;
2930 for (i = 0; i < nr_pages; i++) {
2931 struct page *page = rdata->pages[i];
2932 size_t n;
2934 if (len <= 0) {
2935 /* no need to hold page hostage */
2936 rdata->pages[i] = NULL;
2937 rdata->nr_pages--;
2938 put_page(page);
2939 continue;
2941 n = len;
2942 if (len >= PAGE_SIZE) {
2943 /* enough data to fill the page */
2944 n = PAGE_SIZE;
2945 len -= n;
2946 } else {
2947 zero_user(page, len, PAGE_SIZE - len);
2948 rdata->tailsz = len;
2949 len = 0;
2951 result = cifs_read_page_from_socket(server, page, n);
2952 if (result < 0)
2953 break;
2955 rdata->got_bytes += result;
2958 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2959 rdata->got_bytes : result;
2962 static int
2963 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2964 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2966 struct cifs_readdata *rdata;
2967 unsigned int npages, rsize, credits;
2968 size_t cur_len;
2969 int rc;
2970 pid_t pid;
2971 struct TCP_Server_Info *server;
2973 server = tlink_tcon(open_file->tlink)->ses->server;
2975 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2976 pid = open_file->pid;
2977 else
2978 pid = current->tgid;
2980 do {
2981 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2982 &rsize, &credits);
2983 if (rc)
2984 break;
2986 cur_len = min_t(const size_t, len, rsize);
2987 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2989 /* allocate a readdata struct */
2990 rdata = cifs_readdata_alloc(npages,
2991 cifs_uncached_readv_complete);
2992 if (!rdata) {
2993 add_credits_and_wake_if(server, credits, 0);
2994 rc = -ENOMEM;
2995 break;
2998 rc = cifs_read_allocate_pages(rdata, npages);
2999 if (rc)
3000 goto error;
3002 rdata->cfile = cifsFileInfo_get(open_file);
3003 rdata->nr_pages = npages;
3004 rdata->offset = offset;
3005 rdata->bytes = cur_len;
3006 rdata->pid = pid;
3007 rdata->pagesz = PAGE_SIZE;
3008 rdata->read_into_pages = cifs_uncached_read_into_pages;
3009 rdata->credits = credits;
3011 if (!rdata->cfile->invalidHandle ||
3012 !(rc = cifs_reopen_file(rdata->cfile, true)))
3013 rc = server->ops->async_readv(rdata);
3014 error:
3015 if (rc) {
3016 add_credits_and_wake_if(server, rdata->credits, 0);
3017 kref_put(&rdata->refcount,
3018 cifs_uncached_readdata_release);
3019 if (rc == -EAGAIN)
3020 continue;
3021 break;
3024 list_add_tail(&rdata->list, rdata_list);
3025 offset += cur_len;
3026 len -= cur_len;
3027 } while (len > 0);
3029 return rc;
3032 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3034 struct file *file = iocb->ki_filp;
3035 ssize_t rc;
3036 size_t len;
3037 ssize_t total_read = 0;
3038 loff_t offset = iocb->ki_pos;
3039 struct cifs_sb_info *cifs_sb;
3040 struct cifs_tcon *tcon;
3041 struct cifsFileInfo *open_file;
3042 struct cifs_readdata *rdata, *tmp;
3043 struct list_head rdata_list;
3045 len = iov_iter_count(to);
3046 if (!len)
3047 return 0;
3049 INIT_LIST_HEAD(&rdata_list);
3050 cifs_sb = CIFS_FILE_SB(file);
3051 open_file = file->private_data;
3052 tcon = tlink_tcon(open_file->tlink);
3054 if (!tcon->ses->server->ops->async_readv)
3055 return -ENOSYS;
3057 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3058 cifs_dbg(FYI, "attempting read on write only file instance\n");
3060 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3062 /* if at least one read request send succeeded, then reset rc */
3063 if (!list_empty(&rdata_list))
3064 rc = 0;
3066 len = iov_iter_count(to);
3067 /* the loop below should proceed in the order of increasing offsets */
3068 again:
3069 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3070 if (!rc) {
3071 /* FIXME: freezable sleep too? */
3072 rc = wait_for_completion_killable(&rdata->done);
3073 if (rc)
3074 rc = -EINTR;
3075 else if (rdata->result == -EAGAIN) {
3076 /* resend call if it's a retryable error */
3077 struct list_head tmp_list;
3078 unsigned int got_bytes = rdata->got_bytes;
3080 list_del_init(&rdata->list);
3081 INIT_LIST_HEAD(&tmp_list);
3084 * Got a part of data and then reconnect has
3085 * happened -- fill the buffer and continue
3086 * reading.
3088 if (got_bytes && got_bytes < rdata->bytes) {
3089 rc = cifs_readdata_to_iov(rdata, to);
3090 if (rc) {
3091 kref_put(&rdata->refcount,
3092 cifs_uncached_readdata_release);
3093 continue;
3097 rc = cifs_send_async_read(
3098 rdata->offset + got_bytes,
3099 rdata->bytes - got_bytes,
3100 rdata->cfile, cifs_sb,
3101 &tmp_list);
3103 list_splice(&tmp_list, &rdata_list);
3105 kref_put(&rdata->refcount,
3106 cifs_uncached_readdata_release);
3107 goto again;
3108 } else if (rdata->result)
3109 rc = rdata->result;
3110 else
3111 rc = cifs_readdata_to_iov(rdata, to);
3113 /* if there was a short read -- discard anything left */
3114 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3115 rc = -ENODATA;
3117 list_del_init(&rdata->list);
3118 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3121 total_read = len - iov_iter_count(to);
3123 cifs_stats_bytes_read(tcon, total_read);
3125 /* mask nodata case */
3126 if (rc == -ENODATA)
3127 rc = 0;
3129 if (total_read) {
3130 iocb->ki_pos += total_read;
3131 return total_read;
3133 return rc;
3136 ssize_t
3137 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3139 struct inode *inode = file_inode(iocb->ki_filp);
3140 struct cifsInodeInfo *cinode = CIFS_I(inode);
3141 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3142 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3143 iocb->ki_filp->private_data;
3144 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3145 int rc = -EACCES;
3148 * In strict cache mode we need to read from the server all the time
3149 * if we don't have level II oplock because the server can delay mtime
3150 * change - so we can't make a decision about inode invalidating.
3151 * And we can also fail with pagereading if there are mandatory locks
3152 * on pages affected by this read but not on the region from pos to
3153 * pos+len-1.
3155 if (!CIFS_CACHE_READ(cinode))
3156 return cifs_user_readv(iocb, to);
3158 if (cap_unix(tcon->ses) &&
3159 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3160 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3161 return generic_file_read_iter(iocb, to);
3164 * We need to hold the sem to be sure nobody modifies lock list
3165 * with a brlock that prevents reading.
3167 down_read(&cinode->lock_sem);
3168 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3169 tcon->ses->server->vals->shared_lock_type,
3170 NULL, CIFS_READ_OP))
3171 rc = generic_file_read_iter(iocb, to);
3172 up_read(&cinode->lock_sem);
3173 return rc;
3176 static ssize_t
3177 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3179 int rc = -EACCES;
3180 unsigned int bytes_read = 0;
3181 unsigned int total_read;
3182 unsigned int current_read_size;
3183 unsigned int rsize;
3184 struct cifs_sb_info *cifs_sb;
3185 struct cifs_tcon *tcon;
3186 struct TCP_Server_Info *server;
3187 unsigned int xid;
3188 char *cur_offset;
3189 struct cifsFileInfo *open_file;
3190 struct cifs_io_parms io_parms;
3191 int buf_type = CIFS_NO_BUFFER;
3192 __u32 pid;
3194 xid = get_xid();
3195 cifs_sb = CIFS_FILE_SB(file);
3197 /* FIXME: set up handlers for larger reads and/or convert to async */
3198 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3200 if (file->private_data == NULL) {
3201 rc = -EBADF;
3202 free_xid(xid);
3203 return rc;
3205 open_file = file->private_data;
3206 tcon = tlink_tcon(open_file->tlink);
3207 server = tcon->ses->server;
3209 if (!server->ops->sync_read) {
3210 free_xid(xid);
3211 return -ENOSYS;
3214 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3215 pid = open_file->pid;
3216 else
3217 pid = current->tgid;
3219 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3220 cifs_dbg(FYI, "attempting read on write only file instance\n");
3222 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3223 total_read += bytes_read, cur_offset += bytes_read) {
3224 do {
3225 current_read_size = min_t(uint, read_size - total_read,
3226 rsize);
3228 * For windows me and 9x we do not want to request more
3229 * than it negotiated since it will refuse the read
3230 * then.
3232 if ((tcon->ses) && !(tcon->ses->capabilities &
3233 tcon->ses->server->vals->cap_large_files)) {
3234 current_read_size = min_t(uint,
3235 current_read_size, CIFSMaxBufSize);
3237 if (open_file->invalidHandle) {
3238 rc = cifs_reopen_file(open_file, true);
3239 if (rc != 0)
3240 break;
3242 io_parms.pid = pid;
3243 io_parms.tcon = tcon;
3244 io_parms.offset = *offset;
3245 io_parms.length = current_read_size;
3246 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3247 &bytes_read, &cur_offset,
3248 &buf_type);
3249 } while (rc == -EAGAIN);
3251 if (rc || (bytes_read == 0)) {
3252 if (total_read) {
3253 break;
3254 } else {
3255 free_xid(xid);
3256 return rc;
3258 } else {
3259 cifs_stats_bytes_read(tcon, total_read);
3260 *offset += bytes_read;
3263 free_xid(xid);
3264 return total_read;
3268 * If the page is mmap'ed into a process' page tables, then we need to make
3269 * sure that it doesn't change while being written back.
3271 static int
3272 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3274 struct page *page = vmf->page;
3276 lock_page(page);
3277 return VM_FAULT_LOCKED;
3280 static const struct vm_operations_struct cifs_file_vm_ops = {
3281 .fault = filemap_fault,
3282 .map_pages = filemap_map_pages,
3283 .page_mkwrite = cifs_page_mkwrite,
3286 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3288 int rc, xid;
3289 struct inode *inode = file_inode(file);
3291 xid = get_xid();
3293 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3294 rc = cifs_zap_mapping(inode);
3295 if (rc)
3296 return rc;
3299 rc = generic_file_mmap(file, vma);
3300 if (rc == 0)
3301 vma->vm_ops = &cifs_file_vm_ops;
3302 free_xid(xid);
3303 return rc;
3306 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3308 int rc, xid;
3310 xid = get_xid();
3311 rc = cifs_revalidate_file(file);
3312 if (rc) {
3313 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3314 rc);
3315 free_xid(xid);
3316 return rc;
3318 rc = generic_file_mmap(file, vma);
3319 if (rc == 0)
3320 vma->vm_ops = &cifs_file_vm_ops;
3321 free_xid(xid);
3322 return rc;
3325 static void
3326 cifs_readv_complete(struct work_struct *work)
3328 unsigned int i, got_bytes;
3329 struct cifs_readdata *rdata = container_of(work,
3330 struct cifs_readdata, work);
3332 got_bytes = rdata->got_bytes;
3333 for (i = 0; i < rdata->nr_pages; i++) {
3334 struct page *page = rdata->pages[i];
3336 lru_cache_add_file(page);
3338 if (rdata->result == 0 ||
3339 (rdata->result == -EAGAIN && got_bytes)) {
3340 flush_dcache_page(page);
3341 SetPageUptodate(page);
3344 unlock_page(page);
3346 if (rdata->result == 0 ||
3347 (rdata->result == -EAGAIN && got_bytes))
3348 cifs_readpage_to_fscache(rdata->mapping->host, page);
3350 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3352 put_page(page);
3353 rdata->pages[i] = NULL;
3355 kref_put(&rdata->refcount, cifs_readdata_release);
3358 static int
3359 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3360 struct cifs_readdata *rdata, unsigned int len)
3362 int result = 0;
3363 unsigned int i;
3364 u64 eof;
3365 pgoff_t eof_index;
3366 unsigned int nr_pages = rdata->nr_pages;
3368 /* determine the eof that the server (probably) has */
3369 eof = CIFS_I(rdata->mapping->host)->server_eof;
3370 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3371 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3373 rdata->got_bytes = 0;
3374 rdata->tailsz = PAGE_SIZE;
3375 for (i = 0; i < nr_pages; i++) {
3376 struct page *page = rdata->pages[i];
3377 size_t n = PAGE_SIZE;
3379 if (len >= PAGE_SIZE) {
3380 len -= PAGE_SIZE;
3381 } else if (len > 0) {
3382 /* enough for partial page, fill and zero the rest */
3383 zero_user(page, len, PAGE_SIZE - len);
3384 n = rdata->tailsz = len;
3385 len = 0;
3386 } else if (page->index > eof_index) {
3388 * The VFS will not try to do readahead past the
3389 * i_size, but it's possible that we have outstanding
3390 * writes with gaps in the middle and the i_size hasn't
3391 * caught up yet. Populate those with zeroed out pages
3392 * to prevent the VFS from repeatedly attempting to
3393 * fill them until the writes are flushed.
3395 zero_user(page, 0, PAGE_SIZE);
3396 lru_cache_add_file(page);
3397 flush_dcache_page(page);
3398 SetPageUptodate(page);
3399 unlock_page(page);
3400 put_page(page);
3401 rdata->pages[i] = NULL;
3402 rdata->nr_pages--;
3403 continue;
3404 } else {
3405 /* no need to hold page hostage */
3406 lru_cache_add_file(page);
3407 unlock_page(page);
3408 put_page(page);
3409 rdata->pages[i] = NULL;
3410 rdata->nr_pages--;
3411 continue;
3414 result = cifs_read_page_from_socket(server, page, n);
3415 if (result < 0)
3416 break;
3418 rdata->got_bytes += result;
3421 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3422 rdata->got_bytes : result;
3425 static int
3426 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3427 unsigned int rsize, struct list_head *tmplist,
3428 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3430 struct page *page, *tpage;
3431 unsigned int expected_index;
3432 int rc;
3433 gfp_t gfp = readahead_gfp_mask(mapping);
3435 INIT_LIST_HEAD(tmplist);
3437 page = list_entry(page_list->prev, struct page, lru);
3440 * Lock the page and put it in the cache. Since no one else
3441 * should have access to this page, we're safe to simply set
3442 * PG_locked without checking it first.
3444 __SetPageLocked(page);
3445 rc = add_to_page_cache_locked(page, mapping,
3446 page->index, gfp);
3448 /* give up if we can't stick it in the cache */
3449 if (rc) {
3450 __ClearPageLocked(page);
3451 return rc;
3454 /* move first page to the tmplist */
3455 *offset = (loff_t)page->index << PAGE_SHIFT;
3456 *bytes = PAGE_SIZE;
3457 *nr_pages = 1;
3458 list_move_tail(&page->lru, tmplist);
3460 /* now try and add more pages onto the request */
3461 expected_index = page->index + 1;
3462 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3463 /* discontinuity ? */
3464 if (page->index != expected_index)
3465 break;
3467 /* would this page push the read over the rsize? */
3468 if (*bytes + PAGE_SIZE > rsize)
3469 break;
3471 __SetPageLocked(page);
3472 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3473 __ClearPageLocked(page);
3474 break;
3476 list_move_tail(&page->lru, tmplist);
3477 (*bytes) += PAGE_SIZE;
3478 expected_index++;
3479 (*nr_pages)++;
3481 return rc;
3484 static int cifs_readpages(struct file *file, struct address_space *mapping,
3485 struct list_head *page_list, unsigned num_pages)
3487 int rc;
3488 struct list_head tmplist;
3489 struct cifsFileInfo *open_file = file->private_data;
3490 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3491 struct TCP_Server_Info *server;
3492 pid_t pid;
3495 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3496 * immediately if the cookie is negative
3498 * After this point, every page in the list might have PG_fscache set,
3499 * so we will need to clean that up off of every page we don't use.
3501 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3502 &num_pages);
3503 if (rc == 0)
3504 return rc;
3506 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3507 pid = open_file->pid;
3508 else
3509 pid = current->tgid;
3511 rc = 0;
3512 server = tlink_tcon(open_file->tlink)->ses->server;
3514 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3515 __func__, file, mapping, num_pages);
3518 * Start with the page at end of list and move it to private
3519 * list. Do the same with any following pages until we hit
3520 * the rsize limit, hit an index discontinuity, or run out of
3521 * pages. Issue the async read and then start the loop again
3522 * until the list is empty.
3524 * Note that list order is important. The page_list is in
3525 * the order of declining indexes. When we put the pages in
3526 * the rdata->pages, then we want them in increasing order.
3528 while (!list_empty(page_list)) {
3529 unsigned int i, nr_pages, bytes, rsize;
3530 loff_t offset;
3531 struct page *page, *tpage;
3532 struct cifs_readdata *rdata;
3533 unsigned credits;
3535 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3536 &rsize, &credits);
3537 if (rc)
3538 break;
3541 * Give up immediately if rsize is too small to read an entire
3542 * page. The VFS will fall back to readpage. We should never
3543 * reach this point however since we set ra_pages to 0 when the
3544 * rsize is smaller than a cache page.
3546 if (unlikely(rsize < PAGE_SIZE)) {
3547 add_credits_and_wake_if(server, credits, 0);
3548 return 0;
3551 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3552 &nr_pages, &offset, &bytes);
3553 if (rc) {
3554 add_credits_and_wake_if(server, credits, 0);
3555 break;
3558 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3559 if (!rdata) {
3560 /* best to give up if we're out of mem */
3561 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3562 list_del(&page->lru);
3563 lru_cache_add_file(page);
3564 unlock_page(page);
3565 put_page(page);
3567 rc = -ENOMEM;
3568 add_credits_and_wake_if(server, credits, 0);
3569 break;
3572 rdata->cfile = cifsFileInfo_get(open_file);
3573 rdata->mapping = mapping;
3574 rdata->offset = offset;
3575 rdata->bytes = bytes;
3576 rdata->pid = pid;
3577 rdata->pagesz = PAGE_SIZE;
3578 rdata->read_into_pages = cifs_readpages_read_into_pages;
3579 rdata->credits = credits;
3581 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3582 list_del(&page->lru);
3583 rdata->pages[rdata->nr_pages++] = page;
3586 if (!rdata->cfile->invalidHandle ||
3587 !(rc = cifs_reopen_file(rdata->cfile, true)))
3588 rc = server->ops->async_readv(rdata);
3589 if (rc) {
3590 add_credits_and_wake_if(server, rdata->credits, 0);
3591 for (i = 0; i < rdata->nr_pages; i++) {
3592 page = rdata->pages[i];
3593 lru_cache_add_file(page);
3594 unlock_page(page);
3595 put_page(page);
3597 /* Fallback to the readpage in error/reconnect cases */
3598 kref_put(&rdata->refcount, cifs_readdata_release);
3599 break;
3602 kref_put(&rdata->refcount, cifs_readdata_release);
3605 /* Any pages that have been shown to fscache but didn't get added to
3606 * the pagecache must be uncached before they get returned to the
3607 * allocator.
3609 cifs_fscache_readpages_cancel(mapping->host, page_list);
3610 return rc;
3614 * cifs_readpage_worker must be called with the page pinned
3616 static int cifs_readpage_worker(struct file *file, struct page *page,
3617 loff_t *poffset)
3619 char *read_data;
3620 int rc;
3622 /* Is the page cached? */
3623 rc = cifs_readpage_from_fscache(file_inode(file), page);
3624 if (rc == 0)
3625 goto read_complete;
3627 read_data = kmap(page);
3628 /* for reads over a certain size could initiate async read ahead */
3630 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3632 if (rc < 0)
3633 goto io_error;
3634 else
3635 cifs_dbg(FYI, "Bytes read %d\n", rc);
3637 file_inode(file)->i_atime =
3638 current_time(file_inode(file));
3640 if (PAGE_SIZE > rc)
3641 memset(read_data + rc, 0, PAGE_SIZE - rc);
3643 flush_dcache_page(page);
3644 SetPageUptodate(page);
3646 /* send this page to the cache */
3647 cifs_readpage_to_fscache(file_inode(file), page);
3649 rc = 0;
3651 io_error:
3652 kunmap(page);
3653 unlock_page(page);
3655 read_complete:
3656 return rc;
3659 static int cifs_readpage(struct file *file, struct page *page)
3661 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3662 int rc = -EACCES;
3663 unsigned int xid;
3665 xid = get_xid();
3667 if (file->private_data == NULL) {
3668 rc = -EBADF;
3669 free_xid(xid);
3670 return rc;
3673 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3674 page, (int)offset, (int)offset);
3676 rc = cifs_readpage_worker(file, page, &offset);
3678 free_xid(xid);
3679 return rc;
3682 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3684 struct cifsFileInfo *open_file;
3685 struct cifs_tcon *tcon =
3686 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3688 spin_lock(&tcon->open_file_lock);
3689 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3690 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3691 spin_unlock(&tcon->open_file_lock);
3692 return 1;
3695 spin_unlock(&tcon->open_file_lock);
3696 return 0;
3699 /* We do not want to update the file size from server for inodes
3700 open for write - to avoid races with writepage extending
3701 the file - in the future we could consider allowing
3702 refreshing the inode only on increases in the file size
3703 but this is tricky to do without racing with writebehind
3704 page caching in the current Linux kernel design */
3705 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3707 if (!cifsInode)
3708 return true;
3710 if (is_inode_writable(cifsInode)) {
3711 /* This inode is open for write at least once */
3712 struct cifs_sb_info *cifs_sb;
3714 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3715 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3716 /* since no page cache to corrupt on directio
3717 we can change size safely */
3718 return true;
3721 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3722 return true;
3724 return false;
3725 } else
3726 return true;
3729 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3730 loff_t pos, unsigned len, unsigned flags,
3731 struct page **pagep, void **fsdata)
3733 int oncethru = 0;
3734 pgoff_t index = pos >> PAGE_SHIFT;
3735 loff_t offset = pos & (PAGE_SIZE - 1);
3736 loff_t page_start = pos & PAGE_MASK;
3737 loff_t i_size;
3738 struct page *page;
3739 int rc = 0;
3741 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3743 start:
3744 page = grab_cache_page_write_begin(mapping, index, flags);
3745 if (!page) {
3746 rc = -ENOMEM;
3747 goto out;
3750 if (PageUptodate(page))
3751 goto out;
3754 * If we write a full page it will be up to date, no need to read from
3755 * the server. If the write is short, we'll end up doing a sync write
3756 * instead.
3758 if (len == PAGE_SIZE)
3759 goto out;
3762 * optimize away the read when we have an oplock, and we're not
3763 * expecting to use any of the data we'd be reading in. That
3764 * is, when the page lies beyond the EOF, or straddles the EOF
3765 * and the write will cover all of the existing data.
3767 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3768 i_size = i_size_read(mapping->host);
3769 if (page_start >= i_size ||
3770 (offset == 0 && (pos + len) >= i_size)) {
3771 zero_user_segments(page, 0, offset,
3772 offset + len,
3773 PAGE_SIZE);
3775 * PageChecked means that the parts of the page
3776 * to which we're not writing are considered up
3777 * to date. Once the data is copied to the
3778 * page, it can be set uptodate.
3780 SetPageChecked(page);
3781 goto out;
3785 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3787 * might as well read a page, it is fast enough. If we get
3788 * an error, we don't need to return it. cifs_write_end will
3789 * do a sync write instead since PG_uptodate isn't set.
3791 cifs_readpage_worker(file, page, &page_start);
3792 put_page(page);
3793 oncethru = 1;
3794 goto start;
3795 } else {
3796 /* we could try using another file handle if there is one -
3797 but how would we lock it to prevent close of that handle
3798 racing with this read? In any case
3799 this will be written out by write_end so is fine */
3801 out:
3802 *pagep = page;
3803 return rc;
3806 static int cifs_release_page(struct page *page, gfp_t gfp)
3808 if (PagePrivate(page))
3809 return 0;
3811 return cifs_fscache_release_page(page, gfp);
3814 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3815 unsigned int length)
3817 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3819 if (offset == 0 && length == PAGE_SIZE)
3820 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3823 static int cifs_launder_page(struct page *page)
3825 int rc = 0;
3826 loff_t range_start = page_offset(page);
3827 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3828 struct writeback_control wbc = {
3829 .sync_mode = WB_SYNC_ALL,
3830 .nr_to_write = 0,
3831 .range_start = range_start,
3832 .range_end = range_end,
3835 cifs_dbg(FYI, "Launder page: %p\n", page);
3837 if (clear_page_dirty_for_io(page))
3838 rc = cifs_writepage_locked(page, &wbc);
3840 cifs_fscache_invalidate_page(page, page->mapping->host);
3841 return rc;
3844 void cifs_oplock_break(struct work_struct *work)
3846 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3847 oplock_break);
3848 struct inode *inode = d_inode(cfile->dentry);
3849 struct cifsInodeInfo *cinode = CIFS_I(inode);
3850 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3851 struct TCP_Server_Info *server = tcon->ses->server;
3852 int rc = 0;
3854 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3855 TASK_UNINTERRUPTIBLE);
3857 server->ops->downgrade_oplock(server, cinode,
3858 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3860 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3861 cifs_has_mand_locks(cinode)) {
3862 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3863 inode);
3864 cinode->oplock = 0;
3867 if (inode && S_ISREG(inode->i_mode)) {
3868 if (CIFS_CACHE_READ(cinode))
3869 break_lease(inode, O_RDONLY);
3870 else
3871 break_lease(inode, O_WRONLY);
3872 rc = filemap_fdatawrite(inode->i_mapping);
3873 if (!CIFS_CACHE_READ(cinode)) {
3874 rc = filemap_fdatawait(inode->i_mapping);
3875 mapping_set_error(inode->i_mapping, rc);
3876 cifs_zap_mapping(inode);
3878 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3881 rc = cifs_push_locks(cfile);
3882 if (rc)
3883 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3886 * releasing stale oplock after recent reconnect of smb session using
3887 * a now incorrect file handle is not a data integrity issue but do
3888 * not bother sending an oplock release if session to server still is
3889 * disconnected since oplock already released by the server
3891 if (!cfile->oplock_break_cancelled) {
3892 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3893 cinode);
3894 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3896 cifs_done_oplock_break(cinode);
3900 * The presence of cifs_direct_io() in the address space ops vector
3901 * allowes open() O_DIRECT flags which would have failed otherwise.
3903 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3904 * so this method should never be called.
3906 * Direct IO is not yet supported in the cached mode.
3908 static ssize_t
3909 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3912 * FIXME
3913 * Eventually need to support direct IO for non forcedirectio mounts
3915 return -EINVAL;
3919 const struct address_space_operations cifs_addr_ops = {
3920 .readpage = cifs_readpage,
3921 .readpages = cifs_readpages,
3922 .writepage = cifs_writepage,
3923 .writepages = cifs_writepages,
3924 .write_begin = cifs_write_begin,
3925 .write_end = cifs_write_end,
3926 .set_page_dirty = __set_page_dirty_nobuffers,
3927 .releasepage = cifs_release_page,
3928 .direct_IO = cifs_direct_io,
3929 .invalidatepage = cifs_invalidate_page,
3930 .launder_page = cifs_launder_page,
3934 * cifs_readpages requires the server to support a buffer large enough to
3935 * contain the header plus one complete page of data. Otherwise, we need
3936 * to leave cifs_readpages out of the address space operations.
3938 const struct address_space_operations cifs_addr_ops_smallbuf = {
3939 .readpage = cifs_readpage,
3940 .writepage = cifs_writepage,
3941 .writepages = cifs_writepages,
3942 .write_begin = cifs_write_begin,
3943 .write_end = cifs_write_end,
3944 .set_page_dirty = __set_page_dirty_nobuffers,
3945 .releasepage = cifs_release_page,
3946 .invalidatepage = cifs_invalidate_page,
3947 .launder_page = cifs_launder_page,