ocfs2: fix locking for res->tracking and dlm->tracking_list
[linux/fpc-iii.git] / fs / cifs / file.c
blob0141aba9eca602a148458bcf8d2519210ac1c508
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
65 static u32 cifs_posix_convert_flags(unsigned int flags)
67 u32 posix_flags = 0;
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
96 return posix_flags;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
146 if (rc)
147 goto posix_open_ret;
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
152 if (!pinode)
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
161 if (!*pinode) {
162 rc = -ENOMEM;
163 goto posix_open_ret;
165 } else {
166 cifs_fattr_to_inode(*pinode, &fattr);
169 posix_open_ret:
170 kfree(presp_data);
171 return rc;
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
179 int rc;
180 int desired_access;
181 int disposition;
182 int create_options = CREATE_NOT_DIR;
183 FILE_ALL_INFO *buf;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
187 if (!server->ops->open)
188 return -ENOSYS;
190 desired_access = cifs_convert_flags(f_flags);
192 /*********************************************************************
193 * open flag mapping table:
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
216 disposition = cifs_get_disposition(f_flags);
218 /* BB pass O_SYNC flag through on file attributes .. BB */
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221 if (!buf)
222 return -ENOMEM;
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
234 oparms.tcon = tcon;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = create_options;
238 oparms.disposition = disposition;
239 oparms.path = full_path;
240 oparms.fid = fid;
241 oparms.reconnect = false;
243 rc = server->ops->open(xid, &oparms, oplock, buf);
245 if (rc)
246 goto out;
248 if (tcon->unix_ext)
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250 xid);
251 else
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253 xid, fid);
255 out:
256 kfree(buf);
257 return rc;
260 static bool
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 struct cifs_fid_locks *cur;
264 bool has_locks = false;
266 down_read(&cinode->lock_sem);
267 list_for_each_entry(cur, &cinode->llist, llist) {
268 if (!list_empty(&cur->locks)) {
269 has_locks = true;
270 break;
273 up_read(&cinode->lock_sem);
274 return has_locks;
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279 struct tcon_link *tlink, __u32 oplock)
281 struct dentry *dentry = file->f_path.dentry;
282 struct inode *inode = d_inode(dentry);
283 struct cifsInodeInfo *cinode = CIFS_I(inode);
284 struct cifsFileInfo *cfile;
285 struct cifs_fid_locks *fdlocks;
286 struct cifs_tcon *tcon = tlink_tcon(tlink);
287 struct TCP_Server_Info *server = tcon->ses->server;
289 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290 if (cfile == NULL)
291 return cfile;
293 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294 if (!fdlocks) {
295 kfree(cfile);
296 return NULL;
299 INIT_LIST_HEAD(&fdlocks->locks);
300 fdlocks->cfile = cfile;
301 cfile->llist = fdlocks;
302 down_write(&cinode->lock_sem);
303 list_add(&fdlocks->llist, &cinode->llist);
304 up_write(&cinode->lock_sem);
306 cfile->count = 1;
307 cfile->pid = current->tgid;
308 cfile->uid = current_fsuid();
309 cfile->dentry = dget(dentry);
310 cfile->f_flags = file->f_flags;
311 cfile->invalidHandle = false;
312 cfile->tlink = cifs_get_tlink(tlink);
313 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314 mutex_init(&cfile->fh_mutex);
315 spin_lock_init(&cfile->file_info_lock);
317 cifs_sb_active(inode->i_sb);
320 * If the server returned a read oplock and we have mandatory brlocks,
321 * set oplock level to None.
323 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325 oplock = 0;
328 spin_lock(&tcon->open_file_lock);
329 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330 oplock = fid->pending_open->oplock;
331 list_del(&fid->pending_open->olist);
333 fid->purge_cache = false;
334 server->ops->set_fid(cfile, fid, oplock);
336 list_add(&cfile->tlist, &tcon->openFileList);
338 /* if readable file instance put first in list*/
339 if (file->f_mode & FMODE_READ)
340 list_add(&cfile->flist, &cinode->openFileList);
341 else
342 list_add_tail(&cfile->flist, &cinode->openFileList);
343 spin_unlock(&tcon->open_file_lock);
345 if (fid->purge_cache)
346 cifs_zap_mapping(inode);
348 file->private_data = cfile;
349 return cfile;
352 struct cifsFileInfo *
353 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
355 spin_lock(&cifs_file->file_info_lock);
356 cifsFileInfo_get_locked(cifs_file);
357 spin_unlock(&cifs_file->file_info_lock);
358 return cifs_file;
362 * Release a reference on the file private data. This may involve closing
363 * the filehandle out on the server. Must be called without holding
364 * tcon->open_file_lock and cifs_file->file_info_lock.
366 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
368 struct inode *inode = d_inode(cifs_file->dentry);
369 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370 struct TCP_Server_Info *server = tcon->ses->server;
371 struct cifsInodeInfo *cifsi = CIFS_I(inode);
372 struct super_block *sb = inode->i_sb;
373 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374 struct cifsLockInfo *li, *tmp;
375 struct cifs_fid fid;
376 struct cifs_pending_open open;
377 bool oplock_break_cancelled;
379 spin_lock(&tcon->open_file_lock);
381 spin_lock(&cifs_file->file_info_lock);
382 if (--cifs_file->count > 0) {
383 spin_unlock(&cifs_file->file_info_lock);
384 spin_unlock(&tcon->open_file_lock);
385 return;
387 spin_unlock(&cifs_file->file_info_lock);
389 if (server->ops->get_lease_key)
390 server->ops->get_lease_key(inode, &fid);
392 /* store open in pending opens to make sure we don't miss lease break */
393 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
395 /* remove it from the lists */
396 list_del(&cifs_file->flist);
397 list_del(&cifs_file->tlist);
399 if (list_empty(&cifsi->openFileList)) {
400 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401 d_inode(cifs_file->dentry));
403 * In strict cache mode we need invalidate mapping on the last
404 * close because it may cause a error when we open this file
405 * again and get at least level II oplock.
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409 cifs_set_oplock_level(cifsi, 0);
412 spin_unlock(&tcon->open_file_lock);
414 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
416 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417 struct TCP_Server_Info *server = tcon->ses->server;
418 unsigned int xid;
420 xid = get_xid();
421 if (server->ops->close)
422 server->ops->close(xid, tcon, &cifs_file->fid);
423 _free_xid(xid);
426 if (oplock_break_cancelled)
427 cifs_done_oplock_break(cifsi);
429 cifs_del_pending_open(&open);
432 * Delete any outstanding lock records. We'll lose them when the file
433 * is closed anyway.
435 down_write(&cifsi->lock_sem);
436 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437 list_del(&li->llist);
438 cifs_del_lock_waiters(li);
439 kfree(li);
441 list_del(&cifs_file->llist->llist);
442 kfree(cifs_file->llist);
443 up_write(&cifsi->lock_sem);
445 cifs_put_tlink(cifs_file->tlink);
446 dput(cifs_file->dentry);
447 cifs_sb_deactive(sb);
448 kfree(cifs_file);
451 int cifs_open(struct inode *inode, struct file *file)
454 int rc = -EACCES;
455 unsigned int xid;
456 __u32 oplock;
457 struct cifs_sb_info *cifs_sb;
458 struct TCP_Server_Info *server;
459 struct cifs_tcon *tcon;
460 struct tcon_link *tlink;
461 struct cifsFileInfo *cfile = NULL;
462 char *full_path = NULL;
463 bool posix_open_ok = false;
464 struct cifs_fid fid;
465 struct cifs_pending_open open;
467 xid = get_xid();
469 cifs_sb = CIFS_SB(inode->i_sb);
470 tlink = cifs_sb_tlink(cifs_sb);
471 if (IS_ERR(tlink)) {
472 free_xid(xid);
473 return PTR_ERR(tlink);
475 tcon = tlink_tcon(tlink);
476 server = tcon->ses->server;
478 full_path = build_path_from_dentry(file->f_path.dentry);
479 if (full_path == NULL) {
480 rc = -ENOMEM;
481 goto out;
484 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485 inode, file->f_flags, full_path);
487 if (file->f_flags & O_DIRECT &&
488 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490 file->f_op = &cifs_file_direct_nobrl_ops;
491 else
492 file->f_op = &cifs_file_direct_ops;
495 if (server->oplocks)
496 oplock = REQ_OPLOCK;
497 else
498 oplock = 0;
500 if (!tcon->broken_posix_open && tcon->unix_ext &&
501 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503 /* can not refresh inode info since size could be stale */
504 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505 cifs_sb->mnt_file_mode /* ignored */,
506 file->f_flags, &oplock, &fid.netfid, xid);
507 if (rc == 0) {
508 cifs_dbg(FYI, "posix open succeeded\n");
509 posix_open_ok = true;
510 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511 if (tcon->ses->serverNOS)
512 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513 tcon->ses->serverName,
514 tcon->ses->serverNOS);
515 tcon->broken_posix_open = true;
516 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517 (rc != -EOPNOTSUPP)) /* path not found or net err */
518 goto out;
520 * Else fallthrough to retry open the old way on network i/o
521 * or DFS errors.
525 if (server->ops->get_lease_key)
526 server->ops->get_lease_key(inode, &fid);
528 cifs_add_pending_open(&fid, tlink, &open);
530 if (!posix_open_ok) {
531 if (server->ops->get_lease_key)
532 server->ops->get_lease_key(inode, &fid);
534 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535 file->f_flags, &oplock, &fid, xid);
536 if (rc) {
537 cifs_del_pending_open(&open);
538 goto out;
542 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
543 if (cfile == NULL) {
544 if (server->ops->close)
545 server->ops->close(xid, tcon, &fid);
546 cifs_del_pending_open(&open);
547 rc = -ENOMEM;
548 goto out;
551 cifs_fscache_set_inode_cookie(inode, file);
553 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
555 * Time to set mode which we can not set earlier due to
556 * problems creating new read-only files.
558 struct cifs_unix_set_info_args args = {
559 .mode = inode->i_mode,
560 .uid = INVALID_UID, /* no change */
561 .gid = INVALID_GID, /* no change */
562 .ctime = NO_CHANGE_64,
563 .atime = NO_CHANGE_64,
564 .mtime = NO_CHANGE_64,
565 .device = 0,
567 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
568 cfile->pid);
571 out:
572 kfree(full_path);
573 free_xid(xid);
574 cifs_put_tlink(tlink);
575 return rc;
578 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
581 * Try to reacquire byte range locks that were released when session
582 * to server was lost.
584 static int
585 cifs_relock_file(struct cifsFileInfo *cfile)
587 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
590 int rc = 0;
592 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
593 if (cinode->can_cache_brlcks) {
594 /* can cache locks - no need to relock */
595 up_read(&cinode->lock_sem);
596 return rc;
599 if (cap_unix(tcon->ses) &&
600 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602 rc = cifs_push_posix_locks(cfile);
603 else
604 rc = tcon->ses->server->ops->push_mand_locks(cfile);
606 up_read(&cinode->lock_sem);
607 return rc;
610 static int
611 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
613 int rc = -EACCES;
614 unsigned int xid;
615 __u32 oplock;
616 struct cifs_sb_info *cifs_sb;
617 struct cifs_tcon *tcon;
618 struct TCP_Server_Info *server;
619 struct cifsInodeInfo *cinode;
620 struct inode *inode;
621 char *full_path = NULL;
622 int desired_access;
623 int disposition = FILE_OPEN;
624 int create_options = CREATE_NOT_DIR;
625 struct cifs_open_parms oparms;
627 xid = get_xid();
628 mutex_lock(&cfile->fh_mutex);
629 if (!cfile->invalidHandle) {
630 mutex_unlock(&cfile->fh_mutex);
631 rc = 0;
632 free_xid(xid);
633 return rc;
636 inode = d_inode(cfile->dentry);
637 cifs_sb = CIFS_SB(inode->i_sb);
638 tcon = tlink_tcon(cfile->tlink);
639 server = tcon->ses->server;
642 * Can not grab rename sem here because various ops, including those
643 * that already have the rename sem can end up causing writepage to get
644 * called and if the server was down that means we end up here, and we
645 * can never tell if the caller already has the rename_sem.
647 full_path = build_path_from_dentry(cfile->dentry);
648 if (full_path == NULL) {
649 rc = -ENOMEM;
650 mutex_unlock(&cfile->fh_mutex);
651 free_xid(xid);
652 return rc;
655 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656 inode, cfile->f_flags, full_path);
658 if (tcon->ses->server->oplocks)
659 oplock = REQ_OPLOCK;
660 else
661 oplock = 0;
663 if (tcon->unix_ext && cap_unix(tcon->ses) &&
664 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
667 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668 * original open. Must mask them off for a reopen.
670 unsigned int oflags = cfile->f_flags &
671 ~(O_CREAT | O_EXCL | O_TRUNC);
673 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674 cifs_sb->mnt_file_mode /* ignored */,
675 oflags, &oplock, &cfile->fid.netfid, xid);
676 if (rc == 0) {
677 cifs_dbg(FYI, "posix reopen succeeded\n");
678 oparms.reconnect = true;
679 goto reopen_success;
682 * fallthrough to retry open the old way on errors, especially
683 * in the reconnect path it is important to retry hard
687 desired_access = cifs_convert_flags(cfile->f_flags);
689 if (backup_cred(cifs_sb))
690 create_options |= CREATE_OPEN_BACKUP_INTENT;
692 if (server->ops->get_lease_key)
693 server->ops->get_lease_key(inode, &cfile->fid);
695 oparms.tcon = tcon;
696 oparms.cifs_sb = cifs_sb;
697 oparms.desired_access = desired_access;
698 oparms.create_options = create_options;
699 oparms.disposition = disposition;
700 oparms.path = full_path;
701 oparms.fid = &cfile->fid;
702 oparms.reconnect = true;
705 * Can not refresh inode by passing in file_info buf to be returned by
706 * ops->open and then calling get_inode_info with returned buf since
707 * file might have write behind data that needs to be flushed and server
708 * version of file size can be stale. If we knew for sure that inode was
709 * not dirty locally we could do this.
711 rc = server->ops->open(xid, &oparms, &oplock, NULL);
712 if (rc == -ENOENT && oparms.reconnect == false) {
713 /* durable handle timeout is expired - open the file again */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 /* indicate that we need to relock the file */
716 oparms.reconnect = true;
719 if (rc) {
720 mutex_unlock(&cfile->fh_mutex);
721 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722 cifs_dbg(FYI, "oplock: %d\n", oplock);
723 goto reopen_error_exit;
726 reopen_success:
727 cfile->invalidHandle = false;
728 mutex_unlock(&cfile->fh_mutex);
729 cinode = CIFS_I(inode);
731 if (can_flush) {
732 rc = filemap_write_and_wait(inode->i_mapping);
733 mapping_set_error(inode->i_mapping, rc);
735 if (tcon->unix_ext)
736 rc = cifs_get_inode_info_unix(&inode, full_path,
737 inode->i_sb, xid);
738 else
739 rc = cifs_get_inode_info(&inode, full_path, NULL,
740 inode->i_sb, xid, NULL);
743 * Else we are writing out data to server already and could deadlock if
744 * we tried to flush data, and since we do not know if we have data that
745 * would invalidate the current end of file on the server we can not go
746 * to the server to get the new inode info.
749 server->ops->set_fid(cfile, &cfile->fid, oplock);
750 if (oparms.reconnect)
751 cifs_relock_file(cfile);
753 reopen_error_exit:
754 kfree(full_path);
755 free_xid(xid);
756 return rc;
759 int cifs_close(struct inode *inode, struct file *file)
761 if (file->private_data != NULL) {
762 cifsFileInfo_put(file->private_data);
763 file->private_data = NULL;
766 /* return code from the ->release op is always ignored */
767 return 0;
770 int cifs_closedir(struct inode *inode, struct file *file)
772 int rc = 0;
773 unsigned int xid;
774 struct cifsFileInfo *cfile = file->private_data;
775 struct cifs_tcon *tcon;
776 struct TCP_Server_Info *server;
777 char *buf;
779 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
781 if (cfile == NULL)
782 return rc;
784 xid = get_xid();
785 tcon = tlink_tcon(cfile->tlink);
786 server = tcon->ses->server;
788 cifs_dbg(FYI, "Freeing private data in close dir\n");
789 spin_lock(&cfile->file_info_lock);
790 if (server->ops->dir_needs_close(cfile)) {
791 cfile->invalidHandle = true;
792 spin_unlock(&cfile->file_info_lock);
793 if (server->ops->close_dir)
794 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
795 else
796 rc = -ENOSYS;
797 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
798 /* not much we can do if it fails anyway, ignore rc */
799 rc = 0;
800 } else
801 spin_unlock(&cfile->file_info_lock);
803 buf = cfile->srch_inf.ntwrk_buf_start;
804 if (buf) {
805 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
806 cfile->srch_inf.ntwrk_buf_start = NULL;
807 if (cfile->srch_inf.smallBuf)
808 cifs_small_buf_release(buf);
809 else
810 cifs_buf_release(buf);
813 cifs_put_tlink(cfile->tlink);
814 kfree(file->private_data);
815 file->private_data = NULL;
816 /* BB can we lock the filestruct while this is going on? */
817 free_xid(xid);
818 return rc;
821 static struct cifsLockInfo *
822 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
824 struct cifsLockInfo *lock =
825 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
826 if (!lock)
827 return lock;
828 lock->offset = offset;
829 lock->length = length;
830 lock->type = type;
831 lock->pid = current->tgid;
832 INIT_LIST_HEAD(&lock->blist);
833 init_waitqueue_head(&lock->block_q);
834 return lock;
837 void
838 cifs_del_lock_waiters(struct cifsLockInfo *lock)
840 struct cifsLockInfo *li, *tmp;
841 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
842 list_del_init(&li->blist);
843 wake_up(&li->block_q);
847 #define CIFS_LOCK_OP 0
848 #define CIFS_READ_OP 1
849 #define CIFS_WRITE_OP 2
851 /* @rw_check : 0 - no op, 1 - read, 2 - write */
852 static bool
853 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
854 __u64 length, __u8 type, struct cifsFileInfo *cfile,
855 struct cifsLockInfo **conf_lock, int rw_check)
857 struct cifsLockInfo *li;
858 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
859 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
861 list_for_each_entry(li, &fdlocks->locks, llist) {
862 if (offset + length <= li->offset ||
863 offset >= li->offset + li->length)
864 continue;
865 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
866 server->ops->compare_fids(cfile, cur_cfile)) {
867 /* shared lock prevents write op through the same fid */
868 if (!(li->type & server->vals->shared_lock_type) ||
869 rw_check != CIFS_WRITE_OP)
870 continue;
872 if ((type & server->vals->shared_lock_type) &&
873 ((server->ops->compare_fids(cfile, cur_cfile) &&
874 current->tgid == li->pid) || type == li->type))
875 continue;
876 if (conf_lock)
877 *conf_lock = li;
878 return true;
880 return false;
883 bool
884 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
885 __u8 type, struct cifsLockInfo **conf_lock,
886 int rw_check)
888 bool rc = false;
889 struct cifs_fid_locks *cur;
890 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
892 list_for_each_entry(cur, &cinode->llist, llist) {
893 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
894 cfile, conf_lock, rw_check);
895 if (rc)
896 break;
899 return rc;
903 * Check if there is another lock that prevents us to set the lock (mandatory
904 * style). If such a lock exists, update the flock structure with its
905 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
906 * or leave it the same if we can't. Returns 0 if we don't need to request to
907 * the server or 1 otherwise.
909 static int
910 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
911 __u8 type, struct file_lock *flock)
913 int rc = 0;
914 struct cifsLockInfo *conf_lock;
915 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
916 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
917 bool exist;
919 down_read(&cinode->lock_sem);
921 exist = cifs_find_lock_conflict(cfile, offset, length, type,
922 &conf_lock, CIFS_LOCK_OP);
923 if (exist) {
924 flock->fl_start = conf_lock->offset;
925 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
926 flock->fl_pid = conf_lock->pid;
927 if (conf_lock->type & server->vals->shared_lock_type)
928 flock->fl_type = F_RDLCK;
929 else
930 flock->fl_type = F_WRLCK;
931 } else if (!cinode->can_cache_brlcks)
932 rc = 1;
933 else
934 flock->fl_type = F_UNLCK;
936 up_read(&cinode->lock_sem);
937 return rc;
940 static void
941 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
943 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
944 down_write(&cinode->lock_sem);
945 list_add_tail(&lock->llist, &cfile->llist->locks);
946 up_write(&cinode->lock_sem);
950 * Set the byte-range lock (mandatory style). Returns:
951 * 1) 0, if we set the lock and don't need to request to the server;
952 * 2) 1, if no locks prevent us but we need to request to the server;
953 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
955 static int
956 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
957 bool wait)
959 struct cifsLockInfo *conf_lock;
960 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961 bool exist;
962 int rc = 0;
964 try_again:
965 exist = false;
966 down_write(&cinode->lock_sem);
968 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
969 lock->type, &conf_lock, CIFS_LOCK_OP);
970 if (!exist && cinode->can_cache_brlcks) {
971 list_add_tail(&lock->llist, &cfile->llist->locks);
972 up_write(&cinode->lock_sem);
973 return rc;
976 if (!exist)
977 rc = 1;
978 else if (!wait)
979 rc = -EACCES;
980 else {
981 list_add_tail(&lock->blist, &conf_lock->blist);
982 up_write(&cinode->lock_sem);
983 rc = wait_event_interruptible(lock->block_q,
984 (lock->blist.prev == &lock->blist) &&
985 (lock->blist.next == &lock->blist));
986 if (!rc)
987 goto try_again;
988 down_write(&cinode->lock_sem);
989 list_del_init(&lock->blist);
992 up_write(&cinode->lock_sem);
993 return rc;
997 * Check if there is another lock that prevents us to set the lock (posix
998 * style). If such a lock exists, update the flock structure with its
999 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1000 * or leave it the same if we can't. Returns 0 if we don't need to request to
1001 * the server or 1 otherwise.
1003 static int
1004 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1006 int rc = 0;
1007 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1008 unsigned char saved_type = flock->fl_type;
1010 if ((flock->fl_flags & FL_POSIX) == 0)
1011 return 1;
1013 down_read(&cinode->lock_sem);
1014 posix_test_lock(file, flock);
1016 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1017 flock->fl_type = saved_type;
1018 rc = 1;
1021 up_read(&cinode->lock_sem);
1022 return rc;
1026 * Set the byte-range lock (posix style). Returns:
1027 * 1) 0, if we set the lock and don't need to request to the server;
1028 * 2) 1, if we need to request to the server;
1029 * 3) <0, if the error occurs while setting the lock.
1031 static int
1032 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1034 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1035 int rc = 1;
1037 if ((flock->fl_flags & FL_POSIX) == 0)
1038 return rc;
1040 try_again:
1041 down_write(&cinode->lock_sem);
1042 if (!cinode->can_cache_brlcks) {
1043 up_write(&cinode->lock_sem);
1044 return rc;
1047 rc = posix_lock_file(file, flock, NULL);
1048 up_write(&cinode->lock_sem);
1049 if (rc == FILE_LOCK_DEFERRED) {
1050 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1051 if (!rc)
1052 goto try_again;
1053 posix_unblock_lock(flock);
1055 return rc;
1059 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1061 unsigned int xid;
1062 int rc = 0, stored_rc;
1063 struct cifsLockInfo *li, *tmp;
1064 struct cifs_tcon *tcon;
1065 unsigned int num, max_num, max_buf;
1066 LOCKING_ANDX_RANGE *buf, *cur;
1067 int types[] = {LOCKING_ANDX_LARGE_FILES,
1068 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1069 int i;
1071 xid = get_xid();
1072 tcon = tlink_tcon(cfile->tlink);
1075 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1076 * and check it for zero before using.
1078 max_buf = tcon->ses->server->maxBuf;
1079 if (!max_buf) {
1080 free_xid(xid);
1081 return -EINVAL;
1084 max_num = (max_buf - sizeof(struct smb_hdr)) /
1085 sizeof(LOCKING_ANDX_RANGE);
1086 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1087 if (!buf) {
1088 free_xid(xid);
1089 return -ENOMEM;
1092 for (i = 0; i < 2; i++) {
1093 cur = buf;
1094 num = 0;
1095 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1096 if (li->type != types[i])
1097 continue;
1098 cur->Pid = cpu_to_le16(li->pid);
1099 cur->LengthLow = cpu_to_le32((u32)li->length);
1100 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1101 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1102 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1103 if (++num == max_num) {
1104 stored_rc = cifs_lockv(xid, tcon,
1105 cfile->fid.netfid,
1106 (__u8)li->type, 0, num,
1107 buf);
1108 if (stored_rc)
1109 rc = stored_rc;
1110 cur = buf;
1111 num = 0;
1112 } else
1113 cur++;
1116 if (num) {
1117 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1118 (__u8)types[i], 0, num, buf);
1119 if (stored_rc)
1120 rc = stored_rc;
1124 kfree(buf);
1125 free_xid(xid);
1126 return rc;
1129 struct lock_to_push {
1130 struct list_head llist;
1131 __u64 offset;
1132 __u64 length;
1133 __u32 pid;
1134 __u16 netfid;
1135 __u8 type;
1138 static int
1139 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1141 struct inode *inode = d_inode(cfile->dentry);
1142 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1143 struct file_lock *flock;
1144 struct file_lock_context *flctx = inode->i_flctx;
1145 unsigned int count = 0, i;
1146 int rc = 0, xid, type;
1147 struct list_head locks_to_send, *el;
1148 struct lock_to_push *lck, *tmp;
1149 __u64 length;
1151 xid = get_xid();
1153 if (!flctx)
1154 goto out;
1156 spin_lock(&flctx->flc_lock);
1157 list_for_each(el, &flctx->flc_posix) {
1158 count++;
1160 spin_unlock(&flctx->flc_lock);
1162 INIT_LIST_HEAD(&locks_to_send);
1165 * Allocating count locks is enough because no FL_POSIX locks can be
1166 * added to the list while we are holding cinode->lock_sem that
1167 * protects locking operations of this inode.
1169 for (i = 0; i < count; i++) {
1170 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1171 if (!lck) {
1172 rc = -ENOMEM;
1173 goto err_out;
1175 list_add_tail(&lck->llist, &locks_to_send);
1178 el = locks_to_send.next;
1179 spin_lock(&flctx->flc_lock);
1180 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1181 if (el == &locks_to_send) {
1183 * The list ended. We don't have enough allocated
1184 * structures - something is really wrong.
1186 cifs_dbg(VFS, "Can't push all brlocks!\n");
1187 break;
1189 length = 1 + flock->fl_end - flock->fl_start;
1190 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1191 type = CIFS_RDLCK;
1192 else
1193 type = CIFS_WRLCK;
1194 lck = list_entry(el, struct lock_to_push, llist);
1195 lck->pid = flock->fl_pid;
1196 lck->netfid = cfile->fid.netfid;
1197 lck->length = length;
1198 lck->type = type;
1199 lck->offset = flock->fl_start;
1201 spin_unlock(&flctx->flc_lock);
1203 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1204 int stored_rc;
1206 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1207 lck->offset, lck->length, NULL,
1208 lck->type, 0);
1209 if (stored_rc)
1210 rc = stored_rc;
1211 list_del(&lck->llist);
1212 kfree(lck);
1215 out:
1216 free_xid(xid);
1217 return rc;
1218 err_out:
1219 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1220 list_del(&lck->llist);
1221 kfree(lck);
1223 goto out;
1226 static int
1227 cifs_push_locks(struct cifsFileInfo *cfile)
1229 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1230 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1231 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1232 int rc = 0;
1234 /* we are going to update can_cache_brlcks here - need a write access */
1235 down_write(&cinode->lock_sem);
1236 if (!cinode->can_cache_brlcks) {
1237 up_write(&cinode->lock_sem);
1238 return rc;
1241 if (cap_unix(tcon->ses) &&
1242 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1243 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1244 rc = cifs_push_posix_locks(cfile);
1245 else
1246 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1248 cinode->can_cache_brlcks = false;
1249 up_write(&cinode->lock_sem);
1250 return rc;
1253 static void
1254 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1255 bool *wait_flag, struct TCP_Server_Info *server)
1257 if (flock->fl_flags & FL_POSIX)
1258 cifs_dbg(FYI, "Posix\n");
1259 if (flock->fl_flags & FL_FLOCK)
1260 cifs_dbg(FYI, "Flock\n");
1261 if (flock->fl_flags & FL_SLEEP) {
1262 cifs_dbg(FYI, "Blocking lock\n");
1263 *wait_flag = true;
1265 if (flock->fl_flags & FL_ACCESS)
1266 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1267 if (flock->fl_flags & FL_LEASE)
1268 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1269 if (flock->fl_flags &
1270 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1271 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1272 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1274 *type = server->vals->large_lock_type;
1275 if (flock->fl_type == F_WRLCK) {
1276 cifs_dbg(FYI, "F_WRLCK\n");
1277 *type |= server->vals->exclusive_lock_type;
1278 *lock = 1;
1279 } else if (flock->fl_type == F_UNLCK) {
1280 cifs_dbg(FYI, "F_UNLCK\n");
1281 *type |= server->vals->unlock_lock_type;
1282 *unlock = 1;
1283 /* Check if unlock includes more than one lock range */
1284 } else if (flock->fl_type == F_RDLCK) {
1285 cifs_dbg(FYI, "F_RDLCK\n");
1286 *type |= server->vals->shared_lock_type;
1287 *lock = 1;
1288 } else if (flock->fl_type == F_EXLCK) {
1289 cifs_dbg(FYI, "F_EXLCK\n");
1290 *type |= server->vals->exclusive_lock_type;
1291 *lock = 1;
1292 } else if (flock->fl_type == F_SHLCK) {
1293 cifs_dbg(FYI, "F_SHLCK\n");
1294 *type |= server->vals->shared_lock_type;
1295 *lock = 1;
1296 } else
1297 cifs_dbg(FYI, "Unknown type of lock\n");
1300 static int
1301 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1302 bool wait_flag, bool posix_lck, unsigned int xid)
1304 int rc = 0;
1305 __u64 length = 1 + flock->fl_end - flock->fl_start;
1306 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1307 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1308 struct TCP_Server_Info *server = tcon->ses->server;
1309 __u16 netfid = cfile->fid.netfid;
1311 if (posix_lck) {
1312 int posix_lock_type;
1314 rc = cifs_posix_lock_test(file, flock);
1315 if (!rc)
1316 return rc;
1318 if (type & server->vals->shared_lock_type)
1319 posix_lock_type = CIFS_RDLCK;
1320 else
1321 posix_lock_type = CIFS_WRLCK;
1322 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1323 flock->fl_start, length, flock,
1324 posix_lock_type, wait_flag);
1325 return rc;
1328 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1329 if (!rc)
1330 return rc;
1332 /* BB we could chain these into one lock request BB */
1333 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1334 1, 0, false);
1335 if (rc == 0) {
1336 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1337 type, 0, 1, false);
1338 flock->fl_type = F_UNLCK;
1339 if (rc != 0)
1340 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1341 rc);
1342 return 0;
1345 if (type & server->vals->shared_lock_type) {
1346 flock->fl_type = F_WRLCK;
1347 return 0;
1350 type &= ~server->vals->exclusive_lock_type;
1352 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1353 type | server->vals->shared_lock_type,
1354 1, 0, false);
1355 if (rc == 0) {
1356 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1357 type | server->vals->shared_lock_type, 0, 1, false);
1358 flock->fl_type = F_RDLCK;
1359 if (rc != 0)
1360 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1361 rc);
1362 } else
1363 flock->fl_type = F_WRLCK;
1365 return 0;
1368 void
1369 cifs_move_llist(struct list_head *source, struct list_head *dest)
1371 struct list_head *li, *tmp;
1372 list_for_each_safe(li, tmp, source)
1373 list_move(li, dest);
1376 void
1377 cifs_free_llist(struct list_head *llist)
1379 struct cifsLockInfo *li, *tmp;
1380 list_for_each_entry_safe(li, tmp, llist, llist) {
1381 cifs_del_lock_waiters(li);
1382 list_del(&li->llist);
1383 kfree(li);
1388 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1389 unsigned int xid)
1391 int rc = 0, stored_rc;
1392 int types[] = {LOCKING_ANDX_LARGE_FILES,
1393 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1394 unsigned int i;
1395 unsigned int max_num, num, max_buf;
1396 LOCKING_ANDX_RANGE *buf, *cur;
1397 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1398 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1399 struct cifsLockInfo *li, *tmp;
1400 __u64 length = 1 + flock->fl_end - flock->fl_start;
1401 struct list_head tmp_llist;
1403 INIT_LIST_HEAD(&tmp_llist);
1406 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1407 * and check it for zero before using.
1409 max_buf = tcon->ses->server->maxBuf;
1410 if (!max_buf)
1411 return -EINVAL;
1413 max_num = (max_buf - sizeof(struct smb_hdr)) /
1414 sizeof(LOCKING_ANDX_RANGE);
1415 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1416 if (!buf)
1417 return -ENOMEM;
1419 down_write(&cinode->lock_sem);
1420 for (i = 0; i < 2; i++) {
1421 cur = buf;
1422 num = 0;
1423 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1424 if (flock->fl_start > li->offset ||
1425 (flock->fl_start + length) <
1426 (li->offset + li->length))
1427 continue;
1428 if (current->tgid != li->pid)
1429 continue;
1430 if (types[i] != li->type)
1431 continue;
1432 if (cinode->can_cache_brlcks) {
1434 * We can cache brlock requests - simply remove
1435 * a lock from the file's list.
1437 list_del(&li->llist);
1438 cifs_del_lock_waiters(li);
1439 kfree(li);
1440 continue;
1442 cur->Pid = cpu_to_le16(li->pid);
1443 cur->LengthLow = cpu_to_le32((u32)li->length);
1444 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1445 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1446 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1448 * We need to save a lock here to let us add it again to
1449 * the file's list if the unlock range request fails on
1450 * the server.
1452 list_move(&li->llist, &tmp_llist);
1453 if (++num == max_num) {
1454 stored_rc = cifs_lockv(xid, tcon,
1455 cfile->fid.netfid,
1456 li->type, num, 0, buf);
1457 if (stored_rc) {
1459 * We failed on the unlock range
1460 * request - add all locks from the tmp
1461 * list to the head of the file's list.
1463 cifs_move_llist(&tmp_llist,
1464 &cfile->llist->locks);
1465 rc = stored_rc;
1466 } else
1468 * The unlock range request succeed -
1469 * free the tmp list.
1471 cifs_free_llist(&tmp_llist);
1472 cur = buf;
1473 num = 0;
1474 } else
1475 cur++;
1477 if (num) {
1478 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1479 types[i], num, 0, buf);
1480 if (stored_rc) {
1481 cifs_move_llist(&tmp_llist,
1482 &cfile->llist->locks);
1483 rc = stored_rc;
1484 } else
1485 cifs_free_llist(&tmp_llist);
1489 up_write(&cinode->lock_sem);
1490 kfree(buf);
1491 return rc;
1494 static int
1495 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1496 bool wait_flag, bool posix_lck, int lock, int unlock,
1497 unsigned int xid)
1499 int rc = 0;
1500 __u64 length = 1 + flock->fl_end - flock->fl_start;
1501 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1502 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1503 struct TCP_Server_Info *server = tcon->ses->server;
1504 struct inode *inode = d_inode(cfile->dentry);
1506 if (posix_lck) {
1507 int posix_lock_type;
1509 rc = cifs_posix_lock_set(file, flock);
1510 if (!rc || rc < 0)
1511 return rc;
1513 if (type & server->vals->shared_lock_type)
1514 posix_lock_type = CIFS_RDLCK;
1515 else
1516 posix_lock_type = CIFS_WRLCK;
1518 if (unlock == 1)
1519 posix_lock_type = CIFS_UNLCK;
1521 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1522 current->tgid, flock->fl_start, length,
1523 NULL, posix_lock_type, wait_flag);
1524 goto out;
1527 if (lock) {
1528 struct cifsLockInfo *lock;
1530 lock = cifs_lock_init(flock->fl_start, length, type);
1531 if (!lock)
1532 return -ENOMEM;
1534 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1535 if (rc < 0) {
1536 kfree(lock);
1537 return rc;
1539 if (!rc)
1540 goto out;
1543 * Windows 7 server can delay breaking lease from read to None
1544 * if we set a byte-range lock on a file - break it explicitly
1545 * before sending the lock to the server to be sure the next
1546 * read won't conflict with non-overlapted locks due to
1547 * pagereading.
1549 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1550 CIFS_CACHE_READ(CIFS_I(inode))) {
1551 cifs_zap_mapping(inode);
1552 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1553 inode);
1554 CIFS_I(inode)->oplock = 0;
1557 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1558 type, 1, 0, wait_flag);
1559 if (rc) {
1560 kfree(lock);
1561 return rc;
1564 cifs_lock_add(cfile, lock);
1565 } else if (unlock)
1566 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1568 out:
1569 if (flock->fl_flags & FL_POSIX && !rc)
1570 rc = locks_lock_file_wait(file, flock);
1571 return rc;
1574 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1576 int rc, xid;
1577 int lock = 0, unlock = 0;
1578 bool wait_flag = false;
1579 bool posix_lck = false;
1580 struct cifs_sb_info *cifs_sb;
1581 struct cifs_tcon *tcon;
1582 struct cifsInodeInfo *cinode;
1583 struct cifsFileInfo *cfile;
1584 __u16 netfid;
1585 __u32 type;
1587 rc = -EACCES;
1588 xid = get_xid();
1590 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1591 cmd, flock->fl_flags, flock->fl_type,
1592 flock->fl_start, flock->fl_end);
1594 cfile = (struct cifsFileInfo *)file->private_data;
1595 tcon = tlink_tcon(cfile->tlink);
1597 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1598 tcon->ses->server);
1600 cifs_sb = CIFS_FILE_SB(file);
1601 netfid = cfile->fid.netfid;
1602 cinode = CIFS_I(file_inode(file));
1604 if (cap_unix(tcon->ses) &&
1605 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1606 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1607 posix_lck = true;
1609 * BB add code here to normalize offset and length to account for
1610 * negative length which we can not accept over the wire.
1612 if (IS_GETLK(cmd)) {
1613 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1614 free_xid(xid);
1615 return rc;
1618 if (!lock && !unlock) {
1620 * if no lock or unlock then nothing to do since we do not
1621 * know what it is
1623 free_xid(xid);
1624 return -EOPNOTSUPP;
1627 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1628 xid);
1629 free_xid(xid);
1630 return rc;
1634 * update the file size (if needed) after a write. Should be called with
1635 * the inode->i_lock held
1637 void
1638 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1639 unsigned int bytes_written)
1641 loff_t end_of_write = offset + bytes_written;
1643 if (end_of_write > cifsi->server_eof)
1644 cifsi->server_eof = end_of_write;
1647 static ssize_t
1648 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1649 size_t write_size, loff_t *offset)
1651 int rc = 0;
1652 unsigned int bytes_written = 0;
1653 unsigned int total_written;
1654 struct cifs_sb_info *cifs_sb;
1655 struct cifs_tcon *tcon;
1656 struct TCP_Server_Info *server;
1657 unsigned int xid;
1658 struct dentry *dentry = open_file->dentry;
1659 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1660 struct cifs_io_parms io_parms;
1662 cifs_sb = CIFS_SB(dentry->d_sb);
1664 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1665 write_size, *offset, dentry);
1667 tcon = tlink_tcon(open_file->tlink);
1668 server = tcon->ses->server;
1670 if (!server->ops->sync_write)
1671 return -ENOSYS;
1673 xid = get_xid();
1675 for (total_written = 0; write_size > total_written;
1676 total_written += bytes_written) {
1677 rc = -EAGAIN;
1678 while (rc == -EAGAIN) {
1679 struct kvec iov[2];
1680 unsigned int len;
1682 if (open_file->invalidHandle) {
1683 /* we could deadlock if we called
1684 filemap_fdatawait from here so tell
1685 reopen_file not to flush data to
1686 server now */
1687 rc = cifs_reopen_file(open_file, false);
1688 if (rc != 0)
1689 break;
1692 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1693 (unsigned int)write_size - total_written);
1694 /* iov[0] is reserved for smb header */
1695 iov[1].iov_base = (char *)write_data + total_written;
1696 iov[1].iov_len = len;
1697 io_parms.pid = pid;
1698 io_parms.tcon = tcon;
1699 io_parms.offset = *offset;
1700 io_parms.length = len;
1701 rc = server->ops->sync_write(xid, &open_file->fid,
1702 &io_parms, &bytes_written, iov, 1);
1704 if (rc || (bytes_written == 0)) {
1705 if (total_written)
1706 break;
1707 else {
1708 free_xid(xid);
1709 return rc;
1711 } else {
1712 spin_lock(&d_inode(dentry)->i_lock);
1713 cifs_update_eof(cifsi, *offset, bytes_written);
1714 spin_unlock(&d_inode(dentry)->i_lock);
1715 *offset += bytes_written;
1719 cifs_stats_bytes_written(tcon, total_written);
1721 if (total_written > 0) {
1722 spin_lock(&d_inode(dentry)->i_lock);
1723 if (*offset > d_inode(dentry)->i_size)
1724 i_size_write(d_inode(dentry), *offset);
1725 spin_unlock(&d_inode(dentry)->i_lock);
1727 mark_inode_dirty_sync(d_inode(dentry));
1728 free_xid(xid);
1729 return total_written;
1732 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1733 bool fsuid_only)
1735 struct cifsFileInfo *open_file = NULL;
1736 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1737 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1739 /* only filter by fsuid on multiuser mounts */
1740 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1741 fsuid_only = false;
1743 spin_lock(&tcon->open_file_lock);
1744 /* we could simply get the first_list_entry since write-only entries
1745 are always at the end of the list but since the first entry might
1746 have a close pending, we go through the whole list */
1747 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1748 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1749 continue;
1750 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1751 if (!open_file->invalidHandle) {
1752 /* found a good file */
1753 /* lock it so it will not be closed on us */
1754 cifsFileInfo_get(open_file);
1755 spin_unlock(&tcon->open_file_lock);
1756 return open_file;
1757 } /* else might as well continue, and look for
1758 another, or simply have the caller reopen it
1759 again rather than trying to fix this handle */
1760 } else /* write only file */
1761 break; /* write only files are last so must be done */
1763 spin_unlock(&tcon->open_file_lock);
1764 return NULL;
1767 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1768 bool fsuid_only)
1770 struct cifsFileInfo *open_file, *inv_file = NULL;
1771 struct cifs_sb_info *cifs_sb;
1772 struct cifs_tcon *tcon;
1773 bool any_available = false;
1774 int rc;
1775 unsigned int refind = 0;
1777 /* Having a null inode here (because mapping->host was set to zero by
1778 the VFS or MM) should not happen but we had reports of on oops (due to
1779 it being zero) during stress testcases so we need to check for it */
1781 if (cifs_inode == NULL) {
1782 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1783 dump_stack();
1784 return NULL;
1787 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1788 tcon = cifs_sb_master_tcon(cifs_sb);
1790 /* only filter by fsuid on multiuser mounts */
1791 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1792 fsuid_only = false;
1794 spin_lock(&tcon->open_file_lock);
1795 refind_writable:
1796 if (refind > MAX_REOPEN_ATT) {
1797 spin_unlock(&tcon->open_file_lock);
1798 return NULL;
1800 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1801 if (!any_available && open_file->pid != current->tgid)
1802 continue;
1803 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1804 continue;
1805 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1806 if (!open_file->invalidHandle) {
1807 /* found a good writable file */
1808 cifsFileInfo_get(open_file);
1809 spin_unlock(&tcon->open_file_lock);
1810 return open_file;
1811 } else {
1812 if (!inv_file)
1813 inv_file = open_file;
1817 /* couldn't find useable FH with same pid, try any available */
1818 if (!any_available) {
1819 any_available = true;
1820 goto refind_writable;
1823 if (inv_file) {
1824 any_available = false;
1825 cifsFileInfo_get(inv_file);
1828 spin_unlock(&tcon->open_file_lock);
1830 if (inv_file) {
1831 rc = cifs_reopen_file(inv_file, false);
1832 if (!rc)
1833 return inv_file;
1834 else {
1835 spin_lock(&tcon->open_file_lock);
1836 list_move_tail(&inv_file->flist,
1837 &cifs_inode->openFileList);
1838 spin_unlock(&tcon->open_file_lock);
1839 cifsFileInfo_put(inv_file);
1840 ++refind;
1841 inv_file = NULL;
1842 spin_lock(&tcon->open_file_lock);
1843 goto refind_writable;
1847 return NULL;
1850 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1852 struct address_space *mapping = page->mapping;
1853 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1854 char *write_data;
1855 int rc = -EFAULT;
1856 int bytes_written = 0;
1857 struct inode *inode;
1858 struct cifsFileInfo *open_file;
1860 if (!mapping || !mapping->host)
1861 return -EFAULT;
1863 inode = page->mapping->host;
1865 offset += (loff_t)from;
1866 write_data = kmap(page);
1867 write_data += from;
1869 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1870 kunmap(page);
1871 return -EIO;
1874 /* racing with truncate? */
1875 if (offset > mapping->host->i_size) {
1876 kunmap(page);
1877 return 0; /* don't care */
1880 /* check to make sure that we are not extending the file */
1881 if (mapping->host->i_size - offset < (loff_t)to)
1882 to = (unsigned)(mapping->host->i_size - offset);
1884 open_file = find_writable_file(CIFS_I(mapping->host), false);
1885 if (open_file) {
1886 bytes_written = cifs_write(open_file, open_file->pid,
1887 write_data, to - from, &offset);
1888 cifsFileInfo_put(open_file);
1889 /* Does mm or vfs already set times? */
1890 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1891 if ((bytes_written > 0) && (offset))
1892 rc = 0;
1893 else if (bytes_written < 0)
1894 rc = bytes_written;
1895 } else {
1896 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1897 rc = -EIO;
1900 kunmap(page);
1901 return rc;
1904 static struct cifs_writedata *
1905 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1906 pgoff_t end, pgoff_t *index,
1907 unsigned int *found_pages)
1909 unsigned int nr_pages;
1910 struct page **pages;
1911 struct cifs_writedata *wdata;
1913 wdata = cifs_writedata_alloc((unsigned int)tofind,
1914 cifs_writev_complete);
1915 if (!wdata)
1916 return NULL;
1919 * find_get_pages_tag seems to return a max of 256 on each
1920 * iteration, so we must call it several times in order to
1921 * fill the array or the wsize is effectively limited to
1922 * 256 * PAGE_CACHE_SIZE.
1924 *found_pages = 0;
1925 pages = wdata->pages;
1926 do {
1927 nr_pages = find_get_pages_tag(mapping, index,
1928 PAGECACHE_TAG_DIRTY, tofind,
1929 pages);
1930 *found_pages += nr_pages;
1931 tofind -= nr_pages;
1932 pages += nr_pages;
1933 } while (nr_pages && tofind && *index <= end);
1935 return wdata;
1938 static unsigned int
1939 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1940 struct address_space *mapping,
1941 struct writeback_control *wbc,
1942 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1944 unsigned int nr_pages = 0, i;
1945 struct page *page;
1947 for (i = 0; i < found_pages; i++) {
1948 page = wdata->pages[i];
1950 * At this point we hold neither mapping->tree_lock nor
1951 * lock on the page itself: the page may be truncated or
1952 * invalidated (changing page->mapping to NULL), or even
1953 * swizzled back from swapper_space to tmpfs file
1954 * mapping
1957 if (nr_pages == 0)
1958 lock_page(page);
1959 else if (!trylock_page(page))
1960 break;
1962 if (unlikely(page->mapping != mapping)) {
1963 unlock_page(page);
1964 break;
1967 if (!wbc->range_cyclic && page->index > end) {
1968 *done = true;
1969 unlock_page(page);
1970 break;
1973 if (*next && (page->index != *next)) {
1974 /* Not next consecutive page */
1975 unlock_page(page);
1976 break;
1979 if (wbc->sync_mode != WB_SYNC_NONE)
1980 wait_on_page_writeback(page);
1982 if (PageWriteback(page) ||
1983 !clear_page_dirty_for_io(page)) {
1984 unlock_page(page);
1985 break;
1989 * This actually clears the dirty bit in the radix tree.
1990 * See cifs_writepage() for more commentary.
1992 set_page_writeback(page);
1993 if (page_offset(page) >= i_size_read(mapping->host)) {
1994 *done = true;
1995 unlock_page(page);
1996 end_page_writeback(page);
1997 break;
2000 wdata->pages[i] = page;
2001 *next = page->index + 1;
2002 ++nr_pages;
2005 /* reset index to refind any pages skipped */
2006 if (nr_pages == 0)
2007 *index = wdata->pages[0]->index + 1;
2009 /* put any pages we aren't going to use */
2010 for (i = nr_pages; i < found_pages; i++) {
2011 page_cache_release(wdata->pages[i]);
2012 wdata->pages[i] = NULL;
2015 return nr_pages;
2018 static int
2019 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2020 struct address_space *mapping, struct writeback_control *wbc)
2022 int rc = 0;
2023 struct TCP_Server_Info *server;
2024 unsigned int i;
2026 wdata->sync_mode = wbc->sync_mode;
2027 wdata->nr_pages = nr_pages;
2028 wdata->offset = page_offset(wdata->pages[0]);
2029 wdata->pagesz = PAGE_CACHE_SIZE;
2030 wdata->tailsz = min(i_size_read(mapping->host) -
2031 page_offset(wdata->pages[nr_pages - 1]),
2032 (loff_t)PAGE_CACHE_SIZE);
2033 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2035 if (wdata->cfile != NULL)
2036 cifsFileInfo_put(wdata->cfile);
2037 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2038 if (!wdata->cfile) {
2039 cifs_dbg(VFS, "No writable handles for inode\n");
2040 rc = -EBADF;
2041 } else {
2042 wdata->pid = wdata->cfile->pid;
2043 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2044 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2047 for (i = 0; i < nr_pages; ++i)
2048 unlock_page(wdata->pages[i]);
2050 return rc;
2053 static int cifs_writepages(struct address_space *mapping,
2054 struct writeback_control *wbc)
2056 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2057 struct TCP_Server_Info *server;
2058 bool done = false, scanned = false, range_whole = false;
2059 pgoff_t end, index;
2060 struct cifs_writedata *wdata;
2061 int rc = 0;
2064 * If wsize is smaller than the page cache size, default to writing
2065 * one page at a time via cifs_writepage
2067 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2068 return generic_writepages(mapping, wbc);
2070 if (wbc->range_cyclic) {
2071 index = mapping->writeback_index; /* Start from prev offset */
2072 end = -1;
2073 } else {
2074 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2075 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2076 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2077 range_whole = true;
2078 scanned = true;
2080 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2081 retry:
2082 while (!done && index <= end) {
2083 unsigned int i, nr_pages, found_pages, wsize, credits;
2084 pgoff_t next = 0, tofind, saved_index = index;
2086 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2087 &wsize, &credits);
2088 if (rc)
2089 break;
2091 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2093 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2094 &found_pages);
2095 if (!wdata) {
2096 rc = -ENOMEM;
2097 add_credits_and_wake_if(server, credits, 0);
2098 break;
2101 if (found_pages == 0) {
2102 kref_put(&wdata->refcount, cifs_writedata_release);
2103 add_credits_and_wake_if(server, credits, 0);
2104 break;
2107 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2108 end, &index, &next, &done);
2110 /* nothing to write? */
2111 if (nr_pages == 0) {
2112 kref_put(&wdata->refcount, cifs_writedata_release);
2113 add_credits_and_wake_if(server, credits, 0);
2114 continue;
2117 wdata->credits = credits;
2119 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2121 /* send failure -- clean up the mess */
2122 if (rc != 0) {
2123 add_credits_and_wake_if(server, wdata->credits, 0);
2124 for (i = 0; i < nr_pages; ++i) {
2125 if (rc == -EAGAIN)
2126 redirty_page_for_writepage(wbc,
2127 wdata->pages[i]);
2128 else
2129 SetPageError(wdata->pages[i]);
2130 end_page_writeback(wdata->pages[i]);
2131 page_cache_release(wdata->pages[i]);
2133 if (rc != -EAGAIN)
2134 mapping_set_error(mapping, rc);
2136 kref_put(&wdata->refcount, cifs_writedata_release);
2138 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2139 index = saved_index;
2140 continue;
2143 wbc->nr_to_write -= nr_pages;
2144 if (wbc->nr_to_write <= 0)
2145 done = true;
2147 index = next;
2150 if (!scanned && !done) {
2152 * We hit the last page and there is more work to be done: wrap
2153 * back to the start of the file
2155 scanned = true;
2156 index = 0;
2157 goto retry;
2160 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2161 mapping->writeback_index = index;
2163 return rc;
2166 static int
2167 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2169 int rc;
2170 unsigned int xid;
2172 xid = get_xid();
2173 /* BB add check for wbc flags */
2174 page_cache_get(page);
2175 if (!PageUptodate(page))
2176 cifs_dbg(FYI, "ppw - page not up to date\n");
2179 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2181 * A writepage() implementation always needs to do either this,
2182 * or re-dirty the page with "redirty_page_for_writepage()" in
2183 * the case of a failure.
2185 * Just unlocking the page will cause the radix tree tag-bits
2186 * to fail to update with the state of the page correctly.
2188 set_page_writeback(page);
2189 retry_write:
2190 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2191 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2192 goto retry_write;
2193 else if (rc == -EAGAIN)
2194 redirty_page_for_writepage(wbc, page);
2195 else if (rc != 0)
2196 SetPageError(page);
2197 else
2198 SetPageUptodate(page);
2199 end_page_writeback(page);
2200 page_cache_release(page);
2201 free_xid(xid);
2202 return rc;
2205 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2207 int rc = cifs_writepage_locked(page, wbc);
2208 unlock_page(page);
2209 return rc;
2212 static int cifs_write_end(struct file *file, struct address_space *mapping,
2213 loff_t pos, unsigned len, unsigned copied,
2214 struct page *page, void *fsdata)
2216 int rc;
2217 struct inode *inode = mapping->host;
2218 struct cifsFileInfo *cfile = file->private_data;
2219 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2220 __u32 pid;
2222 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2223 pid = cfile->pid;
2224 else
2225 pid = current->tgid;
2227 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2228 page, pos, copied);
2230 if (PageChecked(page)) {
2231 if (copied == len)
2232 SetPageUptodate(page);
2233 ClearPageChecked(page);
2234 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2235 SetPageUptodate(page);
2237 if (!PageUptodate(page)) {
2238 char *page_data;
2239 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2240 unsigned int xid;
2242 xid = get_xid();
2243 /* this is probably better than directly calling
2244 partialpage_write since in this function the file handle is
2245 known which we might as well leverage */
2246 /* BB check if anything else missing out of ppw
2247 such as updating last write time */
2248 page_data = kmap(page);
2249 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2250 /* if (rc < 0) should we set writebehind rc? */
2251 kunmap(page);
2253 free_xid(xid);
2254 } else {
2255 rc = copied;
2256 pos += copied;
2257 set_page_dirty(page);
2260 if (rc > 0) {
2261 spin_lock(&inode->i_lock);
2262 if (pos > inode->i_size)
2263 i_size_write(inode, pos);
2264 spin_unlock(&inode->i_lock);
2267 unlock_page(page);
2268 page_cache_release(page);
2270 return rc;
2273 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2274 int datasync)
2276 unsigned int xid;
2277 int rc = 0;
2278 struct cifs_tcon *tcon;
2279 struct TCP_Server_Info *server;
2280 struct cifsFileInfo *smbfile = file->private_data;
2281 struct inode *inode = file_inode(file);
2282 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2284 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2285 if (rc)
2286 return rc;
2287 mutex_lock(&inode->i_mutex);
2289 xid = get_xid();
2291 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2292 file, datasync);
2294 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2295 rc = cifs_zap_mapping(inode);
2296 if (rc) {
2297 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2298 rc = 0; /* don't care about it in fsync */
2302 tcon = tlink_tcon(smbfile->tlink);
2303 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2304 server = tcon->ses->server;
2305 if (server->ops->flush)
2306 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2307 else
2308 rc = -ENOSYS;
2311 free_xid(xid);
2312 mutex_unlock(&inode->i_mutex);
2313 return rc;
2316 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2318 unsigned int xid;
2319 int rc = 0;
2320 struct cifs_tcon *tcon;
2321 struct TCP_Server_Info *server;
2322 struct cifsFileInfo *smbfile = file->private_data;
2323 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2324 struct inode *inode = file->f_mapping->host;
2326 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2327 if (rc)
2328 return rc;
2329 mutex_lock(&inode->i_mutex);
2331 xid = get_xid();
2333 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2334 file, datasync);
2336 tcon = tlink_tcon(smbfile->tlink);
2337 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2338 server = tcon->ses->server;
2339 if (server->ops->flush)
2340 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2341 else
2342 rc = -ENOSYS;
2345 free_xid(xid);
2346 mutex_unlock(&inode->i_mutex);
2347 return rc;
2351 * As file closes, flush all cached write data for this inode checking
2352 * for write behind errors.
2354 int cifs_flush(struct file *file, fl_owner_t id)
2356 struct inode *inode = file_inode(file);
2357 int rc = 0;
2359 if (file->f_mode & FMODE_WRITE)
2360 rc = filemap_write_and_wait(inode->i_mapping);
2362 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2364 return rc;
2367 static int
2368 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2370 int rc = 0;
2371 unsigned long i;
2373 for (i = 0; i < num_pages; i++) {
2374 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2375 if (!pages[i]) {
2377 * save number of pages we have already allocated and
2378 * return with ENOMEM error
2380 num_pages = i;
2381 rc = -ENOMEM;
2382 break;
2386 if (rc) {
2387 for (i = 0; i < num_pages; i++)
2388 put_page(pages[i]);
2390 return rc;
2393 static inline
2394 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2396 size_t num_pages;
2397 size_t clen;
2399 clen = min_t(const size_t, len, wsize);
2400 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2402 if (cur_len)
2403 *cur_len = clen;
2405 return num_pages;
2408 static void
2409 cifs_uncached_writedata_release(struct kref *refcount)
2411 int i;
2412 struct cifs_writedata *wdata = container_of(refcount,
2413 struct cifs_writedata, refcount);
2415 for (i = 0; i < wdata->nr_pages; i++)
2416 put_page(wdata->pages[i]);
2417 cifs_writedata_release(refcount);
2420 static void
2421 cifs_uncached_writev_complete(struct work_struct *work)
2423 struct cifs_writedata *wdata = container_of(work,
2424 struct cifs_writedata, work);
2425 struct inode *inode = d_inode(wdata->cfile->dentry);
2426 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2428 spin_lock(&inode->i_lock);
2429 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2430 if (cifsi->server_eof > inode->i_size)
2431 i_size_write(inode, cifsi->server_eof);
2432 spin_unlock(&inode->i_lock);
2434 complete(&wdata->done);
2436 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2439 static int
2440 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2441 size_t *len, unsigned long *num_pages)
2443 size_t save_len, copied, bytes, cur_len = *len;
2444 unsigned long i, nr_pages = *num_pages;
2446 save_len = cur_len;
2447 for (i = 0; i < nr_pages; i++) {
2448 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2449 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2450 cur_len -= copied;
2452 * If we didn't copy as much as we expected, then that
2453 * may mean we trod into an unmapped area. Stop copying
2454 * at that point. On the next pass through the big
2455 * loop, we'll likely end up getting a zero-length
2456 * write and bailing out of it.
2458 if (copied < bytes)
2459 break;
2461 cur_len = save_len - cur_len;
2462 *len = cur_len;
2465 * If we have no data to send, then that probably means that
2466 * the copy above failed altogether. That's most likely because
2467 * the address in the iovec was bogus. Return -EFAULT and let
2468 * the caller free anything we allocated and bail out.
2470 if (!cur_len)
2471 return -EFAULT;
2474 * i + 1 now represents the number of pages we actually used in
2475 * the copy phase above.
2477 *num_pages = i + 1;
2478 return 0;
2481 static int
2482 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2483 struct cifsFileInfo *open_file,
2484 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2486 int rc = 0;
2487 size_t cur_len;
2488 unsigned long nr_pages, num_pages, i;
2489 struct cifs_writedata *wdata;
2490 struct iov_iter saved_from;
2491 loff_t saved_offset = offset;
2492 pid_t pid;
2493 struct TCP_Server_Info *server;
2495 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2496 pid = open_file->pid;
2497 else
2498 pid = current->tgid;
2500 server = tlink_tcon(open_file->tlink)->ses->server;
2501 memcpy(&saved_from, from, sizeof(struct iov_iter));
2503 do {
2504 unsigned int wsize, credits;
2506 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2507 &wsize, &credits);
2508 if (rc)
2509 break;
2511 nr_pages = get_numpages(wsize, len, &cur_len);
2512 wdata = cifs_writedata_alloc(nr_pages,
2513 cifs_uncached_writev_complete);
2514 if (!wdata) {
2515 rc = -ENOMEM;
2516 add_credits_and_wake_if(server, credits, 0);
2517 break;
2520 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2521 if (rc) {
2522 kfree(wdata);
2523 add_credits_and_wake_if(server, credits, 0);
2524 break;
2527 num_pages = nr_pages;
2528 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2529 if (rc) {
2530 for (i = 0; i < nr_pages; i++)
2531 put_page(wdata->pages[i]);
2532 kfree(wdata);
2533 add_credits_and_wake_if(server, credits, 0);
2534 break;
2538 * Bring nr_pages down to the number of pages we actually used,
2539 * and free any pages that we didn't use.
2541 for ( ; nr_pages > num_pages; nr_pages--)
2542 put_page(wdata->pages[nr_pages - 1]);
2544 wdata->sync_mode = WB_SYNC_ALL;
2545 wdata->nr_pages = nr_pages;
2546 wdata->offset = (__u64)offset;
2547 wdata->cfile = cifsFileInfo_get(open_file);
2548 wdata->pid = pid;
2549 wdata->bytes = cur_len;
2550 wdata->pagesz = PAGE_SIZE;
2551 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2552 wdata->credits = credits;
2554 if (!wdata->cfile->invalidHandle ||
2555 !(rc = cifs_reopen_file(wdata->cfile, false)))
2556 rc = server->ops->async_writev(wdata,
2557 cifs_uncached_writedata_release);
2558 if (rc) {
2559 add_credits_and_wake_if(server, wdata->credits, 0);
2560 kref_put(&wdata->refcount,
2561 cifs_uncached_writedata_release);
2562 if (rc == -EAGAIN) {
2563 memcpy(from, &saved_from,
2564 sizeof(struct iov_iter));
2565 iov_iter_advance(from, offset - saved_offset);
2566 continue;
2568 break;
2571 list_add_tail(&wdata->list, wdata_list);
2572 offset += cur_len;
2573 len -= cur_len;
2574 } while (len > 0);
2576 return rc;
2579 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2581 struct file *file = iocb->ki_filp;
2582 ssize_t total_written = 0;
2583 struct cifsFileInfo *open_file;
2584 struct cifs_tcon *tcon;
2585 struct cifs_sb_info *cifs_sb;
2586 struct cifs_writedata *wdata, *tmp;
2587 struct list_head wdata_list;
2588 struct iov_iter saved_from;
2589 int rc;
2592 * BB - optimize the way when signing is disabled. We can drop this
2593 * extra memory-to-memory copying and use iovec buffers for constructing
2594 * write request.
2597 rc = generic_write_checks(iocb, from);
2598 if (rc <= 0)
2599 return rc;
2601 INIT_LIST_HEAD(&wdata_list);
2602 cifs_sb = CIFS_FILE_SB(file);
2603 open_file = file->private_data;
2604 tcon = tlink_tcon(open_file->tlink);
2606 if (!tcon->ses->server->ops->async_writev)
2607 return -ENOSYS;
2609 memcpy(&saved_from, from, sizeof(struct iov_iter));
2611 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2612 open_file, cifs_sb, &wdata_list);
2615 * If at least one write was successfully sent, then discard any rc
2616 * value from the later writes. If the other write succeeds, then
2617 * we'll end up returning whatever was written. If it fails, then
2618 * we'll get a new rc value from that.
2620 if (!list_empty(&wdata_list))
2621 rc = 0;
2624 * Wait for and collect replies for any successful sends in order of
2625 * increasing offset. Once an error is hit or we get a fatal signal
2626 * while waiting, then return without waiting for any more replies.
2628 restart_loop:
2629 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2630 if (!rc) {
2631 /* FIXME: freezable too? */
2632 rc = wait_for_completion_killable(&wdata->done);
2633 if (rc)
2634 rc = -EINTR;
2635 else if (wdata->result)
2636 rc = wdata->result;
2637 else
2638 total_written += wdata->bytes;
2640 /* resend call if it's a retryable error */
2641 if (rc == -EAGAIN) {
2642 struct list_head tmp_list;
2643 struct iov_iter tmp_from;
2645 INIT_LIST_HEAD(&tmp_list);
2646 list_del_init(&wdata->list);
2648 memcpy(&tmp_from, &saved_from,
2649 sizeof(struct iov_iter));
2650 iov_iter_advance(&tmp_from,
2651 wdata->offset - iocb->ki_pos);
2653 rc = cifs_write_from_iter(wdata->offset,
2654 wdata->bytes, &tmp_from,
2655 open_file, cifs_sb, &tmp_list);
2657 list_splice(&tmp_list, &wdata_list);
2659 kref_put(&wdata->refcount,
2660 cifs_uncached_writedata_release);
2661 goto restart_loop;
2664 list_del_init(&wdata->list);
2665 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2668 if (unlikely(!total_written))
2669 return rc;
2671 iocb->ki_pos += total_written;
2672 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2673 cifs_stats_bytes_written(tcon, total_written);
2674 return total_written;
2677 static ssize_t
2678 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2680 struct file *file = iocb->ki_filp;
2681 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2682 struct inode *inode = file->f_mapping->host;
2683 struct cifsInodeInfo *cinode = CIFS_I(inode);
2684 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2685 ssize_t rc;
2688 * We need to hold the sem to be sure nobody modifies lock list
2689 * with a brlock that prevents writing.
2691 down_read(&cinode->lock_sem);
2692 mutex_lock(&inode->i_mutex);
2694 rc = generic_write_checks(iocb, from);
2695 if (rc <= 0)
2696 goto out;
2698 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2699 server->vals->exclusive_lock_type, NULL,
2700 CIFS_WRITE_OP))
2701 rc = __generic_file_write_iter(iocb, from);
2702 else
2703 rc = -EACCES;
2704 out:
2705 mutex_unlock(&inode->i_mutex);
2707 if (rc > 0) {
2708 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2709 if (err < 0)
2710 rc = err;
2712 up_read(&cinode->lock_sem);
2713 return rc;
2716 ssize_t
2717 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2719 struct inode *inode = file_inode(iocb->ki_filp);
2720 struct cifsInodeInfo *cinode = CIFS_I(inode);
2721 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2722 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2723 iocb->ki_filp->private_data;
2724 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2725 ssize_t written;
2727 written = cifs_get_writer(cinode);
2728 if (written)
2729 return written;
2731 if (CIFS_CACHE_WRITE(cinode)) {
2732 if (cap_unix(tcon->ses) &&
2733 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2734 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2735 written = generic_file_write_iter(iocb, from);
2736 goto out;
2738 written = cifs_writev(iocb, from);
2739 goto out;
2742 * For non-oplocked files in strict cache mode we need to write the data
2743 * to the server exactly from the pos to pos+len-1 rather than flush all
2744 * affected pages because it may cause a error with mandatory locks on
2745 * these pages but not on the region from pos to ppos+len-1.
2747 written = cifs_user_writev(iocb, from);
2748 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2750 * Windows 7 server can delay breaking level2 oplock if a write
2751 * request comes - break it on the client to prevent reading
2752 * an old data.
2754 cifs_zap_mapping(inode);
2755 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2756 inode);
2757 cinode->oplock = 0;
2759 out:
2760 cifs_put_writer(cinode);
2761 return written;
2764 static struct cifs_readdata *
2765 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2767 struct cifs_readdata *rdata;
2769 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2770 GFP_KERNEL);
2771 if (rdata != NULL) {
2772 kref_init(&rdata->refcount);
2773 INIT_LIST_HEAD(&rdata->list);
2774 init_completion(&rdata->done);
2775 INIT_WORK(&rdata->work, complete);
2778 return rdata;
2781 void
2782 cifs_readdata_release(struct kref *refcount)
2784 struct cifs_readdata *rdata = container_of(refcount,
2785 struct cifs_readdata, refcount);
2787 if (rdata->cfile)
2788 cifsFileInfo_put(rdata->cfile);
2790 kfree(rdata);
2793 static int
2794 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2796 int rc = 0;
2797 struct page *page;
2798 unsigned int i;
2800 for (i = 0; i < nr_pages; i++) {
2801 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2802 if (!page) {
2803 rc = -ENOMEM;
2804 break;
2806 rdata->pages[i] = page;
2809 if (rc) {
2810 for (i = 0; i < nr_pages; i++) {
2811 put_page(rdata->pages[i]);
2812 rdata->pages[i] = NULL;
2815 return rc;
2818 static void
2819 cifs_uncached_readdata_release(struct kref *refcount)
2821 struct cifs_readdata *rdata = container_of(refcount,
2822 struct cifs_readdata, refcount);
2823 unsigned int i;
2825 for (i = 0; i < rdata->nr_pages; i++) {
2826 put_page(rdata->pages[i]);
2827 rdata->pages[i] = NULL;
2829 cifs_readdata_release(refcount);
2833 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2834 * @rdata: the readdata response with list of pages holding data
2835 * @iter: destination for our data
2837 * This function copies data from a list of pages in a readdata response into
2838 * an array of iovecs. It will first calculate where the data should go
2839 * based on the info in the readdata and then copy the data into that spot.
2841 static int
2842 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2844 size_t remaining = rdata->got_bytes;
2845 unsigned int i;
2847 for (i = 0; i < rdata->nr_pages; i++) {
2848 struct page *page = rdata->pages[i];
2849 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2850 size_t written = copy_page_to_iter(page, 0, copy, iter);
2851 remaining -= written;
2852 if (written < copy && iov_iter_count(iter) > 0)
2853 break;
2855 return remaining ? -EFAULT : 0;
2858 static void
2859 cifs_uncached_readv_complete(struct work_struct *work)
2861 struct cifs_readdata *rdata = container_of(work,
2862 struct cifs_readdata, work);
2864 complete(&rdata->done);
2865 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2868 static int
2869 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2870 struct cifs_readdata *rdata, unsigned int len)
2872 int result = 0;
2873 unsigned int i;
2874 unsigned int nr_pages = rdata->nr_pages;
2875 struct kvec iov;
2877 rdata->got_bytes = 0;
2878 rdata->tailsz = PAGE_SIZE;
2879 for (i = 0; i < nr_pages; i++) {
2880 struct page *page = rdata->pages[i];
2882 if (len >= PAGE_SIZE) {
2883 /* enough data to fill the page */
2884 iov.iov_base = kmap(page);
2885 iov.iov_len = PAGE_SIZE;
2886 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2887 i, iov.iov_base, iov.iov_len);
2888 len -= PAGE_SIZE;
2889 } else if (len > 0) {
2890 /* enough for partial page, fill and zero the rest */
2891 iov.iov_base = kmap(page);
2892 iov.iov_len = len;
2893 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2894 i, iov.iov_base, iov.iov_len);
2895 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2896 rdata->tailsz = len;
2897 len = 0;
2898 } else {
2899 /* no need to hold page hostage */
2900 rdata->pages[i] = NULL;
2901 rdata->nr_pages--;
2902 put_page(page);
2903 continue;
2906 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2907 kunmap(page);
2908 if (result < 0)
2909 break;
2911 rdata->got_bytes += result;
2914 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2915 rdata->got_bytes : result;
2918 static int
2919 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2920 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2922 struct cifs_readdata *rdata;
2923 unsigned int npages, rsize, credits;
2924 size_t cur_len;
2925 int rc;
2926 pid_t pid;
2927 struct TCP_Server_Info *server;
2929 server = tlink_tcon(open_file->tlink)->ses->server;
2931 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2932 pid = open_file->pid;
2933 else
2934 pid = current->tgid;
2936 do {
2937 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2938 &rsize, &credits);
2939 if (rc)
2940 break;
2942 cur_len = min_t(const size_t, len, rsize);
2943 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2945 /* allocate a readdata struct */
2946 rdata = cifs_readdata_alloc(npages,
2947 cifs_uncached_readv_complete);
2948 if (!rdata) {
2949 add_credits_and_wake_if(server, credits, 0);
2950 rc = -ENOMEM;
2951 break;
2954 rc = cifs_read_allocate_pages(rdata, npages);
2955 if (rc)
2956 goto error;
2958 rdata->cfile = cifsFileInfo_get(open_file);
2959 rdata->nr_pages = npages;
2960 rdata->offset = offset;
2961 rdata->bytes = cur_len;
2962 rdata->pid = pid;
2963 rdata->pagesz = PAGE_SIZE;
2964 rdata->read_into_pages = cifs_uncached_read_into_pages;
2965 rdata->credits = credits;
2967 if (!rdata->cfile->invalidHandle ||
2968 !(rc = cifs_reopen_file(rdata->cfile, true)))
2969 rc = server->ops->async_readv(rdata);
2970 error:
2971 if (rc) {
2972 add_credits_and_wake_if(server, rdata->credits, 0);
2973 kref_put(&rdata->refcount,
2974 cifs_uncached_readdata_release);
2975 if (rc == -EAGAIN)
2976 continue;
2977 break;
2980 list_add_tail(&rdata->list, rdata_list);
2981 offset += cur_len;
2982 len -= cur_len;
2983 } while (len > 0);
2985 return rc;
2988 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2990 struct file *file = iocb->ki_filp;
2991 ssize_t rc;
2992 size_t len;
2993 ssize_t total_read = 0;
2994 loff_t offset = iocb->ki_pos;
2995 struct cifs_sb_info *cifs_sb;
2996 struct cifs_tcon *tcon;
2997 struct cifsFileInfo *open_file;
2998 struct cifs_readdata *rdata, *tmp;
2999 struct list_head rdata_list;
3001 len = iov_iter_count(to);
3002 if (!len)
3003 return 0;
3005 INIT_LIST_HEAD(&rdata_list);
3006 cifs_sb = CIFS_FILE_SB(file);
3007 open_file = file->private_data;
3008 tcon = tlink_tcon(open_file->tlink);
3010 if (!tcon->ses->server->ops->async_readv)
3011 return -ENOSYS;
3013 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3014 cifs_dbg(FYI, "attempting read on write only file instance\n");
3016 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3018 /* if at least one read request send succeeded, then reset rc */
3019 if (!list_empty(&rdata_list))
3020 rc = 0;
3022 len = iov_iter_count(to);
3023 /* the loop below should proceed in the order of increasing offsets */
3024 again:
3025 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3026 if (!rc) {
3027 /* FIXME: freezable sleep too? */
3028 rc = wait_for_completion_killable(&rdata->done);
3029 if (rc)
3030 rc = -EINTR;
3031 else if (rdata->result == -EAGAIN) {
3032 /* resend call if it's a retryable error */
3033 struct list_head tmp_list;
3034 unsigned int got_bytes = rdata->got_bytes;
3036 list_del_init(&rdata->list);
3037 INIT_LIST_HEAD(&tmp_list);
3040 * Got a part of data and then reconnect has
3041 * happened -- fill the buffer and continue
3042 * reading.
3044 if (got_bytes && got_bytes < rdata->bytes) {
3045 rc = cifs_readdata_to_iov(rdata, to);
3046 if (rc) {
3047 kref_put(&rdata->refcount,
3048 cifs_uncached_readdata_release);
3049 continue;
3053 rc = cifs_send_async_read(
3054 rdata->offset + got_bytes,
3055 rdata->bytes - got_bytes,
3056 rdata->cfile, cifs_sb,
3057 &tmp_list);
3059 list_splice(&tmp_list, &rdata_list);
3061 kref_put(&rdata->refcount,
3062 cifs_uncached_readdata_release);
3063 goto again;
3064 } else if (rdata->result)
3065 rc = rdata->result;
3066 else
3067 rc = cifs_readdata_to_iov(rdata, to);
3069 /* if there was a short read -- discard anything left */
3070 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3071 rc = -ENODATA;
3073 list_del_init(&rdata->list);
3074 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3077 total_read = len - iov_iter_count(to);
3079 cifs_stats_bytes_read(tcon, total_read);
3081 /* mask nodata case */
3082 if (rc == -ENODATA)
3083 rc = 0;
3085 if (total_read) {
3086 iocb->ki_pos += total_read;
3087 return total_read;
3089 return rc;
3092 ssize_t
3093 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3095 struct inode *inode = file_inode(iocb->ki_filp);
3096 struct cifsInodeInfo *cinode = CIFS_I(inode);
3097 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3098 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3099 iocb->ki_filp->private_data;
3100 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3101 int rc = -EACCES;
3104 * In strict cache mode we need to read from the server all the time
3105 * if we don't have level II oplock because the server can delay mtime
3106 * change - so we can't make a decision about inode invalidating.
3107 * And we can also fail with pagereading if there are mandatory locks
3108 * on pages affected by this read but not on the region from pos to
3109 * pos+len-1.
3111 if (!CIFS_CACHE_READ(cinode))
3112 return cifs_user_readv(iocb, to);
3114 if (cap_unix(tcon->ses) &&
3115 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3116 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3117 return generic_file_read_iter(iocb, to);
3120 * We need to hold the sem to be sure nobody modifies lock list
3121 * with a brlock that prevents reading.
3123 down_read(&cinode->lock_sem);
3124 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3125 tcon->ses->server->vals->shared_lock_type,
3126 NULL, CIFS_READ_OP))
3127 rc = generic_file_read_iter(iocb, to);
3128 up_read(&cinode->lock_sem);
3129 return rc;
3132 static ssize_t
3133 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3135 int rc = -EACCES;
3136 unsigned int bytes_read = 0;
3137 unsigned int total_read;
3138 unsigned int current_read_size;
3139 unsigned int rsize;
3140 struct cifs_sb_info *cifs_sb;
3141 struct cifs_tcon *tcon;
3142 struct TCP_Server_Info *server;
3143 unsigned int xid;
3144 char *cur_offset;
3145 struct cifsFileInfo *open_file;
3146 struct cifs_io_parms io_parms;
3147 int buf_type = CIFS_NO_BUFFER;
3148 __u32 pid;
3150 xid = get_xid();
3151 cifs_sb = CIFS_FILE_SB(file);
3153 /* FIXME: set up handlers for larger reads and/or convert to async */
3154 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3156 if (file->private_data == NULL) {
3157 rc = -EBADF;
3158 free_xid(xid);
3159 return rc;
3161 open_file = file->private_data;
3162 tcon = tlink_tcon(open_file->tlink);
3163 server = tcon->ses->server;
3165 if (!server->ops->sync_read) {
3166 free_xid(xid);
3167 return -ENOSYS;
3170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3171 pid = open_file->pid;
3172 else
3173 pid = current->tgid;
3175 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3176 cifs_dbg(FYI, "attempting read on write only file instance\n");
3178 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3179 total_read += bytes_read, cur_offset += bytes_read) {
3180 do {
3181 current_read_size = min_t(uint, read_size - total_read,
3182 rsize);
3184 * For windows me and 9x we do not want to request more
3185 * than it negotiated since it will refuse the read
3186 * then.
3188 if ((tcon->ses) && !(tcon->ses->capabilities &
3189 tcon->ses->server->vals->cap_large_files)) {
3190 current_read_size = min_t(uint,
3191 current_read_size, CIFSMaxBufSize);
3193 if (open_file->invalidHandle) {
3194 rc = cifs_reopen_file(open_file, true);
3195 if (rc != 0)
3196 break;
3198 io_parms.pid = pid;
3199 io_parms.tcon = tcon;
3200 io_parms.offset = *offset;
3201 io_parms.length = current_read_size;
3202 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3203 &bytes_read, &cur_offset,
3204 &buf_type);
3205 } while (rc == -EAGAIN);
3207 if (rc || (bytes_read == 0)) {
3208 if (total_read) {
3209 break;
3210 } else {
3211 free_xid(xid);
3212 return rc;
3214 } else {
3215 cifs_stats_bytes_read(tcon, total_read);
3216 *offset += bytes_read;
3219 free_xid(xid);
3220 return total_read;
3224 * If the page is mmap'ed into a process' page tables, then we need to make
3225 * sure that it doesn't change while being written back.
3227 static int
3228 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3230 struct page *page = vmf->page;
3232 lock_page(page);
3233 return VM_FAULT_LOCKED;
3236 static const struct vm_operations_struct cifs_file_vm_ops = {
3237 .fault = filemap_fault,
3238 .map_pages = filemap_map_pages,
3239 .page_mkwrite = cifs_page_mkwrite,
3242 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3244 int xid, rc = 0;
3245 struct inode *inode = file_inode(file);
3247 xid = get_xid();
3249 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3250 rc = cifs_zap_mapping(inode);
3251 if (!rc)
3252 rc = generic_file_mmap(file, vma);
3253 if (!rc)
3254 vma->vm_ops = &cifs_file_vm_ops;
3256 free_xid(xid);
3257 return rc;
3260 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3262 int rc, xid;
3264 xid = get_xid();
3266 rc = cifs_revalidate_file(file);
3267 if (rc)
3268 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3269 rc);
3270 if (!rc)
3271 rc = generic_file_mmap(file, vma);
3272 if (!rc)
3273 vma->vm_ops = &cifs_file_vm_ops;
3275 free_xid(xid);
3276 return rc;
3279 static void
3280 cifs_readv_complete(struct work_struct *work)
3282 unsigned int i, got_bytes;
3283 struct cifs_readdata *rdata = container_of(work,
3284 struct cifs_readdata, work);
3286 got_bytes = rdata->got_bytes;
3287 for (i = 0; i < rdata->nr_pages; i++) {
3288 struct page *page = rdata->pages[i];
3290 lru_cache_add_file(page);
3292 if (rdata->result == 0 ||
3293 (rdata->result == -EAGAIN && got_bytes)) {
3294 flush_dcache_page(page);
3295 SetPageUptodate(page);
3298 unlock_page(page);
3300 if (rdata->result == 0 ||
3301 (rdata->result == -EAGAIN && got_bytes))
3302 cifs_readpage_to_fscache(rdata->mapping->host, page);
3304 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3306 page_cache_release(page);
3307 rdata->pages[i] = NULL;
3309 kref_put(&rdata->refcount, cifs_readdata_release);
3312 static int
3313 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3314 struct cifs_readdata *rdata, unsigned int len)
3316 int result = 0;
3317 unsigned int i;
3318 u64 eof;
3319 pgoff_t eof_index;
3320 unsigned int nr_pages = rdata->nr_pages;
3321 struct kvec iov;
3323 /* determine the eof that the server (probably) has */
3324 eof = CIFS_I(rdata->mapping->host)->server_eof;
3325 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3326 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3328 rdata->got_bytes = 0;
3329 rdata->tailsz = PAGE_CACHE_SIZE;
3330 for (i = 0; i < nr_pages; i++) {
3331 struct page *page = rdata->pages[i];
3333 if (len >= PAGE_CACHE_SIZE) {
3334 /* enough data to fill the page */
3335 iov.iov_base = kmap(page);
3336 iov.iov_len = PAGE_CACHE_SIZE;
3337 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3338 i, page->index, iov.iov_base, iov.iov_len);
3339 len -= PAGE_CACHE_SIZE;
3340 } else if (len > 0) {
3341 /* enough for partial page, fill and zero the rest */
3342 iov.iov_base = kmap(page);
3343 iov.iov_len = len;
3344 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3345 i, page->index, iov.iov_base, iov.iov_len);
3346 memset(iov.iov_base + len,
3347 '\0', PAGE_CACHE_SIZE - len);
3348 rdata->tailsz = len;
3349 len = 0;
3350 } else if (page->index > eof_index) {
3352 * The VFS will not try to do readahead past the
3353 * i_size, but it's possible that we have outstanding
3354 * writes with gaps in the middle and the i_size hasn't
3355 * caught up yet. Populate those with zeroed out pages
3356 * to prevent the VFS from repeatedly attempting to
3357 * fill them until the writes are flushed.
3359 zero_user(page, 0, PAGE_CACHE_SIZE);
3360 lru_cache_add_file(page);
3361 flush_dcache_page(page);
3362 SetPageUptodate(page);
3363 unlock_page(page);
3364 page_cache_release(page);
3365 rdata->pages[i] = NULL;
3366 rdata->nr_pages--;
3367 continue;
3368 } else {
3369 /* no need to hold page hostage */
3370 lru_cache_add_file(page);
3371 unlock_page(page);
3372 page_cache_release(page);
3373 rdata->pages[i] = NULL;
3374 rdata->nr_pages--;
3375 continue;
3378 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3379 kunmap(page);
3380 if (result < 0)
3381 break;
3383 rdata->got_bytes += result;
3386 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3387 rdata->got_bytes : result;
3390 static int
3391 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3392 unsigned int rsize, struct list_head *tmplist,
3393 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3395 struct page *page, *tpage;
3396 unsigned int expected_index;
3397 int rc;
3398 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3400 INIT_LIST_HEAD(tmplist);
3402 page = list_entry(page_list->prev, struct page, lru);
3405 * Lock the page and put it in the cache. Since no one else
3406 * should have access to this page, we're safe to simply set
3407 * PG_locked without checking it first.
3409 __set_page_locked(page);
3410 rc = add_to_page_cache_locked(page, mapping,
3411 page->index, gfp);
3413 /* give up if we can't stick it in the cache */
3414 if (rc) {
3415 __clear_page_locked(page);
3416 return rc;
3419 /* move first page to the tmplist */
3420 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3421 *bytes = PAGE_CACHE_SIZE;
3422 *nr_pages = 1;
3423 list_move_tail(&page->lru, tmplist);
3425 /* now try and add more pages onto the request */
3426 expected_index = page->index + 1;
3427 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3428 /* discontinuity ? */
3429 if (page->index != expected_index)
3430 break;
3432 /* would this page push the read over the rsize? */
3433 if (*bytes + PAGE_CACHE_SIZE > rsize)
3434 break;
3436 __set_page_locked(page);
3437 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3438 __clear_page_locked(page);
3439 break;
3441 list_move_tail(&page->lru, tmplist);
3442 (*bytes) += PAGE_CACHE_SIZE;
3443 expected_index++;
3444 (*nr_pages)++;
3446 return rc;
3449 static int cifs_readpages(struct file *file, struct address_space *mapping,
3450 struct list_head *page_list, unsigned num_pages)
3452 int rc;
3453 struct list_head tmplist;
3454 struct cifsFileInfo *open_file = file->private_data;
3455 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3456 struct TCP_Server_Info *server;
3457 pid_t pid;
3460 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3461 * immediately if the cookie is negative
3463 * After this point, every page in the list might have PG_fscache set,
3464 * so we will need to clean that up off of every page we don't use.
3466 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3467 &num_pages);
3468 if (rc == 0)
3469 return rc;
3471 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3472 pid = open_file->pid;
3473 else
3474 pid = current->tgid;
3476 rc = 0;
3477 server = tlink_tcon(open_file->tlink)->ses->server;
3479 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3480 __func__, file, mapping, num_pages);
3483 * Start with the page at end of list and move it to private
3484 * list. Do the same with any following pages until we hit
3485 * the rsize limit, hit an index discontinuity, or run out of
3486 * pages. Issue the async read and then start the loop again
3487 * until the list is empty.
3489 * Note that list order is important. The page_list is in
3490 * the order of declining indexes. When we put the pages in
3491 * the rdata->pages, then we want them in increasing order.
3493 while (!list_empty(page_list)) {
3494 unsigned int i, nr_pages, bytes, rsize;
3495 loff_t offset;
3496 struct page *page, *tpage;
3497 struct cifs_readdata *rdata;
3498 unsigned credits;
3500 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3501 &rsize, &credits);
3502 if (rc)
3503 break;
3506 * Give up immediately if rsize is too small to read an entire
3507 * page. The VFS will fall back to readpage. We should never
3508 * reach this point however since we set ra_pages to 0 when the
3509 * rsize is smaller than a cache page.
3511 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3512 add_credits_and_wake_if(server, credits, 0);
3513 return 0;
3516 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3517 &nr_pages, &offset, &bytes);
3518 if (rc) {
3519 add_credits_and_wake_if(server, credits, 0);
3520 break;
3523 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3524 if (!rdata) {
3525 /* best to give up if we're out of mem */
3526 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3527 list_del(&page->lru);
3528 lru_cache_add_file(page);
3529 unlock_page(page);
3530 page_cache_release(page);
3532 rc = -ENOMEM;
3533 add_credits_and_wake_if(server, credits, 0);
3534 break;
3537 rdata->cfile = cifsFileInfo_get(open_file);
3538 rdata->mapping = mapping;
3539 rdata->offset = offset;
3540 rdata->bytes = bytes;
3541 rdata->pid = pid;
3542 rdata->pagesz = PAGE_CACHE_SIZE;
3543 rdata->read_into_pages = cifs_readpages_read_into_pages;
3544 rdata->credits = credits;
3546 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3547 list_del(&page->lru);
3548 rdata->pages[rdata->nr_pages++] = page;
3551 if (!rdata->cfile->invalidHandle ||
3552 !(rc = cifs_reopen_file(rdata->cfile, true)))
3553 rc = server->ops->async_readv(rdata);
3554 if (rc) {
3555 add_credits_and_wake_if(server, rdata->credits, 0);
3556 for (i = 0; i < rdata->nr_pages; i++) {
3557 page = rdata->pages[i];
3558 lru_cache_add_file(page);
3559 unlock_page(page);
3560 page_cache_release(page);
3562 /* Fallback to the readpage in error/reconnect cases */
3563 kref_put(&rdata->refcount, cifs_readdata_release);
3564 break;
3567 kref_put(&rdata->refcount, cifs_readdata_release);
3570 /* Any pages that have been shown to fscache but didn't get added to
3571 * the pagecache must be uncached before they get returned to the
3572 * allocator.
3574 cifs_fscache_readpages_cancel(mapping->host, page_list);
3575 return rc;
3579 * cifs_readpage_worker must be called with the page pinned
3581 static int cifs_readpage_worker(struct file *file, struct page *page,
3582 loff_t *poffset)
3584 char *read_data;
3585 int rc;
3587 /* Is the page cached? */
3588 rc = cifs_readpage_from_fscache(file_inode(file), page);
3589 if (rc == 0)
3590 goto read_complete;
3592 read_data = kmap(page);
3593 /* for reads over a certain size could initiate async read ahead */
3595 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3597 if (rc < 0)
3598 goto io_error;
3599 else
3600 cifs_dbg(FYI, "Bytes read %d\n", rc);
3602 file_inode(file)->i_atime =
3603 current_fs_time(file_inode(file)->i_sb);
3605 if (PAGE_CACHE_SIZE > rc)
3606 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3608 flush_dcache_page(page);
3609 SetPageUptodate(page);
3611 /* send this page to the cache */
3612 cifs_readpage_to_fscache(file_inode(file), page);
3614 rc = 0;
3616 io_error:
3617 kunmap(page);
3618 unlock_page(page);
3620 read_complete:
3621 return rc;
3624 static int cifs_readpage(struct file *file, struct page *page)
3626 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3627 int rc = -EACCES;
3628 unsigned int xid;
3630 xid = get_xid();
3632 if (file->private_data == NULL) {
3633 rc = -EBADF;
3634 free_xid(xid);
3635 return rc;
3638 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3639 page, (int)offset, (int)offset);
3641 rc = cifs_readpage_worker(file, page, &offset);
3643 free_xid(xid);
3644 return rc;
3647 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3649 struct cifsFileInfo *open_file;
3650 struct cifs_tcon *tcon =
3651 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3653 spin_lock(&tcon->open_file_lock);
3654 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3655 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3656 spin_unlock(&tcon->open_file_lock);
3657 return 1;
3660 spin_unlock(&tcon->open_file_lock);
3661 return 0;
3664 /* We do not want to update the file size from server for inodes
3665 open for write - to avoid races with writepage extending
3666 the file - in the future we could consider allowing
3667 refreshing the inode only on increases in the file size
3668 but this is tricky to do without racing with writebehind
3669 page caching in the current Linux kernel design */
3670 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3672 if (!cifsInode)
3673 return true;
3675 if (is_inode_writable(cifsInode)) {
3676 /* This inode is open for write at least once */
3677 struct cifs_sb_info *cifs_sb;
3679 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3680 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3681 /* since no page cache to corrupt on directio
3682 we can change size safely */
3683 return true;
3686 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3687 return true;
3689 return false;
3690 } else
3691 return true;
3694 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3695 loff_t pos, unsigned len, unsigned flags,
3696 struct page **pagep, void **fsdata)
3698 int oncethru = 0;
3699 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3700 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3701 loff_t page_start = pos & PAGE_MASK;
3702 loff_t i_size;
3703 struct page *page;
3704 int rc = 0;
3706 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3708 start:
3709 page = grab_cache_page_write_begin(mapping, index, flags);
3710 if (!page) {
3711 rc = -ENOMEM;
3712 goto out;
3715 if (PageUptodate(page))
3716 goto out;
3719 * If we write a full page it will be up to date, no need to read from
3720 * the server. If the write is short, we'll end up doing a sync write
3721 * instead.
3723 if (len == PAGE_CACHE_SIZE)
3724 goto out;
3727 * optimize away the read when we have an oplock, and we're not
3728 * expecting to use any of the data we'd be reading in. That
3729 * is, when the page lies beyond the EOF, or straddles the EOF
3730 * and the write will cover all of the existing data.
3732 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3733 i_size = i_size_read(mapping->host);
3734 if (page_start >= i_size ||
3735 (offset == 0 && (pos + len) >= i_size)) {
3736 zero_user_segments(page, 0, offset,
3737 offset + len,
3738 PAGE_CACHE_SIZE);
3740 * PageChecked means that the parts of the page
3741 * to which we're not writing are considered up
3742 * to date. Once the data is copied to the
3743 * page, it can be set uptodate.
3745 SetPageChecked(page);
3746 goto out;
3750 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3752 * might as well read a page, it is fast enough. If we get
3753 * an error, we don't need to return it. cifs_write_end will
3754 * do a sync write instead since PG_uptodate isn't set.
3756 cifs_readpage_worker(file, page, &page_start);
3757 page_cache_release(page);
3758 oncethru = 1;
3759 goto start;
3760 } else {
3761 /* we could try using another file handle if there is one -
3762 but how would we lock it to prevent close of that handle
3763 racing with this read? In any case
3764 this will be written out by write_end so is fine */
3766 out:
3767 *pagep = page;
3768 return rc;
3771 static int cifs_release_page(struct page *page, gfp_t gfp)
3773 if (PagePrivate(page))
3774 return 0;
3776 return cifs_fscache_release_page(page, gfp);
3779 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3780 unsigned int length)
3782 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3784 if (offset == 0 && length == PAGE_CACHE_SIZE)
3785 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3788 static int cifs_launder_page(struct page *page)
3790 int rc = 0;
3791 loff_t range_start = page_offset(page);
3792 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3793 struct writeback_control wbc = {
3794 .sync_mode = WB_SYNC_ALL,
3795 .nr_to_write = 0,
3796 .range_start = range_start,
3797 .range_end = range_end,
3800 cifs_dbg(FYI, "Launder page: %p\n", page);
3802 if (clear_page_dirty_for_io(page))
3803 rc = cifs_writepage_locked(page, &wbc);
3805 cifs_fscache_invalidate_page(page, page->mapping->host);
3806 return rc;
3809 void cifs_oplock_break(struct work_struct *work)
3811 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3812 oplock_break);
3813 struct inode *inode = d_inode(cfile->dentry);
3814 struct cifsInodeInfo *cinode = CIFS_I(inode);
3815 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3816 struct TCP_Server_Info *server = tcon->ses->server;
3817 int rc = 0;
3819 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3820 TASK_UNINTERRUPTIBLE);
3822 server->ops->downgrade_oplock(server, cinode,
3823 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3825 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3826 cifs_has_mand_locks(cinode)) {
3827 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3828 inode);
3829 cinode->oplock = 0;
3832 if (inode && S_ISREG(inode->i_mode)) {
3833 if (CIFS_CACHE_READ(cinode))
3834 break_lease(inode, O_RDONLY);
3835 else
3836 break_lease(inode, O_WRONLY);
3837 rc = filemap_fdatawrite(inode->i_mapping);
3838 if (!CIFS_CACHE_READ(cinode)) {
3839 rc = filemap_fdatawait(inode->i_mapping);
3840 mapping_set_error(inode->i_mapping, rc);
3841 cifs_zap_mapping(inode);
3843 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3846 rc = cifs_push_locks(cfile);
3847 if (rc)
3848 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3851 * releasing stale oplock after recent reconnect of smb session using
3852 * a now incorrect file handle is not a data integrity issue but do
3853 * not bother sending an oplock release if session to server still is
3854 * disconnected since oplock already released by the server
3856 if (!cfile->oplock_break_cancelled) {
3857 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3858 cinode);
3859 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3861 cifs_done_oplock_break(cinode);
3865 * The presence of cifs_direct_io() in the address space ops vector
3866 * allowes open() O_DIRECT flags which would have failed otherwise.
3868 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3869 * so this method should never be called.
3871 * Direct IO is not yet supported in the cached mode.
3873 static ssize_t
3874 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3877 * FIXME
3878 * Eventually need to support direct IO for non forcedirectio mounts
3880 return -EINVAL;
3884 const struct address_space_operations cifs_addr_ops = {
3885 .readpage = cifs_readpage,
3886 .readpages = cifs_readpages,
3887 .writepage = cifs_writepage,
3888 .writepages = cifs_writepages,
3889 .write_begin = cifs_write_begin,
3890 .write_end = cifs_write_end,
3891 .set_page_dirty = __set_page_dirty_nobuffers,
3892 .releasepage = cifs_release_page,
3893 .direct_IO = cifs_direct_io,
3894 .invalidatepage = cifs_invalidate_page,
3895 .launder_page = cifs_launder_page,
3899 * cifs_readpages requires the server to support a buffer large enough to
3900 * contain the header plus one complete page of data. Otherwise, we need
3901 * to leave cifs_readpages out of the address space operations.
3903 const struct address_space_operations cifs_addr_ops_smallbuf = {
3904 .readpage = cifs_readpage,
3905 .writepage = cifs_writepage,
3906 .writepages = cifs_writepages,
3907 .write_begin = cifs_write_begin,
3908 .write_end = cifs_write_end,
3909 .set_page_dirty = __set_page_dirty_nobuffers,
3910 .releasepage = cifs_release_page,
3911 .invalidatepage = cifs_invalidate_page,
3912 .launder_page = cifs_launder_page,