gro: Allow tunnel stacking in the case of FOU/GUE
[linux/fpc-iii.git] / fs / ocfs2 / namei.c
blob2077dbdd48835ed0bc8f947e447e166b661dc2d6
1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * namei.c
6 * Create and rename file, directory, symlinks
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
10 * Portions of this code from linux/fs/ext3/dir.c
12 * Copyright (C) 1992, 1993, 1994, 1995
13 * Remy Card (card@masi.ibp.fr)
14 * Laboratoire MASI - Institut Blaise pascal
15 * Universite Pierre et Marie Curie (Paris VI)
17 * from
19 * linux/fs/minix/dir.c
21 * Copyright (C) 1991, 1992 Linux Torvalds
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public
25 * License as published by the Free Software Foundation; either
26 * version 2 of the License, or (at your option) any later version.
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 * General Public License for more details.
33 * You should have received a copy of the GNU General Public
34 * License along with this program; if not, write to the
35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36 * Boston, MA 021110-1307, USA.
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 #include <linux/quotaops.h>
45 #include <cluster/masklog.h>
47 #include "ocfs2.h"
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "super.h"
60 #include "symlink.h"
61 #include "sysfile.h"
62 #include "uptodate.h"
63 #include "xattr.h"
64 #include "acl.h"
65 #include "ocfs2_trace.h"
67 #include "buffer_head_io.h"
69 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
70 struct inode *dir,
71 struct inode *inode,
72 dev_t dev,
73 struct buffer_head **new_fe_bh,
74 struct buffer_head *parent_fe_bh,
75 handle_t *handle,
76 struct ocfs2_alloc_context *inode_ac);
78 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
79 struct inode **ret_orphan_dir,
80 u64 blkno,
81 char *name,
82 struct ocfs2_dir_lookup_result *lookup,
83 bool dio);
85 static int ocfs2_orphan_add(struct ocfs2_super *osb,
86 handle_t *handle,
87 struct inode *inode,
88 struct buffer_head *fe_bh,
89 char *name,
90 struct ocfs2_dir_lookup_result *lookup,
91 struct inode *orphan_dir_inode,
92 bool dio);
94 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
95 handle_t *handle,
96 struct inode *inode,
97 const char *symname);
99 static int ocfs2_double_lock(struct ocfs2_super *osb,
100 struct buffer_head **bh1,
101 struct inode *inode1,
102 struct buffer_head **bh2,
103 struct inode *inode2,
104 int rename);
106 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
107 /* An orphan dir name is an 8 byte value, printed as a hex string */
108 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
109 #define OCFS2_DIO_ORPHAN_PREFIX "dio-"
110 #define OCFS2_DIO_ORPHAN_PREFIX_LEN 4
112 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
113 unsigned int flags)
115 int status;
116 u64 blkno;
117 struct inode *inode = NULL;
118 struct dentry *ret;
119 struct ocfs2_inode_info *oi;
121 trace_ocfs2_lookup(dir, dentry, dentry->d_name.len,
122 dentry->d_name.name,
123 (unsigned long long)OCFS2_I(dir)->ip_blkno, 0);
125 if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
126 ret = ERR_PTR(-ENAMETOOLONG);
127 goto bail;
130 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
131 if (status < 0) {
132 if (status != -ENOENT)
133 mlog_errno(status);
134 ret = ERR_PTR(status);
135 goto bail;
138 status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
139 dentry->d_name.len, &blkno);
140 if (status < 0)
141 goto bail_add;
143 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
144 if (IS_ERR(inode)) {
145 ret = ERR_PTR(-EACCES);
146 goto bail_unlock;
149 oi = OCFS2_I(inode);
150 /* Clear any orphaned state... If we were able to look up the
151 * inode from a directory, it certainly can't be orphaned. We
152 * might have the bad state from a node which intended to
153 * orphan this inode but crashed before it could commit the
154 * unlink. */
155 spin_lock(&oi->ip_lock);
156 oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
157 spin_unlock(&oi->ip_lock);
159 bail_add:
160 ret = d_splice_alias(inode, dentry);
162 if (inode) {
164 * If d_splice_alias() finds a DCACHE_DISCONNECTED
165 * dentry, it will d_move() it on top of ourse. The
166 * return value will indicate this however, so in
167 * those cases, we switch them around for the locking
168 * code.
170 * NOTE: This dentry already has ->d_op set from
171 * ocfs2_get_parent() and ocfs2_get_dentry()
173 if (!IS_ERR_OR_NULL(ret))
174 dentry = ret;
176 status = ocfs2_dentry_attach_lock(dentry, inode,
177 OCFS2_I(dir)->ip_blkno);
178 if (status) {
179 mlog_errno(status);
180 ret = ERR_PTR(status);
181 goto bail_unlock;
183 } else
184 ocfs2_dentry_attach_gen(dentry);
186 bail_unlock:
187 /* Don't drop the cluster lock until *after* the d_add --
188 * unlink on another node will message us to remove that
189 * dentry under this lock so otherwise we can race this with
190 * the downconvert thread and have a stale dentry. */
191 ocfs2_inode_unlock(dir, 0);
193 bail:
195 trace_ocfs2_lookup_ret(ret);
197 return ret;
200 static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
202 struct inode *inode;
204 inode = new_inode(dir->i_sb);
205 if (!inode) {
206 mlog(ML_ERROR, "new_inode failed!\n");
207 return NULL;
210 /* populate as many fields early on as possible - many of
211 * these are used by the support functions here and in
212 * callers. */
213 if (S_ISDIR(mode))
214 set_nlink(inode, 2);
215 inode_init_owner(inode, dir, mode);
216 dquot_initialize(inode);
217 return inode;
220 static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
221 struct dentry *dentry, struct inode *inode)
223 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
225 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
226 ocfs2_lock_res_free(&dl->dl_lockres);
227 BUG_ON(dl->dl_count != 1);
228 spin_lock(&dentry_attach_lock);
229 dentry->d_fsdata = NULL;
230 spin_unlock(&dentry_attach_lock);
231 kfree(dl);
232 iput(inode);
235 static int ocfs2_mknod(struct inode *dir,
236 struct dentry *dentry,
237 umode_t mode,
238 dev_t dev)
240 int status = 0;
241 struct buffer_head *parent_fe_bh = NULL;
242 handle_t *handle = NULL;
243 struct ocfs2_super *osb;
244 struct ocfs2_dinode *dirfe;
245 struct buffer_head *new_fe_bh = NULL;
246 struct inode *inode = NULL;
247 struct ocfs2_alloc_context *inode_ac = NULL;
248 struct ocfs2_alloc_context *data_ac = NULL;
249 struct ocfs2_alloc_context *meta_ac = NULL;
250 int want_clusters = 0;
251 int want_meta = 0;
252 int xattr_credits = 0;
253 struct ocfs2_security_xattr_info si = {
254 .enable = 1,
256 int did_quota_inode = 0;
257 struct ocfs2_dir_lookup_result lookup = { NULL, };
258 sigset_t oldset;
259 int did_block_signals = 0;
260 struct ocfs2_dentry_lock *dl = NULL;
262 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
263 (unsigned long long)OCFS2_I(dir)->ip_blkno,
264 (unsigned long)dev, mode);
266 dquot_initialize(dir);
268 /* get our super block */
269 osb = OCFS2_SB(dir->i_sb);
271 status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
272 if (status < 0) {
273 if (status != -ENOENT)
274 mlog_errno(status);
275 return status;
278 if (S_ISDIR(mode) && (dir->i_nlink >= ocfs2_link_max(osb))) {
279 status = -EMLINK;
280 goto leave;
283 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
284 if (!ocfs2_read_links_count(dirfe)) {
285 /* can't make a file in a deleted directory. */
286 status = -ENOENT;
287 goto leave;
290 status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
291 dentry->d_name.len);
292 if (status)
293 goto leave;
295 /* get a spot inside the dir. */
296 status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
297 dentry->d_name.name,
298 dentry->d_name.len, &lookup);
299 if (status < 0) {
300 mlog_errno(status);
301 goto leave;
304 /* reserve an inode spot */
305 status = ocfs2_reserve_new_inode(osb, &inode_ac);
306 if (status < 0) {
307 if (status != -ENOSPC)
308 mlog_errno(status);
309 goto leave;
312 inode = ocfs2_get_init_inode(dir, mode);
313 if (!inode) {
314 status = -ENOMEM;
315 mlog_errno(status);
316 goto leave;
319 /* get security xattr */
320 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
321 if (status) {
322 if (status == -EOPNOTSUPP)
323 si.enable = 0;
324 else {
325 mlog_errno(status);
326 goto leave;
330 /* calculate meta data/clusters for setting security and acl xattr */
331 status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
332 &si, &want_clusters,
333 &xattr_credits, &want_meta);
334 if (status < 0) {
335 mlog_errno(status);
336 goto leave;
339 /* Reserve a cluster if creating an extent based directory. */
340 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
341 want_clusters += 1;
343 /* Dir indexing requires extra space as well */
344 if (ocfs2_supports_indexed_dirs(osb))
345 want_meta++;
348 status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
349 if (status < 0) {
350 if (status != -ENOSPC)
351 mlog_errno(status);
352 goto leave;
355 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
356 if (status < 0) {
357 if (status != -ENOSPC)
358 mlog_errno(status);
359 goto leave;
362 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
363 S_ISDIR(mode),
364 xattr_credits));
365 if (IS_ERR(handle)) {
366 status = PTR_ERR(handle);
367 handle = NULL;
368 mlog_errno(status);
369 goto leave;
372 /* Starting to change things, restart is no longer possible. */
373 ocfs2_block_signals(&oldset);
374 did_block_signals = 1;
376 status = dquot_alloc_inode(inode);
377 if (status)
378 goto leave;
379 did_quota_inode = 1;
381 /* do the real work now. */
382 status = ocfs2_mknod_locked(osb, dir, inode, dev,
383 &new_fe_bh, parent_fe_bh, handle,
384 inode_ac);
385 if (status < 0) {
386 mlog_errno(status);
387 goto leave;
390 if (S_ISDIR(mode)) {
391 status = ocfs2_fill_new_dir(osb, handle, dir, inode,
392 new_fe_bh, data_ac, meta_ac);
393 if (status < 0) {
394 mlog_errno(status);
395 goto leave;
398 status = ocfs2_journal_access_di(handle, INODE_CACHE(dir),
399 parent_fe_bh,
400 OCFS2_JOURNAL_ACCESS_WRITE);
401 if (status < 0) {
402 mlog_errno(status);
403 goto leave;
405 ocfs2_add_links_count(dirfe, 1);
406 ocfs2_journal_dirty(handle, parent_fe_bh);
407 inc_nlink(dir);
410 status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
411 meta_ac, data_ac);
413 if (status < 0) {
414 mlog_errno(status);
415 goto leave;
418 if (si.enable) {
419 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
420 meta_ac, data_ac);
421 if (status < 0) {
422 mlog_errno(status);
423 goto leave;
428 * Do this before adding the entry to the directory. We add
429 * also set d_op after success so that ->d_iput() will cleanup
430 * the dentry lock even if ocfs2_add_entry() fails below.
432 status = ocfs2_dentry_attach_lock(dentry, inode,
433 OCFS2_I(dir)->ip_blkno);
434 if (status) {
435 mlog_errno(status);
436 goto leave;
439 dl = dentry->d_fsdata;
441 status = ocfs2_add_entry(handle, dentry, inode,
442 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
443 &lookup);
444 if (status < 0) {
445 mlog_errno(status);
446 goto leave;
449 insert_inode_hash(inode);
450 d_instantiate(dentry, inode);
451 status = 0;
452 leave:
453 if (status < 0 && did_quota_inode)
454 dquot_free_inode(inode);
455 if (handle)
456 ocfs2_commit_trans(osb, handle);
458 ocfs2_inode_unlock(dir, 1);
459 if (did_block_signals)
460 ocfs2_unblock_signals(&oldset);
462 brelse(new_fe_bh);
463 brelse(parent_fe_bh);
464 kfree(si.value);
466 ocfs2_free_dir_lookup_result(&lookup);
468 if (inode_ac)
469 ocfs2_free_alloc_context(inode_ac);
471 if (data_ac)
472 ocfs2_free_alloc_context(data_ac);
474 if (meta_ac)
475 ocfs2_free_alloc_context(meta_ac);
478 * We should call iput after the i_mutex of the bitmap been
479 * unlocked in ocfs2_free_alloc_context, or the
480 * ocfs2_delete_inode will mutex_lock again.
482 if ((status < 0) && inode) {
483 if (dl)
484 ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
486 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
487 clear_nlink(inode);
488 iput(inode);
491 if (status)
492 mlog_errno(status);
494 return status;
497 static int __ocfs2_mknod_locked(struct inode *dir,
498 struct inode *inode,
499 dev_t dev,
500 struct buffer_head **new_fe_bh,
501 struct buffer_head *parent_fe_bh,
502 handle_t *handle,
503 struct ocfs2_alloc_context *inode_ac,
504 u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
506 int status = 0;
507 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
508 struct ocfs2_dinode *fe = NULL;
509 struct ocfs2_extent_list *fel;
510 u16 feat;
511 struct ocfs2_inode_info *oi = OCFS2_I(inode);
513 *new_fe_bh = NULL;
515 /* populate as many fields early on as possible - many of
516 * these are used by the support functions here and in
517 * callers. */
518 inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
519 OCFS2_I(inode)->ip_blkno = fe_blkno;
520 spin_lock(&osb->osb_lock);
521 inode->i_generation = osb->s_next_generation++;
522 spin_unlock(&osb->osb_lock);
524 *new_fe_bh = sb_getblk(osb->sb, fe_blkno);
525 if (!*new_fe_bh) {
526 status = -ENOMEM;
527 mlog_errno(status);
528 goto leave;
530 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
532 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
533 *new_fe_bh,
534 OCFS2_JOURNAL_ACCESS_CREATE);
535 if (status < 0) {
536 mlog_errno(status);
537 goto leave;
540 fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
541 memset(fe, 0, osb->sb->s_blocksize);
543 fe->i_generation = cpu_to_le32(inode->i_generation);
544 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
545 fe->i_blkno = cpu_to_le64(fe_blkno);
546 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
547 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
548 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
549 fe->i_uid = cpu_to_le32(i_uid_read(inode));
550 fe->i_gid = cpu_to_le32(i_gid_read(inode));
551 fe->i_mode = cpu_to_le16(inode->i_mode);
552 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
553 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
555 ocfs2_set_links_count(fe, inode->i_nlink);
557 fe->i_last_eb_blk = 0;
558 strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
559 fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
560 fe->i_atime = fe->i_ctime = fe->i_mtime =
561 cpu_to_le64(CURRENT_TIME.tv_sec);
562 fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
563 cpu_to_le32(CURRENT_TIME.tv_nsec);
564 fe->i_dtime = 0;
567 * If supported, directories start with inline data. If inline
568 * isn't supported, but indexing is, we start them as indexed.
570 feat = le16_to_cpu(fe->i_dyn_features);
571 if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
572 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
574 fe->id2.i_data.id_count = cpu_to_le16(
575 ocfs2_max_inline_data_with_xattr(osb->sb, fe));
576 } else {
577 fel = &fe->id2.i_list;
578 fel->l_tree_depth = 0;
579 fel->l_next_free_rec = 0;
580 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
583 ocfs2_journal_dirty(handle, *new_fe_bh);
585 ocfs2_populate_inode(inode, fe, 1);
586 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
587 if (!ocfs2_mount_local(osb)) {
588 status = ocfs2_create_new_inode_locks(inode);
589 if (status < 0)
590 mlog_errno(status);
593 oi->i_sync_tid = handle->h_transaction->t_tid;
594 oi->i_datasync_tid = handle->h_transaction->t_tid;
596 leave:
597 if (status < 0) {
598 if (*new_fe_bh) {
599 brelse(*new_fe_bh);
600 *new_fe_bh = NULL;
604 if (status)
605 mlog_errno(status);
606 return status;
609 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
610 struct inode *dir,
611 struct inode *inode,
612 dev_t dev,
613 struct buffer_head **new_fe_bh,
614 struct buffer_head *parent_fe_bh,
615 handle_t *handle,
616 struct ocfs2_alloc_context *inode_ac)
618 int status = 0;
619 u64 suballoc_loc, fe_blkno = 0;
620 u16 suballoc_bit;
622 *new_fe_bh = NULL;
624 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
625 inode_ac, &suballoc_loc,
626 &suballoc_bit, &fe_blkno);
627 if (status < 0) {
628 mlog_errno(status);
629 return status;
632 return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
633 parent_fe_bh, handle, inode_ac,
634 fe_blkno, suballoc_loc, suballoc_bit);
637 static int ocfs2_mkdir(struct inode *dir,
638 struct dentry *dentry,
639 umode_t mode)
641 int ret;
643 trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
644 OCFS2_I(dir)->ip_blkno, mode);
645 ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
646 if (ret)
647 mlog_errno(ret);
649 return ret;
652 static int ocfs2_create(struct inode *dir,
653 struct dentry *dentry,
654 umode_t mode,
655 bool excl)
657 int ret;
659 trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
660 (unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
661 ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
662 if (ret)
663 mlog_errno(ret);
665 return ret;
668 static int ocfs2_link(struct dentry *old_dentry,
669 struct inode *dir,
670 struct dentry *dentry)
672 handle_t *handle;
673 struct inode *inode = d_inode(old_dentry);
674 struct inode *old_dir = d_inode(old_dentry->d_parent);
675 int err;
676 struct buffer_head *fe_bh = NULL;
677 struct buffer_head *old_dir_bh = NULL;
678 struct buffer_head *parent_fe_bh = NULL;
679 struct ocfs2_dinode *fe = NULL;
680 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
681 struct ocfs2_dir_lookup_result lookup = { NULL, };
682 sigset_t oldset;
683 u64 old_de_ino;
685 trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
686 old_dentry->d_name.len, old_dentry->d_name.name,
687 dentry->d_name.len, dentry->d_name.name);
689 if (S_ISDIR(inode->i_mode))
690 return -EPERM;
692 dquot_initialize(dir);
694 err = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
695 &parent_fe_bh, dir, 0);
696 if (err < 0) {
697 if (err != -ENOENT)
698 mlog_errno(err);
699 return err;
702 /* make sure both dirs have bhs
703 * get an extra ref on old_dir_bh if old==new */
704 if (!parent_fe_bh) {
705 if (old_dir_bh) {
706 parent_fe_bh = old_dir_bh;
707 get_bh(parent_fe_bh);
708 } else {
709 mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str);
710 err = -EIO;
711 goto out;
715 if (!dir->i_nlink) {
716 err = -ENOENT;
717 goto out;
720 err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
721 old_dentry->d_name.len, &old_de_ino);
722 if (err) {
723 err = -ENOENT;
724 goto out;
728 * Check whether another node removed the source inode while we
729 * were in the vfs.
731 if (old_de_ino != OCFS2_I(inode)->ip_blkno) {
732 err = -ENOENT;
733 goto out;
736 err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
737 dentry->d_name.len);
738 if (err)
739 goto out;
741 err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
742 dentry->d_name.name,
743 dentry->d_name.len, &lookup);
744 if (err < 0) {
745 mlog_errno(err);
746 goto out;
749 err = ocfs2_inode_lock(inode, &fe_bh, 1);
750 if (err < 0) {
751 if (err != -ENOENT)
752 mlog_errno(err);
753 goto out;
756 fe = (struct ocfs2_dinode *) fe_bh->b_data;
757 if (ocfs2_read_links_count(fe) >= ocfs2_link_max(osb)) {
758 err = -EMLINK;
759 goto out_unlock_inode;
762 handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
763 if (IS_ERR(handle)) {
764 err = PTR_ERR(handle);
765 handle = NULL;
766 mlog_errno(err);
767 goto out_unlock_inode;
770 /* Starting to change things, restart is no longer possible. */
771 ocfs2_block_signals(&oldset);
773 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
774 OCFS2_JOURNAL_ACCESS_WRITE);
775 if (err < 0) {
776 mlog_errno(err);
777 goto out_commit;
780 inc_nlink(inode);
781 inode->i_ctime = CURRENT_TIME;
782 ocfs2_set_links_count(fe, inode->i_nlink);
783 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
784 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
785 ocfs2_journal_dirty(handle, fe_bh);
787 err = ocfs2_add_entry(handle, dentry, inode,
788 OCFS2_I(inode)->ip_blkno,
789 parent_fe_bh, &lookup);
790 if (err) {
791 ocfs2_add_links_count(fe, -1);
792 drop_nlink(inode);
793 mlog_errno(err);
794 goto out_commit;
797 err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
798 if (err) {
799 mlog_errno(err);
800 goto out_commit;
803 ihold(inode);
804 d_instantiate(dentry, inode);
806 out_commit:
807 ocfs2_commit_trans(osb, handle);
808 ocfs2_unblock_signals(&oldset);
809 out_unlock_inode:
810 ocfs2_inode_unlock(inode, 1);
812 out:
813 ocfs2_double_unlock(old_dir, dir);
815 brelse(fe_bh);
816 brelse(parent_fe_bh);
817 brelse(old_dir_bh);
819 ocfs2_free_dir_lookup_result(&lookup);
821 if (err)
822 mlog_errno(err);
824 return err;
828 * Takes and drops an exclusive lock on the given dentry. This will
829 * force other nodes to drop it.
831 static int ocfs2_remote_dentry_delete(struct dentry *dentry)
833 int ret;
835 ret = ocfs2_dentry_lock(dentry, 1);
836 if (ret)
837 mlog_errno(ret);
838 else
839 ocfs2_dentry_unlock(dentry, 1);
841 return ret;
844 static inline int ocfs2_inode_is_unlinkable(struct inode *inode)
846 if (S_ISDIR(inode->i_mode)) {
847 if (inode->i_nlink == 2)
848 return 1;
849 return 0;
852 if (inode->i_nlink == 1)
853 return 1;
854 return 0;
857 static int ocfs2_unlink(struct inode *dir,
858 struct dentry *dentry)
860 int status;
861 int child_locked = 0;
862 bool is_unlinkable = false;
863 struct inode *inode = d_inode(dentry);
864 struct inode *orphan_dir = NULL;
865 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
866 u64 blkno;
867 struct ocfs2_dinode *fe = NULL;
868 struct buffer_head *fe_bh = NULL;
869 struct buffer_head *parent_node_bh = NULL;
870 handle_t *handle = NULL;
871 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
872 struct ocfs2_dir_lookup_result lookup = { NULL, };
873 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
875 trace_ocfs2_unlink(dir, dentry, dentry->d_name.len,
876 dentry->d_name.name,
877 (unsigned long long)OCFS2_I(dir)->ip_blkno,
878 (unsigned long long)OCFS2_I(inode)->ip_blkno);
880 dquot_initialize(dir);
882 BUG_ON(d_inode(dentry->d_parent) != dir);
884 if (inode == osb->root_inode)
885 return -EPERM;
887 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
888 OI_LS_PARENT);
889 if (status < 0) {
890 if (status != -ENOENT)
891 mlog_errno(status);
892 return status;
895 status = ocfs2_find_files_on_disk(dentry->d_name.name,
896 dentry->d_name.len, &blkno, dir,
897 &lookup);
898 if (status < 0) {
899 if (status != -ENOENT)
900 mlog_errno(status);
901 goto leave;
904 if (OCFS2_I(inode)->ip_blkno != blkno) {
905 status = -ENOENT;
907 trace_ocfs2_unlink_noent(
908 (unsigned long long)OCFS2_I(inode)->ip_blkno,
909 (unsigned long long)blkno,
910 OCFS2_I(inode)->ip_flags);
911 goto leave;
914 status = ocfs2_inode_lock(inode, &fe_bh, 1);
915 if (status < 0) {
916 if (status != -ENOENT)
917 mlog_errno(status);
918 goto leave;
920 child_locked = 1;
922 if (S_ISDIR(inode->i_mode)) {
923 if (inode->i_nlink != 2 || !ocfs2_empty_dir(inode)) {
924 status = -ENOTEMPTY;
925 goto leave;
929 status = ocfs2_remote_dentry_delete(dentry);
930 if (status < 0) {
931 /* This remote delete should succeed under all normal
932 * circumstances. */
933 mlog_errno(status);
934 goto leave;
937 if (ocfs2_inode_is_unlinkable(inode)) {
938 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
939 OCFS2_I(inode)->ip_blkno,
940 orphan_name, &orphan_insert,
941 false);
942 if (status < 0) {
943 mlog_errno(status);
944 goto leave;
946 is_unlinkable = true;
949 handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
950 if (IS_ERR(handle)) {
951 status = PTR_ERR(handle);
952 handle = NULL;
953 mlog_errno(status);
954 goto leave;
957 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
958 OCFS2_JOURNAL_ACCESS_WRITE);
959 if (status < 0) {
960 mlog_errno(status);
961 goto leave;
964 fe = (struct ocfs2_dinode *) fe_bh->b_data;
966 /* delete the name from the parent dir */
967 status = ocfs2_delete_entry(handle, dir, &lookup);
968 if (status < 0) {
969 mlog_errno(status);
970 goto leave;
973 if (S_ISDIR(inode->i_mode))
974 drop_nlink(inode);
975 drop_nlink(inode);
976 ocfs2_set_links_count(fe, inode->i_nlink);
977 ocfs2_journal_dirty(handle, fe_bh);
979 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
980 if (S_ISDIR(inode->i_mode))
981 drop_nlink(dir);
983 status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh);
984 if (status < 0) {
985 mlog_errno(status);
986 if (S_ISDIR(inode->i_mode))
987 inc_nlink(dir);
988 goto leave;
991 if (is_unlinkable) {
992 status = ocfs2_orphan_add(osb, handle, inode, fe_bh,
993 orphan_name, &orphan_insert, orphan_dir, false);
994 if (status < 0)
995 mlog_errno(status);
998 leave:
999 if (handle)
1000 ocfs2_commit_trans(osb, handle);
1002 if (child_locked)
1003 ocfs2_inode_unlock(inode, 1);
1005 ocfs2_inode_unlock(dir, 1);
1007 if (orphan_dir) {
1008 /* This was locked for us in ocfs2_prepare_orphan_dir() */
1009 ocfs2_inode_unlock(orphan_dir, 1);
1010 mutex_unlock(&orphan_dir->i_mutex);
1011 iput(orphan_dir);
1014 brelse(fe_bh);
1015 brelse(parent_node_bh);
1017 ocfs2_free_dir_lookup_result(&orphan_insert);
1018 ocfs2_free_dir_lookup_result(&lookup);
1020 if (status && (status != -ENOTEMPTY) && (status != -ENOENT))
1021 mlog_errno(status);
1023 return status;
1026 static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
1027 u64 src_inode_no, u64 dest_inode_no)
1029 int ret = 0, i = 0;
1030 u64 parent_inode_no = 0;
1031 u64 child_inode_no = src_inode_no;
1032 struct inode *child_inode;
1034 #define MAX_LOOKUP_TIMES 32
1035 while (1) {
1036 child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
1037 if (IS_ERR(child_inode)) {
1038 ret = PTR_ERR(child_inode);
1039 break;
1042 ret = ocfs2_inode_lock(child_inode, NULL, 0);
1043 if (ret < 0) {
1044 iput(child_inode);
1045 if (ret != -ENOENT)
1046 mlog_errno(ret);
1047 break;
1050 ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
1051 &parent_inode_no);
1052 ocfs2_inode_unlock(child_inode, 0);
1053 iput(child_inode);
1054 if (ret < 0) {
1055 ret = -ENOENT;
1056 break;
1059 if (parent_inode_no == dest_inode_no) {
1060 ret = 1;
1061 break;
1064 if (parent_inode_no == osb->root_inode->i_ino) {
1065 ret = 0;
1066 break;
1069 child_inode_no = parent_inode_no;
1071 if (++i >= MAX_LOOKUP_TIMES) {
1072 mlog(ML_NOTICE, "max lookup times reached, filesystem "
1073 "may have nested directories, "
1074 "src inode: %llu, dest inode: %llu.\n",
1075 (unsigned long long)src_inode_no,
1076 (unsigned long long)dest_inode_no);
1077 ret = 0;
1078 break;
1082 return ret;
1086 * The only place this should be used is rename and link!
1087 * if they have the same id, then the 1st one is the only one locked.
1089 static int ocfs2_double_lock(struct ocfs2_super *osb,
1090 struct buffer_head **bh1,
1091 struct inode *inode1,
1092 struct buffer_head **bh2,
1093 struct inode *inode2,
1094 int rename)
1096 int status;
1097 int inode1_is_ancestor, inode2_is_ancestor;
1098 struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
1099 struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
1100 struct buffer_head **tmpbh;
1101 struct inode *tmpinode;
1103 trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
1104 (unsigned long long)oi2->ip_blkno);
1106 if (*bh1)
1107 *bh1 = NULL;
1108 if (*bh2)
1109 *bh2 = NULL;
1111 /* we always want to lock the one with the lower lockid first.
1112 * and if they are nested, we lock ancestor first */
1113 if (oi1->ip_blkno != oi2->ip_blkno) {
1114 inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
1115 oi1->ip_blkno);
1116 if (inode1_is_ancestor < 0) {
1117 status = inode1_is_ancestor;
1118 goto bail;
1121 inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
1122 oi2->ip_blkno);
1123 if (inode2_is_ancestor < 0) {
1124 status = inode2_is_ancestor;
1125 goto bail;
1128 if ((inode1_is_ancestor == 1) ||
1129 (oi1->ip_blkno < oi2->ip_blkno &&
1130 inode2_is_ancestor == 0)) {
1131 /* switch id1 and id2 around */
1132 tmpbh = bh2;
1133 bh2 = bh1;
1134 bh1 = tmpbh;
1136 tmpinode = inode2;
1137 inode2 = inode1;
1138 inode1 = tmpinode;
1140 /* lock id2 */
1141 status = ocfs2_inode_lock_nested(inode2, bh2, 1,
1142 rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT);
1143 if (status < 0) {
1144 if (status != -ENOENT)
1145 mlog_errno(status);
1146 goto bail;
1150 /* lock id1 */
1151 status = ocfs2_inode_lock_nested(inode1, bh1, 1,
1152 rename == 1 ? OI_LS_RENAME2 : OI_LS_PARENT);
1153 if (status < 0) {
1155 * An error return must mean that no cluster locks
1156 * were held on function exit.
1158 if (oi1->ip_blkno != oi2->ip_blkno) {
1159 ocfs2_inode_unlock(inode2, 1);
1160 brelse(*bh2);
1161 *bh2 = NULL;
1164 if (status != -ENOENT)
1165 mlog_errno(status);
1168 trace_ocfs2_double_lock_end(
1169 (unsigned long long)OCFS2_I(inode1)->ip_blkno,
1170 (unsigned long long)OCFS2_I(inode2)->ip_blkno);
1172 bail:
1173 if (status)
1174 mlog_errno(status);
1175 return status;
1178 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
1180 ocfs2_inode_unlock(inode1, 1);
1182 if (inode1 != inode2)
1183 ocfs2_inode_unlock(inode2, 1);
1186 static int ocfs2_rename(struct inode *old_dir,
1187 struct dentry *old_dentry,
1188 struct inode *new_dir,
1189 struct dentry *new_dentry)
1191 int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0;
1192 int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0;
1193 struct inode *old_inode = d_inode(old_dentry);
1194 struct inode *new_inode = d_inode(new_dentry);
1195 struct inode *orphan_dir = NULL;
1196 struct ocfs2_dinode *newfe = NULL;
1197 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
1198 struct buffer_head *newfe_bh = NULL;
1199 struct buffer_head *old_inode_bh = NULL;
1200 struct ocfs2_super *osb = NULL;
1201 u64 newfe_blkno, old_de_ino;
1202 handle_t *handle = NULL;
1203 struct buffer_head *old_dir_bh = NULL;
1204 struct buffer_head *new_dir_bh = NULL;
1205 u32 old_dir_nlink = old_dir->i_nlink;
1206 struct ocfs2_dinode *old_di;
1207 struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
1208 struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
1209 struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
1210 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
1211 struct ocfs2_dir_lookup_result target_insert = { NULL, };
1212 bool should_add_orphan = false;
1214 /* At some point it might be nice to break this function up a
1215 * bit. */
1217 trace_ocfs2_rename(old_dir, old_dentry, new_dir, new_dentry,
1218 old_dentry->d_name.len, old_dentry->d_name.name,
1219 new_dentry->d_name.len, new_dentry->d_name.name);
1221 dquot_initialize(old_dir);
1222 dquot_initialize(new_dir);
1224 osb = OCFS2_SB(old_dir->i_sb);
1226 if (new_inode) {
1227 if (!igrab(new_inode))
1228 BUG();
1231 /* Assume a directory hierarchy thusly:
1232 * a/b/c
1233 * a/d
1234 * a,b,c, and d are all directories.
1236 * from cwd of 'a' on both nodes:
1237 * node1: mv b/c d
1238 * node2: mv d b/c
1240 * And that's why, just like the VFS, we need a file system
1241 * rename lock. */
1242 if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) {
1243 status = ocfs2_rename_lock(osb);
1244 if (status < 0) {
1245 mlog_errno(status);
1246 goto bail;
1248 rename_lock = 1;
1250 /* here we cannot guarantee the inodes haven't just been
1251 * changed, so check if they are nested again */
1252 status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
1253 old_inode->i_ino);
1254 if (status < 0) {
1255 mlog_errno(status);
1256 goto bail;
1257 } else if (status == 1) {
1258 status = -EPERM;
1259 trace_ocfs2_rename_not_permitted(
1260 (unsigned long long)old_inode->i_ino,
1261 (unsigned long long)new_dir->i_ino);
1262 goto bail;
1266 /* if old and new are the same, this'll just do one lock. */
1267 status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
1268 &new_dir_bh, new_dir, 1);
1269 if (status < 0) {
1270 mlog_errno(status);
1271 goto bail;
1273 parents_locked = 1;
1275 /* make sure both dirs have bhs
1276 * get an extra ref on old_dir_bh if old==new */
1277 if (!new_dir_bh) {
1278 if (old_dir_bh) {
1279 new_dir_bh = old_dir_bh;
1280 get_bh(new_dir_bh);
1281 } else {
1282 mlog(ML_ERROR, "no old_dir_bh!\n");
1283 status = -EIO;
1284 goto bail;
1289 * Aside from allowing a meta data update, the locking here
1290 * also ensures that the downconvert thread on other nodes
1291 * won't have to concurrently downconvert the inode and the
1292 * dentry locks.
1294 status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
1295 OI_LS_PARENT);
1296 if (status < 0) {
1297 if (status != -ENOENT)
1298 mlog_errno(status);
1299 goto bail;
1301 old_child_locked = 1;
1303 status = ocfs2_remote_dentry_delete(old_dentry);
1304 if (status < 0) {
1305 mlog_errno(status);
1306 goto bail;
1309 if (S_ISDIR(old_inode->i_mode)) {
1310 u64 old_inode_parent;
1312 update_dot_dot = 1;
1313 status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
1314 old_inode,
1315 &old_inode_dot_dot_res);
1316 if (status) {
1317 status = -EIO;
1318 goto bail;
1321 if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
1322 status = -EIO;
1323 goto bail;
1326 if (!new_inode && new_dir != old_dir &&
1327 new_dir->i_nlink >= ocfs2_link_max(osb)) {
1328 status = -EMLINK;
1329 goto bail;
1333 status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
1334 old_dentry->d_name.len,
1335 &old_de_ino);
1336 if (status) {
1337 status = -ENOENT;
1338 goto bail;
1342 * Check for inode number is _not_ due to possible IO errors.
1343 * We might rmdir the source, keep it as pwd of some process
1344 * and merrily kill the link to whatever was created under the
1345 * same name. Goodbye sticky bit ;-<
1347 if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
1348 status = -ENOENT;
1349 goto bail;
1352 /* check if the target already exists (in which case we need
1353 * to delete it */
1354 status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1355 new_dentry->d_name.len,
1356 &newfe_blkno, new_dir,
1357 &target_lookup_res);
1358 /* The only error we allow here is -ENOENT because the new
1359 * file not existing is perfectly valid. */
1360 if ((status < 0) && (status != -ENOENT)) {
1361 /* If we cannot find the file specified we should just */
1362 /* return the error... */
1363 mlog_errno(status);
1364 goto bail;
1366 if (status == 0)
1367 target_exists = 1;
1369 if (!target_exists && new_inode) {
1371 * Target was unlinked by another node while we were
1372 * waiting to get to ocfs2_rename(). There isn't
1373 * anything we can do here to help the situation, so
1374 * bubble up the appropriate error.
1376 status = -ENOENT;
1377 goto bail;
1380 /* In case we need to overwrite an existing file, we blow it
1381 * away first */
1382 if (target_exists) {
1383 /* VFS didn't think there existed an inode here, but
1384 * someone else in the cluster must have raced our
1385 * rename to create one. Today we error cleanly, in
1386 * the future we should consider calling iget to build
1387 * a new struct inode for this entry. */
1388 if (!new_inode) {
1389 status = -EACCES;
1391 trace_ocfs2_rename_target_exists(new_dentry->d_name.len,
1392 new_dentry->d_name.name);
1393 goto bail;
1396 if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1397 status = -EACCES;
1399 trace_ocfs2_rename_disagree(
1400 (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1401 (unsigned long long)newfe_blkno,
1402 OCFS2_I(new_inode)->ip_flags);
1403 goto bail;
1406 status = ocfs2_inode_lock(new_inode, &newfe_bh, 1);
1407 if (status < 0) {
1408 if (status != -ENOENT)
1409 mlog_errno(status);
1410 goto bail;
1412 new_child_locked = 1;
1414 status = ocfs2_remote_dentry_delete(new_dentry);
1415 if (status < 0) {
1416 mlog_errno(status);
1417 goto bail;
1420 newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1422 trace_ocfs2_rename_over_existing(
1423 (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1424 (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1426 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1427 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1428 OCFS2_I(new_inode)->ip_blkno,
1429 orphan_name, &orphan_insert,
1430 false);
1431 if (status < 0) {
1432 mlog_errno(status);
1433 goto bail;
1435 should_add_orphan = true;
1437 } else {
1438 BUG_ON(d_inode(new_dentry->d_parent) != new_dir);
1440 status = ocfs2_check_dir_for_entry(new_dir,
1441 new_dentry->d_name.name,
1442 new_dentry->d_name.len);
1443 if (status)
1444 goto bail;
1446 status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1447 new_dentry->d_name.name,
1448 new_dentry->d_name.len,
1449 &target_insert);
1450 if (status < 0) {
1451 mlog_errno(status);
1452 goto bail;
1456 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
1457 if (IS_ERR(handle)) {
1458 status = PTR_ERR(handle);
1459 handle = NULL;
1460 mlog_errno(status);
1461 goto bail;
1464 if (target_exists) {
1465 if (S_ISDIR(new_inode->i_mode)) {
1466 if (new_inode->i_nlink != 2 ||
1467 !ocfs2_empty_dir(new_inode)) {
1468 status = -ENOTEMPTY;
1469 goto bail;
1472 status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode),
1473 newfe_bh,
1474 OCFS2_JOURNAL_ACCESS_WRITE);
1475 if (status < 0) {
1476 mlog_errno(status);
1477 goto bail;
1480 /* change the dirent to point to the correct inode */
1481 status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
1482 old_inode);
1483 if (status < 0) {
1484 mlog_errno(status);
1485 goto bail;
1487 new_dir->i_version++;
1489 if (S_ISDIR(new_inode->i_mode))
1490 ocfs2_set_links_count(newfe, 0);
1491 else
1492 ocfs2_add_links_count(newfe, -1);
1493 ocfs2_journal_dirty(handle, newfe_bh);
1494 if (should_add_orphan) {
1495 status = ocfs2_orphan_add(osb, handle, new_inode,
1496 newfe_bh, orphan_name,
1497 &orphan_insert, orphan_dir, false);
1498 if (status < 0) {
1499 mlog_errno(status);
1500 goto bail;
1503 } else {
1504 /* if the name was not found in new_dir, add it now */
1505 status = ocfs2_add_entry(handle, new_dentry, old_inode,
1506 OCFS2_I(old_inode)->ip_blkno,
1507 new_dir_bh, &target_insert);
1510 old_inode->i_ctime = CURRENT_TIME;
1511 mark_inode_dirty(old_inode);
1513 status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
1514 old_inode_bh,
1515 OCFS2_JOURNAL_ACCESS_WRITE);
1516 if (status >= 0) {
1517 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1519 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1520 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1521 ocfs2_journal_dirty(handle, old_inode_bh);
1522 } else
1523 mlog_errno(status);
1526 * Now that the name has been added to new_dir, remove the old name.
1528 * We don't keep any directory entry context around until now
1529 * because the insert might have changed the type of directory
1530 * we're dealing with.
1532 status = ocfs2_find_entry(old_dentry->d_name.name,
1533 old_dentry->d_name.len, old_dir,
1534 &old_entry_lookup);
1535 if (status)
1536 goto bail;
1538 status = ocfs2_delete_entry(handle, old_dir, &old_entry_lookup);
1539 if (status < 0) {
1540 mlog_errno(status);
1541 goto bail;
1544 if (new_inode) {
1545 drop_nlink(new_inode);
1546 new_inode->i_ctime = CURRENT_TIME;
1548 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1550 if (update_dot_dot) {
1551 status = ocfs2_update_entry(old_inode, handle,
1552 &old_inode_dot_dot_res, new_dir);
1553 drop_nlink(old_dir);
1554 if (new_inode) {
1555 drop_nlink(new_inode);
1556 } else {
1557 inc_nlink(new_dir);
1558 mark_inode_dirty(new_dir);
1561 mark_inode_dirty(old_dir);
1562 ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh);
1563 if (new_inode) {
1564 mark_inode_dirty(new_inode);
1565 ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh);
1568 if (old_dir != new_dir) {
1569 /* Keep the same times on both directories.*/
1570 new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
1573 * This will also pick up the i_nlink change from the
1574 * block above.
1576 ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh);
1579 if (old_dir_nlink != old_dir->i_nlink) {
1580 if (!old_dir_bh) {
1581 mlog(ML_ERROR, "need to change nlink for old dir "
1582 "%llu from %d to %d but bh is NULL!\n",
1583 (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
1584 (int)old_dir_nlink, old_dir->i_nlink);
1585 } else {
1586 struct ocfs2_dinode *fe;
1587 status = ocfs2_journal_access_di(handle,
1588 INODE_CACHE(old_dir),
1589 old_dir_bh,
1590 OCFS2_JOURNAL_ACCESS_WRITE);
1591 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1592 ocfs2_set_links_count(fe, old_dir->i_nlink);
1593 ocfs2_journal_dirty(handle, old_dir_bh);
1596 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1597 status = 0;
1598 bail:
1599 if (rename_lock)
1600 ocfs2_rename_unlock(osb);
1602 if (handle)
1603 ocfs2_commit_trans(osb, handle);
1605 if (parents_locked)
1606 ocfs2_double_unlock(old_dir, new_dir);
1608 if (old_child_locked)
1609 ocfs2_inode_unlock(old_inode, 1);
1611 if (new_child_locked)
1612 ocfs2_inode_unlock(new_inode, 1);
1614 if (orphan_dir) {
1615 /* This was locked for us in ocfs2_prepare_orphan_dir() */
1616 ocfs2_inode_unlock(orphan_dir, 1);
1617 mutex_unlock(&orphan_dir->i_mutex);
1618 iput(orphan_dir);
1621 if (new_inode)
1622 sync_mapping_buffers(old_inode->i_mapping);
1624 if (new_inode)
1625 iput(new_inode);
1627 ocfs2_free_dir_lookup_result(&target_lookup_res);
1628 ocfs2_free_dir_lookup_result(&old_entry_lookup);
1629 ocfs2_free_dir_lookup_result(&old_inode_dot_dot_res);
1630 ocfs2_free_dir_lookup_result(&orphan_insert);
1631 ocfs2_free_dir_lookup_result(&target_insert);
1633 brelse(newfe_bh);
1634 brelse(old_inode_bh);
1635 brelse(old_dir_bh);
1636 brelse(new_dir_bh);
1638 if (status)
1639 mlog_errno(status);
1641 return status;
1645 * we expect i_size = strlen(symname). Copy symname into the file
1646 * data, including the null terminator.
1648 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1649 handle_t *handle,
1650 struct inode *inode,
1651 const char *symname)
1653 struct buffer_head **bhs = NULL;
1654 const char *c;
1655 struct super_block *sb = osb->sb;
1656 u64 p_blkno, p_blocks;
1657 int virtual, blocks, status, i, bytes_left;
1659 bytes_left = i_size_read(inode) + 1;
1660 /* we can't trust i_blocks because we're actually going to
1661 * write i_size + 1 bytes. */
1662 blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1664 trace_ocfs2_create_symlink_data((unsigned long long)inode->i_blocks,
1665 i_size_read(inode), blocks);
1667 /* Sanity check -- make sure we're going to fit. */
1668 if (bytes_left >
1669 ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1670 status = -EIO;
1671 mlog_errno(status);
1672 goto bail;
1675 bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1676 if (!bhs) {
1677 status = -ENOMEM;
1678 mlog_errno(status);
1679 goto bail;
1682 status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks,
1683 NULL);
1684 if (status < 0) {
1685 mlog_errno(status);
1686 goto bail;
1689 /* links can never be larger than one cluster so we know this
1690 * is all going to be contiguous, but do a sanity check
1691 * anyway. */
1692 if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1693 status = -EIO;
1694 mlog_errno(status);
1695 goto bail;
1698 virtual = 0;
1699 while(bytes_left > 0) {
1700 c = &symname[virtual * sb->s_blocksize];
1702 bhs[virtual] = sb_getblk(sb, p_blkno);
1703 if (!bhs[virtual]) {
1704 status = -ENOMEM;
1705 mlog_errno(status);
1706 goto bail;
1708 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
1709 bhs[virtual]);
1711 status = ocfs2_journal_access(handle, INODE_CACHE(inode),
1712 bhs[virtual],
1713 OCFS2_JOURNAL_ACCESS_CREATE);
1714 if (status < 0) {
1715 mlog_errno(status);
1716 goto bail;
1719 memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1721 memcpy(bhs[virtual]->b_data, c,
1722 (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1723 bytes_left);
1725 ocfs2_journal_dirty(handle, bhs[virtual]);
1727 virtual++;
1728 p_blkno++;
1729 bytes_left -= sb->s_blocksize;
1732 status = 0;
1733 bail:
1735 if (bhs) {
1736 for(i = 0; i < blocks; i++)
1737 brelse(bhs[i]);
1738 kfree(bhs);
1741 if (status)
1742 mlog_errno(status);
1743 return status;
1746 static int ocfs2_symlink(struct inode *dir,
1747 struct dentry *dentry,
1748 const char *symname)
1750 int status, l, credits;
1751 u64 newsize;
1752 struct ocfs2_super *osb = NULL;
1753 struct inode *inode = NULL;
1754 struct super_block *sb;
1755 struct buffer_head *new_fe_bh = NULL;
1756 struct buffer_head *parent_fe_bh = NULL;
1757 struct ocfs2_dinode *fe = NULL;
1758 struct ocfs2_dinode *dirfe;
1759 handle_t *handle = NULL;
1760 struct ocfs2_alloc_context *inode_ac = NULL;
1761 struct ocfs2_alloc_context *data_ac = NULL;
1762 struct ocfs2_alloc_context *xattr_ac = NULL;
1763 int want_clusters = 0;
1764 int xattr_credits = 0;
1765 struct ocfs2_security_xattr_info si = {
1766 .enable = 1,
1768 int did_quota = 0, did_quota_inode = 0;
1769 struct ocfs2_dir_lookup_result lookup = { NULL, };
1770 sigset_t oldset;
1771 int did_block_signals = 0;
1772 struct ocfs2_dentry_lock *dl = NULL;
1774 trace_ocfs2_symlink_begin(dir, dentry, symname,
1775 dentry->d_name.len, dentry->d_name.name);
1777 dquot_initialize(dir);
1779 sb = dir->i_sb;
1780 osb = OCFS2_SB(sb);
1782 l = strlen(symname) + 1;
1784 credits = ocfs2_calc_symlink_credits(sb);
1786 /* lock the parent directory */
1787 status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
1788 if (status < 0) {
1789 if (status != -ENOENT)
1790 mlog_errno(status);
1791 return status;
1794 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1795 if (!ocfs2_read_links_count(dirfe)) {
1796 /* can't make a file in a deleted directory. */
1797 status = -ENOENT;
1798 goto bail;
1801 status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1802 dentry->d_name.len);
1803 if (status)
1804 goto bail;
1806 status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1807 dentry->d_name.name,
1808 dentry->d_name.len, &lookup);
1809 if (status < 0) {
1810 mlog_errno(status);
1811 goto bail;
1814 status = ocfs2_reserve_new_inode(osb, &inode_ac);
1815 if (status < 0) {
1816 if (status != -ENOSPC)
1817 mlog_errno(status);
1818 goto bail;
1821 inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
1822 if (!inode) {
1823 status = -ENOMEM;
1824 mlog_errno(status);
1825 goto bail;
1828 /* get security xattr */
1829 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
1830 if (status) {
1831 if (status == -EOPNOTSUPP)
1832 si.enable = 0;
1833 else {
1834 mlog_errno(status);
1835 goto bail;
1839 /* calculate meta data/clusters for setting security xattr */
1840 if (si.enable) {
1841 status = ocfs2_calc_security_init(dir, &si, &want_clusters,
1842 &xattr_credits, &xattr_ac);
1843 if (status < 0) {
1844 mlog_errno(status);
1845 goto bail;
1849 /* don't reserve bitmap space for fast symlinks. */
1850 if (l > ocfs2_fast_symlink_chars(sb))
1851 want_clusters += 1;
1853 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
1854 if (status < 0) {
1855 if (status != -ENOSPC)
1856 mlog_errno(status);
1857 goto bail;
1860 handle = ocfs2_start_trans(osb, credits + xattr_credits);
1861 if (IS_ERR(handle)) {
1862 status = PTR_ERR(handle);
1863 handle = NULL;
1864 mlog_errno(status);
1865 goto bail;
1868 /* Starting to change things, restart is no longer possible. */
1869 ocfs2_block_signals(&oldset);
1870 did_block_signals = 1;
1872 status = dquot_alloc_inode(inode);
1873 if (status)
1874 goto bail;
1875 did_quota_inode = 1;
1877 trace_ocfs2_symlink_create(dir, dentry, dentry->d_name.len,
1878 dentry->d_name.name,
1879 (unsigned long long)OCFS2_I(dir)->ip_blkno,
1880 inode->i_mode);
1882 status = ocfs2_mknod_locked(osb, dir, inode,
1883 0, &new_fe_bh, parent_fe_bh, handle,
1884 inode_ac);
1885 if (status < 0) {
1886 mlog_errno(status);
1887 goto bail;
1890 fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1891 inode->i_rdev = 0;
1892 newsize = l - 1;
1893 inode->i_op = &ocfs2_symlink_inode_operations;
1894 if (l > ocfs2_fast_symlink_chars(sb)) {
1895 u32 offset = 0;
1897 status = dquot_alloc_space_nodirty(inode,
1898 ocfs2_clusters_to_bytes(osb->sb, 1));
1899 if (status)
1900 goto bail;
1901 did_quota = 1;
1902 inode->i_mapping->a_ops = &ocfs2_aops;
1903 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1904 new_fe_bh,
1905 handle, data_ac, NULL,
1906 NULL);
1907 if (status < 0) {
1908 if (status != -ENOSPC && status != -EINTR) {
1909 mlog(ML_ERROR,
1910 "Failed to extend file to %llu\n",
1911 (unsigned long long)newsize);
1912 mlog_errno(status);
1913 status = -ENOSPC;
1915 goto bail;
1917 i_size_write(inode, newsize);
1918 inode->i_blocks = ocfs2_inode_sector_count(inode);
1919 } else {
1920 inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops;
1921 memcpy((char *) fe->id2.i_symlink, symname, l);
1922 i_size_write(inode, newsize);
1923 inode->i_blocks = 0;
1926 status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1927 if (status < 0) {
1928 mlog_errno(status);
1929 goto bail;
1932 if (!ocfs2_inode_is_fast_symlink(inode)) {
1933 status = ocfs2_create_symlink_data(osb, handle, inode,
1934 symname);
1935 if (status < 0) {
1936 mlog_errno(status);
1937 goto bail;
1941 if (si.enable) {
1942 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
1943 xattr_ac, data_ac);
1944 if (status < 0) {
1945 mlog_errno(status);
1946 goto bail;
1951 * Do this before adding the entry to the directory. We add
1952 * also set d_op after success so that ->d_iput() will cleanup
1953 * the dentry lock even if ocfs2_add_entry() fails below.
1955 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1956 if (status) {
1957 mlog_errno(status);
1958 goto bail;
1961 dl = dentry->d_fsdata;
1963 status = ocfs2_add_entry(handle, dentry, inode,
1964 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1965 &lookup);
1966 if (status < 0) {
1967 mlog_errno(status);
1968 goto bail;
1971 insert_inode_hash(inode);
1972 d_instantiate(dentry, inode);
1973 bail:
1974 if (status < 0 && did_quota)
1975 dquot_free_space_nodirty(inode,
1976 ocfs2_clusters_to_bytes(osb->sb, 1));
1977 if (status < 0 && did_quota_inode)
1978 dquot_free_inode(inode);
1979 if (handle)
1980 ocfs2_commit_trans(osb, handle);
1982 ocfs2_inode_unlock(dir, 1);
1983 if (did_block_signals)
1984 ocfs2_unblock_signals(&oldset);
1986 brelse(new_fe_bh);
1987 brelse(parent_fe_bh);
1988 kfree(si.value);
1989 ocfs2_free_dir_lookup_result(&lookup);
1990 if (inode_ac)
1991 ocfs2_free_alloc_context(inode_ac);
1992 if (data_ac)
1993 ocfs2_free_alloc_context(data_ac);
1994 if (xattr_ac)
1995 ocfs2_free_alloc_context(xattr_ac);
1996 if ((status < 0) && inode) {
1997 if (dl)
1998 ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
2000 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
2001 clear_nlink(inode);
2002 iput(inode);
2005 if (status)
2006 mlog_errno(status);
2008 return status;
2011 static int ocfs2_blkno_stringify(u64 blkno, char *name)
2013 int status, namelen;
2015 namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
2016 (long long)blkno);
2017 if (namelen <= 0) {
2018 if (namelen)
2019 status = namelen;
2020 else
2021 status = -EINVAL;
2022 mlog_errno(status);
2023 goto bail;
2025 if (namelen != OCFS2_ORPHAN_NAMELEN) {
2026 status = -EINVAL;
2027 mlog_errno(status);
2028 goto bail;
2031 trace_ocfs2_blkno_stringify(blkno, name, namelen);
2033 status = 0;
2034 bail:
2035 if (status < 0)
2036 mlog_errno(status);
2037 return status;
2040 static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
2041 struct inode **ret_orphan_dir,
2042 struct buffer_head **ret_orphan_dir_bh)
2044 struct inode *orphan_dir_inode;
2045 struct buffer_head *orphan_dir_bh = NULL;
2046 int ret = 0;
2048 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2049 ORPHAN_DIR_SYSTEM_INODE,
2050 osb->slot_num);
2051 if (!orphan_dir_inode) {
2052 ret = -ENOENT;
2053 mlog_errno(ret);
2054 return ret;
2057 mutex_lock(&orphan_dir_inode->i_mutex);
2059 ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2060 if (ret < 0) {
2061 mutex_unlock(&orphan_dir_inode->i_mutex);
2062 iput(orphan_dir_inode);
2064 mlog_errno(ret);
2065 return ret;
2068 *ret_orphan_dir = orphan_dir_inode;
2069 *ret_orphan_dir_bh = orphan_dir_bh;
2071 return 0;
2074 static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
2075 struct buffer_head *orphan_dir_bh,
2076 u64 blkno,
2077 char *name,
2078 struct ocfs2_dir_lookup_result *lookup,
2079 bool dio)
2081 int ret;
2082 struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
2083 int namelen = dio ?
2084 (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
2085 OCFS2_ORPHAN_NAMELEN;
2087 if (dio) {
2088 ret = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
2089 OCFS2_DIO_ORPHAN_PREFIX);
2090 if (ret != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
2091 ret = -EINVAL;
2092 mlog_errno(ret);
2093 return ret;
2096 ret = ocfs2_blkno_stringify(blkno,
2097 name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
2098 } else
2099 ret = ocfs2_blkno_stringify(blkno, name);
2100 if (ret < 0) {
2101 mlog_errno(ret);
2102 return ret;
2105 ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
2106 orphan_dir_bh, name,
2107 namelen, lookup);
2108 if (ret < 0) {
2109 mlog_errno(ret);
2110 return ret;
2113 return 0;
2117 * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for
2118 * insertion of an orphan.
2119 * @osb: ocfs2 file system
2120 * @ret_orphan_dir: Orphan dir inode - returned locked!
2121 * @blkno: Actual block number of the inode to be inserted into orphan dir.
2122 * @lookup: dir lookup result, to be passed back into functions like
2123 * ocfs2_orphan_add
2125 * Returns zero on success and the ret_orphan_dir, name and lookup
2126 * fields will be populated.
2128 * Returns non-zero on failure.
2130 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2131 struct inode **ret_orphan_dir,
2132 u64 blkno,
2133 char *name,
2134 struct ocfs2_dir_lookup_result *lookup,
2135 bool dio)
2137 struct inode *orphan_dir_inode = NULL;
2138 struct buffer_head *orphan_dir_bh = NULL;
2139 int ret = 0;
2141 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
2142 &orphan_dir_bh);
2143 if (ret < 0) {
2144 mlog_errno(ret);
2145 return ret;
2148 ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
2149 blkno, name, lookup, dio);
2150 if (ret < 0) {
2151 mlog_errno(ret);
2152 goto out;
2155 *ret_orphan_dir = orphan_dir_inode;
2157 out:
2158 brelse(orphan_dir_bh);
2160 if (ret) {
2161 ocfs2_inode_unlock(orphan_dir_inode, 1);
2162 mutex_unlock(&orphan_dir_inode->i_mutex);
2163 iput(orphan_dir_inode);
2166 if (ret)
2167 mlog_errno(ret);
2168 return ret;
2171 static int ocfs2_orphan_add(struct ocfs2_super *osb,
2172 handle_t *handle,
2173 struct inode *inode,
2174 struct buffer_head *fe_bh,
2175 char *name,
2176 struct ocfs2_dir_lookup_result *lookup,
2177 struct inode *orphan_dir_inode,
2178 bool dio)
2180 struct buffer_head *orphan_dir_bh = NULL;
2181 int status = 0;
2182 struct ocfs2_dinode *orphan_fe;
2183 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
2184 int namelen = dio ?
2185 (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
2186 OCFS2_ORPHAN_NAMELEN;
2188 trace_ocfs2_orphan_add_begin(
2189 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2191 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
2192 if (status < 0) {
2193 mlog_errno(status);
2194 goto leave;
2197 status = ocfs2_journal_access_di(handle,
2198 INODE_CACHE(orphan_dir_inode),
2199 orphan_dir_bh,
2200 OCFS2_JOURNAL_ACCESS_WRITE);
2201 if (status < 0) {
2202 mlog_errno(status);
2203 goto leave;
2207 * We're going to journal the change of i_flags and i_orphaned_slot.
2208 * It's safe anyway, though some callers may duplicate the journaling.
2209 * Journaling within the func just make the logic look more
2210 * straightforward.
2212 status = ocfs2_journal_access_di(handle,
2213 INODE_CACHE(inode),
2214 fe_bh,
2215 OCFS2_JOURNAL_ACCESS_WRITE);
2216 if (status < 0) {
2217 mlog_errno(status);
2218 goto leave;
2221 /* we're a cluster, and nlink can change on disk from
2222 * underneath us... */
2223 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2224 if (S_ISDIR(inode->i_mode))
2225 ocfs2_add_links_count(orphan_fe, 1);
2226 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2227 ocfs2_journal_dirty(handle, orphan_dir_bh);
2229 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
2230 namelen, inode,
2231 OCFS2_I(inode)->ip_blkno,
2232 orphan_dir_bh, lookup);
2233 if (status < 0) {
2234 mlog_errno(status);
2235 goto rollback;
2238 if (dio) {
2239 /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan
2240 * slot.
2242 fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
2243 fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num);
2244 } else {
2245 fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
2246 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
2248 /* Record which orphan dir our inode now resides
2249 * in. delete_inode will use this to determine which orphan
2250 * dir to lock. */
2251 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
2254 ocfs2_journal_dirty(handle, fe_bh);
2256 trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
2257 osb->slot_num);
2259 rollback:
2260 if (status < 0) {
2261 if (S_ISDIR(inode->i_mode))
2262 ocfs2_add_links_count(orphan_fe, -1);
2263 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2266 leave:
2267 brelse(orphan_dir_bh);
2269 return status;
2272 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
2273 int ocfs2_orphan_del(struct ocfs2_super *osb,
2274 handle_t *handle,
2275 struct inode *orphan_dir_inode,
2276 struct inode *inode,
2277 struct buffer_head *orphan_dir_bh,
2278 bool dio)
2280 const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN;
2281 char name[namelen + 1];
2282 struct ocfs2_dinode *orphan_fe;
2283 int status = 0;
2284 struct ocfs2_dir_lookup_result lookup = { NULL, };
2286 if (dio) {
2287 status = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
2288 OCFS2_DIO_ORPHAN_PREFIX);
2289 if (status != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
2290 status = -EINVAL;
2291 mlog_errno(status);
2292 return status;
2295 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno,
2296 name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
2297 } else
2298 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2299 if (status < 0) {
2300 mlog_errno(status);
2301 goto leave;
2304 trace_ocfs2_orphan_del(
2305 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2306 name, strlen(name));
2308 /* find it's spot in the orphan directory */
2309 status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
2310 &lookup);
2311 if (status) {
2312 mlog_errno(status);
2313 goto leave;
2316 /* remove it from the orphan directory */
2317 status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
2318 if (status < 0) {
2319 mlog_errno(status);
2320 goto leave;
2323 status = ocfs2_journal_access_di(handle,
2324 INODE_CACHE(orphan_dir_inode),
2325 orphan_dir_bh,
2326 OCFS2_JOURNAL_ACCESS_WRITE);
2327 if (status < 0) {
2328 mlog_errno(status);
2329 goto leave;
2332 /* do the i_nlink dance! :) */
2333 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2334 if (S_ISDIR(inode->i_mode))
2335 ocfs2_add_links_count(orphan_fe, -1);
2336 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2337 ocfs2_journal_dirty(handle, orphan_dir_bh);
2339 leave:
2340 ocfs2_free_dir_lookup_result(&lookup);
2342 if (status)
2343 mlog_errno(status);
2344 return status;
2348 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to receive a newly
2349 * allocated file. This is different from the typical 'add to orphan dir'
2350 * operation in that the inode does not yet exist. This is a problem because
2351 * the orphan dir stringifies the inode block number to come up with it's
2352 * dirent. Obviously if the inode does not yet exist we have a chicken and egg
2353 * problem. This function works around it by calling deeper into the orphan
2354 * and suballoc code than other callers. Use this only by necessity.
2355 * @dir: The directory which this inode will ultimately wind up under - not the
2356 * orphan dir!
2357 * @dir_bh: buffer_head the @dir inode block
2358 * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled
2359 * with the string to be used for orphan dirent. Pass back to the orphan dir
2360 * code.
2361 * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan
2362 * dir code.
2363 * @ret_di_blkno: block number where the new inode will be allocated.
2364 * @orphan_insert: Dir insert context to be passed back into orphan dir code.
2365 * @ret_inode_ac: Inode alloc context to be passed back to the allocator.
2367 * Returns zero on success and the ret_orphan_dir, name and lookup
2368 * fields will be populated.
2370 * Returns non-zero on failure.
2372 static int ocfs2_prep_new_orphaned_file(struct inode *dir,
2373 struct buffer_head *dir_bh,
2374 char *orphan_name,
2375 struct inode **ret_orphan_dir,
2376 u64 *ret_di_blkno,
2377 struct ocfs2_dir_lookup_result *orphan_insert,
2378 struct ocfs2_alloc_context **ret_inode_ac)
2380 int ret;
2381 u64 di_blkno;
2382 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2383 struct inode *orphan_dir = NULL;
2384 struct buffer_head *orphan_dir_bh = NULL;
2385 struct ocfs2_alloc_context *inode_ac = NULL;
2387 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh);
2388 if (ret < 0) {
2389 mlog_errno(ret);
2390 return ret;
2393 /* reserve an inode spot */
2394 ret = ocfs2_reserve_new_inode(osb, &inode_ac);
2395 if (ret < 0) {
2396 if (ret != -ENOSPC)
2397 mlog_errno(ret);
2398 goto out;
2401 ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac,
2402 &di_blkno);
2403 if (ret) {
2404 mlog_errno(ret);
2405 goto out;
2408 ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
2409 di_blkno, orphan_name, orphan_insert,
2410 false);
2411 if (ret < 0) {
2412 mlog_errno(ret);
2413 goto out;
2416 out:
2417 if (ret == 0) {
2418 *ret_orphan_dir = orphan_dir;
2419 *ret_di_blkno = di_blkno;
2420 *ret_inode_ac = inode_ac;
2422 * orphan_name and orphan_insert are already up to
2423 * date via prepare_orphan_dir
2425 } else {
2426 /* Unroll reserve_new_inode* */
2427 if (inode_ac)
2428 ocfs2_free_alloc_context(inode_ac);
2430 /* Unroll orphan dir locking */
2431 mutex_unlock(&orphan_dir->i_mutex);
2432 ocfs2_inode_unlock(orphan_dir, 1);
2433 iput(orphan_dir);
2436 brelse(orphan_dir_bh);
2438 return ret;
2441 int ocfs2_create_inode_in_orphan(struct inode *dir,
2442 int mode,
2443 struct inode **new_inode)
2445 int status, did_quota_inode = 0;
2446 struct inode *inode = NULL;
2447 struct inode *orphan_dir = NULL;
2448 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2449 struct ocfs2_dinode *di = NULL;
2450 handle_t *handle = NULL;
2451 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
2452 struct buffer_head *parent_di_bh = NULL;
2453 struct buffer_head *new_di_bh = NULL;
2454 struct ocfs2_alloc_context *inode_ac = NULL;
2455 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2456 u64 uninitialized_var(di_blkno), suballoc_loc;
2457 u16 suballoc_bit;
2459 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2460 if (status < 0) {
2461 if (status != -ENOENT)
2462 mlog_errno(status);
2463 return status;
2466 status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh,
2467 orphan_name, &orphan_dir,
2468 &di_blkno, &orphan_insert, &inode_ac);
2469 if (status < 0) {
2470 if (status != -ENOSPC)
2471 mlog_errno(status);
2472 goto leave;
2475 inode = ocfs2_get_init_inode(dir, mode);
2476 if (!inode) {
2477 status = -ENOMEM;
2478 mlog_errno(status);
2479 goto leave;
2482 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0));
2483 if (IS_ERR(handle)) {
2484 status = PTR_ERR(handle);
2485 handle = NULL;
2486 mlog_errno(status);
2487 goto leave;
2490 status = dquot_alloc_inode(inode);
2491 if (status)
2492 goto leave;
2493 did_quota_inode = 1;
2495 status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac,
2496 &suballoc_loc,
2497 &suballoc_bit, di_blkno);
2498 if (status < 0) {
2499 mlog_errno(status);
2500 goto leave;
2503 clear_nlink(inode);
2504 /* do the real work now. */
2505 status = __ocfs2_mknod_locked(dir, inode,
2506 0, &new_di_bh, parent_di_bh, handle,
2507 inode_ac, di_blkno, suballoc_loc,
2508 suballoc_bit);
2509 if (status < 0) {
2510 mlog_errno(status);
2511 goto leave;
2514 di = (struct ocfs2_dinode *)new_di_bh->b_data;
2515 status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
2516 &orphan_insert, orphan_dir, false);
2517 if (status < 0) {
2518 mlog_errno(status);
2519 goto leave;
2522 /* get open lock so that only nodes can't remove it from orphan dir. */
2523 status = ocfs2_open_lock(inode);
2524 if (status < 0)
2525 mlog_errno(status);
2527 insert_inode_hash(inode);
2528 leave:
2529 if (status < 0 && did_quota_inode)
2530 dquot_free_inode(inode);
2531 if (handle)
2532 ocfs2_commit_trans(osb, handle);
2534 if (orphan_dir) {
2535 /* This was locked for us in ocfs2_prepare_orphan_dir() */
2536 ocfs2_inode_unlock(orphan_dir, 1);
2537 mutex_unlock(&orphan_dir->i_mutex);
2538 iput(orphan_dir);
2541 if ((status < 0) && inode) {
2542 clear_nlink(inode);
2543 iput(inode);
2546 if (inode_ac)
2547 ocfs2_free_alloc_context(inode_ac);
2549 brelse(new_di_bh);
2551 if (!status)
2552 *new_inode = inode;
2554 ocfs2_free_dir_lookup_result(&orphan_insert);
2556 ocfs2_inode_unlock(dir, 1);
2557 brelse(parent_di_bh);
2558 return status;
2561 static int ocfs2_dio_orphan_recovered(struct inode *inode)
2563 int ret;
2564 struct buffer_head *di_bh = NULL;
2565 struct ocfs2_dinode *di = NULL;
2567 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2568 if (ret < 0) {
2569 mlog_errno(ret);
2570 return 0;
2573 di = (struct ocfs2_dinode *) di_bh->b_data;
2574 ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
2575 ocfs2_inode_unlock(inode, 1);
2576 brelse(di_bh);
2578 return ret;
2581 #define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
2582 int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
2583 struct inode *inode)
2585 char orphan_name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
2586 struct inode *orphan_dir_inode = NULL;
2587 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2588 struct buffer_head *di_bh = NULL;
2589 int status = 0;
2590 handle_t *handle = NULL;
2591 struct ocfs2_dinode *di = NULL;
2593 restart:
2594 status = ocfs2_inode_lock(inode, &di_bh, 1);
2595 if (status < 0) {
2596 mlog_errno(status);
2597 goto bail;
2600 di = (struct ocfs2_dinode *) di_bh->b_data;
2602 * Another append dio crashed?
2603 * If so, wait for recovery first.
2605 if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
2606 ocfs2_inode_unlock(inode, 1);
2607 brelse(di_bh);
2608 wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
2609 ocfs2_dio_orphan_recovered(inode),
2610 msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
2611 goto restart;
2614 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
2615 OCFS2_I(inode)->ip_blkno,
2616 orphan_name,
2617 &orphan_insert,
2618 true);
2619 if (status < 0) {
2620 mlog_errno(status);
2621 goto bail_unlock_inode;
2624 handle = ocfs2_start_trans(osb,
2625 OCFS2_INODE_ADD_TO_ORPHAN_CREDITS);
2626 if (IS_ERR(handle)) {
2627 status = PTR_ERR(handle);
2628 goto bail_unlock_orphan;
2631 status = ocfs2_orphan_add(osb, handle, inode, di_bh, orphan_name,
2632 &orphan_insert, orphan_dir_inode, true);
2633 if (status)
2634 mlog_errno(status);
2636 ocfs2_commit_trans(osb, handle);
2638 bail_unlock_orphan:
2639 ocfs2_inode_unlock(orphan_dir_inode, 1);
2640 mutex_unlock(&orphan_dir_inode->i_mutex);
2641 iput(orphan_dir_inode);
2643 ocfs2_free_dir_lookup_result(&orphan_insert);
2645 bail_unlock_inode:
2646 ocfs2_inode_unlock(inode, 1);
2647 brelse(di_bh);
2649 bail:
2650 return status;
2653 int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
2654 struct inode *inode, int update_isize,
2655 loff_t end)
2657 struct inode *orphan_dir_inode = NULL;
2658 struct buffer_head *orphan_dir_bh = NULL;
2659 struct buffer_head *di_bh = NULL;
2660 struct ocfs2_dinode *di = NULL;
2661 handle_t *handle = NULL;
2662 int status = 0;
2664 status = ocfs2_inode_lock(inode, &di_bh, 1);
2665 if (status < 0) {
2666 mlog_errno(status);
2667 goto bail;
2669 di = (struct ocfs2_dinode *) di_bh->b_data;
2671 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2672 ORPHAN_DIR_SYSTEM_INODE,
2673 le16_to_cpu(di->i_dio_orphaned_slot));
2674 if (!orphan_dir_inode) {
2675 status = -ENOENT;
2676 mlog_errno(status);
2677 goto bail_unlock_inode;
2680 mutex_lock(&orphan_dir_inode->i_mutex);
2681 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2682 if (status < 0) {
2683 mutex_unlock(&orphan_dir_inode->i_mutex);
2684 iput(orphan_dir_inode);
2685 mlog_errno(status);
2686 goto bail_unlock_inode;
2689 handle = ocfs2_start_trans(osb,
2690 OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS);
2691 if (IS_ERR(handle)) {
2692 status = PTR_ERR(handle);
2693 goto bail_unlock_orphan;
2696 BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)));
2698 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode,
2699 inode, orphan_dir_bh, true);
2700 if (status < 0) {
2701 mlog_errno(status);
2702 goto bail_commit;
2705 status = ocfs2_journal_access_di(handle,
2706 INODE_CACHE(inode),
2707 di_bh,
2708 OCFS2_JOURNAL_ACCESS_WRITE);
2709 if (status < 0) {
2710 mlog_errno(status);
2711 goto bail_commit;
2714 di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
2715 di->i_dio_orphaned_slot = 0;
2717 if (update_isize) {
2718 status = ocfs2_set_inode_size(handle, inode, di_bh, end);
2719 if (status)
2720 mlog_errno(status);
2721 } else
2722 ocfs2_journal_dirty(handle, di_bh);
2724 bail_commit:
2725 ocfs2_commit_trans(osb, handle);
2727 bail_unlock_orphan:
2728 ocfs2_inode_unlock(orphan_dir_inode, 1);
2729 mutex_unlock(&orphan_dir_inode->i_mutex);
2730 brelse(orphan_dir_bh);
2731 iput(orphan_dir_inode);
2733 bail_unlock_inode:
2734 ocfs2_inode_unlock(inode, 1);
2735 brelse(di_bh);
2737 bail:
2738 return status;
2741 int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2742 struct inode *inode,
2743 struct dentry *dentry)
2745 int status = 0;
2746 struct buffer_head *parent_di_bh = NULL;
2747 handle_t *handle = NULL;
2748 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2749 struct ocfs2_dinode *dir_di, *di;
2750 struct inode *orphan_dir_inode = NULL;
2751 struct buffer_head *orphan_dir_bh = NULL;
2752 struct buffer_head *di_bh = NULL;
2753 struct ocfs2_dir_lookup_result lookup = { NULL, };
2755 trace_ocfs2_mv_orphaned_inode_to_new(dir, dentry,
2756 dentry->d_name.len, dentry->d_name.name,
2757 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2758 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2760 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2761 if (status < 0) {
2762 if (status != -ENOENT)
2763 mlog_errno(status);
2764 return status;
2767 dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
2768 if (!dir_di->i_links_count) {
2769 /* can't make a file in a deleted directory. */
2770 status = -ENOENT;
2771 goto leave;
2774 status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
2775 dentry->d_name.len);
2776 if (status)
2777 goto leave;
2779 /* get a spot inside the dir. */
2780 status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
2781 dentry->d_name.name,
2782 dentry->d_name.len, &lookup);
2783 if (status < 0) {
2784 mlog_errno(status);
2785 goto leave;
2788 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2789 ORPHAN_DIR_SYSTEM_INODE,
2790 osb->slot_num);
2791 if (!orphan_dir_inode) {
2792 status = -ENOENT;
2793 mlog_errno(status);
2794 goto leave;
2797 mutex_lock(&orphan_dir_inode->i_mutex);
2799 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2800 if (status < 0) {
2801 mlog_errno(status);
2802 mutex_unlock(&orphan_dir_inode->i_mutex);
2803 iput(orphan_dir_inode);
2804 goto leave;
2807 status = ocfs2_read_inode_block(inode, &di_bh);
2808 if (status < 0) {
2809 mlog_errno(status);
2810 goto orphan_unlock;
2813 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
2814 if (IS_ERR(handle)) {
2815 status = PTR_ERR(handle);
2816 handle = NULL;
2817 mlog_errno(status);
2818 goto orphan_unlock;
2821 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2822 di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2823 if (status < 0) {
2824 mlog_errno(status);
2825 goto out_commit;
2828 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
2829 orphan_dir_bh, false);
2830 if (status < 0) {
2831 mlog_errno(status);
2832 goto out_commit;
2835 di = (struct ocfs2_dinode *)di_bh->b_data;
2836 di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL);
2837 di->i_orphaned_slot = 0;
2838 set_nlink(inode, 1);
2839 ocfs2_set_links_count(di, inode->i_nlink);
2840 ocfs2_update_inode_fsync_trans(handle, inode, 1);
2841 ocfs2_journal_dirty(handle, di_bh);
2843 status = ocfs2_add_entry(handle, dentry, inode,
2844 OCFS2_I(inode)->ip_blkno, parent_di_bh,
2845 &lookup);
2846 if (status < 0) {
2847 mlog_errno(status);
2848 goto out_commit;
2851 status = ocfs2_dentry_attach_lock(dentry, inode,
2852 OCFS2_I(dir)->ip_blkno);
2853 if (status) {
2854 mlog_errno(status);
2855 goto out_commit;
2858 d_instantiate(dentry, inode);
2859 status = 0;
2860 out_commit:
2861 ocfs2_commit_trans(osb, handle);
2862 orphan_unlock:
2863 ocfs2_inode_unlock(orphan_dir_inode, 1);
2864 mutex_unlock(&orphan_dir_inode->i_mutex);
2865 iput(orphan_dir_inode);
2866 leave:
2868 ocfs2_inode_unlock(dir, 1);
2870 brelse(di_bh);
2871 brelse(parent_di_bh);
2872 brelse(orphan_dir_bh);
2874 ocfs2_free_dir_lookup_result(&lookup);
2876 if (status)
2877 mlog_errno(status);
2879 return status;
2882 const struct inode_operations ocfs2_dir_iops = {
2883 .create = ocfs2_create,
2884 .lookup = ocfs2_lookup,
2885 .link = ocfs2_link,
2886 .unlink = ocfs2_unlink,
2887 .rmdir = ocfs2_unlink,
2888 .symlink = ocfs2_symlink,
2889 .mkdir = ocfs2_mkdir,
2890 .mknod = ocfs2_mknod,
2891 .rename = ocfs2_rename,
2892 .setattr = ocfs2_setattr,
2893 .getattr = ocfs2_getattr,
2894 .permission = ocfs2_permission,
2895 .setxattr = generic_setxattr,
2896 .getxattr = generic_getxattr,
2897 .listxattr = ocfs2_listxattr,
2898 .removexattr = generic_removexattr,
2899 .fiemap = ocfs2_fiemap,
2900 .get_acl = ocfs2_iop_get_acl,
2901 .set_acl = ocfs2_iop_set_acl,