1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/completion.h>
13 #include <linux/buffer_head.h>
14 #include <linux/kallsyms.h>
15 #include <linux/gfs2_ondisk.h>
26 #include "trace_gfs2.h"
28 static void gfs2_print_trans(struct gfs2_sbd
*sdp
, const struct gfs2_trans
*tr
)
30 fs_warn(sdp
, "Transaction created at: %pSR\n", (void *)tr
->tr_ip
);
31 fs_warn(sdp
, "blocks=%u revokes=%u reserved=%u touched=%u\n",
32 tr
->tr_blocks
, tr
->tr_revokes
, tr
->tr_reserved
,
33 test_bit(TR_TOUCHED
, &tr
->tr_flags
));
34 fs_warn(sdp
, "Buf %u/%u Databuf %u/%u Revoke %u\n",
35 tr
->tr_num_buf_new
, tr
->tr_num_buf_rm
,
36 tr
->tr_num_databuf_new
, tr
->tr_num_databuf_rm
,
40 int __gfs2_trans_begin(struct gfs2_trans
*tr
, struct gfs2_sbd
*sdp
,
41 unsigned int blocks
, unsigned int revokes
,
44 unsigned int extra_revokes
;
46 if (current
->journal_info
) {
47 gfs2_print_trans(sdp
, current
->journal_info
);
50 BUG_ON(blocks
== 0 && revokes
== 0);
52 if (!test_bit(SDF_JOURNAL_LIVE
, &sdp
->sd_flags
))
56 tr
->tr_blocks
= blocks
;
57 tr
->tr_revokes
= revokes
;
58 tr
->tr_reserved
= GFS2_LOG_FLUSH_MIN_BLOCKS
;
61 * The reserved blocks are either used for data or metadata.
62 * We can have mixed data and metadata, each with its own log
63 * descriptor block; see calc_reserved().
65 tr
->tr_reserved
+= blocks
+ 1 + DIV_ROUND_UP(blocks
- 1, databuf_limit(sdp
));
67 INIT_LIST_HEAD(&tr
->tr_databuf
);
68 INIT_LIST_HEAD(&tr
->tr_buf
);
69 INIT_LIST_HEAD(&tr
->tr_list
);
70 INIT_LIST_HEAD(&tr
->tr_ail1_list
);
71 INIT_LIST_HEAD(&tr
->tr_ail2_list
);
73 if (gfs2_assert_warn(sdp
, tr
->tr_reserved
<= sdp
->sd_jdesc
->jd_blocks
))
76 sb_start_intwrite(sdp
->sd_vfs
);
79 * Try the reservations under sd_log_flush_lock to prevent log flushes
80 * from creating inconsistencies between the number of allocated and
81 * reserved revokes. If that fails, do a full-block allocation outside
82 * of the lock to avoid stalling log flushes. Then, allot the
83 * appropriate number of blocks to revokes, use as many revokes locally
84 * as needed, and "release" the surplus into the revokes pool.
87 down_read(&sdp
->sd_log_flush_lock
);
88 if (gfs2_log_try_reserve(sdp
, tr
, &extra_revokes
))
90 up_read(&sdp
->sd_log_flush_lock
);
91 gfs2_log_reserve(sdp
, tr
, &extra_revokes
);
92 down_read(&sdp
->sd_log_flush_lock
);
95 gfs2_log_release_revokes(sdp
, extra_revokes
);
96 if (unlikely(!test_bit(SDF_JOURNAL_LIVE
, &sdp
->sd_flags
))) {
97 gfs2_log_release_revokes(sdp
, tr
->tr_revokes
);
98 up_read(&sdp
->sd_log_flush_lock
);
99 gfs2_log_release(sdp
, tr
->tr_reserved
);
100 sb_end_intwrite(sdp
->sd_vfs
);
104 current
->journal_info
= tr
;
109 int gfs2_trans_begin(struct gfs2_sbd
*sdp
, unsigned int blocks
,
110 unsigned int revokes
)
112 struct gfs2_trans
*tr
;
115 tr
= kmem_cache_zalloc(gfs2_trans_cachep
, GFP_NOFS
);
118 error
= __gfs2_trans_begin(tr
, sdp
, blocks
, revokes
, _RET_IP_
);
120 kmem_cache_free(gfs2_trans_cachep
, tr
);
124 void gfs2_trans_end(struct gfs2_sbd
*sdp
)
126 struct gfs2_trans
*tr
= current
->journal_info
;
129 current
->journal_info
= NULL
;
131 if (!test_bit(TR_TOUCHED
, &tr
->tr_flags
)) {
132 gfs2_log_release_revokes(sdp
, tr
->tr_revokes
);
133 up_read(&sdp
->sd_log_flush_lock
);
134 gfs2_log_release(sdp
, tr
->tr_reserved
);
135 if (!test_bit(TR_ONSTACK
, &tr
->tr_flags
))
136 gfs2_trans_free(sdp
, tr
);
137 sb_end_intwrite(sdp
->sd_vfs
);
141 gfs2_log_release_revokes(sdp
, tr
->tr_revokes
- tr
->tr_num_revoke
);
143 nbuf
= tr
->tr_num_buf_new
+ tr
->tr_num_databuf_new
;
144 nbuf
-= tr
->tr_num_buf_rm
;
145 nbuf
-= tr
->tr_num_databuf_rm
;
147 if (gfs2_assert_withdraw(sdp
, nbuf
<= tr
->tr_blocks
) ||
148 gfs2_assert_withdraw(sdp
, tr
->tr_num_revoke
<= tr
->tr_revokes
))
149 gfs2_print_trans(sdp
, tr
);
151 gfs2_log_commit(sdp
, tr
);
152 if (!test_bit(TR_ONSTACK
, &tr
->tr_flags
) &&
153 !test_bit(TR_ATTACHED
, &tr
->tr_flags
))
154 gfs2_trans_free(sdp
, tr
);
155 up_read(&sdp
->sd_log_flush_lock
);
157 if (sdp
->sd_vfs
->s_flags
& SB_SYNCHRONOUS
)
158 gfs2_log_flush(sdp
, NULL
, GFS2_LOG_HEAD_FLUSH_NORMAL
|
160 sb_end_intwrite(sdp
->sd_vfs
);
163 static struct gfs2_bufdata
*gfs2_alloc_bufdata(struct gfs2_glock
*gl
,
164 struct buffer_head
*bh
)
166 struct gfs2_bufdata
*bd
;
168 bd
= kmem_cache_zalloc(gfs2_bufdata_cachep
, GFP_NOFS
| __GFP_NOFAIL
);
171 INIT_LIST_HEAD(&bd
->bd_list
);
172 INIT_LIST_HEAD(&bd
->bd_ail_st_list
);
173 INIT_LIST_HEAD(&bd
->bd_ail_gl_list
);
179 * gfs2_trans_add_data - Add a databuf to the transaction.
180 * @gl: The inode glock associated with the buffer
181 * @bh: The buffer to add
183 * This is used in journaled data mode.
184 * We need to journal the data block in the same way as metadata in
185 * the functions above. The difference is that here we have a tag
186 * which is two __be64's being the block number (as per meta data)
187 * and a flag which says whether the data block needs escaping or
188 * not. This means we need a new log entry for each 251 or so data
189 * blocks, which isn't an enormous overhead but twice as much as
190 * for normal metadata blocks.
192 void gfs2_trans_add_data(struct gfs2_glock
*gl
, struct buffer_head
*bh
)
194 struct gfs2_trans
*tr
= current
->journal_info
;
195 struct gfs2_sbd
*sdp
= gl
->gl_name
.ln_sbd
;
196 struct gfs2_bufdata
*bd
;
199 if (buffer_pinned(bh
)) {
200 set_bit(TR_TOUCHED
, &tr
->tr_flags
);
206 gfs2_log_unlock(sdp
);
208 if (bh
->b_private
== NULL
)
209 bd
= gfs2_alloc_bufdata(gl
, bh
);
215 gfs2_assert(sdp
, bd
->bd_gl
== gl
);
216 set_bit(TR_TOUCHED
, &tr
->tr_flags
);
217 if (list_empty(&bd
->bd_list
)) {
218 set_bit(GLF_LFLUSH
, &bd
->bd_gl
->gl_flags
);
219 set_bit(GLF_DIRTY
, &bd
->bd_gl
->gl_flags
);
220 gfs2_pin(sdp
, bd
->bd_bh
);
221 tr
->tr_num_databuf_new
++;
222 list_add_tail(&bd
->bd_list
, &tr
->tr_databuf
);
224 gfs2_log_unlock(sdp
);
229 void gfs2_trans_add_meta(struct gfs2_glock
*gl
, struct buffer_head
*bh
)
232 struct gfs2_sbd
*sdp
= gl
->gl_name
.ln_sbd
;
233 struct super_block
*sb
= sdp
->sd_vfs
;
234 struct gfs2_bufdata
*bd
;
235 struct gfs2_meta_header
*mh
;
236 struct gfs2_trans
*tr
= current
->journal_info
;
237 bool withdraw
= false;
240 if (buffer_pinned(bh
)) {
241 set_bit(TR_TOUCHED
, &tr
->tr_flags
);
247 gfs2_log_unlock(sdp
);
249 lock_page(bh
->b_page
);
250 if (bh
->b_private
== NULL
)
251 bd
= gfs2_alloc_bufdata(gl
, bh
);
254 unlock_page(bh
->b_page
);
258 gfs2_assert(sdp
, bd
->bd_gl
== gl
);
259 set_bit(TR_TOUCHED
, &tr
->tr_flags
);
260 if (!list_empty(&bd
->bd_list
))
262 set_bit(GLF_LFLUSH
, &bd
->bd_gl
->gl_flags
);
263 set_bit(GLF_DIRTY
, &bd
->bd_gl
->gl_flags
);
264 mh
= (struct gfs2_meta_header
*)bd
->bd_bh
->b_data
;
265 if (unlikely(mh
->mh_magic
!= cpu_to_be32(GFS2_MAGIC
))) {
266 fs_err(sdp
, "Attempting to add uninitialised block to "
267 "journal (inplace block=%lld)\n",
268 (unsigned long long)bd
->bd_bh
->b_blocknr
);
271 if (gfs2_withdrawing_or_withdrawn(sdp
)) {
272 fs_info(sdp
, "GFS2:adding buf while withdrawn! 0x%llx\n",
273 (unsigned long long)bd
->bd_bh
->b_blocknr
);
276 if (unlikely(sb
->s_writers
.frozen
== SB_FREEZE_COMPLETE
)) {
277 fs_info(sdp
, "GFS2:adding buf while frozen\n");
281 gfs2_pin(sdp
, bd
->bd_bh
);
282 mh
->__pad0
= cpu_to_be64(0);
283 mh
->mh_jid
= cpu_to_be32(sdp
->sd_jdesc
->jd_jid
);
284 list_add(&bd
->bd_list
, &tr
->tr_buf
);
285 tr
->tr_num_buf_new
++;
287 gfs2_log_unlock(sdp
);
289 gfs2_assert_withdraw(sdp
, 0);
294 void gfs2_trans_add_revoke(struct gfs2_sbd
*sdp
, struct gfs2_bufdata
*bd
)
296 struct gfs2_trans
*tr
= current
->journal_info
;
298 BUG_ON(!list_empty(&bd
->bd_list
));
299 gfs2_add_revoke(sdp
, bd
);
300 set_bit(TR_TOUCHED
, &tr
->tr_flags
);
304 void gfs2_trans_remove_revoke(struct gfs2_sbd
*sdp
, u64 blkno
, unsigned int len
)
306 struct gfs2_bufdata
*bd
, *tmp
;
307 unsigned int n
= len
;
310 list_for_each_entry_safe(bd
, tmp
, &sdp
->sd_log_revokes
, bd_list
) {
311 if ((bd
->bd_blkno
>= blkno
) && (bd
->bd_blkno
< (blkno
+ len
))) {
312 list_del_init(&bd
->bd_list
);
313 gfs2_assert_withdraw(sdp
, sdp
->sd_log_num_revoke
);
314 sdp
->sd_log_num_revoke
--;
316 gfs2_glock_remove_revoke(bd
->bd_gl
);
317 kmem_cache_free(gfs2_bufdata_cachep
, bd
);
318 gfs2_log_release_revokes(sdp
, 1);
323 gfs2_log_unlock(sdp
);
326 void gfs2_trans_free(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
331 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_ail1_list
));
332 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_ail2_list
));
333 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_databuf
));
334 gfs2_assert_warn(sdp
, list_empty(&tr
->tr_buf
));
335 kmem_cache_free(gfs2_trans_cachep
, tr
);