1 #include <linux/ceph/ceph_debug.h>
5 #include <linux/random.h>
6 #include <linux/slab.h>
7 #include <linux/types.h>
9 #include <linux/ceph/mdsmap.h>
10 #include <linux/ceph/messenger.h>
11 #include <linux/ceph/decode.h>
17 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
19 int ceph_mdsmap_get_random_mds(struct ceph_mdsmap
*m
)
24 /* special case for one mds */
25 if (1 == m
->m_max_mds
&& m
->m_info
[0].state
> 0)
29 for (i
= 0; i
< m
->m_max_mds
; i
++)
30 if (m
->m_info
[i
].state
> 0)
36 n
= prandom_u32() % n
;
38 for (i
= 0; n
> 0; i
++, n
--)
39 while (m
->m_info
[i
].state
<= 0)
45 #define __decode_and_drop_type(p, end, type, bad) \
47 if (*p + sizeof(type) > end) \
52 #define __decode_and_drop_set(p, end, type, bad) \
56 ceph_decode_32_safe(p, end, n, bad); \
57 need = sizeof(type) * n; \
58 ceph_decode_need(p, end, need, bad); \
62 #define __decode_and_drop_map(p, end, ktype, vtype, bad) \
66 ceph_decode_32_safe(p, end, n, bad); \
67 need = (sizeof(ktype) + sizeof(vtype)) * n; \
68 ceph_decode_need(p, end, need, bad); \
73 static int __decode_and_drop_compat_set(void **p
, void* end
)
76 /* compat, ro_compat, incompat*/
77 for (i
= 0; i
< 3; i
++) {
79 ceph_decode_need(p
, end
, sizeof(u64
) + sizeof(u32
), bad
);
82 /* names (map<u64, string>) */
83 n
= ceph_decode_32(p
);
86 ceph_decode_need(p
, end
, sizeof(u64
) + sizeof(u32
),
89 len
= ceph_decode_32(p
);
90 ceph_decode_need(p
, end
, len
, bad
);
102 * Ignore any fields we don't care about (there are quite a few of
105 struct ceph_mdsmap
*ceph_mdsmap_decode(void **p
, void *end
)
107 struct ceph_mdsmap
*m
;
108 const void *start
= *p
;
111 u8 mdsmap_v
, mdsmap_cv
;
114 m
= kzalloc(sizeof(*m
), GFP_NOFS
);
116 return ERR_PTR(-ENOMEM
);
118 ceph_decode_need(p
, end
, 1 + 1, bad
);
119 mdsmap_v
= ceph_decode_8(p
);
120 mdsmap_cv
= ceph_decode_8(p
);
123 ceph_decode_32_safe(p
, end
, mdsmap_len
, bad
);
124 if (end
< *p
+ mdsmap_len
)
126 end
= *p
+ mdsmap_len
;
129 ceph_decode_need(p
, end
, 8*sizeof(u32
) + sizeof(u64
), bad
);
130 m
->m_epoch
= ceph_decode_32(p
);
131 m
->m_client_epoch
= ceph_decode_32(p
);
132 m
->m_last_failure
= ceph_decode_32(p
);
133 m
->m_root
= ceph_decode_32(p
);
134 m
->m_session_timeout
= ceph_decode_32(p
);
135 m
->m_session_autoclose
= ceph_decode_32(p
);
136 m
->m_max_file_size
= ceph_decode_64(p
);
137 m
->m_max_mds
= ceph_decode_32(p
);
139 m
->m_info
= kcalloc(m
->m_max_mds
, sizeof(*m
->m_info
), GFP_NOFS
);
140 if (m
->m_info
== NULL
)
143 /* pick out active nodes from mds_info (state > 0) */
144 n
= ceph_decode_32(p
);
145 for (i
= 0; i
< n
; i
++) {
151 void *info_end
= NULL
;
152 struct ceph_entity_addr addr
;
153 u32 num_export_targets
;
154 void *pexport_targets
= NULL
;
155 struct ceph_timespec laggy_since
;
156 struct ceph_mds_info
*info
;
158 ceph_decode_need(p
, end
, sizeof(u64
) + 1, bad
);
159 global_id
= ceph_decode_64(p
);
160 info_v
= ceph_decode_8(p
);
164 ceph_decode_need(p
, end
, 1 + sizeof(u32
), bad
);
165 info_cv
= ceph_decode_8(p
);
166 info_len
= ceph_decode_32(p
);
167 info_end
= *p
+ info_len
;
172 ceph_decode_need(p
, end
, sizeof(u64
) + sizeof(u32
), bad
);
174 namelen
= ceph_decode_32(p
); /* skip mds name */
177 ceph_decode_need(p
, end
,
178 4*sizeof(u32
) + sizeof(u64
) +
179 sizeof(addr
) + sizeof(struct ceph_timespec
),
181 mds
= ceph_decode_32(p
);
182 inc
= ceph_decode_32(p
);
183 state
= ceph_decode_32(p
);
184 state_seq
= ceph_decode_64(p
);
185 ceph_decode_copy(p
, &addr
, sizeof(addr
));
186 ceph_decode_addr(&addr
);
187 ceph_decode_copy(p
, &laggy_since
, sizeof(laggy_since
));
189 ceph_decode_32_safe(p
, end
, namelen
, bad
);
192 ceph_decode_32_safe(p
, end
, num_export_targets
, bad
);
193 pexport_targets
= *p
;
194 *p
+= num_export_targets
* sizeof(u32
);
196 num_export_targets
= 0;
199 if (info_end
&& *p
!= info_end
) {
205 dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
206 i
+1, n
, global_id
, mds
, inc
,
207 ceph_pr_addr(&addr
.in_addr
),
208 ceph_mds_state_name(state
));
210 if (mds
< 0 || mds
>= m
->m_max_mds
|| state
<= 0)
213 info
= &m
->m_info
[mds
];
214 info
->global_id
= global_id
;
217 info
->laggy
= (laggy_since
.tv_sec
!= 0 ||
218 laggy_since
.tv_nsec
!= 0);
219 info
->num_export_targets
= num_export_targets
;
220 if (num_export_targets
) {
221 info
->export_targets
= kcalloc(num_export_targets
,
222 sizeof(u32
), GFP_NOFS
);
223 if (info
->export_targets
== NULL
)
225 for (j
= 0; j
< num_export_targets
; j
++)
226 info
->export_targets
[j
] =
227 ceph_decode_32(&pexport_targets
);
229 info
->export_targets
= NULL
;
234 ceph_decode_32_safe(p
, end
, n
, bad
);
235 m
->m_num_data_pg_pools
= n
;
236 m
->m_data_pg_pools
= kcalloc(n
, sizeof(u64
), GFP_NOFS
);
237 if (!m
->m_data_pg_pools
)
239 ceph_decode_need(p
, end
, sizeof(u64
)*(n
+1), bad
);
240 for (i
= 0; i
< n
; i
++)
241 m
->m_data_pg_pools
[i
] = ceph_decode_64(p
);
242 m
->m_cas_pg_pool
= ceph_decode_64(p
);
243 m
->m_enabled
= m
->m_epoch
> 1;
247 ceph_decode_16_safe(p
, end
, mdsmap_ev
, bad_ext
);
249 if (mdsmap_ev
>= 3) {
250 if (__decode_and_drop_compat_set(p
, end
) < 0)
255 __decode_and_drop_type(p
, end
, u32
, bad_ext
);
257 __decode_and_drop_type(p
, end
, u64
, bad_ext
);
260 /* created + modified + tableserver */
261 __decode_and_drop_type(p
, end
, struct ceph_timespec
, bad_ext
);
262 __decode_and_drop_type(p
, end
, struct ceph_timespec
, bad_ext
);
263 __decode_and_drop_type(p
, end
, u32
, bad_ext
);
268 ceph_decode_32_safe(p
, end
, n
, bad_ext
);
269 ceph_decode_need(p
, end
, sizeof(u32
) * n
, bad_ext
);
271 for (i
= 0; i
< n
; i
++) {
272 s32 mds
= ceph_decode_32(p
);
273 if (mds
>= 0 && mds
< m
->m_max_mds
) {
274 if (m
->m_info
[mds
].laggy
)
278 m
->m_num_laggy
= num_laggy
;
282 __decode_and_drop_map(p
, end
, u32
, u32
, bad_ext
);
284 __decode_and_drop_map(p
, end
, u32
, u64
, bad_ext
);
286 __decode_and_drop_set(p
, end
, u32
, bad_ext
);
288 __decode_and_drop_set(p
, end
, u32
, bad_ext
);
290 if (mdsmap_ev
>= 4) {
291 /* last_failure_osd_epoch */
292 __decode_and_drop_type(p
, end
, u32
, bad_ext
);
294 if (mdsmap_ev
>= 6) {
295 /* ever_allowed_snaps */
296 __decode_and_drop_type(p
, end
, u8
, bad_ext
);
297 /* explicitly_allowed_snaps */
298 __decode_and_drop_type(p
, end
, u8
, bad_ext
);
300 if (mdsmap_ev
>= 7) {
301 /* inline_data_enabled */
302 __decode_and_drop_type(p
, end
, u8
, bad_ext
);
304 if (mdsmap_ev
>= 8) {
307 ceph_decode_8_safe(p
, end
, m
->m_enabled
, bad_ext
);
308 ceph_decode_32_safe(p
, end
, name_len
, bad_ext
);
309 ceph_decode_need(p
, end
, name_len
, bad_ext
);
313 if (mdsmap_ev
>= 9) {
315 ceph_decode_32_safe(p
, end
, n
, bad_ext
);
316 need
= sizeof(u32
) * n
;
317 ceph_decode_need(p
, end
, need
, bad_ext
);
319 m
->m_damaged
= n
> 0;
321 m
->m_damaged
= false;
325 dout("mdsmap_decode success epoch %u\n", m
->m_epoch
);
331 pr_err("corrupt mdsmap\n");
332 print_hex_dump(KERN_DEBUG
, "mdsmap: ",
333 DUMP_PREFIX_OFFSET
, 16, 1,
334 start
, end
- start
, true);
336 ceph_mdsmap_destroy(m
);
340 void ceph_mdsmap_destroy(struct ceph_mdsmap
*m
)
344 for (i
= 0; i
< m
->m_max_mds
; i
++)
345 kfree(m
->m_info
[i
].export_targets
);
347 kfree(m
->m_data_pg_pools
);
351 bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap
*m
)
353 int i
, nr_active
= 0;
358 if (m
->m_num_laggy
> 0)
360 for (i
= 0; i
< m
->m_max_mds
; i
++) {
361 if (m
->m_info
[i
].state
== CEPH_MDS_STATE_ACTIVE
)
364 return nr_active
> 0;