2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
9 #define POHMELFS_READ_LATEST_GROUPS_SCRIPT "pohmelfs_read_latest_groups.py"
11 static int pohmelfs_write_init(struct pohmelfs_trans
*t
)
13 struct pohmelfs_wait
*wait
= t
->priv
;
15 pohmelfs_wait_get(wait
);
19 static void pohmelfs_write_destroy(struct pohmelfs_trans
*t
)
21 struct pohmelfs_wait
*wait
= t
->priv
;
24 pohmelfs_wait_put(wait
);
27 static int pohmelfs_write_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
29 struct pohmelfs_wait
*wait
= t
->priv
;
30 struct pohmelfs_inode
*pi
= pohmelfs_inode(t
->inode
);
31 struct dnet_cmd
*cmd
= &recv
->cmd
;
32 unsigned long long trans
= cmd
->trans
& ~DNET_TRANS_REPLY
;
34 pr_debug("pohmelfs: %s: write complete: %llu, flags: %x, status: %d\n",
35 pohmelfs_dump_id(pi
->id
.id
), trans
, cmd
->flags
, cmd
->status
);
37 if (cmd
->flags
& DNET_FLAGS_MORE
)
40 wait
->condition
= cmd
->status
;
47 static int pohmelfs_send_write_metadata(struct pohmelfs_inode
*pi
, struct pohmelfs_io
*pio
, struct pohmelfs_wait
*wait
)
49 struct pohmelfs_sb
*psb
= pohmelfs_sb(pi
->vfs_inode
.i_sb
);
50 struct timespec ts
= CURRENT_TIME
;
51 struct dnet_meta_update
*mu
;
56 size
= sizeof(struct dnet_meta
) * 4 +
57 sizeof(struct dnet_meta_check_status
) +
58 sizeof(struct dnet_meta_update
) +
60 psb
->group_num
* sizeof(int);
62 data
= kzalloc(size
, GFP_NOIO
);
69 m
->type
= DNET_META_GROUPS
;
70 m
->size
= psb
->group_num
* sizeof(int);
71 memcpy(m
->data
, psb
->groups
, m
->size
);
74 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
75 m
->type
= DNET_META_NAMESPACE
;
76 m
->size
= psb
->fsid_len
;
77 memcpy(m
->data
, psb
->fsid
, psb
->fsid_len
);
80 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
81 m
->type
= DNET_META_UPDATE
;
82 m
->size
= sizeof(struct dnet_meta_update
);
83 mu
= (struct dnet_meta_update
*)m
->data
;
84 mu
->tm
.tsec
= ts
.tv_sec
;
85 mu
->tm
.tnsec
= ts
.tv_nsec
;
86 dnet_convert_meta_update(mu
);
89 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
90 m
->type
= DNET_META_CHECK_STATUS
;
91 m
->size
= sizeof(struct dnet_meta_check_status
);
92 /* do not fill, it will be updated on server */
97 pio
->cmd
= DNET_CMD_WRITE
;
98 pio
->ioflags
= DNET_IO_FLAGS_OVERWRITE
| DNET_IO_FLAGS_META
;
99 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
101 pio
->cb
.init
= pohmelfs_write_init
;
102 pio
->cb
.destroy
= pohmelfs_write_destroy
;
103 pio
->cb
.complete
= pohmelfs_write_complete
;
108 err
= pohmelfs_send_io(pio
);
118 static int pohmelfs_write_command_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
120 struct dnet_cmd
*cmd
= &recv
->cmd
;
121 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
123 if (cmd
->flags
& DNET_FLAGS_MORE
)
126 if (cmd
->status
== 0)
127 atomic_inc(&ctl
->good_writes
);
129 struct inode
*inode
= t
->inode
;
130 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
131 unsigned long long size
= le64_to_cpu(t
->cmd
.p
.io
.size
);
132 unsigned long long offset
= le64_to_cpu(t
->cmd
.p
.io
.offset
);
134 pr_debug("pohmelfs: %s: write failed: ino: %lu, isize: %llu, offset: %llu, size: %llu: %d\n",
135 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
, inode
->i_size
, offset
, size
, cmd
->status
);
141 static int pohmelfs_write_command_init(struct pohmelfs_trans
*t
)
143 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
145 kref_get(&ctl
->refcnt
);
149 static void pohmelfs_write_command_destroy(struct pohmelfs_trans
*t
)
151 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
153 kref_put(&ctl
->refcnt
, pohmelfs_write_ctl_release
);
156 int pohmelfs_write_command(struct pohmelfs_inode
*pi
, struct pohmelfs_write_ctl
*ctl
, loff_t offset
, size_t len
)
159 struct inode
*inode
= &pi
->vfs_inode
;
160 struct pohmelfs_io
*pio
;
161 uint64_t prepare_size
= i_size_read(&pi
->vfs_inode
);
163 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
171 pio
->cmd
= DNET_CMD_WRITE
;
172 pio
->offset
= offset
;
174 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
177 * We always set prepare bit, since elliptics/eblob reuses existing (previously prepared/reserved) area
178 * But it also allows to 'miss' prepare message (for example if we sent prepare bit when node was offline)
180 pio
->ioflags
= DNET_IO_FLAGS_OVERWRITE
| DNET_IO_FLAGS_PLAIN_WRITE
| DNET_IO_FLAGS_PREPARE
;
182 pio
->num
= prepare_size
;
184 /* commit when whole inode is written */
185 if (offset
+ len
== prepare_size
) {
186 pio
->ioflags
|= DNET_IO_FLAGS_COMMIT
;
191 pio
->cb
.complete
= pohmelfs_write_command_complete
;
192 pio
->cb
.init
= pohmelfs_write_command_init
;
193 pio
->cb
.destroy
= pohmelfs_write_command_destroy
;
195 pr_debug("pohmelfs_write_prepare_commit: %s: ino: %lu, offset: %llu, len: %zu, total size: %llu\n",
196 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
, (unsigned long long)offset
, len
, inode
->i_size
);
198 err
= pohmelfs_send_io(pio
);
203 kmem_cache_free(pohmelfs_io_cache
, pio
);
208 int pohmelfs_metadata_inode(struct pohmelfs_inode
*pi
, int sync
)
210 struct inode
*inode
= &pi
->vfs_inode
;
211 struct pohmelfs_sb
*psb
= pohmelfs_sb(inode
->i_sb
);
212 struct pohmelfs_io
*pio
;
213 struct pohmelfs_wait
*wait
;
217 wait
= pohmelfs_wait_alloc(pi
);
223 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
229 err
= pohmelfs_send_write_metadata(pi
, pio
, wait
);
234 ret
= wait_event_interruptible_timeout(wait
->wq
,
235 wait
->condition
!= 0 && atomic_read(&wait
->refcnt
.refcount
) <= 2,
236 msecs_to_jiffies(psb
->write_wait_timeout
));
244 if (wait
->condition
< 0) {
245 err
= wait
->condition
;
251 kmem_cache_free(pohmelfs_io_cache
, pio
);
253 pohmelfs_wait_put(wait
);
258 static long pohmelfs_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
260 struct inode
*inode
= file
->f_path
.dentry
->d_inode
;
261 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
262 struct pohmelfs_io
*pio
;
265 if (offset
+ len
< i_size_read(inode
)) {
270 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
278 pio
->cmd
= DNET_CMD_WRITE
;
279 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
280 pio
->ioflags
= DNET_IO_FLAGS_PREPARE
;
281 pio
->num
= i_size_read(inode
);
283 pr_info("pohmelfs_fallocate: %s: ino: %lu, offset: %llu, len: %llu, total size: %llu\n",
284 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
,
285 (unsigned long long)offset
, (unsigned long long)len
, inode
->i_size
);
287 err
= pohmelfs_send_io(pio
);
292 kmem_cache_free(pohmelfs_io_cache
, pio
);
297 struct pohmelfs_latest_ctl
{
303 static int pohmelfs_read_latest_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
305 struct pohmelfs_inode
*pi
= pohmelfs_inode(t
->inode
);
306 struct pohmelfs_wait
*wait
= t
->priv
;
307 struct dnet_cmd
*cmd
= &recv
->cmd
;
308 int err
= cmd
->status
;
313 if (cmd
->flags
& DNET_FLAGS_MORE
) {
314 pr_debug("pohmelfs: %s: read-latest: complete: group: %d, attr size: %lld\n",
315 pohmelfs_dump_id(cmd
->id
.id
), cmd
->id
.group_id
, cmd
->size
- sizeof(struct dnet_attr
));
316 if (cmd
->size
< sizeof(struct dnet_attr
) + 4) {
321 mutex_lock(&pi
->lock
);
323 pi
->groups
= kmalloc(cmd
->size
- sizeof(struct dnet_attr
), GFP_NOIO
);
326 mutex_unlock(&pi
->lock
);
330 pi
->group_num
= (cmd
->size
- sizeof(struct dnet_attr
)) / sizeof(int);
331 memcpy(pi
->groups
, t
->recv_data
+ sizeof(struct dnet_attr
), pi
->group_num
* sizeof(int));
333 pr_debug("pohmelfs: %s: read-latest: complete: group: %d, received: %d groups\n",
334 pohmelfs_dump_id(cmd
->id
.id
), cmd
->id
.group_id
, pi
->group_num
);
336 mutex_unlock(&pi
->lock
);
338 err
= 1; /* setting wait->condition to 'everything is ok' */
343 wait
->condition
= err
;
347 static int pohmelfs_read_latest_group(struct pohmelfs_inode
*pi
, struct pohmelfs_latest_ctl
*r
, int group_id
)
349 struct pohmelfs_script_req req
;
351 memset(&req
, 0, sizeof(struct pohmelfs_script_req
));
353 req
.script_name
= POHMELFS_READ_LATEST_GROUPS_SCRIPT
;
354 req
.script_namelen
= sizeof(POHMELFS_READ_LATEST_GROUPS_SCRIPT
) - 1;
356 req
.obj_name
= "noname";
360 req
.binary_size
= sizeof(struct pohmelfs_latest_ctl
);
363 req
.group_id
= group_id
;
365 req
.complete
= pohmelfs_read_latest_complete
;
367 return pohmelfs_send_script_request(pi
, &req
);
370 static int pohmelfs_read_latest(struct pohmelfs_inode
*pi
)
372 struct pohmelfs_latest_ctl
*r
;
373 struct pohmelfs_sb
*psb
= pohmelfs_sb(pi
->vfs_inode
.i_sb
);
374 int i
, err
= -ENOENT
;
376 r
= kzalloc(sizeof(struct pohmelfs_latest_ctl
), GFP_NOIO
);
382 dnet_setup_id(&r
->id
, 0, pi
->id
.id
);
384 for (i
= 0; i
< psb
->group_num
; ++i
) {
385 r
->id
.group_id
= psb
->groups
[i
];
387 err
= pohmelfs_read_latest_group(pi
, r
, psb
->groups
[i
]);
396 pr_debug("pohmelfs: %s: read-latest: %d groups\n", pohmelfs_dump_id(pi
->id
.id
), pi
->group_num
);
402 static int pohmelfs_file_open(struct inode
*inode
, struct file
*filp
)
404 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
407 pohmelfs_read_latest(pi
);
409 return generic_file_open(inode
, filp
);
412 const struct file_operations pohmelfs_file_ops
= {
413 .open
= pohmelfs_file_open
,
415 .llseek
= generic_file_llseek
,
417 .read
= do_sync_read
,
418 .aio_read
= generic_file_aio_read
,
420 .mmap
= generic_file_mmap
,
422 .splice_read
= generic_file_splice_read
,
423 .splice_write
= generic_file_splice_write
,
425 .write
= do_sync_write
,
426 .aio_write
= generic_file_aio_write
,
428 .fallocate
= pohmelfs_fallocate
,
431 const struct inode_operations pohmelfs_file_inode_operations
= {