2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
5 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #define POHMELFS_READ_LATEST_GROUPS_SCRIPT "pohmelfs_read_latest_groups.py"
13 static int pohmelfs_write_init(struct pohmelfs_trans
*t
)
15 struct pohmelfs_wait
*wait
= t
->priv
;
17 pohmelfs_wait_get(wait
);
21 static void pohmelfs_write_destroy(struct pohmelfs_trans
*t
)
23 struct pohmelfs_wait
*wait
= t
->priv
;
26 pohmelfs_wait_put(wait
);
29 static int pohmelfs_write_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
31 struct pohmelfs_wait
*wait
= t
->priv
;
32 struct pohmelfs_inode
*pi
= pohmelfs_inode(t
->inode
);
33 struct dnet_cmd
*cmd
= &recv
->cmd
;
34 unsigned long long trans
= cmd
->trans
& ~DNET_TRANS_REPLY
;
36 pr_debug("%s: %llu, flags: %x, status: %d\n",
37 pohmelfs_dump_id(pi
->id
.id
), trans
, cmd
->flags
, cmd
->status
);
39 if (cmd
->flags
& DNET_FLAGS_MORE
)
42 wait
->condition
= cmd
->status
;
49 static int pohmelfs_send_write_metadata(struct pohmelfs_inode
*pi
, struct pohmelfs_io
*pio
, struct pohmelfs_wait
*wait
)
51 struct pohmelfs_sb
*psb
= pohmelfs_sb(pi
->vfs_inode
.i_sb
);
52 struct timespec ts
= CURRENT_TIME
;
53 struct dnet_meta_update
*mu
;
58 size
= sizeof(struct dnet_meta
) * 4 +
59 sizeof(struct dnet_meta_check_status
) +
60 sizeof(struct dnet_meta_update
) +
62 psb
->group_num
* sizeof(int);
64 data
= kzalloc(size
, GFP_NOIO
);
71 m
->type
= DNET_META_GROUPS
;
72 m
->size
= psb
->group_num
* sizeof(int);
73 memcpy(m
->data
, psb
->groups
, m
->size
);
76 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
77 m
->type
= DNET_META_NAMESPACE
;
78 m
->size
= psb
->fsid_len
;
79 memcpy(m
->data
, psb
->fsid
, psb
->fsid_len
);
82 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
83 m
->type
= DNET_META_UPDATE
;
84 m
->size
= sizeof(struct dnet_meta_update
);
85 mu
= (struct dnet_meta_update
*)m
->data
;
86 mu
->tm
.tsec
= ts
.tv_sec
;
87 mu
->tm
.tnsec
= ts
.tv_nsec
;
88 dnet_convert_meta_update(mu
);
91 m
= (struct dnet_meta
*)(m
->data
+ le32_to_cpu(m
->size
));
92 m
->type
= DNET_META_CHECK_STATUS
;
93 m
->size
= sizeof(struct dnet_meta_check_status
);
94 /* do not fill, it will be updated on server */
99 pio
->cmd
= DNET_CMD_WRITE
;
100 pio
->ioflags
= DNET_IO_FLAGS_OVERWRITE
| DNET_IO_FLAGS_META
;
101 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
103 pio
->cb
.init
= pohmelfs_write_init
;
104 pio
->cb
.destroy
= pohmelfs_write_destroy
;
105 pio
->cb
.complete
= pohmelfs_write_complete
;
110 err
= pohmelfs_send_io(pio
);
120 static int pohmelfs_write_command_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
122 struct dnet_cmd
*cmd
= &recv
->cmd
;
123 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
125 if (cmd
->flags
& DNET_FLAGS_MORE
)
128 if (cmd
->status
== 0)
129 atomic_inc(&ctl
->good_writes
);
131 struct inode
*inode
= t
->inode
;
132 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
133 unsigned long long size
= le64_to_cpu(t
->cmd
.p
.io
.size
);
134 unsigned long long offset
= le64_to_cpu(t
->cmd
.p
.io
.offset
);
136 pr_debug("%s: write failed: ino: %lu, isize: %llu, offset: %llu, size: %llu: %d\n",
137 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
,
138 inode
->i_size
, offset
, size
, cmd
->status
);
144 static int pohmelfs_write_command_init(struct pohmelfs_trans
*t
)
146 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
148 kref_get(&ctl
->refcnt
);
152 static void pohmelfs_write_command_destroy(struct pohmelfs_trans
*t
)
154 struct pohmelfs_write_ctl
*ctl
= t
->wctl
;
156 kref_put(&ctl
->refcnt
, pohmelfs_write_ctl_release
);
159 int pohmelfs_write_command(struct pohmelfs_inode
*pi
, struct pohmelfs_write_ctl
*ctl
, loff_t offset
, size_t len
)
162 struct inode
*inode
= &pi
->vfs_inode
;
163 struct pohmelfs_io
*pio
;
164 uint64_t prepare_size
= i_size_read(&pi
->vfs_inode
);
166 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
174 pio
->cmd
= DNET_CMD_WRITE
;
175 pio
->offset
= offset
;
177 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
180 * We always set prepare bit, since elliptics/eblob reuses existing (previously prepared/reserved) area
181 * But it also allows to 'miss' prepare message (for example if we sent prepare bit when node was offline)
183 pio
->ioflags
= DNET_IO_FLAGS_OVERWRITE
| DNET_IO_FLAGS_PLAIN_WRITE
| DNET_IO_FLAGS_PREPARE
;
185 pio
->num
= prepare_size
;
187 /* commit when whole inode is written */
188 if (offset
+ len
== prepare_size
) {
189 pio
->ioflags
|= DNET_IO_FLAGS_COMMIT
;
194 pio
->cb
.complete
= pohmelfs_write_command_complete
;
195 pio
->cb
.init
= pohmelfs_write_command_init
;
196 pio
->cb
.destroy
= pohmelfs_write_command_destroy
;
198 pr_debug("%s: ino: %lu, offset: %llu, len: %zu, total size: %llu\n",
199 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
,
200 (unsigned long long)offset
, len
, inode
->i_size
);
202 err
= pohmelfs_send_io(pio
);
207 kmem_cache_free(pohmelfs_io_cache
, pio
);
212 int pohmelfs_metadata_inode(struct pohmelfs_inode
*pi
, int sync
)
214 struct inode
*inode
= &pi
->vfs_inode
;
215 struct pohmelfs_sb
*psb
= pohmelfs_sb(inode
->i_sb
);
216 struct pohmelfs_io
*pio
;
217 struct pohmelfs_wait
*wait
;
221 wait
= pohmelfs_wait_alloc(pi
);
227 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
233 err
= pohmelfs_send_write_metadata(pi
, pio
, wait
);
238 ret
= wait_event_interruptible_timeout(wait
->wq
,
239 wait
->condition
!= 0 && atomic_read(&wait
->refcnt
.refcount
) <= 2,
240 msecs_to_jiffies(psb
->write_wait_timeout
));
248 if (wait
->condition
< 0) {
249 err
= wait
->condition
;
255 kmem_cache_free(pohmelfs_io_cache
, pio
);
257 pohmelfs_wait_put(wait
);
262 static long pohmelfs_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
264 struct inode
*inode
= file
->f_path
.dentry
->d_inode
;
265 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
266 struct pohmelfs_io
*pio
;
269 if (offset
+ len
< i_size_read(inode
)) {
274 pio
= kmem_cache_zalloc(pohmelfs_io_cache
, GFP_NOIO
);
282 pio
->cmd
= DNET_CMD_WRITE
;
283 pio
->cflags
= DNET_FLAGS_NEED_ACK
;
284 pio
->ioflags
= DNET_IO_FLAGS_PREPARE
;
285 pio
->num
= i_size_read(inode
);
287 pr_debug("%s: ino: %lu, offset: %llu, len: %llu, total size: %llu\n",
288 pohmelfs_dump_id(pi
->id
.id
), inode
->i_ino
,
289 (unsigned long long)offset
, (unsigned long long)len
,
292 err
= pohmelfs_send_io(pio
);
297 kmem_cache_free(pohmelfs_io_cache
, pio
);
302 struct pohmelfs_latest_ctl
{
308 static int pohmelfs_read_latest_complete(struct pohmelfs_trans
*t
, struct pohmelfs_state
*recv
)
310 struct pohmelfs_inode
*pi
= pohmelfs_inode(t
->inode
);
311 struct pohmelfs_wait
*wait
= t
->priv
;
312 struct dnet_cmd
*cmd
= &recv
->cmd
;
313 int err
= cmd
->status
;
318 if (cmd
->flags
& DNET_FLAGS_MORE
) {
319 pr_debug("%s: group: %d, attr size: %lld\n",
320 pohmelfs_dump_id(cmd
->id
.id
), cmd
->id
.group_id
,
321 cmd
->size
- sizeof(struct dnet_attr
));
322 if (cmd
->size
< sizeof(struct dnet_attr
) + 4) {
327 mutex_lock(&pi
->lock
);
329 pi
->groups
= kmalloc(cmd
->size
- sizeof(struct dnet_attr
), GFP_NOIO
);
332 mutex_unlock(&pi
->lock
);
336 pi
->group_num
= (cmd
->size
- sizeof(struct dnet_attr
)) / sizeof(int);
337 memcpy(pi
->groups
, t
->recv_data
+ sizeof(struct dnet_attr
), pi
->group_num
* sizeof(int));
339 pr_debug("%s: group: %d, received: %d groups\n",
340 pohmelfs_dump_id(cmd
->id
.id
), cmd
->id
.group_id
,
343 mutex_unlock(&pi
->lock
);
348 wait
->condition
= err
;
354 static int pohmelfs_read_latest_group(struct pohmelfs_inode
*pi
, struct pohmelfs_latest_ctl
*r
, int group_id
)
356 struct pohmelfs_script_req req
;
358 memset(&req
, 0, sizeof(struct pohmelfs_script_req
));
360 req
.script_name
= POHMELFS_READ_LATEST_GROUPS_SCRIPT
;
361 req
.script_namelen
= sizeof(POHMELFS_READ_LATEST_GROUPS_SCRIPT
) - 1;
363 req
.obj_name
= "noname";
367 req
.binary_size
= sizeof(struct pohmelfs_latest_ctl
);
370 req
.group_id
= group_id
;
373 req
.complete
= pohmelfs_read_latest_complete
;
375 return pohmelfs_send_script_request(pi
, &req
);
378 static int pohmelfs_read_latest(struct pohmelfs_inode
*pi
)
380 struct pohmelfs_latest_ctl
*r
;
381 struct pohmelfs_sb
*psb
= pohmelfs_sb(pi
->vfs_inode
.i_sb
);
382 int i
, err
= -ENOENT
;
384 r
= kzalloc(sizeof(struct pohmelfs_latest_ctl
), GFP_NOIO
);
390 dnet_setup_id(&r
->id
, 0, pi
->id
.id
);
392 for (i
= 0; i
< psb
->group_num
; ++i
) {
393 r
->id
.group_id
= psb
->groups
[i
];
395 err
= pohmelfs_read_latest_group(pi
, r
, psb
->groups
[i
]);
404 pr_debug("%s: %d groups\n", pohmelfs_dump_id(pi
->id
.id
), pi
->group_num
);
410 static int pohmelfs_file_open(struct inode
*inode
, struct file
*filp
)
412 struct pohmelfs_inode
*pi
= pohmelfs_inode(inode
);
414 if (!pi
->group_num
&& !pi
->local
)
415 pohmelfs_read_latest(pi
);
417 if (pohmelfs_need_resync(pi
))
418 invalidate_mapping_pages(&inode
->i_data
, 0, -1);
420 return generic_file_open(inode
, filp
);
424 * We want fsync() to work on POHMELFS.
426 static int pohmelfs_fsync(struct file
*filp
, loff_t start
, loff_t end
, int datasync
)
428 struct inode
*inode
= filp
->f_mapping
->host
;
429 int err
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
431 mutex_lock(&inode
->i_mutex
);
432 err
= sync_inode_metadata(inode
, 1);
433 mutex_unlock(&inode
->i_mutex
);
435 pr_debug("%s: start: %lld, end: %lld, nrpages: %ld, dirty: %d: %d\n",
436 pohmelfs_dump_id(pohmelfs_inode(inode
)->id
.id
),
437 (unsigned long long)start
, (unsigned long long)end
,
438 inode
->i_mapping
->nrpages
,
439 mapping_cap_writeback_dirty(inode
->i_mapping
), err
);
443 static int pohmelfs_flush(struct file
*filp
, fl_owner_t id
)
445 struct inode
*inode
= filp
->f_mapping
->host
;
446 struct pohmelfs_sb
*psb
= pohmelfs_sb(inode
->i_sb
);
449 if (psb
->sync_on_close
)
450 err
= pohmelfs_fsync(filp
, 0, ~0ULL, 1);
452 if (!err
&& test_bit(AS_EIO
, &inode
->i_mapping
->flags
))
456 pohmelfs_dump_id(pohmelfs_inode(inode
)->id
.id
), err
);
460 const struct file_operations pohmelfs_file_ops
= {
461 .open
= pohmelfs_file_open
,
463 .llseek
= generic_file_llseek
,
465 .read
= do_sync_read
,
466 .aio_read
= generic_file_aio_read
,
468 .mmap
= generic_file_mmap
,
470 .splice_read
= generic_file_splice_read
,
471 .splice_write
= generic_file_splice_write
,
473 .write
= do_sync_write
,
474 .aio_write
= generic_file_aio_write
,
476 .fallocate
= pohmelfs_fallocate
,
478 .fsync
= pohmelfs_fsync
,
479 .flush
= pohmelfs_flush
,
482 const struct inode_operations pohmelfs_file_inode_operations
= {