From d6ef5463a8df7afc191246bbce6dc28d63c3917e Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@navytux.spb.ru>
Date: Tue, 20 Dec 2011 01:37:03 +0400
Subject: [PATCH] fs: Draft module to mount filesystem representing navymail
 store

Now one can mount navymail store, and access gboxes with usual mail clients,
such as mutt. Only read-only access is implemented.

The filesystem is implemented as a high-level FUSE module which exports gbox'es
in traditional mbox format, i.e. files content is the same as output of
`navymail export --original-order`.

In order to measure fuse overhead, I've timed plain `navymail export` and cat
of the same gbox from filesystem for sup-talk archives:

	navymail export --original-order sup-talk >/dev/null	0.69s
	cat <mountpoint>/mail/sup-talk >/dev/null		0.75s

So overhead the overhead is ~ 10%.

NOTE while implementing first draft I did not pay attention to simultaneous
     access and locking, only tried to get the thing up and running.

Signed-off-by: Kirill Smelkov <kirr@navytux.spb.ru>
---
 Makefile                                |   6 +-
 README                                  |   1 -
 fromgit/git.cocci                       |   1 +
 fs.c                                    | 535 ++++++++++++++++++++++++++++++++
 include/navymail/navymail-common-cmds.h |   1 +
 include/navymail/navymail.h             |   1 +
 t/t0002-fs.sh                           | 131 ++++++++
 t/xdd                                   |  67 ++++
 8 files changed, 739 insertions(+), 4 deletions(-)
 create mode 100644 fs.c
 create mode 100755 t/t0002-fs.sh
 create mode 100755 t/xdd
diff --git a/Makefile b/Makefile
index 9e64e86..55d4911 100644
--- a/Makefile
+++ b/Makefile
@@ -105,7 +105,7 @@ include .config.mk.git
 
 CPPFLAGS:= -Igit/ -Iinclude/
 CC	:= $(GIT_CC)
-CFLAGS	:= $(GIT_ALL_CFLAGS)
+CFLAGS	:= $(GIT_ALL_CFLAGS) $(FUSE_CFLAGS)
 # lib.a -> git/lib.a , but -lsomething stays the same
 GIT_LIBS:= $(foreach _,$(GIT_LIBS),$(if $(filter -%,$_),$_,git/$_))
 X	:= $(GIT_X)
@@ -123,8 +123,8 @@ fromgit/libgit.a: git/libgit.a fromgit/help.o fromgit/exec_cmd.o
 	ar r $$Atmp $(wordlist 2,$(words $+),$+)	&&\
 	mv -f $$Atmp $@
 
-navymail$X: navymail.o export.o gbox-walk.o fromgit/builtin/help.o compat/progexe.o fromgit/libgit.a
-	$(QUIET_CC)$(CC) $(CFLAGS) -o $@ $^ $(GIT_LDFLAGS) $(GIT_LIBS_wo_libgit)
+navymail$X: navymail.o export.o gbox-walk.o fs.o fromgit/builtin/help.o compat/progexe.o fromgit/libgit.a
+	$(QUIET_CC)$(CC) $(CFLAGS) -o $@ $^ $(GIT_LDFLAGS) $(GIT_LIBS_wo_libgit) $(FUSE_LIBS)
 
 # TODO automatically extract dependencies on compile
 %.o	: %.c
diff --git a/README b/README
index a15b339..5f95af3 100644
--- a/README
+++ b/README
@@ -18,7 +18,6 @@ TODO
 - add own header to msg
 - import -> C
 - filter out duplicate messages on import (only conditionally?)
-- mount gbox (fuse)
 - why import'ed mbox is only ~ 2.5 times smaller, compared to ~ 7-8 times
   smaller after simple gzip on the whole file?
 
diff --git a/fromgit/git.cocci b/fromgit/git.cocci
index fa73ae1..5588d81 100644
--- a/fromgit/git.cocci
+++ b/fromgit/git.cocci
@@ -145,6 +145,7 @@ identifier reword_usage.msg2;
 -	...
 +	{ "export",	cmd_export,	RUN_SETUP },
 +	{ "help",	cmd_help },
++	{ "mount",	cmd_mount,	RUN_SETUP },
 +	{ "version",	cmd_version },
    };
    ...
diff --git a/fs.c b/fs.c
new file mode 100644
index 0000000..cda3f64
--- /dev/null
+++ b/fs.c
@@ -0,0 +1,535 @@
+/* filesystem interface to navymail store
+ * Copyright (C) 2011  Kirill Smelkov <kirr@navytux.spb.ru>
+ *
+ * This program is free software: you can Use, Study, Modify and Redistribute it
+ * under the terms of the GNU General Public License version 2. This program is
+ * distributed WITHOUT ANY WARRANTY. See COPYING file for full License terms.
+ *
+ *
+ * XXX what about threading/locking?
+ */
+
+#include "git-compat-util.h"
+#include "parse-options.h"
+#include "argv-array.h"
+#include "refs.h"
+#include "navymail/navymail.h"
+#include "navymail/gbox-walk.h"
+
+#define FUSE_USE_VERSION 26
+#include <fuse.h>
+
+static const char * const  mount_usage[] = {
+	"navymail mount <mountpoint> [-- fuse-options]",
+	NULL
+};
+
+static const struct option mount_options[] = {
+	OPT_END()
+};
+
+
+
+/* just tell for_each_ref* caller, we were here, i.e. there were non-zero refs
+ * iterated */
+static int __refs_nonempty(const char *refname, const unsigned char *sha1,
+		int flags, void *cb_data)
+{
+	return 1;
+}
+
+static int navymailfs_getattr(const char *path, struct stat *st)
+{
+	int ret = 0;
+
+	memset(st, 0, sizeof(*st));
+
+	/* XXX dumb */
+	/* XXX what about st_nlink?
+	 * XXX what about st_uid/st_gid, st_{a,m,c}time ?
+	 */
+	if (!strcmp(path, "/")) {
+		st->st_mode  = S_IFDIR | 0755;
+		/* st->st_nlink = 2; */
+	}
+	else if (!strcmp(path, "/mail")) {
+		st->st_mode  = S_IFDIR | 0755;
+		/* st_nlink */
+	}
+	else if (!prefixcmp(path, "/mail/")) {
+		struct strbuf refname = STRBUF_INIT;
+		strbuf_addf(&refname, "refs%s", path);
+
+		/* XXX what to do if there are e.g.
+		 *
+		 * 	refs/mail/x  and  refs/mail/x/y
+		 *
+		 * ? i.e. here x is both file and directory - Git allows such refs...
+		 */
+
+		/* a file? */
+		if (ref_exists(refname.buf)) {
+			st->st_mode  = S_IFREG | 0444;
+			/* don't waste resources computing st_size. but then
+			 * this needs direct_io=1 for file to be readable */
+			st->st_size  = 0;
+		}
+
+		/* it could be a dir still */
+		else {
+			/* ensure there is / at tail */
+			if (refname.buf[refname.len - 1] != '/')
+				strbuf_addch(&refname, '/');
+
+			/* read refs with refname as prefix -- if there are
+			 * some, then refname is a dir */
+			if (for_each_ref_in(refname.buf, __refs_nonempty, NULL)) {
+				st->st_mode = S_IFDIR | 0755;
+				/* st_nlink */
+			}
+			else
+				ret = -ENOENT;
+		}
+
+		strbuf_release(&refname);
+	}
+	else
+		ret = -ENOENT;
+
+	return ret;
+}
+
+
+struct readdir_ctx {
+	void *buf;
+	fuse_fill_dir_t filler;
+
+	/* previous added entry. empty if no entries were added yet */
+	struct strbuf prev_entry;
+};
+
+static int __readdir_handle_ref(const char *refname, const unsigned char *sha1,
+		int flags, void *cb_data)
+{
+	struct readdir_ctx *ctx = (struct readdir_ctx *)cb_data;
+	struct strbuf entry = STRBUF_INIT;
+	const char *subdir_mark;
+
+	/* extract entry-name from this refname - a file or a subdirectory
+	 * (potentially from sub-sub-sub...directory */
+	if ((subdir_mark = strchr(refname, '/')))
+		strbuf_add(&entry, refname, subdir_mark-refname);
+	else
+		strbuf_addf(&entry, "%s", refname);
+
+	/* refs come here in sorted order, so we can avoid duplicates simply
+	 * checking against previous entry */
+	if (!strbuf_cmp(&entry, &ctx->prev_entry))
+		goto out;
+
+	/* now we know this dir is not empty - emit . and .. before first entry */
+	if (!ctx->prev_entry.len) {
+		ctx->filler(ctx->buf, ".",	NULL, 0);
+		ctx->filler(ctx->buf, "..",	NULL, 0);
+	}
+
+	/* ctx->prev_entry = entry */
+	strbuf_reset(&ctx->prev_entry);
+	strbuf_addbuf(&ctx->prev_entry, &entry);
+
+	/* emit entry */
+	ctx->filler(ctx->buf, entry.buf,  NULL, 0);
+
+out:
+	strbuf_release(&entry);
+	return 0;
+}
+
+static int navymailfs_readdir(const char *path, void *buf, fuse_fill_dir_t
+		filler, off_t offset, struct fuse_file_info *fi)
+{
+	int ret = 0;
+	struct readdir_ctx ctx = { .buf = buf,  .filler = filler,  .prev_entry = STRBUF_INIT };
+
+	(void) offset;
+	(void) fi;
+
+
+	/* XXX dumb */
+	if (!strcmp(path, "/")) {
+		filler(buf, ".",	NULL, 0);
+		filler(buf, "..",	NULL, 0);
+		filler(buf, "mail",	NULL, 0);
+	}
+	else if (!strcmp(path, "/mail") || !prefixcmp(path, "/mail/")) {
+		struct strbuf refprefix = STRBUF_INIT;
+		strbuf_addf(&refprefix, "refs%s", path);
+		if (refprefix.buf[refprefix.len - 1] != '/')
+			strbuf_addch(&refprefix, '/');	/* ensure there is / at tail */
+
+		/* read refs from refprefix (e.g. /refs/mail/dir/) */
+		for_each_ref_in(refprefix.buf, __readdir_handle_ref, &ctx);
+
+		if (!ctx.prev_entry.len)
+			ret = -ENOENT;	/* no entries for refprefix */
+
+		strbuf_release(&refprefix);
+	}
+	else
+		ret = -ENOENT;
+
+
+	strbuf_release(&ctx.prev_entry);
+	return ret;
+}
+
+
+
+
+/* index for looking up messages in gbox by offset */
+struct msgmap_index {
+	off_t *sumsize;			/* Si = sum_{j=0..i} size(msg_j) */
+	unsigned char (*sha1)[20];	/* sha1_i */
+
+	int nr;
+	int alloc;
+};
+
+struct gbox_ctx {
+	struct strbuf refname;
+	struct gbox_walk gbox_walk;
+	struct msgmap_index msgmap;
+
+	/* last read msg */
+	char *msg_buf;
+	unsigned long msg_size;
+	int msg_idx;	/* index in msgmap */
+};
+
+static int navymailfs_open(const char *path, struct fuse_file_info *fi)
+{
+	int ret = 0;
+	struct strbuf refname = STRBUF_INIT;
+	struct gbox_ctx *ctx = NULL;
+
+	/* XXX dumb */
+	if (!!prefixcmp(path, "/mail/")) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	strbuf_addf(&refname, "refs%s", path);
+
+	if (!ref_exists(refname.buf)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if ((fi->flags & (O_RDONLY | O_WRONLY | O_RDWR)) != O_RDONLY) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * direct_io is used so that files with st_size=0 could be read
+	 * XXX how this affects performance?
+	 * XXX what about fi->keep_cache ?
+	 */
+	fi->direct_io = 1;
+
+	ctx = xcalloc(1, sizeof(*ctx));
+	strbuf_init(&ctx->refname, 0);
+	strbuf_addbuf(&ctx->refname, &refname);
+	ctx->gbox_walk.gbox = ctx->refname.buf;
+	ctx->gbox_walk.original_order = 1;
+	prepare_gbox_walk(&ctx->gbox_walk);
+
+	ctx->msg_idx = -1;
+
+	fi->fh = (intptr_t)ctx;
+
+out:
+	strbuf_release(&refname);
+	return ret;
+}
+
+
+static int navymailfs_release(const char *path, struct fuse_file_info *fi)
+{
+	struct gbox_ctx *ctx = (struct gbox_ctx *)(intptr_t)fi->fh;
+
+	strbuf_release(&ctx->refname);
+	free(ctx->msgmap.sumsize);
+	free(ctx->msgmap.sha1);
+
+	if (ctx->msg_idx != -1) {
+		free(ctx->msg_buf);
+		ctx->msg_buf  = NULL;
+		ctx->msg_size = 0;
+		ctx->msg_idx  = -1;
+	}
+
+	free(ctx);
+
+	return 0;
+}
+
+
+/* compute a*b/c  correctly handling overflow in a*b
+ *
+ * b = q*c+r	-> a*b/c = a*q + a*r/c
+ */
+static intmax_t imaxmuldiv(intmax_t a, intmax_t b, intmax_t c)
+{
+	imaxdiv_t _;
+
+	_ = imaxdiv(b, c);
+	return a*_.quot + a*_.rem/c;
+}
+
+
+#define	DEBUG_LOOKUP	0
+
+#if DEBUG_LOOKUP
+# define TRACE_LOOKUP(fmt...)	do { fprintf(stderr, fmt); } while (0)
+#else
+# define TRACE_LOOKUP(fmt...)	do {} while (0)
+#endif
+
+
+static int navymailfs_read(const char *path, char *buf, size_t size, off_t
+		offset, struct fuse_file_info *fi)
+{
+	struct gbox_ctx *ctx = (struct gbox_ctx *)(intptr_t)fi->fh;
+
+	int lo, hi, i, alloc;
+	off_t Si, Si_1, Slo_, Shi;
+	const unsigned char *sha1;
+	enum object_type type;
+	const char *read_start;
+	int read_size, read_total;
+
+	/* reading optimized for linear reads */
+
+	TRACE_LOOKUP("READ %s\toffset: %llu\tsize: %u\n", path, offset, size);
+
+
+	/* 1) lookup already-seen msg overlapping with offset.
+	 * after this phase, either
+	 *
+	 * - i indicates appropriate msg, or
+	 * - i points just-after msgmap tail	(i.e. we need to fetch next msgs)
+	 *
+	 *
+	 * ~~~~
+	 * some reminders before we begin:
+	 *
+	 *	Si = sum_{j=0..i} size(msg_j)
+	 *
+	 *	offset <  Si	<=> offset is in [0,i]		(1)
+	 *	offset >= Si	<=> offset is in [i+1,∞]	(2)
+	 *
+	 * in particular
+	 *
+	 * 	Si_1 <= offset < Si	<=> offset is in i'th msg	(3)
+	 */
+	lo = 0;
+	hi = ctx->msgmap.nr - 1;
+
+	/* if we are at tail - there is no chance to find appropriate chunk -
+	 * we'll have to skip search and start reading next msgs (see 2)
+	 */
+	if (hi==-1 || offset >= ctx->msgmap.sumsize[hi])
+		i = hi+1;
+
+	else {
+		/* now we know offset is somewhere in msgmap;
+		 * start searching at last read */
+		i = ctx->msg_idx != -1 ? ctx->msg_idx : hi;
+
+		while (1) {
+			Si   =  ctx->msgmap.sumsize[i];
+			Si_1 =  i >= 1 ? ctx->msgmap.sumsize[i-1] : 0;
+
+			TRACE_LOOKUP("\tlo: %i\thi: %i\ti: %i\t Si: %llu\tSi_1: %llu\n", lo, hi, i, Si, Si_1);
+
+
+			if (offset < Si) {
+				if (offset >= Si_1)
+					break;		/* found, see (3) */
+
+				/* offset < Si_1 , so because of (1) */
+				hi = i-1;
+			}
+			else
+				/* offset >= Si , see (2) */
+				lo = i+1;
+
+
+			/* now we know offset is somewhere in [lo,hi];
+			 * estimate new i, assuming messages are of approximately equal size.
+			 *
+			 * ~~~~
+			 * we have:
+			 *
+			 *	                    offset
+			 *	                      |
+			 *	      lo              v        hi
+			 *	    |----|----|-- ... --|----|----|
+			 *	   Olo<                          Ohi>
+			 *
+			 *	Olo< = Slo_1
+			 *	Ohi> = Shi - 1
+			 *
+			 *	Olo<  <= offset <= Ohi>		(4)	,i.e.
+			 *	Slo_1 <= offset <= Shi - 1	(5)
+			 *
+			 * then new guess, assuming uniformity would be as follows:
+			 *
+			 *	offset - Olo<      i - lo
+			 *	-------------  =  -------	(6)
+			 *	 Ohi> - Olo<      hi - lo
+			 *
+			 * which gives
+			 *
+			 *	                    offset - Slo_1
+			 * 	i = lo + (hi - lo)*-----------------	(7)
+			 *	                    Shi - Slo_1 - 1
+			 *
+			 * the ratio multiplier for (hi - lo) is in [0,1]
+			 * because of (5), so new i should be always in [lo,hi].
+			 *
+			 * The process should converge regardless of sizes
+			 * distribution, because at each step we tighten lo or
+			 * high by 1 in the above block.
+			 *
+			 * XXX maybe detect pathological cases, and do plain
+			 *     binary search then?
+			 */
+
+			Slo_ = lo >= 1 ? ctx->msgmap.sumsize[lo-1] : 0;
+			Shi  = ctx->msgmap.sumsize[hi];
+
+			/* compute
+			 *
+			 *	i = lo + (offset - Slo_)*(hi-lo)/(Shi-Slo_-1);
+			 *
+			 * but avoid overflow on multiply
+			 */
+			i = lo + imaxmuldiv( (offset - Slo_),(hi-lo),(Shi-Slo_-1) );
+		}
+	}
+
+
+	/* 2) now linearly read messages, until all read request is done
+	 * NOTE: we'll maybe have to first skip some msgs, if offset >= Si  (see 2)
+	 */
+	read_total = 0;
+
+	for (;size != 0; ++i) {
+		/* read i'th msg */
+		if (i != ctx->msg_idx) {
+			if (ctx->msg_idx != -1) {
+				free(ctx->msg_buf);
+				ctx->msg_buf  = NULL;
+				ctx->msg_size = 0;
+				ctx->msg_idx  = -1;
+			}
+
+			if (i == ctx->msgmap.nr) {
+				/* fetch next msg from gbox */
+				sha1 = next_gbox_entry(&ctx->gbox_walk);
+				if (!sha1)
+					break;	/* EOF */
+			}
+			else
+				sha1 = ctx->msgmap.sha1[i];
+
+			ctx->msg_buf = read_sha1_file(sha1, &type, &ctx->msg_size);
+			if (!ctx->msg_buf)
+				die("read_sha1_file(%s) -> NULL", sha1_to_hex(sha1));
+			if (type != OBJ_BLOB)
+				die("%s is not a blob", sha1_to_hex(sha1));
+
+
+			if (i == ctx->msgmap.nr) {
+				alloc = ctx->msgmap.alloc;
+				ALLOC_GROW(ctx->msgmap.sumsize, ctx->msgmap.nr +1, alloc);
+				ALLOC_GROW(ctx->msgmap.sha1,    ctx->msgmap.nr +1, ctx->msgmap.alloc);
+
+				ctx->msgmap.nr += 1;
+				ctx->msgmap.sumsize[i] = (i > 0 ? ctx->msgmap.sumsize[i-1] : 0)
+							 + ctx->msg_size;
+				hashcpy(ctx->msgmap.sha1[i], sha1);
+			}
+
+			ctx->msg_idx = i;
+		}
+
+		Si = ctx->msgmap.sumsize[i];
+		Si_1 =  i >= 1 ? ctx->msgmap.sumsize[i-1] : 0;
+
+
+		/* i'th msg overlaps - use it */
+		if (offset < Si) {
+			read_start = ctx->msg_buf  + (offset - Si_1);
+			read_size  = ctx->msg_size - (offset - Si_1);
+			read_size  = MIN(read_size, size);
+
+			memcpy(buf, read_start, read_size);
+			buf    += read_size;
+			size   -= read_size;
+			offset += read_size;
+			read_total += read_size;
+		}
+	}
+
+
+	return read_total;
+}
+
+static struct fuse_operations navymailfs_ops = {
+	.getattr	= navymailfs_getattr,
+
+#if 0
+	.opendir	=
+	.releasedir	=
+#endif
+	.readdir	= navymailfs_readdir,
+
+	.open		= navymailfs_open,
+	.release	= navymailfs_release,
+	.read		= navymailfs_read,
+
+};
+
+
+int cmd_mount(int argc, const char **argv, const char *prefix)
+{
+	int i, ret;
+	struct argv_array argvf;
+
+	argc = parse_options(argc, argv, NULL /*prefix?*/, mount_options, mount_usage, 0);
+
+	/* git_config(git_default_config, NULL); ? */
+
+	if (argc < 1)
+		usage_with_options(mount_usage, mount_options);
+
+	/* XXX only absolute NAVYMAIL_DIR path is ok, because when
+	 * daemonizing, fuse does chdir("/"). TODO make this automatically happen */
+	if (!is_absolute_path(getenv(NAVYMAIL_DIR_ENVIRONMENT)))
+		die("mount: TODO atm navymail-dir has to be absolute-path");
+
+	/* tail to fuse main loop */
+	argv_array_init(&argvf);
+	argv_array_push(&argvf, "navymail-mount");
+	for (i=0; i<argc; ++i)
+		argv_array_push(&argvf, argv[i]);
+
+	/* TODO hook fuse_session_exit to die (see exit_handler() in fuse_signals.c) */
+	ret = fuse_main(argvf.argc, /* XXX can't we avoid cast? */(char **)argvf.argv, &navymailfs_ops, NULL);
+
+	argv_array_clear(&argvf);
+	return ret;
+}
diff --git a/include/navymail/navymail-common-cmds.h b/include/navymail/navymail-common-cmds.h
index bb619d8..7f95e5f 100644
--- a/include/navymail/navymail-common-cmds.h
+++ b/include/navymail/navymail-common-cmds.h
@@ -9,4 +9,5 @@ static struct cmdname_help navymail_common_cmds[] = {
   {"import",	"Import mbox into navymail store"},
   {"init",	"Initialize navymail store"},
   {"export",	"Export mbox from navymail store"},
+  {"mount",	"Mount filesystem representing navymail store"},
 };
diff --git a/include/navymail/navymail.h b/include/navymail/navymail.h
index 3af41a0..f6c8020 100644
--- a/include/navymail/navymail.h
+++ b/include/navymail/navymail.h
@@ -40,5 +40,6 @@ const char *navymail_html_path();
  * XXX we don't need prefix - it has meaning only for worktree
  */
 extern int cmd_export	(int argc, const char **argv, const char *prefix);
+extern int cmd_mount	(int argc, const char **argv, const char *prefix);
 
 #endif
diff --git a/t/t0002-fs.sh b/t/t0002-fs.sh
new file mode 100755
index 0000000..fcd91f6
--- /dev/null
+++ b/t/t0002-fs.sh
@@ -0,0 +1,131 @@
+#!/bin/sh
+# Copyright (C) 2011 Kirill Smelkov <kirr@navytux.spb.ru>
+
+test_description='Test filesystem interface'
+. ./test-lib.sh
+
+
+D="$TEST_DIRECTORY/t0001"
+
+# XXX partially dup from t0001-importexport.sh
+test_expect_success 'import test mail' '
+	navymail import sup-talk	$D/sup-talk.mbox,1-2 &&
+	navymail import sup-talk	$D/sup-talk.mbox,3-4 &&
+	navymail import sup-talk	$D/sup-talk.mbox,5-17 &&
+	navymail import sup-talk2	$D/sup-talk.mbox,1-17 &&
+	navymail import x/zzz		$D/sup-talk.mbox,1-2 &&
+	navymail import x/yyy		$D/sup-talk.mbox,3-4 &&
+	navymail import x/y/qqq		$D/sup-talk.mbox,5-17 &&
+	navymail import x/y/ddd		$D/sup-talk.mbox,17-1
+'
+
+navymail_mountpath=
+navymail_mount() {
+	trap 'code=$?; navymail_umount; (exit $code); die' EXIT
+	navymail_mountpath="$(pwd)/$1"
+	navymail mount "$navymail_mountpath"
+}
+
+navymail_umount() {
+	test -n "$navymail_mountpath" || return
+
+	fusermount -u "$navymail_mountpath"
+	navymail_mountpath=
+}
+
+
+test_expect_success 'mount it' '
+	mkdir fs &&
+	>expect &&
+	ls -F fs >actual &&
+	test_cmp expect actual &&
+	navymail_mount fs
+'
+
+test_expect_success 'basic filesystem layout' '
+	echo "mail/" >expect &&
+	ls -F fs >actual &&
+	test_cmp expect actual &&
+	cat >expect <<EOF &&
+sup-talk
+sup-talk2
+x/
+EOF
+	ls -F fs/mail >actual &&
+	test_cmp expect actual &&
+	cat >expect <<EOF &&
+y/
+yyy
+zzz
+EOF
+	ls -F fs/mail/x >actual &&
+	test_cmp expect actual &&
+	cat >expect <<EOF &&
+ddd
+qqq
+EOF
+	ls -F fs/mail/x/y >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'read file content' '
+	cat fs/mail/sup-talk >read1 &&
+	cat fs/mail/sup-talk >read2 &&
+	cat fs/mail/sup-talk >read3 &&
+	test_cmp read1 read2 &&
+	test_cmp read2 read3 &&
+	navymail export --original-order sup-talk >sup-talk.mbox &&
+	cat fs/mail/sup-talk >sup-talk.read &&
+	test_cmp sup-talk.mbox sup-talk.read &&
+	test_cmp sup-talk.read read1 &&
+	test_cmp fs/mail/sup-talk2	$D/sup-talk.mbox,1-17 &&
+	test_cmp fs/mail/x/zzz		$D/sup-talk.mbox,1-2 &&
+	test_cmp fs/mail/x/yyy		$D/sup-talk.mbox,3-4 &&
+	test_cmp fs/mail/x/y/qqq	$D/sup-talk.mbox,5-17 &&
+	test_cmp fs/mail/x/y/ddd	$D/sup-talk.mbox,17-1
+'
+
+cat >expect <<EOF
+Date: Wed, 31 Oct 2007 09:42:58 +0100
+Date: Mon, 29 Oct 2007 16:40:37 -0500
+Date: Mon, 29 Oct 2007 14:43:07 -0700
+Message-ID: <1193694144-sup-4506@south>
+Subject: [ANN] Sup 0.2 released
+Received: from localhost.localdomain
+Sender: sup-talk-bounces@rubyforge.org
+- Add custom code to handle certain types of messages or to handle
+Date: Mon, 29 Oct 2007 18:55:59 -0700
+Date: Tue, 30 Oct 2007 15:55:18 +0000
+rrrr
+Date: Tue, 30 Oct 2007 15:55:18 +0000
+        from /Users/brendano/sw/sup/lib/sup/index.rb:323:in \`load_entry_for_id'
+EOF
+
+# tests already-seen msg lookup in navymailfs_read
+test_expect_success 'read file content seeky' '
+	$TEST_DIRECTORY/xdd	\
+		53843,38	\
+		  115,38	\
+		 5118,38	\
+		 5156,40	\
+		 5086,32	\
+		 6084,37	\
+		 7053,39	\
+		 8017,67	\
+		 8862,38	\
+		23731,38	\
+		 4994,1		\
+		 8709,1		\
+		13374,1		\
+		15883,1		\
+		15926,1		\
+		23731,38	\
+		12221,80	\
+	<fs/mail/sup-talk >actual &&
+	test_cmp expect actual
+'
+
+
+
+navymail_umount
+test_done
diff --git a/t/xdd b/t/xdd
new file mode 100755
index 0000000..e3a4232
--- /dev/null
+++ b/t/xdd
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+"""
+dd-alike which supports SEEK_SET seeking
+Copyright (C) 2011  Kirill Smelkov <kirr@navytux.spb.ru>
+
+This program is free software: you can Use, Study, Modify and Redistribute it
+under the terms of the GNU General Public License version 2. This program is
+distributed WITHOUT ANY WARRANTY. See COPYING file for full License terms.
+
+
+dd's skip uses SEEK_CUR, and also it checks whether current position is >
+st_size after, and errors if it is, which is not good for us - in our case all
+files have st_size=0, like in /proc.
+
+So here goes small utility
+$ xdd off1,size1 off2,size2 ...  <in >out
+"""
+import sys
+from os import read, write, lseek, SEEK_SET
+
+
+# safe read
+def xread(fd, size):
+	buf = ''
+
+	while size != 0:
+		chunk = read(fd, size)
+		if not chunk:
+			break
+
+		buf  += chunk
+		size -= len(chunk)
+
+	return buf
+
+# safe write
+def xwrite(fd, buf):
+	wrote = 0
+
+	while buf:
+		size = write(fd, buf)
+
+		buf    = buf[size:]
+		wrote += size
+
+	return wrote
+
+
+def main():
+	# (offset, size)
+	work = []
+
+	for arg in sys.argv[1:]:
+		offset, size = arg.split(',')
+		offset = int(offset)
+		size   = int(size)
+		work.append( (offset, size) )
+
+	for offset, size in work:
+		lseek(0, offset, SEEK_SET)
+		buf = xread(0, size)
+		xwrite(1, buf)
+
+
+
+if __name__ == '__main__':
+	main()
-- 
2.11.4.GIT