2 Copyright (C) 2012,2013,2014,2016,2018 rofl0r
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #define _XOPEN_SOURCE 700
23 #include "../lib/include/timelib.h"
24 #include "../lib/include/filelist.h"
25 #include "../lib/include/stringptrlist.h"
26 #include "../lib/include/filelib.h"
27 #include "../lib/include/strlib.h"
28 #include "../lib/include/logger.h"
29 #include "../lib/include/optparser.h"
30 #include "../lib/include/crc32c.h"
31 #include "../lib/include/format.h"
37 #include <arpa/inet.h>
45 typedef unsigned long long ull
;
47 #pragma RcB2 LINK "-lpthread"
51 ACT_SIMULATE_SYNC
= 2,
70 stringptrlist
* excludes
;
88 static progstate_s progstate
;
90 static int isdir(stringptr
* file
) {
91 return file
->size
&& file
->ptr
[file
->size
-1] == '/';
94 static void copyDate(stringptr
* file
, struct stat
* st
) {
96 ut
.modtime
= st
->st_mtime
;
97 ut
.actime
= st
->st_atime
;
98 if(utime(file
->ptr
, &ut
) == -1)
102 static void restoreTrailingSlash(stringptr
* s
) {
103 s
->ptr
[s
->size
] = '/';
108 static inline int removeTrailingSlash(stringptr
* s
) {
110 stringptr_shiftleft(s
, 1);
116 static void updateTimestamp(stringptr
* dst
, struct stat
* ss
) {
117 int wasdir
= removeTrailingSlash(dst
);
119 if(wasdir
) restoreTrailingSlash(dst
);
122 static int makeDir(stringptr
* dst
, struct stat
* ss
) {
123 if(progstate
.action
!= ACT_SYNC
)
125 if(mkdir(dst
->ptr
, ss
->st_mode
) == -1) {
127 if(errno
== ENOSPC
) return -1;
133 static char* getMbsString(char* mbs_buf
, size_t buf_size
, uint64_t bytes
, long ms
) {
135 (((float) bytes
/ (1024.f
* 1024.f
)) /
136 ((float) ms
/ 1000.f
)) :
138 unsigned mbs_a
= (unsigned) mbs
;
139 unsigned mbs_b
= (unsigned)((mbs
- mbs_a
) * 100.f
);
140 ulz_snprintf(mbs_buf
, buf_size
, "%u.%.2u MB/s", mbs_a
, mbs_b
);
149 /* if dest is not passed, no copy will be produced
150 * returns 1 if successfull, 0 on recoverable errors, -1 on disk full */
151 static int get_crc_and_copy(stringptr
* src
, stringptr
* dst
, struct stat
*src_stat
, crc_t
* crc_result
) {
152 int fds
, fdd
= -1, diskfull
= 0;
161 if(S_ISFIFO(src_stat
->st_mode
)) {
162 crc_result
->asInt
= 0;
163 if(mkfifo(dst
->ptr
, src_stat
->st_mode
& ~S_IFMT
) == -1) {
172 if((fds
= open(src
->ptr
, O_RDONLY
)) == -1) {
178 if(errno
== ENOSPC
) diskfull
= 1;
179 log_puts(2, err_data
);
180 log_puts(2, SPL(" "));
181 log_perror(err_func
);
185 if(errclose
&& fdd
!= -1) {
190 return diskfull
? -1 : 0;
192 if(dst
&& (fdd
= open(dst
->ptr
, O_WRONLY
| O_CREAT
| O_TRUNC
, src_stat
->st_mode
)) == -1) {
200 while(done
< (uint64_t) src_stat
->st_size
) {
201 ssize_t nread
= read(fds
, buf
, sizeof buf
);
207 } else if (nread
== 0)
210 ssize_t nwrote
= 0, nwrote_s
;
212 CRC32C_Update(&crc
, (const uint8_t*) buf
, nread
);
214 if(fdd
!= -1) while(nwrote
< nread
) {
215 nwrote_s
= write(fdd
, buf
+ nwrote
, nread
- nwrote
);
228 if (fdd
!= -1) close(fdd
);
229 CRC32C_Final(crc_result
->asChar
, &crc
);
233 static int doSync(stringptr
* src
, stringptr
* dst
, struct stat
*src_stat
, char* reason
) {
236 struct timeval starttime
;
239 if(progstate
.action
== ACT_SIMULATE_SYNC
) {
240 crc_result
.asInt
= 0;
243 } else if(progstate
.action
== ACT_PRINT
) {
244 log_put(1, VARIS(src
), NULL
);
248 gettimestamp(&starttime
);
250 // we always compute the CRC, because it's nearly "for free",
251 // since the file has to be read anyway for the copy.
252 if((copyret
= get_crc_and_copy(src
, dst
, src_stat
, &crc_result
)) <= 0) {
253 progstate
.total
.errors
++;
257 copyDate(dst
, src_stat
);
261 time_passed
= mspassed(&starttime
);
264 ulz_snprintf(crc_str
, sizeof(crc_str
), "%.8x", htonl(crc_result
.asInt
));
266 // we do not use printf because it has a limited buffer size
267 log_put(1, VARISL("CRC: "), VARIC(crc_str
), VARISL(", "),
268 VARIS(src
), VARISL(" -> "), VARIS(dst
),
269 VARISL(" @"), VARIC(getMbsString(mbs_str
, sizeof(mbs_str
), src_stat
->st_size
, time_passed
)),
270 VARISL(" ("), VARIC(reason
), VARISL(")"),
273 progstate
.total
.copied
+= src_stat
->st_size
;
274 progstate
.total
.copies
+= 1;
280 struct stat
* file_stat
;
285 static void* child_thread(void* data
) {
286 thread_data
* td
= (thread_data
*) data
;
287 td
->error
= !get_crc_and_copy(td
->fn
, NULL
, td
->file_stat
, td
->crc_res
);
291 static int checksumDiffers(stringptr
* src
, stringptr
* dst
, struct stat
* src_stat
, struct stat
* dst_stat
) {
292 crc_t crc_src
, crc_dst
;
293 // TODO: what happens if src is no fifo, but dst is ?
294 // TODO: handle S_ISBLK S_ISSOCK S_ISCHR
295 // (S_ISREG may be used to check whether regular file)
296 if(S_ISFIFO(src_stat
->st_mode
))
298 if((src_stat
->st_dev
!= dst_stat
->st_dev
)) {
299 pthread_attr_t ptattr
;
301 thread_data td
= {.fn
= src
, .file_stat
= src_stat
, .crc_res
= &crc_src
, .error
= 0};
304 if((errno
= pthread_attr_init(&ptattr
))) {
305 errmsg
= "pthread_attr_init";
310 if((errno
= pthread_attr_setstacksize(&ptattr
, 128 * 1024))) {
311 errmsg
= "pthread_attr_init";
314 if((errno
= pthread_create(&child
, &ptattr
, child_thread
, (void*) &td
))) {
315 errmsg
= "pthread_create";
319 if(!get_crc_and_copy(dst
, NULL
, dst_stat
, &crc_dst
)) error
= 1;
321 if((errno
= pthread_join(child
, NULL
))) {
322 errmsg
= "pthread_join";
325 if((errno
= pthread_attr_destroy(&ptattr
))) {
326 errmsg
= "pthread_attr_destroy";
330 if(td
.error
|| error
) return 0;
333 if(!get_crc_and_copy(src
, NULL
, src_stat
, &crc_src
)) return 0;
334 if(!get_crc_and_copy(dst
, NULL
, dst_stat
, &crc_dst
)) return 0;
337 return crc_src
.asInt
!= crc_dst
.asInt
;
340 static int scriptDiffers(stringptr
* src
, stringptr
* dst
) {
344 ret
= waitpid(pid
, &r
, 0);
345 if(ret
== -1) { log_perror("waitpid"); exit(1); }
347 if(WIFEXITED(r
) == 0) {
348 ulz_fprintf(2, "compare script terminated abnormally while comparing %s and %s\n", src
->ptr
, dst
->ptr
);
351 return WEXITSTATUS(r
);
353 execl(progstate
.script
, progstate
.script
, src
->ptr
, dst
->ptr
, (char*)0);
359 static int doFile(stringptr
* src
, stringptr
* dst
, stringptr
* diff
, struct stat
* ss
) {
362 if(progstate
.verbose
) ulz_fprintf(2, "file: %s\n", src
->ptr
);
363 if(stat(dst
->ptr
, &sd
) == -1) {
366 if(progstate
.checkExists
) {
369 return doSync(src
, diff
, ss
, reason
);
374 log_puts(2, SPL(" "));
375 log_perror("stat dest");
379 if(progstate
.checkFileSize
&& ss
->st_size
!= sd
.st_size
) {
382 } else if (progstate
.checkDate
&& ss
->st_mtime
> sd
.st_mtime
) {
385 } else if (progstate
.checkDateOlder
&& ss
->st_mtime
< sd
.st_mtime
) {
388 } else if(progstate
.checkChecksum
&& checksumDiffers(src
, dst
, ss
, &sd
)) {
391 } else if(progstate
.script
&& scriptDiffers(src
, dst
)) {
394 } else if (!progstate
.checkDateOlder
&& progstate
.warnNewer
&& ss
->st_mtime
< sd
.st_mtime
) {
395 ulz_fprintf(2, "dest is newer than source: %s , %s : %llu , %llu\n", src
->ptr
, dst
->ptr
, (ull
) ss
->st_mtime
, (ull
) sd
.st_mtime
);
398 progstate
.total
.skipped
+= 1;
402 static void setLinkTimestamp(stringptr
* link
, struct stat
* ss
) {
403 struct timeval tv
[2];
404 tv
[0].tv_sec
= ss
->st_atime
;
405 tv
[0].tv_usec
= ss
->st_atim
.tv_nsec
/ 1000;
406 tv
[1].tv_sec
= ss
->st_mtime
;
407 tv
[1].tv_usec
= ss
->st_mtim
.tv_nsec
/ 1000;
408 if(lutimes(link
->ptr
, tv
) == -1) {
409 log_perror("lutimes");
413 // FIXME dont copy symlink if the target is equal
414 // FIXME dont increment progstate.total.symlink in case of failure
415 static void doLink(stringptr
* src
, stringptr
* dst
, struct stat
* ss
) {
420 if(progstate
.action
!= ACT_SYNC
) goto skip
;
422 ret
= readlink(src
->ptr
, buf
, sizeof(buf
) - 1);
425 log_puts(2, SPL(" "));
426 log_perror("readlink");
430 log_puts(2, SPL(" "));
431 log_puts(2, SPL("readlink returned 0"));
436 wasdir
= removeTrailingSlash(dst
);
438 if(!(lstat(dst
->ptr
, &sd
) == -1 && errno
== ENOENT
)) {
439 //dst already exists, we need to unlink it for symlink to succeed
440 //if(S_ISLNK(sd.st_mode))
441 if(unlink(dst
->ptr
) == -1) {
443 log_puts(2, SPL(" "));
444 log_perror("unlink");
448 if(symlink(buf
, dst
->ptr
) == -1) {
450 log_puts(2, SPL(" -> "));
452 log_puts(2, SPL(" "));
453 log_perror("symlink");
456 log_puts(1, SPL(" >> "));
461 setLinkTimestamp(dst
, ss
);
464 restoreTrailingSlash(dst
);
466 progstate
.total
.symlink
+= 1;
469 static int excludelist_contains(stringptr
* dir
) {
470 char resolv
[PATH_MAX
];
471 if(!progstate
.excludes
) return 0;
472 if(!realpath(dir
->ptr
, resolv
)) return 0;
474 t
= stringptr_fromchar(resolv
, &temp
);
475 return stringptrlist_contains(progstate
.excludes
, t
);
478 static int doDir(stringptr
* subd
) {
481 stringptr
*combined_src
= stringptr_concat(progstate
.srcdir
, subd
, NULL
);
482 stringptr
*combined_dst
= stringptr_concat(progstate
.dstdir
, subd
, NULL
);
483 stringptr
*combined_diff
= stringptr_concat(progstate
.diffdir
, subd
, NULL
);
485 if(excludelist_contains(combined_src
) || excludelist_contains(combined_dst
))
488 struct stat src_stat
;
490 if(!filelist_search(&f
, combined_src
, SPL("*"), FLF_EXCLUDE_PATH
| FLF_INCLUDE_HIDDEN
)) {
492 stringptr
* file_combined_src
;
493 stringptr
* file_combined_dst
;
494 stringptr
* file_combined_diff
;
495 sblist_iter(f
.files
, file
) {
496 file_combined_src
= stringptr_concat(combined_src
, file
, NULL
);
497 file_combined_dst
= stringptr_concat(combined_dst
, file
, NULL
);
498 file_combined_diff
= stringptr_concat(combined_diff
, file
, NULL
);
500 removeTrailingSlash(file_combined_src
); // remove trailing slash so stat doesnt resolve symlinks...
502 if(lstat(file_combined_src
->ptr
, &src_stat
) == -1) {
503 log_puts(2, file_combined_src
);
504 log_puts(2, SPL(" "));
507 if(S_ISLNK(src_stat
.st_mode
)) {
508 if(!progstate
.glob
|| !fnmatch(progstate
.glob
, file
->ptr
, 0))
509 doLink(file_combined_src
, file_combined_diff
, &src_stat
);
511 } else if(isdir(file
)) {
512 restoreTrailingSlash(file_combined_src
);
514 stringptr
*path_combined
= stringptr_concat(subd
, file
, NULL
);
515 if(progstate
.action
== ACT_SYNC
&& access(file_combined_diff
->ptr
, R_OK
) == -1 && errno
== ENOENT
) {
516 if(makeDir(file_combined_diff
, &src_stat
) == -1) ret
= -1;
518 // else updateTimestamp(file_combined_dst, &src_stat);
519 if(ret
!= -1 && doDir(path_combined
) == -1) ret
= -1;
520 stringptr_free(path_combined
);
521 if(ret
!= -1 && progstate
.action
== ACT_SYNC
)
522 updateTimestamp(file_combined_diff
, &src_stat
);
524 if(!progstate
.glob
|| !fnmatch(progstate
.glob
, file
->ptr
, 0))
525 if(doFile(file_combined_src
, file_combined_dst
, file_combined_diff
, &src_stat
) == -1) ret
= -1;
528 stringptr_free(file_combined_src
);
529 stringptr_free(file_combined_dst
);
530 stringptr_free(file_combined_diff
);
535 log_put(2, VARISL("glob error: "), VARIS(combined_src
), NULL
);
539 stringptr_free(combined_src
);
540 stringptr_free(combined_dst
);
541 stringptr_free(combined_diff
);
545 static void printStats(long ms
) {
547 ulz_fprintf(1, "copied: %llu\n"
551 "bytes copied: %llu\n"
554 (ull
) progstate
.total
.copies
,
555 (ull
) progstate
.total
.skipped
,
556 (ull
) progstate
.total
.symlink
,
557 (ull
) progstate
.total
.errors
,
558 (ull
) progstate
.total
.copied
,
560 getMbsString(mbs_buf
, sizeof(mbs_buf
), progstate
.total
.copied
, ms
)
564 static int syntax() {
565 log_puts(1, SPL("filesync OPTIONS srcdir dstdir [diffdir]\n\n"
566 "if diffdir is given, the program will check for files in destdir,\n"
567 "but will write into diffdir instead. this allows usage as a simple\n"
568 "incremental backup tool.\n\n"
569 "\toptions: -s[imulate] -e[xists] -d[ate] -o[lder] -f[ilesize] -c[hecksum] -w[arn] -v[erbose]\n"
570 "\t-s : only simulate and print to stdout (dry run)\n"
571 "\t note: will not print symlinks currently\n"
572 "\t-p : only print filenames of matching source files\n"
573 "\t-e : copy source files that dont exist on the dest side\n"
574 "\t-f : copy source files with different filesize\n"
575 "\t-d : copy source files with newer timestamp (modtime)\n"
576 "\t-o : copy source files with older timestamp (modtime)\n"
577 "\t-c : copy source files if checksums are different\n"
578 "\t-w : warn if dest is newer than src\n"
579 "\t-v : verbose: always print actual filename, even when skipping\n"
580 "\t--glob=\"*.o\" only sync files that match glob\n\n"
581 "\t--script=./foo.sh execute ./foo.sh to decide if files differ\n"
582 " the script will get passed both filenames and must return\n"
583 " true when they are equal, false if not\n\n"
584 "\t--exclude=... : colon-separated list of src directories to exclude\n"
585 " the directories must end with a slash, and must start identical\n"
586 " to the srcdir parameter (e.g. './foo/' if srcdir is '.')\n"
587 " files in excluded dirs are exempt from the 'skipped' statistics.\n\n"
588 "\t--exclude-file: enables support for directory-specific exclude file:\n"
589 " if on either src or dst side a file .filesync-exclude.conf is\n"
590 " encountered, its contents (line-based) will be interpreted as\n"
591 " paths relative from the directory containing it, and be excluded\n"
592 " from the sync process.\n"
593 " when using the option, check before every sync whether such a file exists and contents are relevant!\n\n"
595 "filesync will always use the rule that has the least\n"
596 "runtime cost, e.g. a CRC-check will only be done\n"
597 "if the file has the same size and modtime, if filesize check\n"
598 "or modtime check are also enabled.\n\n"
599 "WARNING: you should *always* redirect stdout and stderr\n"
600 "into some logfile. to see the actual state, attach with\n"
601 "tail -f or tee...\n"
602 "After a full run you can pipe the stdout.txt into the supplied\n"
603 "perl script which can check the CRCs, in case you want to\n"
604 "verify the copy. it is proposed that this run happens separately,\n"
605 "so that the copied files are no longer buffered.\n\n"
610 static void setup_excludes(stringptrlist
*dirs
, stringptr
*base
) {
611 stringptrlist
*ret
= progstate
.excludes
;
612 if(!ret
) ret
= stringptrlist_new(stringptrlist_getsize(dirs
));
614 sblist_iter(dirs
, curr
) {
615 if(!stringptr_getsize(curr
)) continue; // skip empty line (would result in base added)
616 char dir
[PATH_MAX
], resolv
[PATH_MAX
];
617 snprintf(dir
, sizeof dir
, "%s%s%s", stringptr_get(base
), stringptr_getsize(base
) ? "/" : "", stringptr_get(curr
));
618 if(realpath(dir
, resolv
)) {
619 SPDECLAREC(tmp
, resolv
);
620 stringptrlist_add_strdup(ret
, tmp
);
622 ulz_fprintf(2, "warning: couldn't resolve exclude path %s (%s)\n", dir
, strerror(errno
));
625 stringptrlist_free(dirs
);
626 progstate
.excludes
= ret
;
629 static void read_exclude_file(stringptr
*base
) {
631 snprintf(fn
, sizeof fn
, "%s/.filesync-exclude.conf", stringptr_get(base
));
632 stringptr
*fc
= stringptr_fromfile(fn
);
634 if(progstate
.verbose
)
635 ulz_fprintf(2, "processing exclude file: %s\n", fn
);
636 stringptrlist
*lines
= stringptr_splitc(fc
, '\n');
637 setup_excludes(lines
, base
);
640 int main (int argc
, char** argv
) {
642 if(argc
< 4) return syntax();
644 int freedst
= 0, freediff
= 0;
645 int dirargs
= 0, i
, ret
= 0;
646 struct timeval starttime
;
647 struct stat src_stat
;
649 op_state op_b
, *op
= &op_b
;
651 op_init(op
, argc
, argv
);
653 progstate
.action
= ACT_SYNC
;
655 if(op_hasflag(op
, SPL("s")) || op_hasflag(op
, SPL("simulate")))
656 progstate
.action
= ACT_SIMULATE_SYNC
;
657 if(op_hasflag(op
, SPL("p")) || op_hasflag(op
, SPL("print")))
658 progstate
.action
= ACT_PRINT
;
660 progstate
.checkExists
= op_hasflag(op
, SPL("e")) || op_hasflag(op
, SPL("exists"));
661 progstate
.checkFileSize
= op_hasflag(op
, SPL("f")) || op_hasflag(op
, SPL("filesize"));
662 progstate
.checkDate
= op_hasflag(op
, SPL("d")) || op_hasflag(op
, SPL("date"));
663 progstate
.checkDateOlder
= op_hasflag(op
, SPL("o")) || op_hasflag(op
, SPL("older"));
664 progstate
.checkChecksum
= op_hasflag(op
, SPL("c")) || op_hasflag(op
, SPL("checksum"));
665 progstate
.warnNewer
= op_hasflag(op
, SPL("w")) || op_hasflag(op
, SPL("warn"));
666 progstate
.verbose
= op_hasflag(op
, SPL("v")) || op_hasflag(op
, SPL("verbose"));
667 progstate
.glob
= op_get(op
, SPL("glob"));
668 progstate
.script
= op_get(op
, SPL("script"));
669 progstate
.excludefile
= op_hasflag(op
, SPL("exclude-file"));
671 for(i
= 1; i
< argc
; i
++)
672 if(argv
[i
][0] != '-') dirargs
++;
674 if(dirargs
< 2 || dirargs
> 3) {
675 log_puts(2, SPL("invalid arguments detected\n"));
679 startarg
= argc
- dirargs
;
681 memset(&progstate
.total
, 0, sizeof(totals
));
683 progstate
.srcdir
= stringptr_fromchar(argv
[startarg
], &progstate
.srcdir_b
);
684 progstate
.dstdir
= stringptr_fromchar(argv
[startarg
+1], &progstate
.dstdir_b
);
685 progstate
.diffdir
= stringptr_fromchar((dirargs
== 3) ? argv
[startarg
+2] : argv
[startarg
+1], &progstate
.diffdir_b
);
687 progstate
.excludes
= 0;
689 char *exc
= op_get(op
, SPL("exclude"));
692 stringptr_fromchar(exc
, &tmp
);
693 setup_excludes(stringptr_splitc(&tmp
, ':'), SPL(""));
697 if(access(progstate
.diffdir
->ptr
, R_OK
) == -1) {
698 if(errno
== ENOENT
) {
699 if(stat(progstate
.srcdir
->ptr
, &src_stat
) == -1) {
703 makeDir(progstate
.diffdir
, &src_stat
);
705 log_perror("uncaught error while trying to access dest/diff dir");
710 if(!isdir(progstate
.dstdir
)) {
711 progstate
.dstdir
= stringptr_concat(progstate
.dstdir
, SPL("/"), NULL
);
715 if(!isdir(progstate
.diffdir
)) {
716 progstate
.diffdir
= stringptr_concat(progstate
.diffdir
, SPL("/"), NULL
);
720 if(progstate
.excludefile
) {
721 read_exclude_file(progstate
.srcdir
);
722 read_exclude_file(progstate
.dstdir
);
725 gettimestamp(&starttime
);
729 if(doDir(isdir(progstate
.srcdir
) ? SPL("") : SPL("/")) == -1) {
730 // TODO: also return error if other errors happened ?
731 log_puts(2, SPL("FATAL: disk full, premature termination!\n"));
735 if(progstate
.action
!= ACT_PRINT
) printStats(mspassed(&starttime
));
737 if(freedst
) stringptr_free(progstate
.dstdir
);
738 if(freediff
) stringptr_free(progstate
.diffdir
);
739 if(progstate
.excludes
) stringptrlist_free(progstate
.excludes
);