You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1403 lines
36 KiB
1403 lines
36 KiB
/* hardlink.c - Link multiple identical files together |
|
* |
|
* Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org> |
|
* Copyright (C) 2021 Karel Zak <kzak@redhat.com> |
|
* |
|
* SPDX-License-Identifier: MIT |
|
* |
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|
* of this software and associated documentation files (the "Software"), to deal |
|
* in the Software without restriction, including without limitation the rights |
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
* copies of the Software, and to permit persons to whom the Software is |
|
* furnished to do so, subject to the following conditions: |
|
* |
|
* The above copyright notice and this permission notice shall be included in |
|
* all copies or substantial portions of the Software. |
|
* |
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
* THE SOFTWARE. |
|
*/ |
|
#define _POSIX_C_SOURCE 200112L /* POSIX functions */ |
|
#define _XOPEN_SOURCE 600 /* nftw() */ |
|
|
|
#include <sys/types.h> /* stat */ |
|
#include <sys/stat.h> /* stat */ |
|
#include <sys/time.h> /* getrlimit, getrusage */ |
|
#include <sys/resource.h> /* getrlimit, getrusage */ |
|
#include <fcntl.h> /* posix_fadvise */ |
|
#include <ftw.h> /* ftw */ |
|
#include <search.h> /* tsearch() and friends */ |
|
#include <signal.h> /* SIG*, sigaction */ |
|
#include <getopt.h> /* getopt_long() */ |
|
#include <ctype.h> /* tolower() */ |
|
#include <sys/ioctl.h> |
|
|
|
#if defined(HAVE_LINUX_FIEMAP_H) && defined(HAVE_SYS_VFS_H) |
|
# include <linux/fs.h> |
|
# include <linux/fiemap.h> |
|
# ifdef FICLONE |
|
# define USE_REFLINK 1 |
|
# endif |
|
#endif |
|
|
|
#include "nls.h" |
|
#include "c.h" |
|
#include "xalloc.h" |
|
#include "strutils.h" |
|
#include "monotonic.h" |
|
#include "optutils.h" |
|
#include "fileeq.h" |
|
|
|
#ifdef USE_REFLINK |
|
# include "statfs_magic.h" |
|
#endif |
|
|
|
#include <regex.h> /* regcomp(), regexec() */ |
|
|
|
#if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR) |
|
# include <sys/xattr.h> |
|
# define USE_XATTR 1 |
|
#endif |
|
|
|
static int quiet; /* don't print anything */ |
|
|
|
#ifdef USE_REFLINK |
|
enum { |
|
REFLINK_NEVER = 0, |
|
REFLINK_AUTO, |
|
REFLINK_ALWAYS |
|
}; |
|
static int reflink_mode = REFLINK_NEVER; |
|
static int reflinks_skip; |
|
#endif |
|
|
|
static struct ul_fileeq fileeq; |
|
|
|
/** |
|
* struct file - Information about a file |
|
* @st: The stat buffer associated with the file |
|
* @next: Next file with the same size |
|
* @basename: The offset off the basename in the filename |
|
* @path: The path of the file |
|
* |
|
* This contains all information we need about a file. |
|
*/ |
|
struct file { |
|
struct stat st; |
|
struct ul_fileeq_data data; |
|
|
|
struct file *next; |
|
struct link { |
|
struct link *next; |
|
int basename; |
|
#if __STDC_VERSION__ >= 199901L |
|
char path[]; |
|
#elif __GNUC__ |
|
char path[0]; |
|
#else |
|
char path[1]; |
|
#endif |
|
} *links; |
|
}; |
|
|
|
/** |
|
* enum log_level - Logging levels |
|
* @JLOG_SUMMARY: Default log level |
|
* @JLOG_INFO: Verbose logging (verbose == 1) |
|
* @JLOG_VERBOSE1: Verbosity 2 |
|
* @JLOG_VERBOSE2: Verbosity 3 |
|
*/ |
|
enum log_level { |
|
JLOG_SUMMARY, |
|
JLOG_INFO, |
|
JLOG_VERBOSE1, |
|
JLOG_VERBOSE2 |
|
}; |
|
|
|
/** |
|
* struct statistic - Statistics about the file |
|
* @started: Whether we are post command-line processing |
|
* @files: The number of files worked on |
|
* @linked: The number of files replaced by a hardlink to a master |
|
* @xattr_comparisons: The number of extended attribute comparisons |
|
* @comparisons: The number of comparisons |
|
* @saved: The (exaggerated) amount of space saved |
|
* @start_time: The time we started at |
|
*/ |
|
static struct statistics { |
|
int started; |
|
size_t files; |
|
size_t linked; |
|
size_t xattr_comparisons; |
|
size_t comparisons; |
|
size_t ignored_reflinks; |
|
double saved; |
|
struct timeval start_time; |
|
} stats; |
|
|
|
|
|
struct hdl_regex { |
|
regex_t re; /* POSIX compatible regex handler */ |
|
|
|
struct hdl_regex *next; |
|
}; |
|
|
|
/** |
|
* struct options - Processed command-line options |
|
* @include: A linked list of regular expressions for the --include option |
|
* @exclude: A linked list of regular expressions for the --exclude option |
|
* @verbosity: The verbosity. Should be one of #enum log_level |
|
* @respect_mode: Whether to respect file modes (default = TRUE) |
|
* @respect_owner: Whether to respect file owners (uid, gid; default = TRUE) |
|
* @respect_name: Whether to respect file names (default = FALSE) |
|
* @respect_time: Whether to respect file modification times (default = TRUE) |
|
* @respect_xattrs: Whether to respect extended attributes (default = FALSE) |
|
* @maximise: Chose the file with the highest link count as master |
|
* @minimise: Chose the file with the lowest link count as master |
|
* @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE) |
|
* @dry_run: Specifies whether hardlink should not link files (default = FALSE) |
|
* @min_size: Minimum size of files to consider. (default = 1 byte) |
|
* @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte) |
|
*/ |
|
static struct options { |
|
struct hdl_regex *include; |
|
struct hdl_regex *exclude; |
|
|
|
const char *method; |
|
signed int verbosity; |
|
unsigned int respect_mode:1; |
|
unsigned int respect_owner:1; |
|
unsigned int respect_name:1; |
|
unsigned int respect_time:1; |
|
unsigned int respect_xattrs:1; |
|
unsigned int maximise:1; |
|
unsigned int minimise:1; |
|
unsigned int keep_oldest:1; |
|
unsigned int dry_run:1; |
|
uintmax_t min_size; |
|
uintmax_t max_size; |
|
size_t io_size; |
|
size_t cache_size; |
|
} opts = { |
|
/* default setting */ |
|
#ifdef USE_FILEEQ_CRYPTOAPI |
|
.method = "sha256", |
|
#else |
|
.method = "memcmp", |
|
#endif |
|
.respect_mode = TRUE, |
|
.respect_owner = TRUE, |
|
.respect_time = TRUE, |
|
.respect_xattrs = FALSE, |
|
.keep_oldest = FALSE, |
|
.min_size = 1, |
|
.cache_size = 10*1024*1024 |
|
}; |
|
|
|
/* |
|
* files |
|
* |
|
* A binary tree of files, managed using tsearch(). To see which nodes |
|
* are considered equal, see compare_nodes() |
|
*/ |
|
static void *files; |
|
static void *files_by_ino; |
|
|
|
/* |
|
* last_signal |
|
* |
|
* The last signal we received. We store the signal here in order to be able |
|
* to break out of loops gracefully and to return from our nftw() handler. |
|
*/ |
|
static int last_signal; |
|
|
|
|
|
#define is_log_enabled(_level) (quiet == 0 && (_level) <= (unsigned int)opts.verbosity) |
|
|
|
/** |
|
* jlog - Logging for hardlink |
|
* @level: The log level |
|
* @format: A format string for printf() |
|
*/ |
|
__attribute__((format(printf, 2, 3))) |
|
static void jlog(enum log_level level, const char *format, ...) |
|
{ |
|
va_list args; |
|
|
|
if (!is_log_enabled(level)) |
|
return; |
|
|
|
va_start(args, format); |
|
vfprintf(stdout, format, args); |
|
va_end(args); |
|
fputc('\n', stdout); |
|
} |
|
|
|
/** |
|
* CMP - Compare two numerical values, return 1, 0, or -1 |
|
* @a: First value |
|
* @b: Second value |
|
* |
|
* Used to compare two integers of any size while avoiding overflow. |
|
*/ |
|
#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0)) |
|
|
|
/** |
|
* register_regex - Compile and insert a regular expression into list |
|
* @pregs: Pointer to a linked list of regular expressions |
|
* @regex: String containing the regular expression to be compiled |
|
*/ |
|
static void register_regex(struct hdl_regex **pregs, const char *regex) |
|
{ |
|
struct hdl_regex *link; |
|
int err; |
|
|
|
link = xmalloc(sizeof(*link)); |
|
|
|
if ((err = regcomp(&link->re, regex, REG_NOSUB | REG_EXTENDED)) != 0) { |
|
size_t size = regerror(err, &link->re, NULL, 0); |
|
char *buf = xmalloc(size + 1); |
|
|
|
regerror(err, &link->re, buf, size); |
|
|
|
errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"), |
|
regex, buf); |
|
} |
|
link->next = *pregs; *pregs = link; |
|
} |
|
|
|
/** |
|
* match_any_regex - Match against multiple regular expressions |
|
* @pregs: A linked list of regular expressions |
|
* @what: The string to match against |
|
* |
|
* Checks whether any of the regular expressions in the list matches the |
|
* string. |
|
*/ |
|
static int match_any_regex(struct hdl_regex *pregs, const char *what) |
|
{ |
|
for (; pregs != NULL; pregs = pregs->next) { |
|
if (regexec(&pregs->re, what, 0, NULL, 0) == 0) |
|
return TRUE; |
|
} |
|
return FALSE; |
|
} |
|
|
|
/** |
|
* compare_nodes - Node comparison function |
|
* @_a: The first node (a #struct file) |
|
* @_b: The second node (a #struct file) |
|
* |
|
* Compare the two nodes for the binary tree. |
|
*/ |
|
static int compare_nodes(const void *_a, const void *_b) |
|
{ |
|
const struct file *a = _a; |
|
const struct file *b = _b; |
|
int diff = 0; |
|
|
|
if (diff == 0) |
|
diff = CMP(a->st.st_dev, b->st.st_dev); |
|
if (diff == 0) |
|
diff = CMP(a->st.st_size, b->st.st_size); |
|
|
|
return diff; |
|
} |
|
|
|
/** |
|
* compare_nodes_ino - Node comparison function |
|
* @_a: The first node (a #struct file) |
|
* @_b: The second node (a #struct file) |
|
* |
|
* Compare the two nodes for the binary tree. |
|
*/ |
|
static int compare_nodes_ino(const void *_a, const void *_b) |
|
{ |
|
const struct file *a = _a; |
|
const struct file *b = _b; |
|
int diff = 0; |
|
|
|
if (diff == 0) |
|
diff = CMP(a->st.st_dev, b->st.st_dev); |
|
if (diff == 0) |
|
diff = CMP(a->st.st_ino, b->st.st_ino); |
|
|
|
/* If opts.respect_name is used, we will restrict a struct file to |
|
* contain only links with the same basename to keep the rest simple. |
|
*/ |
|
if (diff == 0 && opts.respect_name) |
|
diff = strcmp(a->links->path + a->links->basename, |
|
b->links->path + b->links->basename); |
|
|
|
return diff; |
|
} |
|
|
|
/** |
|
* print_stats - Print statistics to stdout |
|
*/ |
|
static void print_stats(void) |
|
{ |
|
struct timeval end = { 0, 0 }, delta = { 0, 0 }; |
|
char *ssz; |
|
|
|
gettime_monotonic(&end); |
|
timersub(&end, &stats.start_time, &delta); |
|
|
|
jlog(JLOG_SUMMARY, "%-25s %s", _("Mode:"), |
|
opts.dry_run ? _("dry-run") : _("real")); |
|
jlog(JLOG_SUMMARY, "%-25s %s", _("Method:"), opts.method); |
|
jlog(JLOG_SUMMARY, "%-25s %zu", _("Files:"), stats.files); |
|
jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Linked:"), stats.linked); |
|
|
|
#ifdef USE_XATTR |
|
jlog(JLOG_SUMMARY, _("%-25s %zu xattrs"), _("Compared:"), |
|
stats.xattr_comparisons); |
|
#endif |
|
jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Compared:"), |
|
stats.comparisons); |
|
#ifdef USE_REFLINK |
|
if (reflinks_skip) |
|
jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Skipped reflinks:"), |
|
stats.ignored_reflinks); |
|
#endif |
|
ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | |
|
SIZE_SUFFIX_SPACE | |
|
SIZE_DECIMAL_2DIGITS, stats.saved); |
|
|
|
jlog(JLOG_SUMMARY, "%-25s %s", _("Saved:"), ssz); |
|
free(ssz); |
|
|
|
jlog(JLOG_SUMMARY, _("%-25s %"PRId64".%06"PRId64" seconds"), _("Duration:"), |
|
(int64_t)delta.tv_sec, (int64_t)delta.tv_usec); |
|
} |
|
|
|
/** |
|
* handle_interrupt - Handle a signal |
|
* |
|
* Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals. |
|
*/ |
|
static int handle_interrupt(void) |
|
{ |
|
switch (last_signal) { |
|
case SIGINT: |
|
case SIGTERM: |
|
return TRUE; |
|
case SIGUSR1: |
|
print_stats(); |
|
putchar('\n'); |
|
break; |
|
} |
|
|
|
last_signal = 0; |
|
return FALSE; |
|
} |
|
|
|
#ifdef USE_XATTR |
|
|
|
/** |
|
* llistxattr_or_die - Wrapper for llistxattr() |
|
* |
|
* This does the same thing as llistxattr() except that it aborts if any error |
|
* other than "not supported" is detected. |
|
*/ |
|
static ssize_t llistxattr_or_die(const char *path, char *list, size_t size) |
|
{ |
|
ssize_t len = llistxattr(path, list, size); |
|
|
|
if (len < 0 && errno != ENOTSUP) |
|
err(EXIT_FAILURE, _("cannot get xattr names for %s"), path); |
|
|
|
return len; |
|
} |
|
|
|
/** |
|
* lgetxattr_or_die - Wrapper for lgetxattr() |
|
* |
|
* This does the same thing as lgetxattr() except that it aborts upon error. |
|
*/ |
|
static ssize_t lgetxattr_or_die(const char *path, |
|
const char *name, void *value, size_t size) |
|
{ |
|
ssize_t len = lgetxattr(path, name, value, size); |
|
|
|
if (len < 0) |
|
err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"), |
|
name, path); |
|
|
|
return len; |
|
} |
|
|
|
/** |
|
* get_xattr_name_count - Count the number of xattr names |
|
* @names: a non-empty table of concatenated, null-terminated xattr names |
|
* @len: the total length of the table |
|
* |
|
* @Returns the number of xattr names |
|
*/ |
|
static int get_xattr_name_count(const char *const names, ssize_t len) |
|
{ |
|
int count = 0; |
|
const char *name; |
|
|
|
for (name = names; name < (names + len); name += strlen(name) + 1) |
|
count++; |
|
|
|
return count; |
|
} |
|
|
|
/** |
|
* cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing |
|
* the names they point to. |
|
*/ |
|
static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2) |
|
{ |
|
return strcmp(*(char *const *)ptr1, *(char *const *)ptr2); |
|
} |
|
|
|
/** |
|
* get_sorted_xattr_name_table - Create a sorted table of xattr names. |
|
* @names - table of concatenated, null-terminated xattr names |
|
* @n - the number of names |
|
* |
|
* @Returns allocated table of pointers to the names, sorted alphabetically |
|
*/ |
|
static const char **get_sorted_xattr_name_table(const char *names, int n) |
|
{ |
|
const char **table = xmalloc(n * sizeof(char *)); |
|
int i; |
|
|
|
for (i = 0; i < n; i++) { |
|
table[i] = names; |
|
names += strlen(names) + 1; |
|
} |
|
|
|
qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs); |
|
|
|
return table; |
|
} |
|
|
|
/** |
|
* file_xattrs_equal - Compare the extended attributes of two files |
|
* @a: The first file |
|
* @b: The second file |
|
* |
|
* @Returns: %TRUE if and only if extended attributes are equal |
|
*/ |
|
static int file_xattrs_equal(const struct file *a, const struct file *b) |
|
{ |
|
ssize_t len_a; |
|
ssize_t len_b; |
|
char *names_a = NULL; |
|
char *names_b = NULL; |
|
int n_a; |
|
int n_b; |
|
const char **name_ptrs_a = NULL; |
|
const char **name_ptrs_b = NULL; |
|
void *value_a = NULL; |
|
void *value_b = NULL; |
|
int ret = FALSE; |
|
int i; |
|
|
|
assert(a->links != NULL); |
|
assert(b->links != NULL); |
|
|
|
jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path, |
|
b->links->path); |
|
|
|
stats.xattr_comparisons++; |
|
|
|
len_a = llistxattr_or_die(a->links->path, NULL, 0); |
|
len_b = llistxattr_or_die(b->links->path, NULL, 0); |
|
|
|
if (len_a <= 0 && len_b <= 0) |
|
return TRUE; // xattrs not supported or neither file has any |
|
|
|
if (len_a != len_b) |
|
return FALSE; // total lengths of xattr names differ |
|
|
|
names_a = xmalloc(len_a); |
|
names_b = xmalloc(len_b); |
|
|
|
len_a = llistxattr_or_die(a->links->path, names_a, len_a); |
|
len_b = llistxattr_or_die(b->links->path, names_b, len_b); |
|
assert((len_a > 0) && (len_a == len_b)); |
|
|
|
n_a = get_xattr_name_count(names_a, len_a); |
|
n_b = get_xattr_name_count(names_b, len_b); |
|
|
|
if (n_a != n_b) |
|
goto exit; // numbers of xattrs differ |
|
|
|
name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a); |
|
name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b); |
|
|
|
// We now have two sorted tables of xattr names. |
|
|
|
for (i = 0; i < n_a; i++) { |
|
if (handle_interrupt()) |
|
goto exit; // user wants to quit |
|
|
|
if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0) |
|
goto exit; // names at same slot differ |
|
|
|
len_a = |
|
lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0); |
|
len_b = |
|
lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0); |
|
|
|
if (len_a != len_b) |
|
goto exit; // xattrs with same name, different value lengths |
|
|
|
value_a = xmalloc(len_a); |
|
value_b = xmalloc(len_b); |
|
|
|
len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], |
|
value_a, len_a); |
|
len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], |
|
value_b, len_b); |
|
assert((len_a >= 0) && (len_a == len_b)); |
|
|
|
if (memcmp(value_a, value_b, len_a) != 0) |
|
goto exit; // xattrs with same name, different values |
|
|
|
free(value_a); |
|
free(value_b); |
|
value_a = NULL; |
|
value_b = NULL; |
|
} |
|
|
|
ret = TRUE; |
|
|
|
exit: |
|
free(names_a); |
|
free(names_b); |
|
free(name_ptrs_a); |
|
free(name_ptrs_b); |
|
free(value_a); |
|
free(value_b); |
|
return ret; |
|
} |
|
#else /* !USE_XATTR */ |
|
static int file_xattrs_equal(const struct file *a, const struct file *b) |
|
{ |
|
return TRUE; |
|
} |
|
#endif /* USE_XATTR */ |
|
|
|
/** |
|
* file_may_link_to - Check whether a file may replace another one |
|
* @a: The first file |
|
* @b: The second file |
|
* |
|
* Check whether the two files are considered equal attributes and can be |
|
* linked. This function does not compare content od the files! |
|
*/ |
|
static int file_may_link_to(const struct file *a, const struct file *b) |
|
{ |
|
return (a->st.st_size != 0 && |
|
a->st.st_size == b->st.st_size && |
|
a->links != NULL && b->links != NULL && |
|
a->st.st_dev == b->st.st_dev && |
|
a->st.st_ino != b->st.st_ino && |
|
(!opts.respect_mode || a->st.st_mode == b->st.st_mode) && |
|
(!opts.respect_owner || a->st.st_uid == b->st.st_uid) && |
|
(!opts.respect_owner || a->st.st_gid == b->st.st_gid) && |
|
(!opts.respect_time || a->st.st_mtime == b->st.st_mtime) && |
|
(!opts.respect_name |
|
|| strcmp(a->links->path + a->links->basename, |
|
b->links->path + b->links->basename) == 0) && |
|
(!opts.respect_xattrs || file_xattrs_equal(a, b))); |
|
} |
|
|
|
/** |
|
* file_compare - Compare two files to decide which should be master |
|
* @a: The first file |
|
* @b: The second file |
|
* |
|
* Check which of the files should be considered greater and thus serve |
|
* as the master when linking (the master is the file that all equal files |
|
* will be replaced with). |
|
*/ |
|
static int file_compare(const struct file *a, const struct file *b) |
|
{ |
|
int res = 0; |
|
if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino) |
|
return 0; |
|
|
|
if (res == 0 && opts.maximise) |
|
res = CMP(a->st.st_nlink, b->st.st_nlink); |
|
if (res == 0 && opts.minimise) |
|
res = CMP(b->st.st_nlink, a->st.st_nlink); |
|
if (res == 0) |
|
res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime) |
|
: CMP(a->st.st_mtime, b->st.st_mtime); |
|
if (res == 0) |
|
res = CMP(b->st.st_ino, a->st.st_ino); |
|
|
|
return res; |
|
} |
|
|
|
#ifdef USE_REFLINK |
|
static inline int do_link(struct file *a, struct file *b, |
|
const char *new_name, int reflink) |
|
{ |
|
if (reflink) { |
|
int dest = -1, src = -1; |
|
|
|
dest = open(new_name, O_CREAT|O_WRONLY|O_TRUNC, 0600); |
|
if (dest < 0) |
|
goto fallback; |
|
if (fchmod(dest, b->st.st_mode) != 0) |
|
goto fallback; |
|
if (fchown(dest, b->st.st_uid, b->st.st_gid) != 0) |
|
goto fallback; |
|
src = open(a->links->path, O_RDONLY); |
|
if (src < 0) |
|
goto fallback; |
|
if (ioctl(dest, FICLONE, src) != 0) |
|
goto fallback; |
|
close(dest); |
|
close(src); |
|
return 0; |
|
fallback: |
|
if (dest >= 0) { |
|
close(dest); |
|
unlink(new_name); |
|
} |
|
if (src >= 0) |
|
close(src); |
|
|
|
if (reflink_mode == REFLINK_ALWAYS) |
|
return -errno; |
|
jlog(JLOG_VERBOSE2,_("Reflinking failed, fallback to hardlinking")); |
|
} |
|
|
|
return link(a->links->path, new_name); |
|
} |
|
#else |
|
static inline int do_link(struct file *a, |
|
struct file *b __attribute__((__unused__)), |
|
const char *new_name, |
|
int reflink __attribute__((__unused__))) |
|
{ |
|
return link(a->links->path, new_name); |
|
} |
|
#endif /* USE_REFLINK */ |
|
|
|
/** |
|
* file_link - Replace b with a link to a |
|
* @a: The first file |
|
* @b: The second file |
|
* |
|
* Link the file, replacing @b with the current one. The file is first |
|
* linked to a temporary name, and then renamed to the name of @b, making |
|
* the replace atomic (@b will always exist). |
|
*/ |
|
static int file_link(struct file *a, struct file *b, int reflink) |
|
{ |
|
|
|
file_link: |
|
assert(a->links != NULL); |
|
assert(b->links != NULL); |
|
|
|
if (is_log_enabled(JLOG_INFO)) { |
|
char *ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | |
|
SIZE_SUFFIX_SPACE | |
|
SIZE_DECIMAL_2DIGITS, a->st.st_size); |
|
jlog(JLOG_INFO, _("%s%sLinking %s to %s (-%s)"), |
|
opts.dry_run ? _("[DryRun] ") : "", |
|
reflink ? "Ref" : "", |
|
a->links->path, b->links->path, |
|
ssz); |
|
free(ssz); |
|
} |
|
|
|
if (!opts.dry_run) { |
|
char *new_path; |
|
int failed = 1; |
|
|
|
xasprintf(&new_path, "%s.hardlink-temporary", b->links->path); |
|
|
|
if (do_link(a, b, new_path, reflink) != 0) |
|
warn(_("cannot link %s to %s"), a->links->path, new_path); |
|
|
|
else if (rename(new_path, b->links->path) != 0) { |
|
warn(_("cannot rename %s to %s"), a->links->path, new_path); |
|
unlink(new_path); |
|
} else |
|
failed = 0; |
|
|
|
free(new_path); |
|
if (failed) |
|
return FALSE; |
|
} |
|
|
|
/* Update statistics */ |
|
stats.linked++; |
|
|
|
/* Increase the link count of this file, and set stat() of other file */ |
|
a->st.st_nlink++; |
|
b->st.st_nlink--; |
|
|
|
if (b->st.st_nlink == 0) |
|
stats.saved += a->st.st_size; |
|
|
|
/* Move the link from file b to a */ |
|
{ |
|
struct link *new_link = b->links; |
|
|
|
b->links = b->links->next; |
|
new_link->next = a->links->next; |
|
a->links->next = new_link; |
|
} |
|
|
|
/* Do it again */ |
|
if (b->links) |
|
goto file_link; |
|
|
|
return TRUE; |
|
} |
|
|
|
static int has_fpath(struct file *node, const char *path) |
|
{ |
|
struct link *l; |
|
|
|
for (l = node->links; l; l = l->next) { |
|
if (strcmp(l->path, path) == 0) |
|
return 1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
/** |
|
* inserter - Callback function for nftw() |
|
* @fpath: The path of the file being visited |
|
* @sb: The stat information of the file |
|
* @typeflag: The type flag |
|
* @ftwbuf: Contains current level of nesting and offset of basename |
|
* |
|
* Called by nftw() for the files. See the manual page for nftw() for |
|
* further information. |
|
*/ |
|
static int inserter(const char *fpath, const struct stat *sb, |
|
int typeflag, struct FTW *ftwbuf) |
|
{ |
|
struct file *fil; |
|
struct file **node; |
|
size_t pathlen; |
|
int included; |
|
int excluded; |
|
|
|
if (handle_interrupt()) |
|
return 1; |
|
if (typeflag == FTW_DNR || typeflag == FTW_NS) |
|
warn(_("cannot read %s"), fpath); |
|
if (typeflag != FTW_F || !S_ISREG(sb->st_mode)) |
|
return 0; |
|
|
|
included = match_any_regex(opts.include, fpath); |
|
excluded = match_any_regex(opts.exclude, fpath); |
|
|
|
if ((opts.exclude && excluded && !included) || |
|
(!opts.exclude && opts.include && !included)) |
|
return 0; |
|
|
|
stats.files++; |
|
|
|
if ((uintmax_t) sb->st_size < opts.min_size) { |
|
jlog(JLOG_VERBOSE1, |
|
_("Skipped %s (smaller than configured size)"), fpath); |
|
return 0; |
|
} |
|
|
|
jlog(JLOG_VERBOSE2, " %5zu: [%" PRIu64 "/%" PRIu64 "/%zu] %s", |
|
stats.files, sb->st_dev, sb->st_ino, |
|
(size_t) sb->st_nlink, fpath); |
|
|
|
if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) { |
|
jlog(JLOG_VERBOSE1, |
|
_("Skipped %s (greater than configured size)"), fpath); |
|
return 0; |
|
} |
|
|
|
pathlen = strlen(fpath) + 1; |
|
|
|
fil = xcalloc(1, sizeof(*fil)); |
|
fil->links = xcalloc(1, sizeof(struct link) + pathlen); |
|
|
|
fil->st = *sb; |
|
fil->links->basename = ftwbuf->base; |
|
fil->links->next = NULL; |
|
|
|
memcpy(fil->links->path, fpath, pathlen); |
|
|
|
node = tsearch(fil, &files_by_ino, compare_nodes_ino); |
|
|
|
if (node == NULL) |
|
goto fail; |
|
|
|
if (*node != fil) { |
|
/* Already known inode, add link to inode information */ |
|
assert((*node)->st.st_dev == sb->st_dev); |
|
assert((*node)->st.st_ino == sb->st_ino); |
|
|
|
if (has_fpath(*node, fpath)) { |
|
jlog(JLOG_VERBOSE1, |
|
_("Skipped %s (specified more than once)"), fpath); |
|
free(fil->links); |
|
} else { |
|
fil->links->next = (*node)->links; |
|
(*node)->links = fil->links; |
|
} |
|
|
|
free(fil); |
|
} else { |
|
/* New inode, insert into by-size table */ |
|
node = tsearch(fil, &files, compare_nodes); |
|
|
|
if (node == NULL) |
|
goto fail; |
|
|
|
if (*node != fil) { |
|
struct file *l; |
|
|
|
if (file_compare(fil, *node) >= 0) { |
|
fil->next = *node; |
|
*node = fil; |
|
} else { |
|
for (l = *node; l != NULL; l = l->next) { |
|
if (l->next != NULL |
|
&& file_compare(fil, l->next) < 0) |
|
continue; |
|
|
|
fil->next = l->next; |
|
l->next = fil; |
|
|
|
break; |
|
} |
|
} |
|
} |
|
} |
|
|
|
return 0; |
|
|
|
fail: |
|
warn(_("cannot continue")); /* probably ENOMEM */ |
|
return 0; |
|
} |
|
|
|
#ifdef USE_REFLINK |
|
static int is_reflink_compatible(dev_t devno, const char *filename) |
|
{ |
|
static dev_t last_dev = 0; |
|
static int last_status = 0; |
|
|
|
if (last_dev != devno) { |
|
struct statfs vfs; |
|
|
|
if (statfs(filename, &vfs) != 0) |
|
return 0; |
|
|
|
last_dev = devno; |
|
switch (vfs.f_type) { |
|
case STATFS_BTRFS_MAGIC: |
|
case STATFS_XFS_MAGIC: |
|
last_status = 1; |
|
break; |
|
default: |
|
last_status = 0; |
|
break; |
|
} |
|
} |
|
|
|
return last_status; |
|
} |
|
|
|
static int is_reflink(struct file *xa, struct file *xb) |
|
{ |
|
int last = 0, rc = 0; |
|
char abuf[BUFSIZ] = { 0 }, |
|
bbuf[BUFSIZ] = { 0 }; |
|
|
|
struct fiemap *amap = (struct fiemap *) abuf, |
|
*bmap = (struct fiemap *) bbuf; |
|
|
|
int af = open(xa->links->path, O_RDONLY), |
|
bf = open(xb->links->path, O_RDONLY); |
|
|
|
if (af < 0 || bf < 0) |
|
goto done; |
|
|
|
do { |
|
size_t i; |
|
|
|
amap->fm_length = ~0ULL; |
|
amap->fm_flags = FIEMAP_FLAG_SYNC; |
|
amap->fm_extent_count = (sizeof(abuf) - sizeof(*amap)) / sizeof(struct fiemap_extent); |
|
|
|
bmap->fm_length = ~0ULL; |
|
bmap->fm_flags = FIEMAP_FLAG_SYNC; |
|
bmap->fm_extent_count = (sizeof(bbuf) - sizeof(*bmap)) / sizeof(struct fiemap_extent); |
|
|
|
if (ioctl(af, FS_IOC_FIEMAP, (unsigned long) amap) < 0) |
|
goto done; |
|
if (ioctl(bf, FS_IOC_FIEMAP, (unsigned long) bmap) < 0) |
|
goto done; |
|
|
|
if (amap->fm_mapped_extents != bmap->fm_mapped_extents) |
|
goto done; |
|
|
|
for (i = 0; i < amap->fm_mapped_extents; i++) { |
|
struct fiemap_extent *a = &amap->fm_extents[i]; |
|
struct fiemap_extent *b = &bmap->fm_extents[i]; |
|
|
|
if (a->fe_logical != b->fe_logical || |
|
a->fe_length != b->fe_length || |
|
a->fe_physical != b->fe_physical) |
|
goto done; |
|
if (!(a->fe_flags & FIEMAP_EXTENT_SHARED) || |
|
!(b->fe_flags & FIEMAP_EXTENT_SHARED)) |
|
goto done; |
|
if (a->fe_flags & FIEMAP_EXTENT_LAST) |
|
last = 1; |
|
} |
|
|
|
bmap->fm_start = amap->fm_start = |
|
amap->fm_extents[amap->fm_mapped_extents - 1].fe_logical + |
|
amap->fm_extents[amap->fm_mapped_extents - 1].fe_length; |
|
} while (last == 0); |
|
|
|
rc = 1; |
|
done: |
|
if (af >= 0) |
|
close(af); |
|
if (bf >= 0) |
|
close(bf); |
|
return rc; |
|
} |
|
#endif /* USE_REFLINK */ |
|
|
|
static inline size_t count_nodes(struct file *x) |
|
{ |
|
size_t ct = 0; |
|
|
|
for ( ; x != NULL; x = x->next) |
|
ct++; |
|
|
|
return ct; |
|
} |
|
|
|
/** |
|
* visitor - Callback for twalk() |
|
* @nodep: Pointer to a pointer to a #struct file |
|
* @which: At which point this visit is (preorder, postorder, endorder) |
|
* @depth: The depth of the node in the tree |
|
* |
|
* Visit the nodes in the binary tree. For each node, call hardlinker() |
|
* on each #struct file in the linked list of #struct file instances located |
|
* at that node. |
|
*/ |
|
static void visitor(const void *nodep, const VISIT which, const int depth) |
|
{ |
|
struct file *master = *(struct file **)nodep; |
|
struct file *begin = master; |
|
struct file *other; |
|
|
|
(void)depth; |
|
|
|
if (which != leaf && which != endorder) |
|
return; |
|
|
|
for (; master != NULL; master = master->next) { |
|
size_t nnodes, memsiz; |
|
int may_reflink = 0; |
|
|
|
if (handle_interrupt()) |
|
exit(EXIT_FAILURE); |
|
if (master->links == NULL) |
|
continue; |
|
|
|
/* calculate per file max memory use */ |
|
nnodes = count_nodes(master); |
|
if (!nnodes) |
|
continue; |
|
|
|
/* per-file cache size */ |
|
memsiz = opts.cache_size / nnodes; |
|
/* filesiz, readsiz, memsiz */ |
|
ul_fileeq_set_size(&fileeq, master->st.st_size, opts.io_size, memsiz); |
|
|
|
#ifdef USE_REFLINK |
|
if (reflink_mode || reflinks_skip) { |
|
may_reflink = |
|
reflink_mode == REFLINK_ALWAYS ? 1 : |
|
is_reflink_compatible(master->st.st_dev, |
|
master->links->path); |
|
} |
|
#endif |
|
for (other = master->next; other != NULL; other = other->next) { |
|
int eq; |
|
|
|
if (handle_interrupt()) |
|
exit(EXIT_FAILURE); |
|
|
|
assert(other != other->next); |
|
assert(other->st.st_size == master->st.st_size); |
|
|
|
if (!other->links) |
|
continue; |
|
|
|
/* check file attributes, etc. */ |
|
if (!file_may_link_to(master, other)) { |
|
jlog(JLOG_VERBOSE2, |
|
_("Skipped (attributes mismatch) %s"), other->links->path); |
|
continue; |
|
} |
|
#ifdef USE_REFLINK |
|
if (may_reflink && reflinks_skip && is_reflink(master, other)) { |
|
jlog(JLOG_VERBOSE2, |
|
_("Skipped (already reflink) %s"), other->links->path); |
|
stats.ignored_reflinks++; |
|
continue; |
|
} |
|
#endif |
|
/* initialize content comparison */ |
|
if (!ul_fileeq_data_associated(&master->data)) |
|
ul_fileeq_data_set_file(&master->data, master->links->path); |
|
if (!ul_fileeq_data_associated(&other->data)) |
|
ul_fileeq_data_set_file(&other->data, other->links->path); |
|
|
|
/* compare files */ |
|
eq = ul_fileeq(&fileeq, &master->data, &other->data); |
|
|
|
/* reduce number of open files, keep only master open */ |
|
ul_fileeq_data_close_file(&other->data); |
|
|
|
stats.comparisons++; |
|
|
|
if (!eq) { |
|
jlog(JLOG_VERBOSE2, |
|
_("Skipped (content mismatch) %s"), other->links->path); |
|
continue; |
|
} |
|
|
|
/* link files */ |
|
if (!file_link(master, other, may_reflink) && errno == EMLINK) { |
|
ul_fileeq_data_deinit(&master->data); |
|
master = other; |
|
} |
|
} |
|
|
|
/* don't keep master data in memory */ |
|
ul_fileeq_data_deinit(&master->data); |
|
} |
|
|
|
/* final cleanup */ |
|
for (other = begin; other != NULL; other = other->next) { |
|
if (ul_fileeq_data_associated(&other->data)) |
|
ul_fileeq_data_deinit(&other->data); |
|
} |
|
} |
|
|
|
/** |
|
* usage - Print the program help and exit |
|
*/ |
|
static void __attribute__((__noreturn__)) usage(void) |
|
{ |
|
FILE *out = stdout; |
|
|
|
fputs(USAGE_HEADER, out); |
|
fprintf(out, _(" %s [options] <directory>|<file> ...\n"), |
|
program_invocation_short_name); |
|
|
|
fputs(USAGE_SEPARATOR, out); |
|
fputs(_("Consolidate duplicate files using hardlinks.\n"), out); |
|
|
|
fputs(USAGE_OPTIONS, out); |
|
fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out); |
|
fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out); |
|
fputs(_(" -n, --dry-run don't actually link anything\n"), out); |
|
fputs(_(" -y, --method <name> file content comparison method\n"), out); |
|
|
|
fputs(_(" -f, --respect-name filenames have to be identical\n"), out); |
|
fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out); |
|
fputs(_(" -o, --ignore-owner ignore owner changes\n"), out); |
|
fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out); |
|
fputs(_(" -c, --content compare only file contents, same as -pot\n"), out); |
|
#ifdef USE_XATTR |
|
fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out); |
|
#endif |
|
#ifdef USE_REFLINK |
|
fputs(_(" --reflink[=<when>] create clone/CoW copies (auto, always, never)\n"), out); |
|
fputs(_(" --skip-reflinks skip already cloned files (enabled on --reflink)\n"), out); |
|
#endif |
|
fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n" |
|
" lowest hardlink count\n"), out); |
|
fputs(_(" -M, --minimize reverse the meaning of -m\n"), out); |
|
fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n" |
|
" (lower precedence than minimize/maximize)\n"), out); |
|
fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out); |
|
fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out); |
|
fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out); |
|
fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out); |
|
fputs(_(" -b, --io-size <size> I/O buffer size for file reading\n" |
|
" (speedup, using more RAM)\n"), out); |
|
fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out); |
|
|
|
fputs(USAGE_SEPARATOR, out); |
|
printf(USAGE_HELP_OPTIONS(28)); |
|
printf(USAGE_MAN_TAIL("hardlink(1)")); |
|
|
|
exit(EXIT_SUCCESS); |
|
} |
|
|
|
/** |
|
* parse_options - Parse the command line options |
|
* @argc: Number of options |
|
* @argv: Array of options |
|
*/ |
|
static int parse_options(int argc, char *argv[]) |
|
{ |
|
enum { |
|
OPT_REFLINK = CHAR_MAX + 1, |
|
OPT_SKIP_RELINKS |
|
}; |
|
static const char optstr[] = "VhvnfpotXcmMOx:y:i:r:S:s:b:q"; |
|
static const struct option long_options[] = { |
|
{"version", no_argument, NULL, 'V'}, |
|
{"help", no_argument, NULL, 'h'}, |
|
{"verbose", no_argument, NULL, 'v'}, |
|
{"dry-run", no_argument, NULL, 'n'}, |
|
{"respect-name", no_argument, NULL, 'f'}, |
|
{"ignore-mode", no_argument, NULL, 'p'}, |
|
{"ignore-owner", no_argument, NULL, 'o'}, |
|
{"ignore-time", no_argument, NULL, 't'}, |
|
{"respect-xattrs", no_argument, NULL, 'X'}, |
|
{"maximize", no_argument, NULL, 'm'}, |
|
{"minimize", no_argument, NULL, 'M'}, |
|
{"keep-oldest", no_argument, NULL, 'O'}, |
|
{"exclude", required_argument, NULL, 'x'}, |
|
{"include", required_argument, NULL, 'i'}, |
|
{"method", required_argument, NULL, 'y' }, |
|
{"minimum-size", required_argument, NULL, 's'}, |
|
{"maximum-size", required_argument, NULL, 'S'}, |
|
#ifdef USE_REFLINK |
|
{"reflink", optional_argument, NULL, OPT_REFLINK }, |
|
{"skip-reflinks", no_argument, NULL, OPT_SKIP_RELINKS }, |
|
#endif |
|
{"io-size", required_argument, NULL, 'b'}, |
|
{"content", no_argument, NULL, 'c'}, |
|
{"quiet", no_argument, NULL, 'q'}, |
|
{"cache-size", required_argument, NULL, 'r'}, |
|
{NULL, 0, NULL, 0} |
|
}; |
|
static const ul_excl_t excl[] = { |
|
{'q', 'v'}, |
|
{0} |
|
}; |
|
int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; |
|
int c; |
|
|
|
while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) { |
|
|
|
err_exclusive_options(c, long_options, excl, excl_st); |
|
|
|
switch (c) { |
|
case 'p': |
|
opts.respect_mode = FALSE; |
|
break; |
|
case 'o': |
|
opts.respect_owner = FALSE; |
|
break; |
|
case 't': |
|
opts.respect_time = FALSE; |
|
break; |
|
case 'X': |
|
opts.respect_xattrs = TRUE; |
|
break; |
|
case 'm': |
|
opts.maximise = TRUE; |
|
break; |
|
case 'M': |
|
opts.minimise = TRUE; |
|
break; |
|
case 'O': |
|
opts.keep_oldest = TRUE; |
|
break; |
|
case 'f': |
|
opts.respect_name = TRUE; |
|
break; |
|
case 'v': |
|
opts.verbosity++; |
|
break; |
|
case 'q': |
|
quiet = TRUE; |
|
break; |
|
case 'c': |
|
opts.respect_mode = FALSE; |
|
opts.respect_name = FALSE; |
|
opts.respect_owner = FALSE; |
|
opts.respect_time = FALSE; |
|
opts.respect_xattrs = FALSE; |
|
break; |
|
case 'n': |
|
opts.dry_run = 1; |
|
break; |
|
case 'x': |
|
register_regex(&opts.exclude, optarg); |
|
break; |
|
case 'y': |
|
opts.method = optarg; |
|
break; |
|
case 'i': |
|
register_regex(&opts.include, optarg); |
|
break; |
|
case 's': |
|
opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size")); |
|
break; |
|
case 'S': |
|
opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size")); |
|
break; |
|
case 'r': |
|
opts.cache_size = strtosize_or_err(optarg, _("failed to parse cache size")); |
|
break; |
|
case 'b': |
|
opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size")); |
|
break; |
|
#ifdef USE_REFLINK |
|
case OPT_REFLINK: |
|
reflink_mode = REFLINK_AUTO; |
|
if (optarg) { |
|
if (strcmp(optarg, "auto") == 0) |
|
reflink_mode = REFLINK_AUTO; |
|
else if (strcmp(optarg, "always") == 0) |
|
reflink_mode = REFLINK_ALWAYS; |
|
else if (strcmp(optarg, "never") == 0) |
|
reflink_mode = REFLINK_NEVER; |
|
else |
|
errx(EXIT_FAILURE, _("unsupported reflink mode; %s"), optarg); |
|
} |
|
if (reflink_mode != REFLINK_NEVER) |
|
reflinks_skip = 1; |
|
break; |
|
case OPT_SKIP_RELINKS: |
|
reflinks_skip = 1; |
|
break; |
|
#endif |
|
case 'h': |
|
usage(); |
|
case 'V': |
|
{ |
|
static const char *features[] = { |
|
#ifdef USE_REFLINK |
|
"reflink", |
|
#endif |
|
#ifdef USE_FILEEQ_CRYPTOAPI |
|
"cryptoapi", |
|
#endif |
|
NULL |
|
}; |
|
print_version_with_features(EXIT_SUCCESS, features); |
|
} |
|
default: |
|
errtryhelp(EXIT_FAILURE); |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
/** |
|
* to_be_called_atexit - Cleanup handler, also prints statistics. |
|
*/ |
|
static void to_be_called_atexit(void) |
|
{ |
|
if (stats.started) |
|
print_stats(); |
|
} |
|
|
|
/** |
|
* sighandler - Signal handler, sets the global last_signal variable |
|
* @i: The signal number |
|
*/ |
|
static void sighandler(int i) |
|
{ |
|
if (last_signal != SIGINT) |
|
last_signal = i; |
|
if (i == SIGINT) |
|
putchar('\n'); |
|
} |
|
|
|
int main(int argc, char *argv[]) |
|
{ |
|
struct sigaction sa; |
|
int rc; |
|
|
|
sa.sa_handler = sighandler; |
|
sa.sa_flags = SA_RESTART; |
|
sigfillset(&sa.sa_mask); |
|
|
|
/* If we receive a SIGINT, end the processing */ |
|
sigaction(SIGINT, &sa, NULL); |
|
sigaction(SIGUSR1, &sa, NULL); |
|
|
|
/* Localize messages, number formatting, and anything else. */ |
|
setlocale(LC_ALL, ""); |
|
bindtextdomain(PACKAGE, LOCALEDIR); |
|
textdomain(PACKAGE); |
|
|
|
if (atexit(to_be_called_atexit) != 0) |
|
err(EXIT_FAILURE, _("cannot register exit handler")); |
|
|
|
parse_options(argc, argv); |
|
|
|
if (optind == argc) |
|
errx(EXIT_FAILURE, _("no directory or file specified")); |
|
|
|
gettime_monotonic(&stats.start_time); |
|
|
|
rc = ul_fileeq_init(&fileeq, opts.method); |
|
if (rc != 0 && strcmp(opts.method, "memcmp") != 0) { |
|
jlog(JLOG_INFO, _("cannot initialize %s method, use 'memcmp' fallback"), opts.method); |
|
opts.method = "memcmp"; |
|
rc = ul_fileeq_init(&fileeq, opts.method); |
|
} |
|
if (rc < 0) |
|
err(EXIT_FAILURE, _("failed to initialize files comparior")); |
|
|
|
/* defautl I/O size */ |
|
if (!opts.io_size) { |
|
if (strcmp(opts.method, "memcmp") == 0) |
|
opts.io_size = 8*1024; |
|
else |
|
opts.io_size = 1024*1024; |
|
} |
|
|
|
stats.started = TRUE; |
|
|
|
jlog(JLOG_VERBOSE2, _("Scanning [device/inode/links]:")); |
|
for (; optind < argc; optind++) { |
|
char *path = realpath(argv[optind], NULL); |
|
|
|
if (!path) { |
|
warn(_("cannot get realpath: %s"), argv[optind]); |
|
continue; |
|
} |
|
if (nftw(path, inserter, 20, FTW_PHYS) == -1) |
|
warn(_("cannot process %s"), path); |
|
free(path); |
|
} |
|
|
|
twalk(files, visitor); |
|
|
|
ul_fileeq_deinit(&fileeq); |
|
return 0; |
|
}
|
|
|