Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented Feature #225 - Don't follow symlinks. #382

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions man/hashdeep.1
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ open(). Specifying \fB-Fm\fR will use memory-mapped I/O which will be
faster on some platforms, but which (currently) will not work with
files that produce I/O errors.

.TP
\fB-R\fR
Don't follow symlinks, instead hash the output of readlink. (Not available on
Windows)



.TP
Expand Down
29 changes: 24 additions & 5 deletions src/dig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,19 @@ file_types file_metadata_t::decode_file_type(const struct __stat64 &sb)
*/
int file_metadata_t::stat(const tstring &fn,
file_metadata_t *m,
class display &ocb)
class display &ocb,
bool const is_symlink)
{
struct __stat64 sb;
if (::TSTAT(fn.c_str(),&sb))
if (ocb.opt_readlink && is_symlink)
{
if (::TLSTAT(fn.c_str(),&sb))
{
ocb.error_filename(fn,"%s",strerror(errno));
return -1;
}
}
else if (::TSTAT(fn.c_str(),&sb))
{
ocb.error_filename(fn,"%s",strerror(errno));
return -1;
Expand Down Expand Up @@ -564,6 +573,13 @@ void state::process_dir(const tstring &fn)
*/
bool state::should_hash_symlink(const tstring &fn, file_types *link_type)
{
/**
* When readlink option is set, all symlinks are to be hashed.
*/
if (ocb.opt_readlink) {
return true;
}

/**
* We must look at what this symlink points to before we process it.
* The file_type() function uses lstat to examine the file.
Expand Down Expand Up @@ -694,9 +710,11 @@ bool state::should_hash_expert(const tstring &fn, file_types type)
* but if it is called with a directory it recursively hashes it.
*/

bool state::should_hash(const tstring &fn)
bool state::should_hash(const tstring &fn, file_types &_type)
{
file_types type = state::file_type(fn,&ocb,0,0,0,0);

_type = type;

if (mode_expert)
return should_hash_expert(fn,type);
Expand Down Expand Up @@ -735,8 +753,9 @@ void state::dig_normal(const tstring &fn_) {
#endif
if (opt_debug)
ocb.status("*** cleaned:%s",global::make_utf8(fn).c_str());
if (should_hash(fn))
ocb.hash_file(fn);
file_types type;
if (should_hash(fn, type))
ocb.hash_file(fn, type);
}


Expand Down
51 changes: 44 additions & 7 deletions src/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le
hc1->read_offset = request_start;
hc1->read_len = 0; // so far

unsigned char *readlink_buffer = 0;
bool const request_larger_than_buffer = request_len > file_data_hasher_t::MD5DEEP_IDEAL_BLOCK_SIZE;
if (ocb->opt_readlink && file_is_symlink) {
#ifndef _WIN32
if (request_larger_than_buffer) {
readlink_buffer = (unsigned char*)malloc(request_len);
readlink(file_name_to_hash.c_str(), (char*)readlink_buffer, request_len);
}
#endif
}

while (request_len>0){
// Clear the buffer in case we hit an error and need to pad the hash
// The use of MD5DEEP_IDEAL_BLOCK_SIZE means that we loop even for memory-mapped
Expand All @@ -107,10 +118,21 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le

ssize_t current_read_bytes = 0; // read the data into buffer

if(this->handle){
if (ocb->opt_readlink && file_is_symlink) {
#ifndef _WIN32
if (request_larger_than_buffer) {
memcpy(buffer_, readlink_buffer + hc1->read_len, toread);
}
else {
readlink(file_name_to_hash.c_str(), (char*)buffer_, toread);
}
current_read_bytes = toread;
#endif
}
else if(this->handle){
current_read_bytes = fread(buffer_, 1, toread, this->handle);
} else {
assert(this->fd!=0);
assert(this->fd!=-1);
if(this->base){
buffer = this->base + request_start;
current_read_bytes = min(toread,this->bounds - request_start); // can't read more than this
Expand Down Expand Up @@ -169,6 +191,12 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le
request_start += toread;
request_len -= toread;
}

if (readlink_buffer) {
free(readlink_buffer);
readlink_buffer = 0;
}

if (ocb->opt_estimate) ocb->clear_realtime_stats();
if (this->file_bytes == this->stat_bytes) this->eof = true; // end of the file
return true; // done hashing!
Expand Down Expand Up @@ -201,6 +229,8 @@ void file_data_hasher_t::hash()
{
file_data_hasher_t *fdht = this;

bool const readlink_this_file = ocb->opt_readlink && fdht->file_is_symlink;

/*
* If the handle is set, we are probably hashing stdin.
* If not, figure out file size and full file name for the handle
Expand All @@ -211,7 +241,7 @@ void file_data_hasher_t::hash()
//state::file_type(fdht->file_name_to_hash,ocb,&fdht->stat_bytes,
//&fdht->ctime,&fdht->mtime,&fdht->atime);
file_metadata_t m;
file_metadata_t::stat(fdht->file_name_to_hash,&m,*ocb);
file_metadata_t::stat(fdht->file_name_to_hash,&m,*ocb,fdht->file_is_symlink);
fdht->stat_bytes = m.size;
fdht->ctime = m.ctime;
fdht->mtime = m.mtime;
Expand All @@ -238,7 +268,11 @@ void file_data_hasher_t::hash()
}
}

switch(ocb->opt_iomode){
if (readlink_this_file) {
assert(fdht->fd == -1);
assert(fdht->handle == 0);
}
else switch(ocb->opt_iomode){
case iomode::buffered:
assert(fdht->handle==0);

Expand Down Expand Up @@ -360,7 +394,7 @@ void file_data_hasher_t::hash()
*/
fdht->file_bytes = 0;
if(fdht->handle) fseeko(fdht->handle, 0, SEEK_SET);
if(fdht->fd){
if(fdht->fd != -1){
lseek(this->fd,0,SEEK_SET);
}
fdht->eof = false; //
Expand All @@ -381,7 +415,7 @@ void file_data_hasher_t::hash()
while (fdht->eof==false) {

uint64_t request_len = fdht->stat_bytes; // by default, hash the file
if ( fdht->ocb->piecewise_size>0 ) {
if ( !readlink_this_file && fdht->ocb->piecewise_size>0 ) {
request_len = fdht->ocb->piecewise_size;
}

Expand Down Expand Up @@ -467,10 +501,13 @@ void worker::do_work(file_data_hasher_t *fdht)
* 2 - hash the fdht
* 3 - record it in stdout using display.
*/
void display::hash_file(const tstring &fn)
void display::hash_file(const tstring &fn, file_types const type)
{
file_data_hasher_t *fdht = new file_data_hasher_t(this);
fdht->file_name_to_hash = fn;
if (type == stat_symlink) {
fdht->file_is_symlink = true;
}

/**
* If we are using a thread pool, hash in another thread
Expand Down
9 changes: 8 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,11 @@ int state::hashdeep_process_command_line(int argc_, char **argv_)
bool did_usage = false;
int i;

while ((i=getopt(argc_,argv_,"abc:CdeEF:f:o:I:i:MmXxtlk:rsp:wvVhW:0D:uj:")) != -1) {
while ((i=getopt(argc_,argv_,"abc:CdeEF:f:o:I:i:MmXxtlk:rsp:wvVhW:0D:uj:"
#ifndef _WIN32
"R"
#endif
)) != -1) {
switch (i)
{
case 'a':
Expand Down Expand Up @@ -683,6 +687,9 @@ int state::hashdeep_process_command_line(int argc_, char **argv_)
case 'b': ocb.mode_barename=true; break;
case 'l': ocb.opt_relative=true; break;
case 'e': ocb.opt_estimate = true; break;
#ifndef _WIN32
case 'R': ocb.opt_readlink = true; break;
#endif
case 'r': mode_recursive=true; break;
case 's': ocb.opt_silent = true; break;

Expand Down
12 changes: 8 additions & 4 deletions src/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class file_metadata_t {
static file_types decode_file_type(const struct __stat64 &sb);

// stat a file, print an error and return -1 if it fails, otherwise return 0
static int stat(const filename_t &path,file_metadata_t *m,class display &ocb);
static int stat(const filename_t &path,file_metadata_t *m,class display &ocb, bool is_symlink = false);
class fileid_t { // uniquely defines a file on this system
public:
fileid_t():dev(0),ino(0){};
Expand Down Expand Up @@ -296,6 +296,7 @@ class file_data_hasher_t : public file_data_t {
}
static const size_t MD5DEEP_IDEAL_BLOCK_SIZE = 8192;
file_data_hasher_t(class display *ocb_):
file_is_symlink(false),
ocb(ocb_), // where we put results
handle(0),
fd(-1),
Expand All @@ -322,6 +323,7 @@ class file_data_hasher_t : public file_data_t {

/* The actual file to hash */
filename_t file_name_to_hash;
bool file_is_symlink;

/* Where the results go */
class display *ocb;
Expand Down Expand Up @@ -585,6 +587,7 @@ class display {
opt_display_hash(false),
opt_show_matched(false),
opt_case_sensitive(true),
opt_readlink(false),
opt_iomode(iomode::buffered), // by default, use buffered
#ifdef HAVE_PTHREAD
opt_threadcount(threadpool::numCPU()),
Expand Down Expand Up @@ -620,6 +623,7 @@ class display {
bool opt_display_hash;
bool opt_show_matched;
bool opt_case_sensitive;
bool opt_readlink;
int opt_iomode;
int opt_threadcount;

Expand Down Expand Up @@ -755,7 +759,7 @@ class display {
void finalize_matching();

/* hash.cpp: Actually trigger the hashing. */
void hash_file(const tstring &file_name);
void hash_file(const tstring &file_name, file_types type);
void hash_stdin();
void dump_hashlist(){ lock(); known.dump_hashlist(); unlock(); }
};
Expand Down Expand Up @@ -794,7 +798,7 @@ public:;

state():mode_recursive(false), // do we recurse?
mode_warn_only(false), // for loading hash files

// these determine which files get hashed
mode_expert(false),
mode_regular(false),
Expand Down Expand Up @@ -904,7 +908,7 @@ public:;
bool should_hash_symlink(const tstring &fn,file_types *link_type);
bool should_hash_winpe(const tstring &fn);
bool should_hash_expert(const tstring &fn, file_types type);
bool should_hash(const tstring &fn);
bool should_hash(const tstring &fn, file_types &type);

/* file_type returns the file type of a string.
* If an error is found and ocb is provided, send the error to ocb.
Expand Down