diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..c529bca --- /dev/null +++ b/.clang-format @@ -0,0 +1,7 @@ +UseTab: false +IndentWidth: 2 +BreakBeforeBraces: Allman +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +ColumnLimit: 120 +SortIncludes: false \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..6302f75 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,46 @@ +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Setup clang + run: make apt-install + + - name: Build + run: make build + + - name: Run clang-tidy + run: make tidy + + - name: Run clang-format + run: make format + + - name: Upload build artifacts + uses: actions/upload-artifact@v2 + with: + name: fs-trace-linux-amd64 + path: bin/fs-trace + + publish-npm: + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + needs: build + runs-on: ubuntu-latest + steps: + - name: Setup Node.js + uses: actions/setup-node@v2 + with: + node-version: "18" + - name: Publish to npm + run: npm publish + env: + NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6dd29b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +bin/ \ No newline at end of file diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..16b0ae1 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/node_modules/node-addon-api/**", + "/usr/local/share/nvm/versions/node/v18.19.1/include/node/**" + ], + "defines": [], + "compilerPath": "/usr/bin/clang", + "cStandard": "c17", + "cppStandard": "c++14", + "intelliSenseMode": "linux-clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac475df --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +build: + mkdir -p bin + clang -std=gnu11 -O3 -o bin/fs-trace fs-trace.c + +build-debug: + mkdir -p bin + clang -std=gnu11 -O0 -DDEBUG=1 -o bin/fs-trace fs-trace.c + +tidy: + clang-tidy fs-trace.c -- -std=gnu11 + +format: + clang-format -style=file -i fs-trace.c + +apt-install: + sudo apt-get -y install clang clang-format diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/fs-trace.c b/fs-trace.c new file mode 100644 index 0000000..8f15308 --- /dev/null +++ b/fs-trace.c @@ -0,0 +1,18 @@ +#include "trace_exec.c" +#include +#include +#include + +int main(int argc, char *argv[]) +{ + if (argc < 2) + { + fprintf(stderr, "Usage: %s [args...]\n", argv[0]); + return 1; + } + + char *program = argv[1]; + char **args = &argv[1]; + + return trace_exec(program, args); +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..387320e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,21 @@ +{ + "name": "fs-trace", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "fs-trace", + "version": "0.0.1", + "cpu": [ + "x64" + ], + "os": [ + "linux" + ], + "bin": { + "fs-trace": "bin/fs-trace" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..f89d804 --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "fs-trace", + "version": "0.0.1", + "description": "Trace file system calls", + "os": [ + "linux" + ], + "cpu": [ + "x64" + ], + "bin": { + "fs-trace": "./bin/fs-trace" + }, + "files": [ + "bin/fs-trace", + "package.json" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/bmiddha/fs-trace.git" + } +} \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..d679837 --- /dev/null +++ b/test.sh @@ -0,0 +1,7 @@ +/usr/bin/mkdir /tmp/bar +/usr/bin/echo foo >> /tmp/foo +/usr/bin/echo foo > /tmp/foo +/usr/bin/rm -r /tmp/bar +/usr/bin/rm /tmp/foo +test -f /tmp/foo222 && echo foo +readlink /etc/localtime diff --git a/trace_exec.c b/trace_exec.c new file mode 100644 index 0000000..fefeebf --- /dev/null +++ b/trace_exec.c @@ -0,0 +1,487 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void log_access(char *path, char *mode, char *comment) +{ + // log filtering + if (strncmp(path, "/proc/", 6) == 0 || strncmp(path, "/dev/", 5) == 0) + { + return; + } + + if (mode[0] == 'R') + { + if (strncmp(path, "/usr/lib/", 9) == 0) + { + return; + } + } +#ifdef DEBUG + if (comment != NULL) + { + dprintf(3, "# %s\n%s %s\n", comment, mode, path); + } +#else + dprintf(3, "%s %s\n", mode, path); +#endif +} + +int get_fd_path(pid_t proc_pid, long long fd, char *buf) +{ + unsigned bufsize = PATH_MAX; + char linkpath[PATH_MAX]; + int n; + + if (fd < 0) + { + return -1; + } + + sprintf(linkpath, "/proc/%u/fd/%lld", proc_pid, fd); + n = readlink(linkpath, buf, bufsize - 1); + if (n < 0) + { + return n; + } + + buf[n] = '\0'; + + return n; +} + +void read_struct_from_tracee(pid_t pid, unsigned long addr, void *buffer, size_t buffer_size) +{ + unsigned long data; + size_t i = 0; + do + { + data = ptrace(PTRACE_PEEKDATA, pid, addr + i * sizeof(unsigned long), NULL); + if (data == -1 && errno != 0) + { + // perror("PTRACE_PEEKDATA"); + break; + } + memcpy(buffer + i * sizeof(unsigned long), &data, sizeof(unsigned long)); + i++; + } while (i < buffer_size / sizeof(unsigned long)); +} + +void read_cstring_from_tracee(pid_t pid, unsigned long addr, char *buffer, size_t buffer_size) +{ + read_struct_from_tracee(pid, addr, buffer, buffer_size); + // null-terminate the string + buffer[buffer_size - 1] = '\0'; +} + +void read_cstring_array_from_tracee(pid_t pid, unsigned long addr, char **buffer_array, size_t array_size, + size_t buffer_size) +{ + unsigned long ptr; + for (size_t i = 0; i < array_size; i++) + { + ptr = ptrace(PTRACE_PEEKDATA, pid, addr + i * sizeof(unsigned long), NULL); + if (ptr == 0 || (ptr == -1 && errno != 0)) + { + buffer_array[i][0] = '\0'; // Null pointer or error, set empty string + break; + } + read_cstring_from_tracee(pid, ptr, buffer_array[i], buffer_size); + } +} + +void parse_dirfd_pathname(pid_t pid, unsigned long dirfd, unsigned long pathname, char *fullpath, size_t fullpath_size) +{ + char path[PATH_MAX]; + read_cstring_from_tracee(pid, pathname, path, PATH_MAX); + if (path[0] == '/') + { + snprintf(fullpath, fullpath_size, "%s", path); + } + else + { + char dirpath[PATH_MAX]; + get_fd_path(pid, dirfd, dirpath); + snprintf(fullpath, fullpath_size, "%s/%s", dirpath, path); + } +} + +void print_fs_syscall_info(pid_t pid, struct ptrace_syscall_info *info) +{ + char fullpath[PATH_MAX]; + + // printf("syscall_%llu(%llu,%llu,%llu,%llu,%llu,%llu)=%llu\n", + // info->entry.nr, info->entry.args[0], info->entry.args[1], info->entry.args[2], + // info->entry.args[3], info->entry.args[4], info->entry.args[5], info->exit.rval); + + switch (info->entry.nr) + { + // int execve(const char *pathname, char *const argv[], char *const envp[]); + case __NR_execve: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "R", "execve"); + break; + } + // int execveat(int dirfd, const char *pathname, char *const argv[], char *const envp[], int flags); + case __NR_execveat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "R", "execveat"); + break; + } + + // int open(const char *pathname, int flags); + // int open(const char *pathname, int flags, mode_t mode); + case __NR_open: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + int flags = info->entry.args[1]; + if (flags & O_RDONLY) + { + log_access(fullpath, "R", "open"); + } + else if (flags & O_WRONLY || flags & O_RDWR) + { + log_access(fullpath, "W", "open"); + } + break; + } + // int creat(const char *pathname, mode_t mode); + case __NR_creat: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "W", "creat"); + break; + } + // int openat(int dirfd, const char *pathname, int flags); + // int openat(int dirfd, const char *pathname, int flags, mode_t mode); + case __NR_openat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + if (info->entry.args[2] & O_RDONLY) + { + log_access(fullpath, "R", "openat"); + } + else if (info->entry.args[2] & O_WRONLY || info->entry.args[2] & O_RDWR) + { + log_access(fullpath, "W", "openat"); + } + break; + } + // int openat2(int dirfd, const char *pathname, struct open_how *how, size_t size); + case __NR_openat2: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + struct open_how how; + read_struct_from_tracee(pid, info->entry.args[2], &how, sizeof(struct open_how)); + if (how.flags & O_RDONLY) + { + log_access(fullpath, "R", "openat2"); + } + else if (how.flags & O_WRONLY || how.flags & O_RDWR) + { + log_access(fullpath, "W", "openat2"); + } + break; + } + // ssize_t readlink(const char *pathname, char *buf, size_t bufsiz); + case __NR_readlink: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "R", "readlink"); + break; + } + // ssize_t readlinkat(int dirfd, const char *pathname, char *buf, size_t bufsiz); + case __NR_readlinkat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "R", "readlinkat"); + break; + } + // int lstat(const char *pathname, struct stat *statbuf); + case __NR_lstat: + // int stat(const char *pathname, struct stat *statbuf); + case __NR_stat: + { + char pathname[PATH_MAX]; + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "R", "stat"); + break; + } + // case __NR_fstatat: + // case __NR_fstatat64: + // int fstatat(int dirfd, const char *pathname, struct stat *statbuf, int flags); + case __NR_newfstatat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "R", "fstatat"); + break; + } + + // int faccessat(int dirfd, const char *pathname, int mode, int flags); + case __NR_faccessat: + case __NR_faccessat2: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "R", "faccessat"); + break; + } + + // int access(const char *pathname, int mode); + case __NR_access: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "R", "access"); + break; + } + + // int unlinkat(int dirfd, const char *pathname, int flags); + case __NR_unlinkat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "D", "unlinkat"); + break; + } + + // int unlink(const char *pathname); + case __NR_unlink: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "D", "unlink"); + break; + } + + // int rmdir(const char *pathname); + case __NR_rmdir: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "D", "rmdir"); + + break; + } + + // int rename(const char *oldpath, const char *newpath); + case __NR_rename: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "D", "rename"); + fullpath[0] = '\0'; + read_cstring_from_tracee(pid, info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "W", "rename"); + break; + } + + // int renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); + // int renameat2(int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags); + case __NR_renameat: + case __NR_renameat2: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "D", "renameat"); + parse_dirfd_pathname(pid, info->entry.args[2], info->entry.args[3], fullpath, PATH_MAX); + log_access(fullpath, "W", "renameat"); + break; + } + // long getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); + // ssize_t getdents64(int fd, void *dirp, size_t count); + case __NR_getdents: + case __NR_getdents64: + { + get_fd_path(pid, info->entry.args[0], fullpath); + strcat(fullpath, "/*"); + log_access(fullpath, "R", "getdents"); + break; + } + + // int chmod(const char *pathname, mode_t mode); + case __NR_chmod: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "W", "chmod"); + break; + } + + // ignoring fchmod, fchmodat because a file descriptor is already open for writing + + // int symlink(const char *target, const char *linkpath); + case __NR_symlink: + { + read_cstring_from_tracee(pid, info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "W", "symlink"); + break; + } + + // int symlinkat(const char *target, int newdirfd, const char *linkpath); + case __NR_symlinkat: + { + parse_dirfd_pathname(pid, info->entry.args[1], info->entry.args[2], fullpath, PATH_MAX); + log_access(fullpath, "W", "symlinkat"); + break; + } + + // int linkat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath, int flags); + case __NR_linkat: + { + parse_dirfd_pathname(pid, info->entry.args[2], info->entry.args[3], fullpath, PATH_MAX); + log_access(fullpath, "W", "linkat"); + break; + } + + // int link(const char *oldpath, const char *newpath); + case __NR_link: + { + read_cstring_from_tracee(pid, info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "W", "link"); + break; + } + + // int mkdir(const char *pathname, mode_t mode); + case __NR_mkdir: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "C", "mkdir"); + break; + } + + // int mkdirat(int dirfd, const char *pathname, mode_t mode); + case __NR_mkdirat: + { + parse_dirfd_pathname(pid, info->entry.args[0], info->entry.args[1], fullpath, PATH_MAX); + log_access(fullpath, "C", "mkdirat"); + break; + } + + // int utime(const char *filename, const struct utimbuf *times); + case __NR_utime: + // int utimes(const char *filename, const struct timeval times[2]); + case __NR_utimes: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "W", "utime"); + break; + } + + // int truncate(const char *path, off_t length); + case __NR_truncate: + { + read_cstring_from_tracee(pid, info->entry.args[0], fullpath, PATH_MAX); + log_access(fullpath, "W", "truncate"); + break; + } + + // ignoring ftruncate because a file descriptor is already open for writing + } +} + +int run_parent(pid_t child_pid) +{ + int status; + + struct ptrace_syscall_info info; + + unsigned long ptraceOptions = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACECLONE | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | + PTRACE_O_TRACEEXIT | PTRACE_O_EXITKILL; + + if (ptrace(PTRACE_SEIZE, child_pid, 0, ptraceOptions) == -1) + { + fprintf(stderr, "PTRACE_SEIZE failed with error %s\n", strerror(errno)); + return -1; + } + + if (ptrace(PTRACE_INTERRUPT, child_pid, 0, 0) == -1) + { + fprintf(stderr, "PTRACE_INTERRUPT failed with error %s\n", strerror(errno)); + return -1; + } + + ptrace(PTRACE_SYSCALL, child_pid, 0, 0); + while (1) + { + child_pid = waitpid(-1, &status, 0); + + if (child_pid == -1) + { + if (errno != ECHILD) + { + fprintf(stderr, "waitpid returned -1 but did not set errno to ECHILD"); + return -1; + } + return 0; + } + + if (WIFEXITED(status) || WIFSIGNALED(status)) + { + continue; + } + else if (!WIFSTOPPED(status)) + { + fprintf(stderr, "waitpid returned bad status %d", status); + return -1; + } + + ptrace(PTRACE_GET_SYSCALL_INFO, child_pid, sizeof(info), &info); + print_fs_syscall_info(child_pid, &info); + + if (status >> 8 == (SIGTRAP | (PTRACE_EVENT_VFORK << 8))) + { + ptrace(PTRACE_SYSCALL, child_pid, NULL, NULL); + } + else if (status >> 8 == (SIGTRAP | (PTRACE_EVENT_EXIT << 8))) + { + unsigned long traceeStatus = 0; + ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &traceeStatus); + ptrace(PTRACE_SYSCALL, child_pid, NULL, NULL); + } + else if (WIFSTOPPED(status) && !(WSTOPSIG(status) & 0x80)) + { + ptrace(PTRACE_SYSCALL, child_pid, NULL, WSTOPSIG(status)); + } + else + { + ptrace(PTRACE_SYSCALL, child_pid, NULL, NULL); + } + } +} + +int run_child(char *program, char **args) +{ + ptrace(PTRACE_TRACEME, 0, 0, 0); + return execvp(program, args); +} + +int trace_exec(char *program, char **args) +{ + pid_t child_pid = fork(); + + if (child_pid == 0) + { + run_child(program, args); + fprintf(stderr, "execvp"); + return 1; + } + else if (child_pid > 0) + { + run_parent(child_pid); + } + else + { + fprintf(stderr, "fork"); + return 1; + } + + return 0; +}