From 53e910abd22a890978bbf2120c5fd1ff4536a36b Mon Sep 17 00:00:00 2001 From: Chow Jia Ying Date: Sat, 12 Apr 2025 19:22:57 +0800 Subject: [PATCH 1/3] Add documentation for tree objects --- doc/commits.md | 2 +- doc/tree.md | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 doc/tree.md diff --git a/doc/commits.md b/doc/commits.md index 238f3dc..40eef09 100644 --- a/doc/commits.md +++ b/doc/commits.md @@ -1,7 +1,7 @@ # Commits ## Structure of a commit -tree - tree commit +[tree](tree.md) - tree commit parent - reference to its parents author - author name committer - committer name diff --git a/doc/tree.md b/doc/tree.md new file mode 100644 index 0000000..301c01d --- /dev/null +++ b/doc/tree.md @@ -0,0 +1,36 @@ +# tree + +## What's in a tree? + +A tree describes the content of the current folder by associating blobs to paths. +It's a table with 3 columns: file mode, file path, SHA-1. +Each subfolder will be represented by its own tree object. + +## Tree format + +A tree is a concatenation of records of the format: +``` +[mode] space [path] 0x00 [sha-1] +``` + +## How does Git store version history in the worktree? + +1. Each branch is associated with one worktree object +2. Current versions of the worktree is associated with a blob object + +## How to parse a tree object? + +1. Not sure if there's a format header +2. Follow the tree format. +3. It can be parsed into: +- a mapping of a path to a file mode, and a SHA-1 hash +- when needed, the SHA-1 hash could be read + +# What do we do with trees? +1. Every commit object stores a reference to the tree, which represents the working object. +2. Need to modify the paths in the working tree. +3. Need to add to the working tree. +4. Delete from the working tree +5. Show the mode, sha1, object type, from a given file path + +Since I'm using `git add`, `git rm` in terms of file paths, I will use the file path as a key and represent it in an unordered_map. \ No newline at end of file From 612f728e70e1954d94f3218f6cc691da488d924f Mon Sep 17 00:00:00 2001 From: Chow Jia Ying Date: Sat, 12 Apr 2025 19:32:10 +0800 Subject: [PATCH 2/3] Implement parsing of tree objects --- include/tree.h | 27 ++++++++++ include/util.h | 3 +- src/CMakeLists.txt | 2 +- src/tree.cpp | 49 ++++++++++++++++++ src/util.cpp | 126 +++++++++++++++++++++++++++------------------ 5 files changed, 154 insertions(+), 53 deletions(-) create mode 100644 include/tree.h create mode 100644 src/tree.cpp diff --git a/include/tree.h b/include/tree.h new file mode 100644 index 0000000..74f9e0e --- /dev/null +++ b/include/tree.h @@ -0,0 +1,27 @@ +#ifndef TREE_H +#define TREE_H + +#include +#include + +#include "object.h" + +class GitTree : public GitObject { +public: + GitTree(const std::string &data = std::string("")); + + void deserialise( + const std::string &data) override; // convert string format to data object + std::string + serialise(GitRepository &repo) override; // convert this to a string format + std::string print_matching_files( + const std::string &filePathPattern); // print tree entries that + // match the given file path + void init(); + +protected: + std::vector pathNames; + std::unordered_map> fileEntries; +}; + +#endif // TREE_H diff --git a/include/util.h b/include/util.h index 0c11d78..60dee01 100644 --- a/include/util.h +++ b/include/util.h @@ -8,5 +8,6 @@ namespace fs = boost::filesystem; std::string read_file(const fs::path &filePath); bool create_file(const fs::path &filePath, const std::string &content = ""); std::string sha1_hexdigest(const std::string &data); - +std::string binaryToHex(const std::string &binary); +std::string hexToBinary(const std::string &hexString); #endif // UTIL_H \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b5c5e7d..0e19218 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,7 +15,7 @@ target_link_libraries(repository PRIVATE inih util boost_libraries) target_include_directories(repository PUBLIC ../include) target_compile_features(repository PUBLIC cxx_std_17) -add_library(object object.cpp blob.cpp commit.cpp) +add_library(object object.cpp blob.cpp commit.cpp tree.cpp) target_link_libraries(object PRIVATE repository boost_libraries) target_include_directories(object PUBLIC ../include) target_compile_features(object PUBLIC cxx_std_17) diff --git a/src/tree.cpp b/src/tree.cpp new file mode 100644 index 0000000..a4350bf --- /dev/null +++ b/src/tree.cpp @@ -0,0 +1,49 @@ +#include "tree.h" +#include "repository.h" +#include "util.h" +#include +#include + +GitTree::GitTree(const std::string &data) : GitObject() { + this->deserialise(data); +}; + +std::string GitTree::serialise(GitRepository &repo) { + std::stringstream ss; + for (const auto &path : this->pathNames) { + auto &[mode, sha] = this->fileEntries[path]; + ss << mode << " " << path << '\0' << hexToBinary(sha); + } + return ss.str(); +} + +void GitTree::deserialise(const std::string &data) { + int curr = 0; + while (curr < data.size()) { + int space = data.find(' ', curr); + int mode = std::stoi(data.substr(curr, space - curr)); + curr = space + 1; + space = data.find('\0', curr); + std::string path = data.substr(curr, space - curr); + curr = space + 1; + std::string sha = binaryToHex(data.substr(curr, 20)); + curr += 20; + this->fileEntries[path] = {mode, sha}; + pathNames.push_back(path); + } + // maintain sort order via path names to ensure + // consistent tree objects are generated each time + std::sort(pathNames.begin(), pathNames.end()); +} + +std::string GitTree::print_matching_files(const std::string &filePath) { + std::stringstream ss; + for (const auto &path : this->pathNames) { + // TODO: implement better file path matching + if (filePath.empty() || path == filePath) { + auto &[mode, sha] = this->fileEntries[path]; + ss << mode << " " << sha << " " << path << "\n"; + } + } + return ss.str(); +} diff --git a/src/util.cpp b/src/util.cpp index 4c1cb5c..652c5ca 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -1,69 +1,93 @@ -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include namespace fs = boost::filesystem; std::string read_file(const fs::path &filePath) { - try { - // Open the file - std::ifstream fileStream(filePath.string()); + try { + // Open the file + std::ifstream fileStream(filePath.string()); - // Check if the file is successfully opened - if (fileStream.is_open()) { - // Read the content of the file - std::stringstream buffer; - buffer << fileStream.rdbuf(); - return buffer.str(); - } else { - std::cerr << "Error opening the file for reading: " << filePath << std::endl; - return ""; - } - } catch (const std::exception& e) { - std::cerr << "Exception: " << e.what() << std::endl; - return ""; + // Check if the file is successfully opened + if (fileStream.is_open()) { + // Read the content of the file + std::stringstream buffer; + buffer << fileStream.rdbuf(); + return buffer.str(); + } else { + std::cerr << "Error opening the file for reading: " << filePath + << std::endl; + return ""; } + } catch (const std::exception &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return ""; + } } -bool create_file(const fs::path &filePath, const std::string& content = "") { - try { - // Create the file - std::ofstream fileStream(filePath.string()); +bool create_file(const fs::path &filePath, const std::string &content = "") { + try { + // Create the file + std::ofstream fileStream(filePath.string()); - // Check if the file is successfully opened - if (fileStream.is_open()) { - // Write content to the file if provided - if (!content.empty()) { - fileStream << content; - } + // Check if the file is successfully opened + if (fileStream.is_open()) { + // Write content to the file if provided + if (!content.empty()) { + fileStream << content; + } - std::cout << "File created successfully: " << filePath << std::endl; - return true; - } else { - std::cerr << "Error opening the file for writing: " << filePath << std::endl; - return false; - } - } catch (const std::exception& e) { - std::cerr << "Exception: " << e.what() << std::endl; - return false; + std::cout << "File created successfully: " << filePath << std::endl; + return true; + } else { + std::cerr << "Error opening the file for writing: " << filePath + << std::endl; + return false; } + } catch (const std::exception &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return false; + } } -std::string sha1_hexdigest(const std::string& data) { - boost::uuids::detail::sha1 sha1; - sha1.process_bytes(data.data(), data.size()); +std::string sha1_hexdigest(const std::string &data) { + boost::uuids::detail::sha1 sha1; + sha1.process_bytes(data.data(), data.size()); - unsigned int digest[5]; - sha1.get_digest(digest); + unsigned int digest[5]; + sha1.get_digest(digest); - std::stringstream ss; - ss << std::hex << std::setfill('0'); - for (unsigned int i : digest) { - ss << std::setw(8) << i; - } - return ss.str(); + std::stringstream ss; + ss << std::hex << std::setfill('0'); + for (unsigned int i : digest) { + ss << std::setw(8) << i; + } + return ss.str(); } + +std::string binaryToHex(const std::string &binaryData) { + std::ostringstream hexStream; + for (unsigned char byte : binaryData) { + hexStream << std::setw(2) << std::setfill('0') << std::hex + << static_cast(byte); + } + return hexStream.str(); +} + +std::string hexToBinary(const std::string &hexString) { + std::string binaryData; + for (size_t i = 0; i < hexString.size(); i += 2) { + // Take two hex characters at a time + std::string byteStr = hexString.substr(i, 2); + + // Convert hex pair to a single byte (char) + char byte = static_cast(std::stoi(byteStr, nullptr, 16)); + binaryData.push_back(byte); + } + return binaryData; +} \ No newline at end of file From d497aafed50ccf6396be0925b5ac2fd6fa9bc5e4 Mon Sep 17 00:00:00 2001 From: Chow Jia Ying Date: Sat, 12 Apr 2025 19:32:26 +0800 Subject: [PATCH 3/3] Remove redundant newline for cat-file --- src/cat-file.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cat-file.cpp b/src/cat-file.cpp index a528f0c..a7ebe75 100644 --- a/src/cat-file.cpp +++ b/src/cat-file.cpp @@ -27,7 +27,7 @@ void catfile(std::vector &args) { if (repo) { GitObject *obj = GitObject::read(*repo, GitObject::find(*repo, hash, type)); - std::cout << obj->serialise(*repo) << "\n"; + std::cout << obj->serialise(*repo); } } catch (std::runtime_error &err) { std::cerr << err.what() << "\n";