Skip to content

Commit f1a0db6

Browse files
Implement parsing for tree objects (#40)
* Add documentation for tree objects * Implement parsing of tree objects * Remove redundant newline for cat-file
1 parent d4fbcc0 commit f1a0db6

File tree

8 files changed

+192
-55
lines changed

8 files changed

+192
-55
lines changed

doc/commits.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Commits
22

33
## Structure of a commit
4-
tree - tree commit
4+
[tree](tree.md) - tree commit
55
parent - reference to its parents
66
author - author name
77
committer - committer name

doc/tree.md

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# tree
2+
3+
## What's in a tree?
4+
5+
A tree describes the content of the current folder by associating blobs to paths.
6+
It's a table with 3 columns: file mode, file path, SHA-1.
7+
Each subfolder will be represented by its own tree object.
8+
9+
## Tree format
10+
11+
A tree is a concatenation of records of the format:
12+
```
13+
[mode] space [path] 0x00 [sha-1]
14+
```
15+
16+
## How does Git store version history in the worktree?
17+
18+
1. Each branch is associated with one worktree object
19+
2. Current versions of the worktree is associated with a blob object
20+
21+
## How to parse a tree object?
22+
23+
1. Not sure if there's a format header
24+
2. Follow the tree format.
25+
3. It can be parsed into:
26+
- a mapping of a path to a file mode, and a SHA-1 hash
27+
- when needed, the SHA-1 hash could be read
28+
29+
# What do we do with trees?
30+
1. Every commit object stores a reference to the tree, which represents the working object.
31+
2. Need to modify the paths in the working tree.
32+
3. Need to add to the working tree.
33+
4. Delete from the working tree
34+
5. Show the mode, sha1, object type, from a given file path
35+
36+
Since I'm using `git add`, `git rm` in terms of file paths, I will use the file path as a key and represent it in an unordered_map.

include/tree.h

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef TREE_H
2+
#define TREE_H
3+
4+
#include <string>
5+
#include <unordered_map>
6+
7+
#include "object.h"
8+
9+
class GitTree : public GitObject {
10+
public:
11+
GitTree(const std::string &data = std::string(""));
12+
13+
void deserialise(
14+
const std::string &data) override; // convert string format to data object
15+
std::string
16+
serialise(GitRepository &repo) override; // convert this to a string format
17+
std::string print_matching_files(
18+
const std::string &filePathPattern); // print tree entries that
19+
// match the given file path
20+
void init();
21+
22+
protected:
23+
std::vector<std::string> pathNames;
24+
std::unordered_map<std::string, std::tuple<int, std::string>> fileEntries;
25+
};
26+
27+
#endif // TREE_H

include/util.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ namespace fs = boost::filesystem;
88
std::string read_file(const fs::path &filePath);
99
bool create_file(const fs::path &filePath, const std::string &content = "");
1010
std::string sha1_hexdigest(const std::string &data);
11-
11+
std::string binaryToHex(const std::string &binary);
12+
std::string hexToBinary(const std::string &hexString);
1213
#endif // UTIL_H

src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ target_link_libraries(repository PRIVATE inih util boost_libraries)
1515
target_include_directories(repository PUBLIC ../include)
1616
target_compile_features(repository PUBLIC cxx_std_17)
1717

18-
add_library(object object.cpp blob.cpp commit.cpp)
18+
add_library(object object.cpp blob.cpp commit.cpp tree.cpp)
1919
target_link_libraries(object PRIVATE repository boost_libraries)
2020
target_include_directories(object PUBLIC ../include)
2121
target_compile_features(object PUBLIC cxx_std_17)

src/cat-file.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void catfile(std::vector<std::string> &args) {
2727
if (repo) {
2828
GitObject *obj =
2929
GitObject::read(*repo, GitObject::find(*repo, hash, type));
30-
std::cout << obj->serialise(*repo) << "\n";
30+
std::cout << obj->serialise(*repo);
3131
}
3232
} catch (std::runtime_error &err) {
3333
std::cerr << err.what() << "\n";

src/tree.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include "tree.h"
2+
#include "repository.h"
3+
#include "util.h"
4+
#include <iostream>
5+
#include <sstream>
6+
7+
GitTree::GitTree(const std::string &data) : GitObject() {
8+
this->deserialise(data);
9+
};
10+
11+
std::string GitTree::serialise(GitRepository &repo) {
12+
std::stringstream ss;
13+
for (const auto &path : this->pathNames) {
14+
auto &[mode, sha] = this->fileEntries[path];
15+
ss << mode << " " << path << '\0' << hexToBinary(sha);
16+
}
17+
return ss.str();
18+
}
19+
20+
void GitTree::deserialise(const std::string &data) {
21+
int curr = 0;
22+
while (curr < data.size()) {
23+
int space = data.find(' ', curr);
24+
int mode = std::stoi(data.substr(curr, space - curr));
25+
curr = space + 1;
26+
space = data.find('\0', curr);
27+
std::string path = data.substr(curr, space - curr);
28+
curr = space + 1;
29+
std::string sha = binaryToHex(data.substr(curr, 20));
30+
curr += 20;
31+
this->fileEntries[path] = {mode, sha};
32+
pathNames.push_back(path);
33+
}
34+
// maintain sort order via path names to ensure
35+
// consistent tree objects are generated each time
36+
std::sort(pathNames.begin(), pathNames.end());
37+
}
38+
39+
std::string GitTree::print_matching_files(const std::string &filePath) {
40+
std::stringstream ss;
41+
for (const auto &path : this->pathNames) {
42+
// TODO: implement better file path matching
43+
if (filePath.empty() || path == filePath) {
44+
auto &[mode, sha] = this->fileEntries[path];
45+
ss << mode << " " << sha << " " << path << "\n";
46+
}
47+
}
48+
return ss.str();
49+
}

src/util.cpp

+75-51
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,93 @@
1-
#include <iostream>
2-
#include <iomanip>
3-
#include <string>
4-
#include <fstream>
5-
#include <sstream>
61
#include <boost/filesystem.hpp>
72
#include <boost/uuid/detail/sha1.hpp>
3+
#include <fstream>
4+
#include <iomanip>
5+
#include <iostream>
6+
#include <sstream>
7+
#include <string>
88

99
namespace fs = boost::filesystem;
1010

1111
std::string read_file(const fs::path &filePath) {
12-
try {
13-
// Open the file
14-
std::ifstream fileStream(filePath.string());
12+
try {
13+
// Open the file
14+
std::ifstream fileStream(filePath.string());
1515

16-
// Check if the file is successfully opened
17-
if (fileStream.is_open()) {
18-
// Read the content of the file
19-
std::stringstream buffer;
20-
buffer << fileStream.rdbuf();
21-
return buffer.str();
22-
} else {
23-
std::cerr << "Error opening the file for reading: " << filePath << std::endl;
24-
return "";
25-
}
26-
} catch (const std::exception& e) {
27-
std::cerr << "Exception: " << e.what() << std::endl;
28-
return "";
16+
// Check if the file is successfully opened
17+
if (fileStream.is_open()) {
18+
// Read the content of the file
19+
std::stringstream buffer;
20+
buffer << fileStream.rdbuf();
21+
return buffer.str();
22+
} else {
23+
std::cerr << "Error opening the file for reading: " << filePath
24+
<< std::endl;
25+
return "";
2926
}
27+
} catch (const std::exception &e) {
28+
std::cerr << "Exception: " << e.what() << std::endl;
29+
return "";
30+
}
3031
}
3132

32-
bool create_file(const fs::path &filePath, const std::string& content = "") {
33-
try {
34-
// Create the file
35-
std::ofstream fileStream(filePath.string());
33+
bool create_file(const fs::path &filePath, const std::string &content = "") {
34+
try {
35+
// Create the file
36+
std::ofstream fileStream(filePath.string());
3637

37-
// Check if the file is successfully opened
38-
if (fileStream.is_open()) {
39-
// Write content to the file if provided
40-
if (!content.empty()) {
41-
fileStream << content;
42-
}
38+
// Check if the file is successfully opened
39+
if (fileStream.is_open()) {
40+
// Write content to the file if provided
41+
if (!content.empty()) {
42+
fileStream << content;
43+
}
4344

44-
std::cout << "File created successfully: " << filePath << std::endl;
45-
return true;
46-
} else {
47-
std::cerr << "Error opening the file for writing: " << filePath << std::endl;
48-
return false;
49-
}
50-
} catch (const std::exception& e) {
51-
std::cerr << "Exception: " << e.what() << std::endl;
52-
return false;
45+
std::cout << "File created successfully: " << filePath << std::endl;
46+
return true;
47+
} else {
48+
std::cerr << "Error opening the file for writing: " << filePath
49+
<< std::endl;
50+
return false;
5351
}
52+
} catch (const std::exception &e) {
53+
std::cerr << "Exception: " << e.what() << std::endl;
54+
return false;
55+
}
5456
}
5557

56-
std::string sha1_hexdigest(const std::string& data) {
57-
boost::uuids::detail::sha1 sha1;
58-
sha1.process_bytes(data.data(), data.size());
58+
std::string sha1_hexdigest(const std::string &data) {
59+
boost::uuids::detail::sha1 sha1;
60+
sha1.process_bytes(data.data(), data.size());
5961

60-
unsigned int digest[5];
61-
sha1.get_digest(digest);
62+
unsigned int digest[5];
63+
sha1.get_digest(digest);
6264

63-
std::stringstream ss;
64-
ss << std::hex << std::setfill('0');
65-
for (unsigned int i : digest) {
66-
ss << std::setw(8) << i;
67-
}
68-
return ss.str();
65+
std::stringstream ss;
66+
ss << std::hex << std::setfill('0');
67+
for (unsigned int i : digest) {
68+
ss << std::setw(8) << i;
69+
}
70+
return ss.str();
6971
}
72+
73+
std::string binaryToHex(const std::string &binaryData) {
74+
std::ostringstream hexStream;
75+
for (unsigned char byte : binaryData) {
76+
hexStream << std::setw(2) << std::setfill('0') << std::hex
77+
<< static_cast<int>(byte);
78+
}
79+
return hexStream.str();
80+
}
81+
82+
std::string hexToBinary(const std::string &hexString) {
83+
std::string binaryData;
84+
for (size_t i = 0; i < hexString.size(); i += 2) {
85+
// Take two hex characters at a time
86+
std::string byteStr = hexString.substr(i, 2);
87+
88+
// Convert hex pair to a single byte (char)
89+
char byte = static_cast<char>(std::stoi(byteStr, nullptr, 16));
90+
binaryData.push_back(byte);
91+
}
92+
return binaryData;
93+
}

0 commit comments

Comments
 (0)