-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgit_repository.py
276 lines (217 loc) · 10.5 KB
/
git_repository.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#!/usr/bin/env python3
"""
Implements a class that represents a Git repository
[1] Git Repository Format Versions:
https://github.com/git/git/blob/master/Documentation/technical/repository-version.txt
[2] git-config documentation:
https://git-scm.com/docs/git-config
[3] https://codewords.recurse.com/issues/three/unpacking-git-packfiles
"""
import configparser
import os
import glob
from pathlib import Path
class GitRepository():
"""A git repository"""
git_subdir_name = ".git"
@staticmethod
def get_root_git_dir(git_subdir):
""" Find the root Git directory for the given repo subdirectory
Git can be invoked from any subdirectory within a Git project. This
method returns the root Git directory corresponding the the repository
containing `git_subdir`.
INPUT:
git_subdir - a subdirectory within a Git repository
RETURN:
Git repository root directory or None if not a Git repository
"""
# Start by assuming that this is the root Git directory
repo_path = git_subdir
while not repo_path.joinpath(repo_path, GitRepository.git_subdir_name).exists() \
and repo_path != repo_path.parent:
repo_path = repo_path.parent
if repo_path.joinpath(repo_path, GitRepository.git_subdir_name).exists():
return repo_path
return None
def is_object_in_repo(self, object_hash: str):
"""Check whether this repo contains an object with SHA matching object_hash?
INPUT:
object_hash - Git object hash to get the path for
RETURN:
True if `object_hash` exists, False otherwise
"""
# Calculate the file path from the object hash
file_dir = Path(self.git_dir) / "objects" / object_hash[0:2]
file_path = Path(file_dir) / object_hash[2:]
list_of_matching_files = glob.glob(str(file_path) + "*")
if len(list_of_matching_files) == 1:
return True
return False
def get_head_rev(self):
""" Get the revision pointed to by .git/HEAD """
# NOTE: Technically speaking, .git/HEAD might contain a _revision file_
# or a _tag_ (perhaps something else that I've missed). Here, I assume
# that it's a revision file.
# Get the contents of .git/HEAD - a Git reference file
ref_file_name = None
head_file_path = os.path.join(self.git_dir, "HEAD")
with open(head_file_path, "r", encoding = 'utf-8') as head_file:
file_contents = head_file.read()
ref_file_name = file_contents.split(':')[1].lstrip().rstrip()
# Get the contents of the Git reference file pointed to by `.git/HEAD`
head_commit = None
ref_file_path = os.path.join(self.git_dir, ref_file_name)
if not os.path.exists(ref_file_path):
return None
with open(ref_file_path, "r", encoding = 'utf-8') as ref_file:
head_commit = ref_file.read().rstrip()
return head_commit
def update_head_rev(self, new_head_sha):
""" Update .git/HEAD
INPUT:
new_head_sha - new SHA for .git/HEAD
"""
# Get the contents of .git/HEAD - a Git reference file
ref_file_name = None
head_file_path = os.path.join(self.git_dir, "HEAD")
with open(head_file_path, "r", encoding = 'utf-8') as head_file:
file_contents = head_file.read()
ref_file_name = file_contents.split(':')[1].lstrip().rstrip()
# Get the contents of the Git reference file pointed to by `.git/HEAD`
ref_file_path = os.path.join(self.git_dir, ref_file_name)
with open(ref_file_path, "w+", encoding = 'utf-8') as ref_file:
ref_file.write(new_head_sha)
def get_object_path(self, object_hash: str):
""" Get the directory and the full path of a Git object
This method generates Git repository path based on the object hash. It
assumes that the corresponding objects alrady exists in the repo.
Note that the object hash could be provided in its full form (e.g.
2c250da0045dc138bf12e2f0217bd30d375b44d7), or in a shortened form (e.g.
2c25). Both versions are accepted, but "short" hash must uniquely
identify the object within this repo.
LIMITATION: Packfiles [3] are not supported.
Note that the object hash could be provided in its full form (e.g.
2c250da0045dc138bf12e2f0217bd30d375b44d7), or a shortened form (e.g.
2c250da0045). The shortened form is simply expanded to its full form.
INPUT:
object_hash - Git object hash to get the path for
RETURN:
dir, path - the directory and the full path of the object
corresponding to the input Git object
"""
assert self.is_object_in_repo(object_hash)
# Calculate the file path from the object hash
file_dir = Path(self.git_dir) / "objects" / object_hash[0:2]
file_path = Path(file_dir) / object_hash[2:]
# If the hash was provided by the user, it might have been a shortened
# version. If that's the case, self.file_path needs to recalculated.
list_of_matching_files = glob.glob(str(file_path) + "*")
if len(list_of_matching_files) == 1:
file_path = Path(list_of_matching_files[0])
return (file_dir, file_path)
def __init__(self, directory=".", force_init=True):
# The worktree directory for this repository
self.worktree_dir = Path(os.path.normpath(directory))
# The .git directory for this repository
self.git_dir = self.worktree_dir.joinpath(".git")
# Contents of the Git config file for this repository (i.e. .git/config)
self.git_config = None
# Find the _top_ working/Git directory
if not os.path.isdir(self.git_dir):
repo_path = GitRepository.get_root_git_dir(self.worktree_dir)
if repo_path:
self.worktree_dir = repo_path
self.git_dir = os.path.join(self.worktree_dir, ".git")
# Check whether this an exiisting repo
if os.path.isdir(self.git_dir):
if force_init:
print(f"Reinitialized existing Git repository in "
f"{os.path.abspath(self.git_dir)}/")
self.load_git_config()
return
# Create standard Git directories. Note that we are not creating:
# * .git/hooks
# * .git/info
# as `git` would.
os.makedirs(".git")
self.create_git_dir("branches")
self.create_git_dir("objects")
self.create_git_dir("refs", "tags")
self.create_git_dir("refs", "heads")
# Create .git/description
with open(self.get_git_file_path("description"), "w", encoding='utf-8') as desc_file:
desc_file.write(
"Unnamed repository; edit this file 'description'"
"to name the repository.\n")
# Create .git/HEAD
with open(self.get_git_file_path("HEAD"), "w", encoding='utf-8') as head_file:
head_file.write("ref: refs/heads/master\n")
# Create .git/config
self.create_default_config()
with open(self.get_git_file_path("config"), "w", encoding='utf-8') as config_file:
self.git_config.write(config_file)
# Print this for consistency with Git
print(f"Initialized empty Git repository in {os.path.abspath(self.git_dir)}")
@staticmethod
def get_repo(git_repo_subdir):
"""Retrieve the Git repository for 'git_repo_subdir'
It returns an instance of GitRepository that corresponds to the input
Git repository subdirectory. If the input directory is not a
(sub)directory in a Git repository, return None.
INPUT:
Directory for which to find a Git repository
RETURN:
GitRepository on success, None on failure
"""
path = Path(os.path.abspath(git_repo_subdir))
path = GitRepository.get_root_git_dir(path)
if path is None:
return None
return GitRepository(path, False)
def create_git_dir(self, *directory):
'''Create a sub-directory in the .git directory
Creates a sub-directory in .git based. Treats every component in
*directory as a subdirectory of the preceding item in the list.
'''
full_path = os.path.join(*directory)
if os.path.exists(full_path):
raise Exception(f"Git dirctory {directory} already exists!")
os.makedirs(os.path.join(self.git_dir, *directory))
def get_git_file_path(self, git_file):
'''Get the path for the requested git_file
This is just a convenice method to contextualise calls to os.path.join.
'''
return os.path.join(self.git_dir, git_file)
def load_git_config(self):
'''Load the Git config file for this repository
This functions also does a minimal sanity-check of the Git config file
for this repository.
'''
# Read the configuration file
self.git_config = configparser.ConfigParser()
config_file = self.get_git_file_path("config")
# Check that it esists
if not os.path.exists(config_file):
raise Exception("Repository local configuration file missing")
self.git_config.read([config_file])
# Check the repository version - only Version '0' is supported, see [1]
format_version = int(self.git_config.get("core", "repositoryformatversion"))
if format_version != 0:
raise Exception(f"Unsupported repositoryformatversion {format_version}")
def create_default_config(self):
'''Create the default Git config for this repository'''
self.git_config = configparser.ConfigParser()
# The Git config section for the following fields. `git init` only
# creates the `core` section.
self.git_config.add_section("core")
# From [2], the repository format and layout version. `git init` sets
# this to 0.
self.git_config.set("core", "repositoryformatversion", "0")
# From [2], filemode tells Git if the executable bit of files in the
# working tree is to be honored. Git sets this to true. Gfg does not
# (to keep things simple)
self.git_config.set("core", "filemode", "false")
# From [2], if true this repository is assumed to be bare and has no
# working directory associated with it. Git sets this to `false` by
# default.
self.git_config.set("core", "bare", "false")