-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from FeBalla/dev
First Release
- Loading branch information
Showing
7 changed files
with
181 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
.env | ||
logs | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,28 @@ | ||
# github-repo-collecter | ||
Script to collect all repositories from a GitHub organization via GitHub API | ||
# GitHub Classroom Collector | ||
It's a simple Python script to collect all **private** repositories from a GitHub Organization. Is intended to be used for downloading all the submissions of a GitHub Classroom assignment, getting information like last commit date. | ||
|
||
## How to use? | ||
1. First of all, you need to install all the dependencies specified in `requirements.txt`. You can use: | ||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
2. Now you need to create a `.env` file like [example.env](./example.env): | ||
- `PREFIX`: Sets a prefix for the repositories name. This is usefull to download only repositories from a specific assignment. | ||
- `TOKEN`: Here you need to use a [personal access token](https://docs.github.com/es/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) for your GitHub account. | ||
- `USER`: Sets the username of your's GitHub account. | ||
- `ORGA`: Sets the organization's name where the repositories are located. | ||
- `PER_PAGE`: Represents how many repositories you want to get from a single API call (it works paginated). You can use 100 as default. | ||
- `PAGES`: Represents how many pages of size _PER_PAGE_ will be called to GitHub API. For example, if you have 500 repositories and _PER_PAGE=100_, then _PAGES_ should be 5. However, if you don't want to overthink, just set a higher value and it only will take a little longer. | ||
|
||
3. Run the [main.py](./main.py) module and wait until all the repositories are downloaded. | ||
|
||
## Logs | ||
Once the execution started, will be created a directory with 2 files: | ||
- `repos.txt`: Has the information of the repositories that will be cloned. Each line has the following format: | ||
```txt | ||
repository-name last-commit-sha last-commit-author last-commit-date | ||
``` | ||
|
||
- `runtime.txt`: Has the runtime logs with the clone results for each repository. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
PREFIX = "my-assignment" | ||
TOKEN = "my-personal-access-token" | ||
USER = "MyUser" | ||
ORGA = "My-Organization" | ||
PER_PAGE = 100 | ||
PAGES = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
from datetime import datetime | ||
|
||
|
||
class Logger: | ||
'''Handle execution logs (repositories list, runtime messages, etc.)''' | ||
|
||
def __init__(self, folder_name="logs", encoding="utf-8"): | ||
self.folder_name = folder_name | ||
|
||
if not os.path.exists(self.folder_name): | ||
os.makedirs(self.folder_name) | ||
|
||
self.repositories_data_file = open(os.path.join(self.folder_name, "repos.txt"), "w", encoding=encoding) | ||
print("Repositories expected to clone:", file=self.repositories_data_file) | ||
|
||
self.runtime_logs_file = open(os.path.join(self.folder_name, "runtime.txt"), "w", encoding=encoding) | ||
print("Runtime logs:", file=self.runtime_logs_file) | ||
|
||
def save_repositories_data(self, repositories_data): | ||
'''Writes a log-file with all the information of the repositories that will be cloned''' | ||
repositories_data = sorted(repositories_data, key=lambda repo: repo["last_commit_date"]) | ||
|
||
for repo in repositories_data: | ||
print(repo["name"], end=" ", file=self.repositories_data_file) | ||
print(repo["last_commit_sha"], end=" ", file=self.repositories_data_file) | ||
print(repo["last_commit_author"], end=" ", file=self.repositories_data_file) | ||
print(repo["last_commit_date"], file=self.repositories_data_file) | ||
|
||
self.repositories_data_file.flush() | ||
os.fsync(self.repositories_data_file.fileno()) | ||
|
||
def write_runtime_log(self, msg): | ||
'''Writes a runtime-log message in the log-file with the current time''' | ||
now = datetime.now() | ||
current_time = now.strftime("%H:%M:%S") | ||
|
||
print(f"({current_time}) {msg}", file=self.runtime_logs_file) | ||
self.runtime_logs_file.flush() | ||
os.fsync(self.runtime_logs_file.fileno()) | ||
|
||
def finalize(self): | ||
'''Ends the logger handler instance, saving and closing the used files''' | ||
self.runtime_logs_file.close() | ||
self.repositories_data_file.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
from dotenv import dotenv_values | ||
from git import Repo, rmtree | ||
from utilities import get_assignment_repos_names, get_repository_information | ||
from logger import Logger | ||
|
||
config = dotenv_values(".env") | ||
SAVE_DIR = os.path.join("..", config["PREFIX"]) | ||
|
||
if __name__ == "__main__": | ||
CLONED = 0 | ||
FAILED = 0 | ||
|
||
print("(1/6) Initializing") | ||
logger = Logger() | ||
|
||
print(f"(2/6) Getting repositories list from {config['ORGA']} with prefix: {config['PREFIX']}") | ||
repo_names = get_assignment_repos_names(config["PREFIX"], int(config["PER_PAGE"]), int(config["PAGES"])) | ||
|
||
print("(3/6) Getting repositories data") | ||
repositories_data = [get_repository_information(name, config["PREFIX"]) for name in repo_names] | ||
|
||
print("(4/6) Saving repositories data") | ||
logger.save_repositories_data(repositories_data) | ||
|
||
if not os.path.exists(SAVE_DIR): | ||
os.makedirs(SAVE_DIR) | ||
|
||
print("(5/6) Cloning repositories") | ||
for repo in repositories_data: | ||
try: | ||
cloned_repo = Repo.clone_from(repo["clone_url"], os.path.join(SAVE_DIR, repo["name"]), no_checkout=True) | ||
cloned_repo.git.checkout(repo["last_commit_sha"]) | ||
rmtree(os.path.join(SAVE_DIR, repo["name"], ".git")) | ||
|
||
logger.write_runtime_log(f"Cloned succesfully: {repo['name']}") | ||
CLONED += 1 | ||
except Exception: | ||
logger.write_runtime_log(f"Couldn't clone: {repo['name']}") | ||
FAILED += 1 | ||
|
||
logger.finalize() | ||
|
||
print("(6/6) Process completed") | ||
print(f"- Cloned: {CLONED}") | ||
print(f"- Failed: {FAILED}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
GitPython==3.1.27 | ||
python-dotenv==0.20.0 | ||
python_dateutil==2.8.2 | ||
requests==2.26.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from datetime import datetime | ||
from dotenv import dotenv_values | ||
from dateutil import tz | ||
import requests | ||
|
||
config = dotenv_values(".env") | ||
BASE_URL = "https://github.com" | ||
API_URL = "https://api.github.com" | ||
|
||
|
||
def get_assignment_repos_names(repository_prefix="", per_page=100, total_pages=2): | ||
'''Gets names of all private repositories from a organization starting with `repository_prefix`''' | ||
assignment_repos = set() | ||
|
||
for page in range(total_pages): | ||
url = f"{API_URL}/orgs/{config['ORGA']}/repos?type=private&per_page={per_page}&page={page + 1}" | ||
org_repos = requests.get(url, auth=(config["USER"], config["TOKEN"])) | ||
|
||
for repo in org_repos.json(): | ||
if repo["name"].startswith(repository_prefix): | ||
assignment_repos.add(repo["name"]) | ||
|
||
return list(assignment_repos) | ||
|
||
|
||
def get_last_commit_date(commit_data): | ||
'''Gets the last commit date in local time (GitHub API returns it in UTC)''' | ||
commit_date = datetime.strptime(commit_data["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ") | ||
commit_date = commit_date.replace(tzinfo=tz.tzutc()) | ||
commit_date = commit_date.astimezone(tz.tzlocal()) | ||
commit_date = commit_date.strftime("%d/%m/%Y-%H:%M") | ||
|
||
return commit_date | ||
|
||
|
||
def get_repository_information(repository_name, repository_prefix=""): | ||
'''Gets information for a specific repository''' | ||
url = f"{API_URL}/repos/{config['ORGA']}/{repository_name}/commits" | ||
data = requests.get(url, auth=(config["USER"], config["TOKEN"])).json()[0] | ||
|
||
repository_info = { | ||
"name": repository_name, | ||
"clone_url": f"{BASE_URL}/{config['ORGA']}/{repository_name}.git", | ||
"last_commit_sha": data["sha"], | ||
"last_commit_author": repository_name.replace(f"{repository_prefix}-", ""), | ||
"last_commit_date": get_last_commit_date(data), | ||
} | ||
|
||
return repository_info |