From 4b209b82c1c5ebe2d3edd20b35d36b595e4e73e8 Mon Sep 17 00:00:00 2001 From: wilsonfreitas Date: Wed, 30 Mar 2022 06:11:03 -0300 Subject: [PATCH] Organized indentation --- cranscrape.py | 14 +++--- parse.py | 123 +++++++++++++++++++++++++------------------------- topic.py | 3 +- 3 files changed, 72 insertions(+), 68 deletions(-) diff --git a/cranscrape.py b/cranscrape.py index 45b0eeee..62f2a2df 100644 --- a/cranscrape.py +++ b/cranscrape.py @@ -60,13 +60,15 @@ 'https://cran.r-project.org/web/packages/bizdays/index.html', ] + def get_data(url): - res = requests.get(url) - m = reu.search(res.text) - if m: - return dict(cran=url, github=m.group(0), repo=m.group(1)) - else: - return dict(cran=url, github='', repo='') + res = requests.get(url) + m = reu.search(res.text) + if m: + return dict(cran=url, github=m.group(0), repo=m.group(1)) + else: + return dict(cran=url, github='', repo='') + all_data = [get_data(url) for url in urls] df = pd.DataFrame(all_data) diff --git a/parse.py b/parse.py index 0945ea7f..8a51293c 100644 --- a/parse.py +++ b/parse.py @@ -10,83 +10,84 @@ # using an access token g = Github(os.environ['GITHUB_ACCESS_TOKEN']) + def extract_repo(url): - reu = re.compile('^https://github.com/([\w-]+/[-\w\.]+)$') - m = reu.match(url) - if m: - return m.group(1) - else: - return '' + reu = re.compile('^https://github.com/([\w-]+/[-\w\.]+)$') + m = reu.match(url) + if m: + return m.group(1) + else: + return '' def get_last_commit(repo): - try: - if repo: - r = g.get_repo(repo) - cs = r.get_commits() - return cs[0].commit.author.date.strftime('%Y-%m-%d') - else: - return '' - except: - print('ERROR' + repo) - return 'error' + try: + if repo: + r = g.get_repo(repo) + cs = r.get_commits() + return cs[0].commit.author.date.strftime('%Y-%m-%d') + else: + return '' + except: + print('ERROR' + repo) + return 'error' class Project(Thread): - def __init__(self, match, section): - super().__init__() - self._match = match - self.regs = None - self._section = section - - def run(self): - m = self._match - is_github = 'github.com' in m.group(2) - is_cran = 'cran.r-project.org' in m.group(2) - repo = extract_repo(m.group(2)) - last_commit = get_last_commit(repo) - self.regs = dict( - project=m.group(1), - section=self._section, - last_commit=last_commit, - url=m.group(2), - description=m.group(3), - github=is_github, - cran=is_cran, - repo=repo - ) + def __init__(self, match, section): + super().__init__() + self._match = match + self.regs = None + self._section = section + + def run(self): + m = self._match + is_github = 'github.com' in m.group(2) + is_cran = 'cran.r-project.org' in m.group(2) + repo = extract_repo(m.group(2)) + last_commit = get_last_commit(repo) + self.regs = dict( + project=m.group(1), + section=self._section, + last_commit=last_commit, + url=m.group(2), + description=m.group(3), + github=is_github, + cran=is_cran, + repo=repo + ) projects = [] with open('README.md', 'r', encoding='utf8') as f: - ret = re.compile('^(#+) (.*)$') - rex = re.compile('^\s*- \[(.*)\]\((.*)\) - (.*)$') - m_titles = [] - last_head_level = 0 - for line in f: - m = rex.match(line) - if m: - p = Project(m, ' > '.join(m_titles[1:])) - p.start() - projects.append(p) - else: - m = ret.match(line) - if m: - hrs = m.group(1) - if len(hrs) > last_head_level: - m_titles.append(m.group(2)) + ret = re.compile('^(#+) (.*)$') + rex = re.compile('^\s*- \[(.*)\]\((.*)\) - (.*)$') + m_titles = [] + last_head_level = 0 + for line in f: + m = rex.match(line) + if m: + p = Project(m, ' > '.join(m_titles[1:])) + p.start() + projects.append(p) else: - for n in range(last_head_level - len(hrs) + 1): - m_titles.pop() - m_titles.append(m.group(2)) - last_head_level = len(hrs) + m = ret.match(line) + if m: + hrs = m.group(1) + if len(hrs) > last_head_level: + m_titles.append(m.group(2)) + else: + for n in range(last_head_level - len(hrs) + 1): + m_titles.pop() + m_titles.append(m.group(2)) + last_head_level = len(hrs) while True: - checks = [not p.is_alive() for p in projects] - if all(checks): - break + checks = [not p.is_alive() for p in projects] + if all(checks): + break projects = [p.regs for p in projects] df = pd.DataFrame(projects) diff --git a/topic.py b/topic.py index 29c84586..714b7620 100644 --- a/topic.py +++ b/topic.py @@ -21,4 +21,5 @@ for repo in repos: if repo.stargazers_count < 1000: break - print(repo.name, repo.stargazers_count, repo.language, repo.html_url, repo.description, repo.updated_at, repo.archived) + print(repo.name, repo.stargazers_count, repo.language, repo.html_url, + repo.description, repo.updated_at, repo.archived)