Skip to content

Commit

Permalink
[#3]download date with seconds
Browse files Browse the repository at this point in the history
  • Loading branch information
kocimski committed Aug 15, 2020
1 parent 0c1e1bf commit c63d613
Show file tree
Hide file tree
Showing 8 changed files with 2,240 additions and 2,241 deletions.

Large diffs are not rendered by default.

932 changes: 0 additions & 932 deletions data/issues_data5ccef963-6b66-4871-8959-66abf8b8c498.csv

This file was deleted.

932 changes: 932 additions & 0 deletions data/issues_dataa758a624-b518-4b21-98ea-b8de45734bd1.csv

Large diffs are not rendered by default.

369 changes: 369 additions & 0 deletions data/mail_data5afb008d-62a5-4e14-a0fb-095977a91eeb.csv

Large diffs are not rendered by default.

369 changes: 0 additions & 369 deletions data/mails_data833c3053-e431-40c6-bb6a-24c4d19e9b67.csv

This file was deleted.

322 changes: 0 additions & 322 deletions data/users_data94b2e7c2-f2f3-40ba-a8cb-fc1f287feafa.csv

This file was deleted.

322 changes: 322 additions & 0 deletions data/users_datac5fc8ea1-4840-415b-a057-e06546b8e19f.csv

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions src/load_data/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def __download_data(data_download_url: str, dtype: str) -> List[Dict[str, str]]:
row_msg = driver.find_element_by_id(f"msg-{i}")
author = row_msg.find_element_by_class_name("author")
subject = row_msg.find_element_by_class_name("subject")
date = row_msg.find_element_by_class_name("date")
try:
content_url = subject.find_element_by_tag_name("a")
except NoSuchElementException:
Expand All @@ -66,7 +65,6 @@ def __download_data(data_download_url: str, dtype: str) -> List[Dict[str, str]]:
"id": record_id,
"author": author.text,
"subject": subject.text,
"date": date.text,
"content_url": content_url.get_attribute("href") or None,
"dtype": dtype
})
Expand Down Expand Up @@ -110,6 +108,7 @@ def __save_data_to_path(raw_data: List[Dict[str, str]], path: str) -> bool:
"""
data_to_save: List[Dict[str, str]] = []
for record in raw_data:
record["date"] = __get_xml_data(record.get("content_url"), "date")
record["content"] = __get_xml_data(record.get("content_url"), "contents")
data_to_save.append(record)
try:
Expand Down Expand Up @@ -160,6 +159,6 @@ def load_data(url: str, path: str, dtype: str) -> None:

urls = [DATA_USERS_DOWNLOAD_URL, DATA_COMMITS_DOWNLOAD_URL, DATA_ISSUES_DOWNLOAD_URL, DATA_MAIL_DOWNLOAD_URL]

for url, name in zip(urls, ["users", "commits", "issues", "mails"]):
for url, name in zip(urls, ["users", "commits", "issues", "mail"]):
filename = f"/{name}_data{uuid.uuid4()}.csv"
load_data(url, path=args.path + filename if args.path else OUTPUT_DIR + filename, dtype=name)

0 comments on commit c63d613

Please sign in to comment.