-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add api_tools.py for calling WikiTree API. Start with just a single…
… method (to see if an id is a redirect (has been merged into another profile)). * Update graph_compare.py to use networkit.
- Loading branch information
Showing
6 changed files
with
168 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
""" | ||
Tools for dealing with WikiTree API. | ||
""" | ||
|
||
import json | ||
import re | ||
import urllib.parse | ||
import urllib.request | ||
|
||
|
||
def api_req(**params): | ||
encoded_params = urllib.parse.urlencode(params) | ||
resp = urllib.request.urlopen("https://api.wikitree.com/api.php", | ||
data=encoded_params.encode("utf-8")) | ||
return json.loads(resp.read()) | ||
|
||
def is_redirect(profile_num_or_id): | ||
"""Lookup a profile by # or id and figure out it is a redirect or not. | ||
If it is, return the id of the profile it now redirects to.""" | ||
resp = api_req(action="getBio", key=profile_num_or_id) | ||
# status == 0 is success. On failure, we see things like: | ||
# status == "Invalid page id" | ||
if resp[0]["status"] == 0: | ||
m = re.fullmatch(r"#REDIRECT \[\[(.*)\]\]", resp[0]["bio"]) | ||
if m: | ||
# Return wikitree_id of profile this is redirected to. | ||
return m.group(1) | ||
# If not a redirect, return nothing | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
""" | ||
Compare two dumps to see details about how profiles were added/deleted. | ||
""" | ||
|
||
import argparse | ||
import csv | ||
import partition_tools | ||
from pathlib import Path | ||
import random | ||
|
||
import api_tools | ||
|
||
|
||
def load_all_profiles(version, debug_limit_read=None): | ||
all_profiles = set() | ||
# Use boolean in the data dump | ||
with open(Path("data", "version", version, "dump_people_users.csv"), "r") as f: | ||
csv_reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE) | ||
for row in csv_reader: | ||
all_profiles.add(row["User ID"]) | ||
return all_profiles | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("old_version") | ||
parser.add_argument("new_version") | ||
parser.add_argument("--sample-api", type=int, default=1000, | ||
help="Number of profiles to try looking up via API.") | ||
args = parser.parse_args() | ||
|
||
old_profiles = load_all_profiles(args.old_version) | ||
new_profiles = load_all_profiles(args.new_version) | ||
|
||
added_profiles = new_profiles - old_profiles | ||
deleted_profiles = old_profiles - new_profiles | ||
|
||
print(f"Version {args.new_version} vs. {args.old_version}") | ||
print(f" * {len(old_profiles)=:_}") | ||
print(f" * {len(new_profiles)=:_}") | ||
print(f" * {len(added_profiles)=:_}") | ||
print(f" * {len(deleted_profiles)=:_}") | ||
|
||
sample_deleted = random.sample(list(deleted_profiles), args.sample_api) | ||
num_redirects = 0 | ||
for profile_num in sample_deleted: | ||
if api_tools.is_redirect(profile_num): | ||
num_redirects += 1 | ||
print(f'Of "deleted" profiles, {num_redirects / len(sample_deleted):.0%} were actually merges') | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters