|
| 1 | +''' |
| 2 | +Usage: `python3 rename_nll_facts.py src ref dest` |
| 3 | +
|
| 4 | +Renames atoms in `src/*.facts` to match the names used in `ref/*.facts`, then |
| 5 | +writes the renamed facts to `dest/`. |
| 6 | +''' |
| 7 | + |
| 8 | +import ast |
| 9 | +from collections import defaultdict |
| 10 | +import os |
| 11 | +import sys |
| 12 | + |
| 13 | +src_dir, ref_dir, dest_dir = sys.argv[1:] |
| 14 | + |
| 15 | +# Map `src` loan/origin/path names to `ref` loan/origin/path names. We don't |
| 16 | +# break this down by type because the names for each type don't collide anyway. |
| 17 | +name_map = {} |
| 18 | +# Set of `ref` names that appear as values in `name_map`. |
| 19 | +ref_names_seen = set() |
| 20 | + |
| 21 | +def match_name(src_name, ref_name): |
| 22 | + if src_name in name_map: |
| 23 | + old_ref_name = name_map[src_name] |
| 24 | + if ref_name != old_ref_name: |
| 25 | + print('error: %r matches both %r and %r' % ( |
| 26 | + src_name, old_ref_name, ref_name)) |
| 27 | + return |
| 28 | + else: |
| 29 | + if ref_name in ref_names_seen: |
| 30 | + print('error: %r matches %r, but %r is already used' % ( |
| 31 | + src_name, ref_name, ref_name)) |
| 32 | + return |
| 33 | + name_map[src_name] = ref_name |
| 34 | + ref_names_seen.add(ref_name) |
| 35 | + |
| 36 | +def match_loan(src_name, ref_name): |
| 37 | + match_name(src_name, ref_name) |
| 38 | + |
| 39 | +def match_origin(src_name, ref_name): |
| 40 | + match_name(src_name, ref_name) |
| 41 | + |
| 42 | +def match_path(src_name, ref_name): |
| 43 | + match_name(src_name, ref_name) |
| 44 | + |
| 45 | + |
| 46 | +def load(name): |
| 47 | + with open(os.path.join(src_dir, name + '.facts')) as f: |
| 48 | + src_rows = [[ast.literal_eval(s) for s in line.strip().split('\t')] |
| 49 | + for line in f] |
| 50 | + with open(os.path.join(ref_dir, name + '.facts')) as f: |
| 51 | + ref_rows = [[ast.literal_eval(s) for s in line.strip().split('\t')] |
| 52 | + for line in f] |
| 53 | + return src_rows, ref_rows |
| 54 | + |
| 55 | + |
| 56 | +# Match up paths using `path_is_var` and `path_assigned_at_base`. |
| 57 | + |
| 58 | +def match_path_is_var(): |
| 59 | + src, ref = load('path_is_var') |
| 60 | + ref_dct = {var: path for path, var in ref} |
| 61 | + for path, var in src: |
| 62 | + if var not in ref_dct: |
| 63 | + continue |
| 64 | + match_path(path, ref_dct[var]) |
| 65 | + |
| 66 | +match_path_is_var() |
| 67 | + |
| 68 | +def match_path_assigned_at_base(): |
| 69 | + src, ref = load('path_assigned_at_base') |
| 70 | + ref_dct = {point: path for path, point in ref} |
| 71 | + for path, point in src: |
| 72 | + if point not in ref_dct: |
| 73 | + continue |
| 74 | + match_path(path, ref_dct[point]) |
| 75 | + |
| 76 | +match_path_assigned_at_base() |
| 77 | + |
| 78 | +# Match up origins and loans using `loan_issued_at` |
| 79 | + |
| 80 | +def match_loan_issued_at(): |
| 81 | + src, ref = load('loan_issued_at') |
| 82 | + ref_dct = {point: (origin, loan) for origin, loan, point in ref} |
| 83 | + for origin, loan, point in src: |
| 84 | + if point not in ref_dct: |
| 85 | + continue |
| 86 | + match_origin(origin, ref_dct[point][0]) |
| 87 | + match_origin(loan, ref_dct[point][1]) |
| 88 | + |
| 89 | +match_loan_issued_at() |
| 90 | + |
| 91 | +# Match up origins using `use_of_var_derefs_origin` |
| 92 | + |
| 93 | +def match_use_of_var_derefs_origin(): |
| 94 | + src, ref = load('use_of_var_derefs_origin') |
| 95 | + src_dct = defaultdict(list) |
| 96 | + for var, origin in src: |
| 97 | + src_dct[var].append(origin) |
| 98 | + ref_dct = defaultdict(list) |
| 99 | + for var, origin in ref: |
| 100 | + ref_dct[var].append(origin) |
| 101 | + for var in set(src_dct.keys()) & set(ref_dct.keys()): |
| 102 | + src_origins = src_dct[var] |
| 103 | + ref_origins = ref_dct[var] |
| 104 | + if len(src_origins) != len(ref_origins): |
| 105 | + print('error: var %r has %d origins in src but %d in ref' % ( |
| 106 | + var, len(src_origins), len(ref_origins))) |
| 107 | + continue |
| 108 | + for src_origin, ref_origin in zip(src_origins, ref_origins): |
| 109 | + match_origin(src_origin, ref_origin) |
| 110 | + |
| 111 | +match_use_of_var_derefs_origin() |
| 112 | + |
| 113 | + |
| 114 | +# Rewrite `src` using the collected name mappings. |
| 115 | + |
| 116 | +os.makedirs(dest_dir, exist_ok=True) |
| 117 | +for name in os.listdir(src_dir): |
| 118 | + if name.startswith('.') or not name.endswith('.facts'): |
| 119 | + continue |
| 120 | + |
| 121 | + with open(os.path.join(src_dir, name)) as src, \ |
| 122 | + open(os.path.join(dest_dir, name), 'w') as dest: |
| 123 | + for line in src: |
| 124 | + src_parts = [ast.literal_eval(s) for s in line.strip().split('\t')] |
| 125 | + dest_parts = [] |
| 126 | + for part in src_parts: |
| 127 | + if part.startswith('_') or part.startswith('Start') or part.startswith('Mid'): |
| 128 | + dest_parts.append(part) |
| 129 | + continue |
| 130 | + |
| 131 | + dest_part = name_map.get(part) |
| 132 | + if dest_part is None: |
| 133 | + print('error: no mapping for %r (used in %s: %r)' % ( |
| 134 | + part, name, src_parts)) |
| 135 | + dest_part = 'OLD:' + part |
| 136 | + dest_parts.append(dest_part) |
| 137 | + |
| 138 | + dest.write('\t'.join('"%s"' % part for part in dest_parts) + '\n') |
| 139 | + |
0 commit comments