Skip to content

Commit

Permalink
Update imgdiag scripts
Browse files Browse the repository at this point in the history
run_imgdiag:
* Add PID to output filenames.
* Add option to specify host output directory.

create_dirty_image_objects:
* Add an option to merge imgdiag files with the same process names.
* Improve object sorting.
* Print stats in csv format.

Test: ./run_imgdiag.py --host-out-dir out
Test: ./create_dirty_image_objects.py ./out/imgdiag_* --merge-same-procnames --print-stats
Change-Id: I94aba7b241e0a0c2ebfb33ffcc333640c04f2d01
  • Loading branch information
Dmitrii Ishcheikin authored and Treehugger Robot committed Sep 26, 2023
1 parent b320a4d commit e606762
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 51 deletions.
149 changes: 103 additions & 46 deletions imgdiag/create_dirty_image_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,90 @@

import argparse
from collections import defaultdict
from enum import Enum
import os
import re


def process_dirty_entries(entries, with_sort):
mark_counts = defaultdict(int)
class SortType(Enum):
NONE = 'none'
SIMPLE = 'simple'
OPT_NEIGHBOURS = 'opt_neighbours'


def merge_same_procnames(entries):
path_regex = r'(.+)_(\d+).txt'
prog = re.compile(path_regex)

merged_entries = defaultdict(set)
for path, objs in entries:
basename = os.path.basename(path)
m = prog.match(basename)
if m:
merged_entries[m.group(1)].update(objs)

return sorted(merged_entries.items(), key=lambda x: len(x[1]))


def opt_neighbours(sort_keys):
sort_keys = dict(sort_keys)
res = list()

# Start with a bin with the lowest process and objects count.
cur_key = min(
sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1]))
)[0]
res.append((cur_key, sort_keys[cur_key]))
del sort_keys[cur_key]

# Find next most similar sort key and update the result.
while sort_keys:

def jaccard_index(x):
return (x & cur_key).bit_count() / (x | cur_key).bit_count()

next_key = max(sort_keys.keys(), key=jaccard_index)
res.append((next_key, sort_keys[next_key]))
del sort_keys[next_key]
cur_key = next_key
return res


def process_dirty_entries(entries, sort_type):
dirty_image_objects = []

union = set()
for v in entries.values():
for k, v in entries:
union = union.union(v)

if sort_type == SortType.NONE:
dirty_obj_lines = [obj + '\n' for obj in sorted(union)]
return (dirty_obj_lines, dict())

# sort_key -> [objs]
sort_keys = defaultdict(list)
for obj in union:
str_marker = ''
marker = 0
# Sort marker is uint32_t, where Nth bit is set if Nth process has this object dirty.
for idx, v in enumerate(entries.values()):
sort_key = 0
# Nth bit of sort_key is set if this object is dirty in Nth process.
for idx, (k, v) in enumerate(entries):
if obj in v:
str_marker += chr(ord('A') + idx)
marker = (marker << 1) | 1
sort_key = (sort_key << 1) | 1
else:
str_marker += '_'
marker = marker << 1
sort_key = sort_key << 1

sort_keys[sort_key].append(obj)

sort_keys = sorted(sort_keys.items())

if with_sort:
dirty_image_objects.append(obj + ' ' + str(marker) + '\n')
else:
dirty_image_objects.append(obj + '\n')
if sort_type == SortType.OPT_NEIGHBOURS:
sort_keys = opt_neighbours(sort_keys)

mark_counts[str_marker] += 1
dirty_obj_lines = list()
for idx, (_, objs) in enumerate(sort_keys):
for obj in objs:
dirty_obj_lines.append(obj + ' ' + str(idx) + '\n')

return (dirty_image_objects, mark_counts)
return (dirty_obj_lines, sort_keys)


def main():
Expand All @@ -62,10 +115,23 @@ def main():
help='imgdiag files to use.',
)
parser.add_argument(
'--sort-objects',
'--sort-type',
choices=[e.value for e in SortType],
default=SortType.OPT_NEIGHBOURS.value,
help=(
'Object sorting type. "simple" puts objects with the same usage'
' pattern in the same bins. "opt_neighbours" also tries to put bins'
' with similar usage patterns close to each other.'
),
)
parser.add_argument(
'--merge-same-procnames',
action=argparse.BooleanOptionalAction,
default=True,
help='Use object sorting.',
default=False,
help=(
'Merge dirty objects from files with the same process name (different'
' pid). Files are expected to end with "_{pid}.txt"'
),
)
parser.add_argument(
'--output-filename',
Expand All @@ -81,49 +147,40 @@ def main():

args = parser.parse_args()

entries = dict()
entries = list()
for path in args.imgdiag_files:
with open(path) as f:
lines = f.readlines()
prefix = 'dirty_obj: '
lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l]
entries[path] = set(lines)

if args.sort_objects and len(entries) > 32:
print(
'WARNING: too many processes for sorting, using top 32 by number of'
' dirty objects.'
)
entries_list = sorted(
list(entries.items()), reverse=True, key=lambda x: len(x[1])
)
entries_list = entries_list[0:32]
entries = {k: v for (k, v) in entries_list}
entries.append((path, set(lines)))

entries = sorted(entries, key=lambda x: len(x[1]))

if args.merge_same_procnames:
entries = merge_same_procnames(entries)

print('Using processes:')
for k, v in sorted(entries.items(), key=lambda x: len(x[1])):
for k, v in entries:
print(f'{k}: {len(v)}')
print()

dirty_image_objects, mark_counts = process_dirty_entries(
entries=entries, with_sort=args.sort_objects
dirty_image_objects, sort_keys = process_dirty_entries(
entries=entries, sort_type=SortType(args.sort_type)
)

with open(args.output_filename, 'w') as f:
f.writelines(dirty_image_objects)

if args.print_stats:
mark_counts = sorted(
list(mark_counts.items()), key=lambda x: x[1], reverse=True
)

for i, path in enumerate(entries.keys()):
print(path, chr(ord('A') + i))

print(','.join(k for k, v in entries), ',obj_count')
total_count = 0
for marker, count in mark_counts:
print(marker, count)
total_count += count
for sort_key, objs in sort_keys:
bits_csv = ','.join(
'{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries))
)
print(bits_csv, ',', len(objs))
total_count += len(objs)
print('total: ', total_count)


Expand Down
31 changes: 26 additions & 5 deletions imgdiag/run_imgdiag.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,26 @@
try:
from tqdm import tqdm
except:

def tqdm(x):
return x


ProcEntry = namedtuple('ProcEntry', 'pid, ppid, cmd, name, etc_args')

def get_mem_stats(zygote_pid, target_pid, target_name, imgdiag_path, boot_image, device_out_dir):
imgdiag_output_path = f'{device_out_dir}/imgdiag_{target_name}.txt'

def get_mem_stats(
zygote_pid,
target_pid,
target_name,
imgdiag_path,
boot_image,
device_out_dir,
host_out_dir,
):
imgdiag_output_path = (
f'{device_out_dir}/imgdiag_{target_name}_{target_pid}.txt'
)
cmd_collect = (
'adb shell '
f'"{imgdiag_path} --zygote-diff-pid={zygote_pid} --image-diff-pid={target_pid} '
Expand All @@ -41,13 +53,15 @@ def get_mem_stats(zygote_pid, target_pid, target_name, imgdiag_path, boot_image,
print('imgdiag call failed on:', target_pid, target_name)
return

cmd_pull = f'adb pull {imgdiag_output_path} ./'
cmd_pull = f'adb pull {imgdiag_output_path} {host_out_dir}'
subprocess.run(cmd_pull, shell=True, check=True, capture_output=True)


def main():
parser = argparse.ArgumentParser(
description='Run imgdiag on selected processes and pull results from the device.',
description=(
'Run imgdiag on selected processes and pull results from the device.'
),
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
Expand Down Expand Up @@ -76,6 +90,11 @@ def main():
default='/data/local/tmp/imgdiag_out',
help='Directory for imgdiag output files on the device.',
)
parser.add_argument(
'--host-out-dir',
default='./',
help='Directory for imgdiag output files on the host.',
)

args = parser.parse_args()

Expand Down Expand Up @@ -104,6 +123,7 @@ def main():
subprocess.run(
args=f'adb shell "mkdir -p {args.device_out_dir}"', check=True, shell=True
)
subprocess.run(args=f'mkdir -p {args.host_out_dir}', check=True, shell=True)

for entry in tqdm(zygote_children):
get_mem_stats(
Expand All @@ -113,8 +133,9 @@ def main():
imgdiag_path=args.imgdiag,
boot_image=args.boot_image,
device_out_dir=args.device_out_dir,
host_out_dir=args.host_out_dir,
)


if __name__ == '__main__':
main()
main()

0 comments on commit e606762

Please sign in to comment.