Tracking the performance of dynesty #432

segasai · 2023-03-22T11:54:28Z

This is an issue to track/discuss ways of tracking dynesty performance vs time to
avoid issues.

I've wrote a quick script to produce timings of test vs time.
And here's the output show relative timing for different tests. Each line is a different test averaged to have a median of 1.

It shows the impact of the recent dynamic sampling fix.

The script is below.
The question is whether there is a way to track this automatically somehow.

import os
import numpy as np
import math
import xml
import glob
import re
import astropy.table as atpy


def fetch_commits():
    cmd = '( echo hash,date ; git log --pretty=format:%H,%aI |grep -e  b6c9d7cf488939e3a4bdab8c405ce298c8843482 -B 10000 ) > commit.list'
    os.system(cmd)


def get_commits():
    # commits = []
    T = atpy.Table().read('commit.list', format='ascii')
    # for commit in open('commit.list', 'r'):
    #    commit = commit.rstrip()
    #    commits.append(commit)
    # commits = np.array(commits)
    return T['hash'], T['date']


def extractor(fout):
    commits, dates = get_commits()
    ids = np.arange(len(commits))
    CD = {}
    for c, i, d in zip(commits, ids, dates):
        CD[c] = (i, d)

    with open(fout, 'w') as fp:
        print('commitid,commdate,commhash,testname1,testname2,duration',
              file=fp)
        for f in glob.glob('xmls/*xml'):
            D = open(f, 'r').read()

            M = re.match('.*failures="([0-9]+)".*', D)
            if M is not None:
                M = M.group(1)
            else:
                continue
            fail = int(M)

            if fail > 0:
                continue
            commit = f.split('.')[-2].split('_')[-1]
            if commit not in CD:
                continue
            D = xml.etree.ElementTree.parse(f)
            for el in D.findall('./testsuite/testcase'):
                el = dict(el.items())
                if 'classname' not in el:
                    continue
                print('%d,%s,%s,%s,%s,%s' %
                      (CD[commit][0], CD[commit][1], commit, el['classname'],
                       el['name'], el['time']),
                      file=fp)


def plotter():
    extractor('output.dat')
    res = {}
    T = atpy.Table().read('output.dat', format='csv')
    T = T[~T['testname1'].mask]

    import matplotlib.pyplot as plt
    plt.clf()
    for tdate, tcid, a, b, cdt in zip(T['commdate'], T['commitid'],
                                      T['testname1'].value,
                                      T['testname2'].value, T['duration']):
        if (a, b) not in res:
            res[a, b] = []
        res[a, b].append((tcid, cdt, tdate))
    # plt.plot([0])
    # plt.xlim(0, 900)
    plt.ylim(.2, 4)
    for k, v in res.items():
        X, Y, Z = np.array(v).T
        Y = Y.astype(np.float32)
        Z = np.array(Z, dtype=np.datetime64)
        if len(X) > 20 and np.median(Y) > 1:
            plt.semilogy(Z[np.argsort(Z)], (Y / np.median(Y))[np.argsort(Z)],
                         alpha=.2)
    plt.ylabel('Relative Time')


def generator():
    commits = get_commits()
    ids = np.arange(len(commits))
    maxpow = int(math.ceil(np.log(len(ids)) / np.log(2)))
    for pows in range(maxpow + 1, -1, -1):
        curN = 1 << pows
        print(curN)
        for curid in ids[::curN]:
            commit = commits[curid]
            print(curid)
            ofname = f'xmls/dat_{commit}.xml'
            if os.path.exists(ofname):
                continue
            commit = commits[curid]
            cmd = f'git checkout {commit}'
            os.system(cmd)
            cmd1 = f'''pytest  --junit-xml={ofname} -m 'not slow' --timeout 300 --dist loadgroup --durations 0 -n 36 tests/test*y'''
            os.system(cmd1)


if __name__ == '__main__':
    generator()

The text was updated successfully, but these errors were encountered:

segasai added question questions about stuff enhancement upgrades and improvements help wanted help! and removed question questions about stuff labels Mar 22, 2023

segasai mentioned this issue Mar 31, 2023

Fixes to bounding updates #428

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tracking the performance of dynesty #432

Tracking the performance of dynesty #432

segasai commented Mar 22, 2023 •

edited

Loading

Tracking the performance of dynesty #432

Tracking the performance of dynesty #432

Comments

segasai commented Mar 22, 2023 • edited Loading

segasai commented Mar 22, 2023 •

edited

Loading