diff --git a/devel/prof.txt b/devel/prof.txt new file mode 100644 index 0000000..6f8c936 --- /dev/null +++ b/devel/prof.txt @@ -0,0 +1,148 @@ +Wrote profile results to test_arg_recorder_with_wf.py.lprof +Timer unit: 1e-06 s + +Total time: 0.565759 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 101240 72447 0.7 12.8 if name not in self: + 35 101240 58303 0.6 10.3 self[name] = (msprime.Node(time=time, population=population, + 36 101240 297613 2.9 52.6 name=name, is_sample=is_sample), []) + 37 101240 137396 1.4 24.3 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 7.5774 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 150314 120881 0.8 1.6 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 150314 79151 0.5 1.0 new_rec = msprime.Edgeset( + 53 150314 71103 0.5 0.9 parent=parent, + 54 150314 66660 0.4 0.9 children=children, + 55 150314 65861 0.4 0.9 left=left, + 56 150314 325604 2.2 4.3 right=right) + 57 150314 6848145 45.6 90.4 merge_records(new_rec, self[parent][1]) + +Total time: 4.08047 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 150314 101369 0.7 2.5 k = 0 + 151 150314 87001 0.6 2.1 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 308472 241009 0.8 5.9 while (k < len(existing)) and (cur_left < new.right): + 156 158158 124833 0.8 3.1 left = existing[k].left + 157 158158 93386 0.6 2.3 right = existing[k].right + 158 158158 90150 0.6 2.2 parent = existing[k].parent + 159 158158 96347 0.6 2.4 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 158158 92653 0.6 2.3 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 158158 85414 0.5 2.1 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 15534 8903 0.6 0.2 k += 1 + 169 15534 7404 0.5 0.2 continue + 170 142624 77301 0.5 1.9 if cur_left < left: + 171 # print("dangling left") + 172 15409 10416 0.7 0.3 existing.insert(k, msprime.Edgeset( + 173 15409 7887 0.5 0.2 left=cur_left, + 174 15409 16021 1.0 0.4 right=min(new.right, left), + 175 15409 7897 0.5 0.2 parent=parent, + 176 15409 43180 2.8 1.1 children=new.children)) + 177 15409 15864 1.0 0.4 cur_left = min(new.right, left) + 178 15409 9449 0.6 0.2 k += 1 + 179 15409 7490 0.5 0.2 continue + 180 127215 393801 3.1 9.7 combined_children = tuple(sorted(children+new.children)) + 181 127215 87066 0.7 2.1 combined_rec = msprime.Edgeset( + 182 127215 67410 0.5 1.7 left=cur_left, + 183 127215 234108 1.8 5.7 right=min(new.right, right), + 184 127215 73773 0.6 1.8 parent=new.parent, + 185 127215 312244 2.5 7.7 children=combined_children) + 186 127215 78337 0.6 1.9 if cur_left == left: + 187 # print("equal left") + 188 105571 67860 0.6 1.7 if new.right < right: + 189 # print("overlap right") + 190 21590 13336 0.6 0.3 mod_rec = msprime.Edgeset( + 191 21590 12284 0.6 0.3 left=new.right, + 192 21590 11592 0.5 0.3 right=right, + 193 21590 11575 0.5 0.3 parent=parent, + 194 21590 149418 6.9 3.7 children=children) + 195 21590 19858 0.9 0.5 existing[k] = combined_rec + 196 21590 13557 0.6 0.3 k += 1 + 197 21590 20224 0.9 0.5 existing.insert(k, mod_rec) + 198 21590 13962 0.6 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 83981 71541 0.9 1.8 existing[k] = combined_rec + 202 83981 55980 0.7 1.4 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 21644 13649 0.6 0.3 mod_rec = msprime.Edgeset( + 207 21644 11864 0.5 0.3 left=left, + 208 21644 11487 0.5 0.3 right=cur_left, + 209 21644 11471 0.5 0.3 parent=parent, + 210 21644 93849 4.3 2.3 children=children) + 211 21644 20146 0.9 0.5 existing[k] = mod_rec + 212 21644 13783 0.6 0.3 k += 1 + 213 21644 20156 0.9 0.5 existing.insert(k, combined_rec) + 214 21644 13301 0.6 0.3 k += 1 + 215 21644 14807 0.7 0.4 if new.right < right: + 216 # print("overlap right") + 217 existing.insert(k, msprime.Edgeset( + 218 left=new.right, + 219 right=right, + 220 parent=parent, + 221 children=children)) + 222 k += 1 + 223 127215 110940 0.9 2.7 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 150314 101572 0.7 2.5 if cur_left < new.right: + 226 82579 60579 0.7 1.5 existing.insert(k, msprime.Edgeset( + 227 82579 47194 0.6 1.2 left=cur_left, + 228 82579 50544 0.6 1.2 right=new.right, + 229 82579 47932 0.6 1.2 parent=new.parent, + 230 82579 428151 5.2 10.5 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 150314 77143 0.5 1.9 return None + diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt new file mode 100644 index 0000000..75ffdad --- /dev/null +++ b/devel/prof_rc.txt @@ -0,0 +1,209 @@ +Wrote profile results to test_recomb_collector.py.lprof +Timer unit: 1e-06 s + +Total time: 2.28796 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 402005 320650 0.8 14.0 if name not in self: + 35 402005 238573 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1231482 3.1 53.8 name=name, is_sample=is_sample), []) + 37 402005 497256 1.2 21.7 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 29.878 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 559608 799704 1.4 2.7 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 559608 339309 0.6 1.1 new_rec = msprime.Edgeset( + 53 559608 276814 0.5 0.9 parent=parent, + 54 559608 256989 0.5 0.9 children=children, + 55 559608 254082 0.5 0.9 left=left, + 56 559608 1489470 2.7 5.0 right=right) + 57 559608 26461636 47.3 88.6 merge_records(new_rec, self[parent][1]) + +Total time: 16.3576 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 559608 1563370 2.8 9.6 k = 0 + 151 559608 343556 0.6 2.1 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 1115266 963964 0.9 5.9 while (k < len(existing)) and (cur_left < new.right): + 156 555658 508057 0.9 3.1 left = existing[k].left + 157 555658 355931 0.6 2.2 right = existing[k].right + 158 555658 350275 0.6 2.1 parent = existing[k].parent + 159 555658 366891 0.7 2.2 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 555658 336903 0.6 2.1 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 555658 330761 0.6 2.0 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 64560 38082 0.6 0.2 k += 1 + 169 64560 32076 0.5 0.2 continue + 170 491098 271582 0.6 1.7 if cur_left < left: + 171 # print("dangling left") + 172 46958 33681 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46958 24061 0.5 0.1 left=cur_left, + 174 46958 53415 1.1 0.3 right=min(new.right, left), + 175 46958 24232 0.5 0.1 parent=parent, + 176 46958 143949 3.1 0.9 children=new.children)) + 177 46958 51934 1.1 0.3 cur_left = min(new.right, left) + 178 46958 29963 0.6 0.2 k += 1 + 179 46958 23954 0.5 0.1 continue + 180 444140 1513825 3.4 9.3 combined_children = tuple(sorted(children+new.children)) + 181 444140 601062 1.4 3.7 combined_rec = msprime.Edgeset( + 182 444140 237600 0.5 1.5 left=cur_left, + 183 444140 894437 2.0 5.5 right=min(new.right, right), + 184 444140 262270 0.6 1.6 parent=new.parent, + 185 444140 1173999 2.6 7.2 children=combined_children) + 186 444140 290592 0.7 1.8 if cur_left == left: + 187 # print("equal left") + 188 374591 267743 0.7 1.6 if new.right < right: + 189 # print("overlap right") + 190 62946 39608 0.6 0.2 mod_rec = msprime.Edgeset( + 191 62946 36558 0.6 0.2 left=new.right, + 192 62946 36056 0.6 0.2 right=right, + 193 62946 34178 0.5 0.2 parent=parent, + 194 62946 191912 3.0 1.2 children=children) + 195 62946 65663 1.0 0.4 existing[k] = combined_rec + 196 62946 41032 0.7 0.3 k += 1 + 197 62946 61760 1.0 0.4 existing.insert(k, mod_rec) + 198 62946 42943 0.7 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 311645 291891 0.9 1.8 existing[k] = combined_rec + 202 311645 216618 0.7 1.3 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 69549 45708 0.7 0.3 mod_rec = msprime.Edgeset( + 207 69549 38658 0.6 0.2 left=left, + 208 69549 37008 0.5 0.2 right=cur_left, + 209 69549 37283 0.5 0.2 parent=parent, + 210 69549 193916 2.8 1.2 children=children) + 211 69549 69996 1.0 0.4 existing[k] = mod_rec + 212 69549 45173 0.6 0.3 k += 1 + 213 69549 65443 0.9 0.4 existing.insert(k, combined_rec) + 214 69549 44130 0.6 0.3 k += 1 + 215 69549 52116 0.7 0.3 if new.right < right: + 216 # print("overlap right") + 217 6539 4796 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6539 3996 0.6 0.0 left=new.right, + 219 6539 3774 0.6 0.0 right=right, + 220 6539 3700 0.6 0.0 parent=parent, + 221 6539 16880 2.6 0.1 children=children)) + 222 6539 4174 0.6 0.0 k += 1 + 223 444140 421187 0.9 2.6 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 559608 412823 0.7 2.5 if cur_left < new.right: + 226 309093 239751 0.8 1.5 existing.insert(k, msprime.Edgeset( + 227 309093 178473 0.6 1.1 left=cur_left, + 228 309093 192582 0.6 1.2 right=new.right, + 229 309093 182228 0.6 1.1 parent=new.parent, + 230 309093 1630994 5.3 10.0 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 559608 286449 0.5 1.8 return None + +Total time: 45.6963 s +File: tests/test_recomb_collector.py +Function: _make_pop at line 53 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 53 @profile + 54 def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 55 recomb_rate, rc, generations): + 56 1 5 5.0 0.0 sim.setOptions(seed=111) + 57 1 1 1.0 0.0 recombinator = sim.Recombinator(intensity=recomb_rate, + 58 1 1 1.0 0.0 output=rc.collect_recombs, + 59 1 54 54.0 0.0 infoFields="ind_id") + 60 1 1 1.0 0.0 pop = sim.Population( + 61 1 1 1.0 0.0 size=[popsize], + 62 1 0 0.0 0.0 loci=[nloci], + 63 1 1 1.0 0.0 lociPos=locus_position, + 64 1 72 72.0 0.0 infoFields=['ind_id']) + 65 1 2 2.0 0.0 pop.evolve( + 66 initOps=[ + 67 1 6 6.0 0.0 sim.InitSex(), + 68 1 1 1.0 0.0 id_tagger + 69 1 1 1.0 0.0 ]+init_geno, + 70 preOps=[ + 71 1 13 13.0 0.0 sim.PyOperator(lambda pop: rc.increment_time() or True), + 72 # Must return true or false. True keeps whole population (?) + 73 ], + 74 1 146 146.0 0.0 matingScheme=mating_scheme_factory(recombinator, popsize), + 75 1 45696018 45696018.0 100.0 gen=generations + 76 ) + 77 1 1 1.0 0.0 return pop + +Total time: 45.7214 s +File: tests/test_recomb_collector.py +Function: test_simupop at line 94 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 94 @profile + 95 def test_simupop(make_pop, generations, popsize): + 96 1 7 7.0 0.0 print("Popsize: ", popsize) + 97 # replications = 1 + 98 1 1 1.0 0.0 nsamples = 2 + 99 1 1 1.0 0.0 length = 10 + 100 1 1 1.0 0.0 nloci = 5 + 101 1 5 5.0 0.0 locus_position = list(range(0, length, int(length/nloci))) + 102 1 1 1.0 0.0 recomb_rate = 0.05 + 103 + 104 1 1 1.0 0.0 rc = RecombCollector( + 105 1 1 1.0 0.0 nsamples=nsamples, generations=generations, N=popsize, + 106 1 23683 23683.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) + 107 + 108 1 59 59.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] + 109 + 110 1 12 12.0 0.0 id_tagger = sim.IdTagger(begin=0) + 111 1 5 5.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't + 112 1 1 1.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 113 1 45696352 45696352.0 99.9 recomb_rate, rc, generations) + 114 1 702 702.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] + 115 1 581 581.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) + diff --git a/ftprime/argrecorder.py b/ftprime/argrecorder.py index b0c768f..553dccd 100644 --- a/ftprime/argrecorder.py +++ b/ftprime/argrecorder.py @@ -1,7 +1,6 @@ import msprime from collections import OrderedDict - class ARGrecorder(OrderedDict): ''' Keys are individual IDs, and values are tuples, whose first entry is a Node, @@ -24,6 +23,7 @@ def __str__(self): ret += self.edgeset_table().__str__() return ret + @profile def add_individual(self, name, time, population=msprime.NULL_POPULATION, is_sample=False): '''Add a new individual. @@ -36,6 +36,7 @@ def add_individual(self, name, time, population=msprime.NULL_POPULATION, name=name, is_sample=is_sample), []) self.num_nodes = max(self.num_nodes, 1+int(name)) + @profile def add_record(self, left, right, parent, children): ''' Add records corresponding to a reproduction event in which children (a @@ -138,6 +139,7 @@ def add_samples(self, samples, length, populations=None): children=(k,)) +@profile def merge_records(new, existing): ''' Incorporate a new record (l,r,x,c,t[x]) diff --git a/tests/test_arg_recorder_with_wf.py b/tests/test_arg_recorder_with_wf.py index cea72ba..27ecc50 100644 --- a/tests/test_arg_recorder_with_wf.py +++ b/tests/test_arg_recorder_with_wf.py @@ -28,19 +28,22 @@ def test_simulation_runs(N, gen, samples): random.seed(123) records = wf(N=N, ngens=gen, nsamples=samples, survival=0.5) - check_tables(records) + # check_tables(records) - for x in records: - print(x, records[x]) + # for x in records: + # print(x, records[x]) - print(records.edgeset_table()) - print(records.node_table()) + # print(records.edgeset_table()) + # print(records.node_table()) - ts = records.tree_sequence() + # ts = records.tree_sequence() - for t in ts.trees(): - print(t) + # for t in ts.trees(): + # print(t) - print("Mean pairwise diversity:",ts.get_pairwise_diversity()) - print("(should be zero)") - assert ts.get_pairwise_diversity() == 0.0 + # print("Mean pairwise diversity:",ts.get_pairwise_diversity()) + # print("(should be zero)") + # assert ts.get_pairwise_diversity() == 0.0 + +if __name__ == '__main__': + test_simulation_runs(1000, 200, 100) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index 93e06e2..9f9439d 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -30,26 +30,27 @@ def check_tables(args): assert(ch < args.num_nodes) -@pytest.fixture(scope="function", params=[ - lambda recombinator, popsize: sim.RandomMating( - ops=[ - sim.IdTagger(), - recombinator - ]), - # Overlapping generations mating system - lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( - ops=[ - sim.IdTagger(), - recombinator - ]), - sim.CloneMating()], - subPopSize=popsize * 2) - ]) +# @pytest.fixture(scope="function", params=[ +# lambda recombinator, popsize: sim.RandomMating( +# ops=[ +# sim.IdTagger(), +# recombinator +# ]), +# # Overlapping generations mating system +# lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( +# ops=[ +# sim.IdTagger(), +# recombinator +# ]), +# sim.CloneMating()], +# subPopSize=popsize * 2) +# ]) def make_pop(request): # request.param stores a lambda function to make mating scheme # each test that uses this fixture will be run for both entries in 'params' mating_scheme_factory = request.param + @profile def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, recomb_rate, rc, generations): sim.setOptions(seed=111) @@ -71,27 +72,26 @@ def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, # Must return true or false. True keeps whole population (?) ], matingScheme=mating_scheme_factory(recombinator, popsize), - postOps=[ - sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1) - ], gen=generations ) return pop return _make_pop -# occasional failures marked below -# ValueError: Parent 3's birth time has not been recorded with .add_individual() -@pytest.mark.parametrize(('generations', 'popsize'), [ - (3, 5), # stochastic fail - (3, 10), # stochastic fail - (3, 20), - (5, 5), - (5, 10), - (5, 20), - (10, 5), - (10, 10), - (10, 20), -]) +# # occasional failures marked below +# # ValueError: Parent 3's birth time has not been recorded with .add_individual() +# @pytest.mark.parametrize(('generations', 'popsize'), [ +# (3, 5), # stochastic fail +# (3, 10), # stochastic fail +# (3, 20), +# (5, 5), +# (5, 10), +# (5, 20), +# (10, 5), +# (10, 10), +# (10, 20), +# ]) + +@profile def test_simupop(make_pop, generations, popsize): print("Popsize: ", popsize) # replications = 1 @@ -114,22 +114,38 @@ def test_simupop(make_pop, generations, popsize): locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] rc.add_diploid_samples(pop.indInfo("ind_id"), locations) - check_record_order(rc.args) - check_tables(rc.args) + # check_record_order(rc.args) + # check_tables(rc.args) + + # for x in rc.args: + # print(rc.args[x]) + # print(rc.args.node_table()) + # print(rc.args.edgeset_table()) + + # ts = rc.args.tree_sequence() - for x in rc.args: - print(rc.args[x]) - print(rc.args.node_table()) - print(rc.args.edgeset_table()) + # print("coalescence records:") + # for x in ts.records(): + # print(x) - ts = rc.args.tree_sequence() + # ts.simplify(samples=list(range(nsamples))) - print("coalescence records:") - for x in ts.records(): - print(x) + # print("trees:") + # for x in ts.trees(): + # print(x) - ts.simplify(samples=list(range(nsamples))) - print("trees:") - for x in ts.trees(): - print(x) +if __name__ == '__main__': + class MockRequest(object): + def __init__(self): + pass + mr = MockRequest() + mr.param = lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( + ops=[ + sim.IdTagger(), + recombinator + ]), + sim.CloneMating()], + subPopSize=popsize * 2) + + test_simupop(make_pop(mr), generations=200, popsize=1000)