forked from tskit-dev/msprime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspecies_tree_parsing_examples.py
123 lines (114 loc) · 4.79 KB
/
species_tree_parsing_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
Examples illustrating the parsing of species trees from files in Newick
or Nexus format, and the definition of tuples combining population
configurations and demographic events.
"""
import msprime
# Parse a species tree written in plain Newick format, generate a tree
# sequence based on this species tree, and inspect the demography.
print("Simple newick tree with branch lengths in units of millions of years:")
parsed_tuple = msprime.parse_species_tree(
tree="(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)",
Ne=10000,
branch_length_units="myr",
generation_time=28)
dd = msprime.DemographyDebugger(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1])
dd.print_history()
# Do the same with a modified version of the species tree in which branch
# lengths are in units of years instead of millions of years.
print("\n\nSimple newick tree with branch lengths in units of years:")
species_tree = "(((human:5600000,chimpanzee:5600000):3000000,\
gorilla:8600000):9400000,orangutan:18000000)"
parsed_tuple = msprime.parse_species_tree(
tree=species_tree,
Ne=10000,
branch_length_units="yr",
generation_time=28)
dd = msprime.DemographyDebugger(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1])
dd.print_history()
# Do the same with a modified version of the species tree in which two
# divergence events are simultaneous.
print("\n\nSimple newick tree with two simultaneous divergence events:")
parsed_tuple = msprime.parse_species_tree(
tree="((human:5,chimpanzee:5):3,(gorilla:5,orangutan:5):3)",
Ne=10000,
branch_length_units="myr",
generation_time=28)
dd = msprime.DemographyDebugger(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1])
dd.print_history()
# Parse a species tree with a polytomy.
print("\n\nSimple newick tree with polytomy:")
parsed_tuple = msprime.parse_species_tree(
tree="((human:8.6,chimpanzee:8.6,gorilla:8.6):9.4,orangutan:18.0)",
Ne=10000,
branch_length_units="myr",
generation_time=28)
dd = msprime.DemographyDebugger(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1])
dd.print_history()
# Parse a non-ultrametric species tree.
print("\n\nNon-ultrametric newick tree:")
parsed_tuple = msprime.parse_species_tree(
tree="(((human:5.6,chimpanzee:5.6):3.0,gorilla:7.6):9.4,orangutan:18.0)",
Ne=10000,
branch_length_units="myr",
generation_time=28)
dd = msprime.DemographyDebugger(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1])
dd.print_history()
# Parse the tree from file 91genes_species_rev.tre, written in StarBEAST
# format and including a translation block and population sizes for each
# branch.
# Generate a tree sequence based on the species tree and draw the first
# tree from the tree sequence.
# As the population sizes are encoded in the tree, Ne does not need to be specified.
print("\n\nStarBEAST species tree with annotation for population sizes:")
with open("tests/data/species_trees/91genes_species_rev.tre", "r") as f:
parsed_tuple = msprime.parse_starbeast(
tree=f.read(),
branch_length_units="myr",
generation_time=5)
population_configurations = parsed_tuple[0]
demographic_events = parsed_tuple[1]
for n in population_configurations:
n.sample_size = 2
tree_sequence = msprime.simulate(
population_configurations=population_configurations,
demographic_events=demographic_events,
recombination_rate=1e-7,
length=100)
tree = tree_sequence.first()
print(tree.draw(format="unicode"))
print("number of trees: ", tree_sequence.num_trees)
# Parse the large species tree with over 100 species from
# 101g_nucl_conc_unconst.combined.tre, written in Nexus format but
# without population sizes per branch.
# This tree is from Musilova et al. (2019), available from
# http://evoinformatics.eu/opsin_evolution.htm.
print("\n\nLarge newick tree with over 100 species:")
with open("tests/data/species_trees/101g_nucl_conc_unconst.combined.nwk.tre", "r") as f:
parsed_tuple = msprime.parse_species_tree(
tree=f.read(),
Ne=1000,
branch_length_units="myr",
generation_time=5)
population_configurations = parsed_tuple[0]
demographic_events = parsed_tuple[1]
for n in population_configurations:
n.sample_size = 2
tree_sequence = msprime.simulate(
population_configurations=parsed_tuple[0],
demographic_events=parsed_tuple[1],
recombination_rate=1e-8,
length=400)
print("number of trees: ", tree_sequence.num_trees)
for tree in tree_sequence.trees():
print("interval: ", tree.interval)