You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importnumpyasnpfromnumpyimportrandomimportosimportpandasaspdfromtqdmimporttqdm#from src.config import ROOT_DIRfromsklearn.metricsimportroc_curve, average_precision_score, confusion_matriximportmatplotlib.pyplotaspltimportpickleimportseabornassnsimportglobfromsklearn.clusterimportKMeansfromsklearnimportmetricsfromscipy.spatial.distanceimportcdistfrompandarallelimportpandarallelfrommplh.color_utilsimportget_colorsfrommplh.fig_utilsimportlegend_from_colorfrommplhimportcluster_helpaschfromsrc.simulations.utils.configimportread_config_file, write_config_filefromdynamicTreeCutimportcutreeHybridfromscipy.spatial.distanceimportpdistfromscipy.cluster.hierarchyimportlinkagefromsklearn.model_selectionimportParameterGridfromsrc.simulations.utils.configimportcheck_requiredfrom .simulationimportSimulation# Does this ruin running the MCMC? I don't think so, b/c that format is going to be put in after anywayclassFullSimulation:
""" Class that simulates cell growth for lineage tracing. Reads in a parameter file and runs a certain number of iterations based on the num_iterations parameter. :ivar n_iter: Number of iterations :type n_iter: int :ivar num_cells: Number of cells to sequence :type num_cells: int :ivar sim: Each index is a different iteration of the simulation. :type sim: pandas Series """def__init__(self, params_f):
params=read_config_file(params_f)
self.n_iter=params['num_iterations']
self.num_cells=params['num_cells']
self.params=paramsself.f_save=os.path.join(self.params['local_outdir'], self.params['prefix']+'.p')
return#for i in self.n_iter:defrun(self):
""" Runs the simulation and stores it in sim attr. This uses Pandaralel to parallelize the runs. :return: """# Parallelize dfdf=pd.Series(index=range(self.n_iter))
#df = df.apply(self.run_sim, args=(self.params,))pandarallel.initialize(nb_workers=self.params['cpus'])
df=df.parallel_apply(self.run_sim, args=(self.params,))
self.sim=df#self.cluster_before_after()self.sim_performance_dominant(group='both')
self.stats_before_after()
return@staticmethoddefrun_sim(x, params):
""" For a simulation, it will initialize, grow, subsample, and merge the before stimulus and after stimulus variables. :param x: Placeholder variable :param params: The parameter dictionary to use :return: """s=Simulation(params)
s.initialize()
s.grow()
s.subsample_new(to_delete=True)
s.combine_init_growth()
returnsdefflatten_sim(self):
## TODO# This will extract out the classes of dfreturndefsim_performance_dominant(self, group='both'):
""" Will colect metrics that are averaged over the simulations. These are specifically for looking at the main, dominant clone, and what the allele-frequency of that clone variant is for each cell. :param group: {'init', 'growth', 'both'} This will indicate to group by :ivar dropout: Number of dominant clone cells that have 0 reads at the lineage variant position. :type dropout: list :ivar prec_scores: sklearn average precision score based on the allele frequencies seen in the dominant clone cells versus the non-clone cells. :type prec_scores: list :ivar rocs: ROC curves for each iteration based on allele frequencies. :return: """dropout= []
rocs= []
prec_scores= []
foriter, sinenumerate(self.sim.values):
# First get the dominant clone , which is indexed as 1mt_pos=s.clone_mt_dict[1]
# TODO account for mt_pos being a list not an intifgroup=='init':
clones=s.clone_cellcell_af=s.cell_af.loc[:,mt_pos]
elifgroup=='growth':
clones=s.new_clone_cellcell_af=s.new_cell_af.loc[:,mt_pos]
elifgroup=='both':
#clones = pd.concat((s.clone_cell, s.subsample_new_clone_cell)).reset_index(drop=True)#cell_af = pd.concat((s.cell_af.loc[:,mt_pos], s.subsample_new_cell_af.loc[:,mt_pos])).reset_index(drop=True)clones=s.combined_clonescell_af=s.combined_cell_af.loc[:,mt_pos]
else:
raiseValueError('group variable not properly set.')
y_true=clones.values.copy()
y_true[y_true!=1] =0# Set nondominant clones to 0rocs.append(roc_curve(y_true, cell_af))
prec_scores.append(average_precision_score(y_true, cell_af))
dropout.append((cell_af[clones==1]==0).sum()/cell_af.shape[0])
self.dropout=dropoutself.prec_scores=prec_scoresself.rocs=rocsreturndefreduce_cells(self, cell_af):
#self.simreturndefstats_before_after(self, clone_id=1):
b_a_df=pd.DataFrame(index=np.arange(0,len(self.sim)), columns=["Before", "After", "A/B"], dtype=str)
foriter, sinenumerate(self.sim.values):
b_clones=s.clone_cella_clones=s.subsample_new_clone_cellb_a_df.at[iter, "Before"] = (b_clones==clone_id).sum()
b_a_df.at[iter, "After"] = (a_clones==clone_id).sum()
b_a_df.at[iter,"A/B"] = (b_a_df.at[iter, "After"]/b_a_df.at[iter, "Before"])
self.b_a_df=b_a_dfreturndefcluster_before_after(self):
""" Loops through the simulations and for each, it clusters the cells. :ivar cluster_results: Cluster labels for each cell in each iteration. :type List of tuples, which is a list of a tuple, where the tuple is indexed by the cell and the value is the cell's cluster label """cluster_results= []
print('clustering')
forsintqdm(self.sim.values):
cluster_results.append(s.cluster(s.combined_cell_af))
print(len(cluster_results[-1]))
self.cluster_results=cluster_resultsdefstats_cluster_before_after(self, clone_id=1):
""" Confusion matrix for clustering the proper clone cells together. :param clone_id: Which clone to get metrics for :return: """b_a_df=pd.DataFrame(index=len(self.sim),
columns=["TN", "FP", "FN", "TP"], dtype=int)
forind, sinenumerate(self.sim.values):
y_true=s.combined_clonesy_true[y_true!=1] =0y_pred=self.cluster_results[ind]
# y_true, y_predtn, fp, fn, tp=confusion_matrix(y_true, y_pred).ravel()
b_a_df.loc[ind] = [tn, fp, fn, tp]
self.b_a_df=b_a_dfreturndefsave(self, f_save=None):
iff_saveisNone:
f_save=self.f_savef=open(f_save, 'wb')
pickle.dump(self.__dict__, f, 2)
f.close()
defload(self, f_save=None):
#filename = self.params['filename']iff_saveisNone:
f_save=self.f_savef=open(f_save, 'rb')
tmp_dict=pickle.load(f)
f.close()
self.__dict__.update(tmp_dict)
defmain():
returnif"__name__"=="__main__":
main()
Nonewlineatendoffileewfilemode100644ndex0000000..a9e1360++b/src/simulations/parametersweep.py
67225f62aefbb2d8d2777307132224bda6f57ada
The text was updated successfully, but these errors were encountered:
account for mt_pos being a list not an int
Mito_Trace/src/simulations/fullsimulation.py
Line 125 in 1b49ccc
67225f62aefbb2d8d2777307132224bda6f57ada
The text was updated successfully, but these errors were encountered: