Skip to content

Commit

Permalink
add DataSetFDR
Browse files Browse the repository at this point in the history
not tested, don't use it yet
  • Loading branch information
cctsou committed Mar 14, 2016
1 parent 62eeb34 commit 3d5472f
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 62 deletions.
57 changes: 57 additions & 0 deletions DIA-Umpire/src/FDREstimator/FDR_DataSetLevel.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package FDREstimator;

import MSUmpire.BaseDataStructure.DBSearchParam;
import MSUmpire.DIA.DIAPack;
import MSUmpire.PSMDataStructure.LCMSID;
import MSUmpire.PSMDataStructure.PepIonID;
import java.io.IOException;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.xmlpull.v1.XmlPullParserException;

/**
*
* @author Chih-Chiang Tsou <[email protected]>
*/
public class FDR_DataSetLevel {

public LCMSID combineID = null;

public void GeneratePepIonList(ArrayList<DIAPack> DIAFileList, DBSearchParam param, String combineIDPath) throws IOException, ParserConfigurationException, SAXException, XmlPullParserException, ClassNotFoundException, InterruptedException {

for (DIAPack diafile : DIAFileList) {
diafile.ParsePepXML(param,null);
}

//Estimate peptide level PepFDR in whole dataset
combineID = new LCMSID(combineIDPath, param.DecoyPrefix, param.FastaPath);
for (DIAPack Diafile : DIAFileList) {
LCMSID lcms = Diafile.IDsummary;
for (PepIonID pepIonID : lcms.GetPepIonList().values()) {
if (!combineID.GetPepIonList().containsKey(pepIonID.GetKey())) {
PepIonID newpep = pepIonID.ClonePepIonID();
if (pepIonID.IsDecoy(param.DecoyPrefix)) {
newpep.IsDecoy = 1;
} else {
newpep.IsDecoy = 0;
}
combineID.AddPeptideID(newpep);
}
if (combineID.GetPepIonList().get(pepIonID.GetKey()).MaxProbability < pepIonID.MaxProbability) {
combineID.GetPepIonList().get(pepIonID.GetKey()).MaxProbability = pepIonID.MaxProbability;
}
}
}
combineID.DecoyTag = param.DecoyPrefix;
combineID.FDR = param.PepFDR;
combineID.FindPepProbThresholdByFDR();
combineID.RemoveDecoyPep();
combineID.RemoveLowProbPep();
}
}
14 changes: 11 additions & 3 deletions DIA-Umpire/src/MSUmpire/DIA/DIAPack.java
Original file line number Diff line number Diff line change
Expand Up @@ -638,17 +638,25 @@ public void SetPepXMLPath(){
iProphPepXMLs.add(PepXMLPath3);
}

public void ParsePepXML(DBSearchParam searchPara) throws ParserConfigurationException, SAXException, IOException, XmlPullParserException, ClassNotFoundException, InterruptedException {
public void ParsePepXML(DBSearchParam searchPara, LCMSID refID) throws ParserConfigurationException, SAXException, IOException, XmlPullParserException, ClassNotFoundException, InterruptedException {

SetPepXMLPath();
IDsummary = new LCMSID(FilenameUtils.getFullPath(Filename) + FilenameUtils.getBaseName(Filename),searchPara.DecoyPrefix,searchPara.FastaPath);
for (String pepxml : iProphPepXMLs) {
LCMSID pepxmlid = new LCMSID(FilenameUtils.getFullPath(Filename) + FilenameUtils.getBaseName(Filename),searchPara.DecoyPrefix,searchPara.FastaPath);
PepXMLParser pepxmlparser = new PepXMLParser(pepxmlid, pepxml, 0f);
pepxmlid.FilterByPepDecoyFDR(searchPara.DecoyPrefix, searchPara.PepFDR);
if (refID == null) {
pepxmlid.FilterByPepDecoyFDR(searchPara.DecoyPrefix, searchPara.PepFDR);
}
Logger.getRootLogger().info("No. of peptide ions:" + pepxmlid.GetPepIonList().size() + "; Peptide level threshold: " + pepxmlid.PepProbThreshold);
for (PepIonID pepID : pepxmlid.GetPepIonList().values()) {
IDsummary.AddPeptideID(pepID);
if (refID != null) {
if(refID.GetPepIonList().containsKey(pepID.GetKey())){
IDsummary.AddPeptideID(pepID);
}
} else {
IDsummary.AddPeptideID(pepID);
}
}
}
IDsummary.ReMapProPep();
Expand Down
14 changes: 7 additions & 7 deletions DIA-Umpire/src/MSUmpire/MathPackage/ChiSquareGOF.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ public class ChiSquareGOF {
public static ReadWriteLock lock = new ReentrantReadWriteLock();

private ChiSquareGOF(int maxpeak) {
chimodels = new ChiSquared[maxpeak];
for (int i = 2; i <= maxpeak; i++) {
chimodels[i - 1] = new ChiSquared(i - 1);
chimodels = new ChiSquared[maxpeak-1];
for (int i = 1; i <= maxpeak; i++) {
chimodels[i - 1] = new ChiSquared(i);
}
}

public static ChiSquareGOF GetInstance(int maxpeak) {
if (models == null || maxpeak > chimodels.length) {
if (models == null || (maxpeak>1 && maxpeak >= chimodels.length)) {
lock.writeLock().lock();
try {
if (models == null) {
Expand All @@ -57,17 +57,17 @@ public static ChiSquareGOF GetInstance(int maxpeak) {
public float GetGoodNessOfFitProb(float[] expected, float[] observed) {
float gof = 0f;
int nopeaks = 0;
for (int i = 0; i < expected.length; i++) {
for (int i = 0; i < Math.min(observed.length,expected.length); i++) {
if (observed[i] > 0) {
float error = expected[i] - observed[i];
gof += (error * error) / (expected[i] * expected[i]);
nopeaks++;
}
}
if (Float.isNaN(gof)) {
if (Float.isNaN(gof) || nopeaks<2){
return 0f;
}
float prob = 1 - (float) chimodels[nopeaks].cdf(gof);
float prob = 1 - (float) chimodels[nopeaks-2].cdf(gof);
return prob;
}
}
2 changes: 1 addition & 1 deletion DIA-Umpire/src/MSUmpire/PeakDataStructure/PeakCluster.java
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ public void CalcPeakArea_V2() {
startRT = MonoIsotopePeak.StartRT();
endRT = MonoIsotopePeak.EndRT();

if (IsoPeaksCurves[1]!=null) {
if (IsoPeaksCurves.length>1 && IsoPeaksCurves[1]!=null) {
startRT = Math.min(MonoIsotopePeak.StartRT(), IsoPeaksCurves[1].StartRT());
endRT = Math.max(MonoIsotopePeak.EndRT(), IsoPeaksCurves[1].EndRT());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,15 +370,15 @@ protected void ReadPepIsoMS1PatternMap() throws FileNotFoundException, IOExcepti

InputStream is = this.getClass().getClassLoader().getResourceAsStream("resource/IsotopicPatternRange.csv");
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
IsotopePatternMap = new TreeMap[LCMSPeakBase.MaxNoPeakCluster - 1];
IsotopePatternMap = new TreeMap[Math.max(2,LCMSPeakBase.MaxNoPeakCluster - 1)];
for (int i = 0; i < IsotopePatternMap.length; i++) {
IsotopePatternMap[i] = new TreeMap<>();
}
String line = "";
while ((line = reader.readLine()) != null) {
float MW = Float.parseFloat(line.split(",")[0]);

for (int i = 0; i < LCMSPeakBase.MaxNoPeakCluster - 1; i++) {
for (int i = 0; i < IsotopePatternMap.length; i++) {
float Mean = Float.parseFloat(line.split(",")[1 + (i * 2)]);
float SD = Float.parseFloat(line.split(",")[2 + (i * 2)]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException
}
Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "....");

DiaFile.ParsePepXML(tandemPara);
DiaFile.ParsePepXML(tandemPara,null);
DiaFile.BuildStructure();
if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) {
Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete");
Expand Down
112 changes: 64 additions & 48 deletions DIA_Umpire_Quant/src/dia_umpire_quant/DIA_Umpire_Quant.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
import FDREstimator.FDR_DataSetLevel;
import MSUmpire.BaseDataStructure.UmpireInfo;
import MSUmpire.DIA.DIAPack;
import MSUmpire.DIA.RTAlignedPepIonMapping;
Expand Down Expand Up @@ -94,7 +95,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException
String ExternalLibPath = "";
String ExternalLibDecoyTag = "DECOY";
boolean DefaultProtFiltering=true;

boolean DataSetLevelPepFDR=false;
float ProbThreshold = 0.99f;
float ExtProbThreshold =0.99f;
float Freq = 0f;
Expand Down Expand Up @@ -202,6 +203,10 @@ public static void main(String[] args) throws FileNotFoundException, IOException
tandemPara.PepFDR = Float.parseFloat(value);
break;
}
case "DataSetLevelPepFDR": {
DataSetLevelPepFDR = Boolean.parseBoolean(value);
break;
}
case "InternalLibID": {
InternalLibID = value;
break;
Expand Down Expand Up @@ -332,6 +337,8 @@ public static void main(String[] args) throws FileNotFoundException, IOException
if (!new File(Combined_Prot).exists()) {
Logger.getRootLogger().info("ProtXML file: " +Combined_Prot + " cannot be found, the export protein summary table will be empty.");
}


LCMSID protID = null;

//Parse prot.xml and generate protein master list given an FDR
Expand Down Expand Up @@ -388,10 +395,65 @@ public static void main(String[] args) throws FileNotFoundException, IOException
for (File fileEntry : AssignFiles.values()) {
Logger.getRootLogger().info(fileEntry.getAbsolutePath());
}


LCMSID combinePepID = null;
if (DataSetLevelPepFDR) {
combinePepID = LCMSID.ReadLCMSIDSerialization(WorkFolder + "combinePepID.SerFS");
if (combinePepID == null) {
FDR_DataSetLevel fdr = new FDR_DataSetLevel();
fdr.GeneratePepIonList(FileList, tandemPara, WorkFolder + "combinePepID.SerFS");
combinePepID = fdr.combineID;
combinePepID.WriteLCMSIDSerialization(WorkFolder + "combinePepID.SerFS");
}
}

//process each DIA file for quantification based on untargeted identifications
for (File fileEntry : AssignFiles.values()) {
ProcessDIA(fileEntry, NoCPUs, tandemPara, FileList, IDSummaryFragments);
String mzXMLFile = fileEntry.getAbsolutePath();
if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) {
long time = System.currentTimeMillis();

DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs);
FileList.add(DiaFile);
HashMap<String, FragmentPeak> FragMap = new HashMap<>();
IDSummaryFragments.put(FilenameUtils.getBaseName(mzXMLFile), FragMap);
if (!new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ1Name() + ".mzXML").exists()
| !new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ2Name() + ".mzXML").exists()
| !new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ3Name() + ".mzXML").exists()) {
return;
}
Logger.getRootLogger().info("=================================================================================================");
Logger.getRootLogger().info("Processing " + mzXMLFile);
if (!DiaFile.LoadDIASetting()) {
Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete");
System.exit(1);
}
if (!DiaFile.LoadParams()) {
Logger.getRootLogger().info("Loading parameters failed, job is incomplete");
System.exit(1);
}
Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "....");

//If the LCMSID serialization is found
if (!DiaFile.ReadSerializedLCMSID()) {
DiaFile.ParsePepXML(tandemPara, combinePepID);
DiaFile.BuildStructure();
if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) {
Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete");
System.exit(1);
}
DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster();
//Generate mapping between index of precursor feature and pseudo MS/MS scan index
DiaFile.GenerateClusterScanNomapping();
//Doing quantification
DiaFile.AssignQuant();
DiaFile.ClearStructure();
}
DiaFile.IDsummary.ReduceMemoryUsage();
time = System.currentTimeMillis() - time;
Logger.getRootLogger().info(mzXMLFile + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time))));
}
}

//<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library">
Expand Down Expand Up @@ -622,51 +684,5 @@ private static void SaintOutput(LCMSID protID, LCMSID IDsummary, FragmentSelecti
}
}

private static void ProcessDIA(final File fileEntry, int NoCPUs, TandemParam tandemPara, ArrayList<DIAPack> FileList, HashMap<String, HashMap<String, FragmentPeak>> IDSummaryFragments) throws IOException, FileNotFoundException, DataFormatException, InterruptedException, ExecutionException, ParserConfigurationException, SAXException, Exception {
String mzXMLFile = fileEntry.getAbsolutePath();
if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) {
long time = System.currentTimeMillis();

DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs);
FileList.add(DiaFile);
HashMap<String, FragmentPeak> FragMap = new HashMap<>();
IDSummaryFragments.put(FilenameUtils.getBaseName(mzXMLFile), FragMap);
if (!new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ1Name() + ".mzXML").exists()
| !new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ2Name() + ".mzXML").exists()
| !new File(FilenameUtils.getFullPath(DiaFile.Filename) + DiaFile.GetQ3Name() + ".mzXML").exists()) {
return;
}
Logger.getRootLogger().info("=================================================================================================");
Logger.getRootLogger().info("Processing " + mzXMLFile);
if (!DiaFile.LoadDIASetting()) {
Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete");
System.exit(1);
}
if (!DiaFile.LoadParams()) {
Logger.getRootLogger().info("Loading parameters failed, job is incomplete");
System.exit(1);
}
Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "....");

//If the serialization file for ID file existed
if (!DiaFile.ReadSerializedLCMSID()) {
DiaFile.ParsePepXML(tandemPara);
DiaFile.BuildStructure();
if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) {
Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete");
System.exit(1);
}
DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster();
//Generate mapping between index of precursor feature and pseudo MS/MS scan index
DiaFile.GenerateClusterScanNomapping();
//Doing quantification
DiaFile.AssignQuant();
DiaFile.ClearStructure();
}
DiaFile.IDsummary.ReduceMemoryUsage();
time = System.currentTimeMillis() - time;
Logger.getRootLogger().info(mzXMLFile + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time))));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ DecoyPrefix=
#FDR threshold
PeptideFDR = 0.01
ProteinFDR = 0.01
DataSetLevelPepFDR = false

#Use default protein FDR filtering.
# Set to true if you wish to filter protein using DIA-Umpire's filtering method based on maximum peptide ion probability values.
Expand Down

0 comments on commit 3d5472f

Please sign in to comment.