From 55d75c6c72ed5cc567c1837a588834adbc8155a4 Mon Sep 17 00:00:00 2001 From: Jimmy Eng Date: Thu, 26 Sep 2024 13:00:15 -0700 Subject: [PATCH] Add variable mod protein filter support to FI. Use bits in siVarModProteinFilter to track which mods are allowed for each peptide in the plain peptide index. There should be an opportunity in getModifiableSequences to also apply some (but not all) variable mod protein filtering but that will come later and will not save much in the grand scheme of things. --- Comet.cpp | 9 +- CometSearch/CombinatoricsUtils.cpp | 36 ++++-- CometSearch/CometData.h | 15 +-- CometSearch/CometDataInternal.h | 10 +- CometSearch/CometFragmentIndex.cpp | 172 +++++++++++++++---------- CometSearch/CometPreprocess.cpp | 27 ---- CometSearch/CometSearch.cpp | 56 +++++--- CometSearch/CometSearchManager.cpp | 179 +++++++++++++++++--------- CometSearch/CometSearchManager.h | 1 + CometSearch/Common.h | 8 +- CometSearch/ModificationsPermuter.cpp | 4 +- RealtimeSearch/Search.cs | 26 ++-- 12 files changed, 328 insertions(+), 215 deletions(-) diff --git a/Comet.cpp b/Comet.cpp index 80d21097..1b769b9d 100644 --- a/Comet.cpp +++ b/Comet.cpp @@ -703,6 +703,12 @@ void LoadParameters(char *pszParamsFile, sprintf(szParamStringVal, "%lf %lf", doubleRangeParam.dStart, doubleRangeParam.dEnd); pSearchMgr->SetParam("clear_mz_range", szParamStringVal, doubleRangeParam); } + else if (!strcmp(szParamName, "percentage_base_peak")) + { + sscanf(szParamVal, "%lf", &dDoubleParam); + sprintf(szParamStringVal, "%lf", dDoubleParam); + pSearchMgr->SetParam("percentage_base_peak", szParamStringVal, dDoubleParam); + } else if (!strcmp(szParamName, "export_additional_pepxml_scores")) { sscanf(szParamVal, "%d", &iIntParam); @@ -1853,10 +1859,11 @@ mass_offsets = # one or more mass offsets to search (val minimum_peaks = 10 # required minimum number of peaks in spectrum to search (default 10)\n"); fprintf(fp, -"minimum_intensity = 0 # minimum intensity value to read in\n\ +"minimum_intensity = 0 # minimum intensity value to read in\n\ remove_precursor_peak = 0 # 0=no, 1=yes, 2=all charge reduced precursor peaks (for ETD), 3=phosphate neutral loss peaks\n\ remove_precursor_tolerance = 1.5 # +- Da tolerance for precursor removal\n\ clear_mz_range = 0.0 0.0 # clear out all peaks in the specified m/z range e.g. remove reporter ion region of TMT spectra\n\ +percentage_base_peak = 0.0 # specify a percentage (e.g. \"0.05\" for 5%%) of the base peak intensity as a minimum intensity threshold\n\ \n\ #\n\ # static modifications\n\ diff --git a/CometSearch/CombinatoricsUtils.cpp b/CometSearch/CombinatoricsUtils.cpp index c5a1caf0..f24f1aea 100644 --- a/CometSearch/CombinatoricsUtils.cpp +++ b/CometSearch/CombinatoricsUtils.cpp @@ -92,7 +92,8 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count) int *c = new int[k + 3]; c[0] = 0; - for (int i = 1; i <= k; ++i) { + for (int i = 1; i <= k; ++i) + { c[i] = i - 1; } // Initialize sentinels @@ -116,7 +117,8 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count) // Prepare next iteration // T2 and T6 loop int x = 0; - if (j > 0) { + if (j > 0) + { x = j; c[j] = x; j--; @@ -126,30 +128,36 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count) // return ret; } // T3 - if (c[1] + 1 < c[2]) { + if (c[1] + 1 < c[2]) + { c[1]++; combinations[idx] = ret; idx++; continue; // return ret; } - else { + else + { j = 2; } // T4 bool stepDone = false; - while (!stepDone) { + while (!stepDone) + { c[j - 1] = j - 2; x = c[j] + 1; - if (x == c[j + 1]) { + if (x == c[j + 1]) + { j++; } - else { + else + { stepDone = true; } } // T5 - if (j > k) { + if (j > k) + { more = false; combinations[idx] = ret; idx++; @@ -169,11 +177,14 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count) int CombinatoricsUtils::nChooseK(const int n, const int k) { - if (n == k || k == 0) return 1; + if (n == k || k == 0) + return 1; - if (k == 1 || k == n - 1) return n; + if (k == 1 || k == n - 1) + return n; - if (k > n / 2) return nChooseK(n, n - k); + if (k > n / 2) + return nChooseK(n, n - k); if (n <= N && k <= K) { @@ -196,7 +207,8 @@ int CombinatoricsUtils::nChooseK(const int n, const int k) int CombinatoricsUtils::getCombinationCount(int n, int k) { int total = 0; - if (k > n) k = n; + if (k > n) + k = n; for (; k >= 1; k--) { total += nChooseK(n, k); diff --git a/CometSearch/CometData.h b/CometSearch/CometData.h index 9aad6cd1..823a54c1 100644 --- a/CometSearch/CometData.h +++ b/CometSearch/CometData.h @@ -338,16 +338,14 @@ struct EnzymeInfo iSearchEnzyme2OffSet = a.iSearchEnzyme2OffSet; iSampleEnzymeOffSet = a.iSampleEnzymeOffSet; - int i; - - for (i = 0; i < ENZYME_NAME_LEN; i++) + for (int i = 0; i < ENZYME_NAME_LEN; ++i) { szSearchEnzymeName[i] = a.szSearchEnzymeName[i]; szSearchEnzyme2Name[i] = a.szSearchEnzyme2Name[i]; szSampleEnzymeName[i] = a.szSampleEnzymeName[i]; } - for (i = 0; i < MAX_ENZYME_AA; i++) + for (int i = 0; i < MAX_ENZYME_AA; ++i) { szSearchEnzymeBreakAA[i] = a.szSearchEnzymeBreakAA[i]; szSearchEnzymeNoBreakAA[i] = a.szSearchEnzymeNoBreakAA[i]; @@ -367,16 +365,14 @@ struct EnzymeInfo iSearchEnzyme2OffSet = a.iSearchEnzyme2OffSet; iSampleEnzymeOffSet = a.iSampleEnzymeOffSet; - int i; - - for (i = 0; i < ENZYME_NAME_LEN; i++) + for (int i = 0; i < ENZYME_NAME_LEN; ++i) { szSearchEnzymeName[i] = a.szSearchEnzymeName[i]; szSearchEnzyme2Name[i] = a.szSearchEnzyme2Name[i]; szSampleEnzymeName[i] = a.szSampleEnzymeName[i]; } - for (i = 0; i < MAX_ENZYME_AA; i++) + for (int i = 0; i < MAX_ENZYME_AA; ++i) { szSearchEnzymeBreakAA[i] = a.szSearchEnzymeBreakAA[i]; szSearchEnzymeNoBreakAA[i] = a.szSearchEnzymeNoBreakAA[i]; @@ -479,11 +475,8 @@ struct InputFileInfo struct SingleSpectrumStruct { - double dMass; - double dInt; - }; enum CometParamType diff --git a/CometSearch/CometDataInternal.h b/CometSearch/CometDataInternal.h index 5cb22c53..0d7e75bc 100644 --- a/CometSearch/CometDataInternal.h +++ b/CometSearch/CometDataInternal.h @@ -59,6 +59,8 @@ class CometSearchManager; #define NO_PEFF_VARIANT -127 #define FRAGINDEX_VMODS 5 // only parse first five variable mods for fragment ion index searches + // if this is ever larger than 16, need to extend range of siVarModProteinFilter + #define VMODS 15 // also "VMODS+1" is 4th dimension of uiBinnedIonMasses to cover unmodified ions (0), mod NL (1-15) #define VMOD_1_INDEX 0 #define VMOD_2_INDEX 1 @@ -424,6 +426,7 @@ struct DBIndex char pcVarModSites[MAX_PEPTIDE_LEN_P2]; // encodes 0-9 indicating which var mod at which position comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList double dPepMass; // MH+ pep mass + unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein bool operator==(const DBIndex &rhs) const { @@ -462,6 +465,7 @@ struct PlainPeptideIndex string sPeptide; comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList double dPepMass; // MH+ pep mass, unmodified mass; modified mass in FragmentPeptidesStruct + unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein bool operator==(const PlainPeptideIndex &rhs) const { @@ -559,13 +563,14 @@ struct VarModParams bool bBinaryModSearch; // set to true if any of the variable mods are of binary mod variety bool bUseFragmentNeutralLoss; // set to true if any custom NL is set; applied only to 1+ and 2+ fragments bool bRareVarModPresent; // set to true if any of iRequireThisMod == -1 + bool bVarModProteinFilter; // set to trueif protein mods list is applied int iRequireVarMod; // 0=no; else use bits to determine which varmods are required int iMaxVarModPerPeptide; int iMaxPermutations; VarMods varModList[VMODS]; char cModCode[VMODS]; // mod characters string sProteinLModsListFile; // file containing list of proteins to restrict application of varmods to - multimap mmapProteinLModsList; // vector read from sProteinModsListFile if present + multimap mmapProteinModsList; // vector read from sProteinModsListFile if present VarModParams& operator=(VarModParams& a) { @@ -576,6 +581,7 @@ struct VarModParams bBinaryModSearch = a.bBinaryModSearch; bUseFragmentNeutralLoss = a.bUseFragmentNeutralLoss; bRareVarModPresent = a.bRareVarModPresent; + bVarModProteinFilter = a.bVarModProteinFilter; iRequireVarMod = a.iRequireVarMod; iMaxVarModPerPeptide = a.iMaxVarModPerPeptide; iMaxPermutations = a.iMaxPermutations; @@ -943,8 +949,6 @@ extern StaticParams g_staticParams; extern string g_psGITHUB_SHA; // grab the GITHUB_SHA environment variable and trim to 7 chars; null if environment variable not present -extern map g_pvProteinNames; - extern vector g_pvDBIndex; extern vector> g_pvProteinsList; diff --git a/CometSearch/CometFragmentIndex.cpp b/CometSearch/CometFragmentIndex.cpp index 9793c810..51001b16 100644 --- a/CometSearch/CometFragmentIndex.cpp +++ b/CometSearch/CometFragmentIndex.cpp @@ -23,6 +23,7 @@ #include "ModificationsPermuter.h" #include +#include #include #include @@ -170,8 +171,8 @@ void CometFragmentIndex::GenerateFragmentIndex(ThreadPool *tp) // a subset of peptides to calculate their fragment ions cout << " - count fragment index vector sizes ... "; fflush(stdout); - // stupid workaround for Visual Studio ... first calculate all fragments to find size - // of each fragment index vector + // stupid workaround for Windows/Visual Studio performance ... first calculate all + // fragments to find size of each fragment on index vector for (int iWhichThread = 0; iWhichThread < iNumIndexingThreads; ++iWhichThread) { for (int iPrecursorBin = 0; iPrecursorBin < FRAGINDEX_PRECURSORBINS; ++iPrecursorBin) @@ -253,7 +254,7 @@ void CometFragmentIndex::GenerateFragmentIndex(ThreadPool *tp) } } } - printf(" - total # of entries in the fragment index: %0.2E (max %lld)\n", (double)ullCount, ullMax); + printf(" - total # of entries in the fragment index: %llu (max %llu)\n", ullCount, ullMax); } @@ -296,15 +297,21 @@ void CometFragmentIndex::AddFragmentsThreadProc(int iWhichThread, // Add any n-term variable mods for (short ctNtermMod=0; ctNtermMod= 0) { - // Add any n-term variable mods - for (short ctNtermMod=0; ctNtermMod< FRAGINDEX_VMODS; ++ctNtermMod) - { - if (g_staticParams.variableModParameters.varModList[ctNtermMod].bNtermMod) - AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, ctNtermMod, -1, bCountOnly); - } + bool bPass = true; - // Add any c-term variable mods - for (short ctCtermMod=0; ctCtermMod< FRAGINDEX_VMODS; ++ctCtermMod) + // if protein variable mod filter is applied, check mods[] against the peptides siVarModProteinFilter + if (g_staticParams.variableModParameters.bVarModProteinFilter) { - if (g_staticParams.variableModParameters.varModList[ctCtermMod].bCtermMod) - AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, -1, ctCtermMod, bCountOnly); + char* mods = MOD_NUMBERS.at(modNumIdx).modifications; + + for (int i = 0; i < MOD_NUMBERS.at(modNumIdx).modStringLen; ++i) + { + // if mods[i] is not set to 1 in siVarModProteinFilter, do not apply this mod + if (!cometbitcheck(g_vRawPeptides.at(iWhichPeptide).siVarModProteinFilter, mods[i])) + { + bPass = false; + break; + } + } } - // Now consider combinations of n-term and c-term variable mods - for (short ctNtermMod=0; ctNtermMod< FRAGINDEX_VMODS; ++ctNtermMod) + if (bPass) { - for (short ctCtermMod=0; ctCtermMod< FRAGINDEX_VMODS; ++ctCtermMod) + AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, -1, -1, bCountOnly); + + if (g_staticParams.variableModParameters.bVarTermModSearch) { - if (g_staticParams.variableModParameters.varModList[ctNtermMod].bNtermMod - && g_staticParams.variableModParameters.varModList[ctCtermMod].bCtermMod) + // Add any n-term variable mods + for (short ctNtermMod = 0; ctNtermMod < FRAGINDEX_VMODS; ++ctNtermMod) { - AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, ctNtermMod, ctCtermMod, bCountOnly); + if (g_staticParams.variableModParameters.varModList[ctNtermMod].bNtermMod + && (!g_staticParams.variableModParameters.bVarModProteinFilter || cometbitcheck(g_vRawPeptides.at(iWhichPeptide).siVarModProteinFilter, ctNtermMod))) + { + AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, ctNtermMod, -1, bCountOnly); + } + } + + // Add any c-term variable mods + for (short ctCtermMod = 0; ctCtermMod < FRAGINDEX_VMODS; ++ctCtermMod) + { + if (g_staticParams.variableModParameters.varModList[ctCtermMod].bCtermMod + && (!g_staticParams.variableModParameters.bVarModProteinFilter || cometbitcheck(g_vRawPeptides.at(iWhichPeptide).siVarModProteinFilter, ctCtermMod))) + { + AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, -1, ctCtermMod, bCountOnly); + } + } + + // Now consider combinations of n-term and c-term variable mods + for (short ctNtermMod = 0; ctNtermMod < FRAGINDEX_VMODS; ++ctNtermMod) + { + for (short ctCtermMod = 0; ctCtermMod < FRAGINDEX_VMODS; ++ctCtermMod) + { + if (g_staticParams.variableModParameters.varModList[ctNtermMod].bNtermMod + && g_staticParams.variableModParameters.varModList[ctCtermMod].bCtermMod + && (!g_staticParams.variableModParameters.bVarModProteinFilter || + (cometbitcheck(g_vRawPeptides.at(iWhichPeptide).siVarModProteinFilter, ctNtermMod) + && cometbitcheck(g_vRawPeptides.at(iWhichPeptide).siVarModProteinFilter, ctCtermMod)))) + { + AddFragments(g_vRawPeptides, iWhichThread, iWhichPeptide, modNumIdx, ctNtermMod, ctCtermMod, bCountOnly); + } + } } } } @@ -740,10 +783,8 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) g_staticParams.variableModParameters.varModList[x].szVarModChar); fprintf(fp, "\n"); - size_t tTmp = (int)g_pvProteinNames.size(); - comet_fileoffset_t *lProteinIndex = new comet_fileoffset_t[tTmp]; - for (size_t i = 0; i < tTmp; ++i) - lProteinIndex[i] = -1; + // Variable mod protein filter: + fprintf(fp, "ProteinModList: %d\n", g_staticParams.variableModParameters.bVarModProteinFilter?1:0); comet_fileoffset_t clPeptidesFilePos = comet_ftell(fp); size_t tNumPeptides = g_pvDBIndex.size(); @@ -757,17 +798,19 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) fwrite(&iLen, sizeof(int), 1, fp); fwrite((*it).szPeptide, sizeof(char), iLen, fp); fwrite(&((*it).dPepMass), sizeof(double), 1, fp); + fwrite(&((*it).siVarModProteinFilter), sizeof(unsigned short), 1, fp); fwrite(&((*it).lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp); sTmp.sPeptide = (*it).szPeptide; sTmp.lIndexProteinFilePosition = clSizeCometFileOffset; sTmp.dPepMass = (*it).dPepMass; + sTmp.siVarModProteinFilter = (*it).siVarModProteinFilter; g_vRawPeptides.push_back(sTmp); } // Now write out: vector> g_pvProteinsList comet_fileoffset_t clProteinsFilePos = comet_ftell(fp); - tTmp = g_pvProteinsList.size(); + size_t tTmp = g_pvProteinsList.size(); fwrite(&tTmp, clSizeCometFileOffset, 1, fp); for (auto it = g_pvProteinsList.begin(); it != g_pvProteinsList.end(); ++it) { @@ -812,8 +855,6 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) fwrite(&clPermutationsFilePos, clSizeCometFileOffset, 1, fp); g_pvDBIndex.clear(); - g_pvProteinNames.clear(); - delete[] lProteinIndex; fclose(fp); @@ -923,7 +964,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) tok=strtok(szBuf+11, delims); while (tok != NULL) { - sscanf(tok, "%lf", &(g_staticParams.staticModifications.pdStaticMods[x])); + int iRet = sscanf(tok, "%lf", &(g_staticParams.staticModifications.pdStaticMods[x])); g_staticParams.massUtility.pdAAMassFragment[x] += g_staticParams.staticModifications.pdStaticMods[x]; tok = strtok(NULL, delims); x++; @@ -953,54 +994,51 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) } else if (!strncmp(szBuf, "VariableMod:", 12)) // read in variable mods { - char szMod1[512]; - char szMod2[512]; - char szMod3[512]; - char szMod4[512]; - char szMod5[512]; + string strMods = szBuf + 12; + + istringstream iss(strMods); - sscanf(szBuf + 12, "%s %s %s %s %s", - szMod1, szMod2, szMod3, szMod4, szMod5); + int iNumMods = 0; - // parse FRAGINDEX_VMODS number of mods; FIX: make this into a loop over FRAGINDEX_VMODS - sscanf(szMod1, "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[0].dVarModMass), - &(g_staticParams.variableModParameters.varModList[0].dNeutralLoss), - g_staticParams.variableModParameters.varModList[0].szVarModChar); + do + { + string subStr; - sscanf(szMod2, "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[1].dVarModMass), - &(g_staticParams.variableModParameters.varModList[1].dNeutralLoss), - g_staticParams.variableModParameters.varModList[1].szVarModChar); + iss >> subStr; // parse each word which is a colon delimited triplet pair for modmass:neutralloss:modchars - sscanf(szMod3, "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[2].dVarModMass), - &(g_staticParams.variableModParameters.varModList[2].dNeutralLoss), - g_staticParams.variableModParameters.varModList[2].szVarModChar); + int iRet = sscanf(subStr.c_str(), "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[iNumMods].dVarModMass), + &(g_staticParams.variableModParameters.varModList[iNumMods].dNeutralLoss), + g_staticParams.variableModParameters.varModList[iNumMods].szVarModChar); - sscanf(szMod4, "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[3].dVarModMass), - &(g_staticParams.variableModParameters.varModList[3].dNeutralLoss), - g_staticParams.variableModParameters.varModList[3].szVarModChar); + iNumMods++; + + if (iNumMods == FRAGINDEX_VMODS) + break; - sscanf(szMod5, "%lf:%lf:%s", &(g_staticParams.variableModParameters.varModList[4].dVarModMass), - &(g_staticParams.variableModParameters.varModList[4].dNeutralLoss), - g_staticParams.variableModParameters.varModList[4].szVarModChar); + } while (iss); for (int x = 0; x < FRAGINDEX_VMODS; ++x) { if (g_staticParams.variableModParameters.varModList[x].dVarModMass != 0.0) - { g_staticParams.variableModParameters.bVarModSearch = true; - break; - } + + if (g_staticParams.variableModParameters.varModList[x].dNeutralLoss != 0.0) + g_staticParams.variableModParameters.bUseFragmentNeutralLoss = true; } bFoundVariable = true; - break; } - } + else if (!strncmp(szBuf, "ProteinModList:", 15)) + { + int iTmp; - for (int i = 0; i < 9 ; ++i) - { - if (g_staticParams.variableModParameters.varModList[i].dNeutralLoss != 0.0) - g_staticParams.variableModParameters.bUseFragmentNeutralLoss = true; + int iRet = sscanf(szBuf + 16, "%d", &iTmp); + + if (iTmp) + g_staticParams.variableModParameters.bVarModProteinFilter = true; + + break; + } } if (!bFoundStatic || !bFoundVariable) @@ -1030,6 +1068,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) struct PlainPeptideIndex sTmp; int iLen; char szPeptide[MAX_PEPTIDE_LEN]; + for (size_t it = 0; it < tNumPeptides; ++it) { tTmp = fread(&iLen, sizeof(int), 1, fp); @@ -1037,6 +1076,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) szPeptide[iLen] = '\0'; sTmp.sPeptide = szPeptide; tTmp = fread(&(sTmp.dPepMass), sizeof(double), 1, fp); + tTmp = fread(&(sTmp.siVarModProteinFilter), sizeof(unsigned short), 1, fp); tTmp = fread(&(sTmp.lIndexProteinFilePosition), clSizeCometFileOffset, 1, fp); g_vRawPeptides.push_back(sTmp); @@ -1082,7 +1122,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) tTmp = fread(MOD_SEQ_MOD_NUM_START, sizeof(int), ulSizeModSeqs, fp); tTmp = fread(MOD_SEQ_MOD_NUM_CNT, sizeof(int), ulSizeModSeqs, fp); - tTmp = fread(PEPTIDE_MOD_SEQ_IDXS, sizeof(int), ulSizevRawPeptides, fp); //FIX + tTmp = fread(PEPTIDE_MOD_SEQ_IDXS, sizeof(int), ulSizevRawPeptides, fp); //FIX, why?? int iTmp; char szTmp[MAX_PEPTIDE_LEN]; diff --git a/CometSearch/CometPreprocess.cpp b/CometSearch/CometPreprocess.cpp index 95775c3e..4fc4b314 100644 --- a/CometSearch/CometPreprocess.cpp +++ b/CometSearch/CometPreprocess.cpp @@ -125,33 +125,6 @@ bool CometPreprocess::ReadPrecursors(MSReader &mstReader) g_staticParams.bSkipToStartScan = false; iTmpCount = iScanNumber; -/* - // Thermo's monoisotopic m/z determine can fail sometimes. Assume that when - // the mono m/z value is less than selection window, it is wrong and use the - // selection m/z as the precursor m/z. This should - // be invoked when searching Thermo raw files and mzML converted from those. - // Only applied when single precursor present. - for (int i = 0 ; i < mstSpectrum.sizeMZ(); ++i) // walk through all precursor m/z's; usually just one - { - double dMZ = mstSpectrum.getMonoMZ(i); - - if (g_staticParams.options.bCorrectMass && mstSpectrum.sizeMZ() == 1) - { - double dSelectionLower = mstSpectrum.getSelWindowLower(); - double dSelectedMZ = mstSpectrum.getMZ(i); - - if (dMZ > 0.1 && dSelectionLower > 0.1 && dMZ+0.1 < dSelectionLower) - dMZ = dSelectedMZ; - } - - if (dMZ == 0) - dMZ = mstSpectrum.getMZ(i); - - if (!(iScanNumber % 5000)) - printf("OK %s dMZ %f %d/%d\n", g_staticParams.inputFile.szFileName, dMZ, iScanNumber, iFileLastScan); - } -*/ - // To run a search, all that's needed is MH+ and Z. So need to generate // all combinations of these for each spectrum, whether there's a known // Z for each precursor or if Comet has to guess the 1+ or 2+/3+ charges. diff --git a/CometSearch/CometSearch.cpp b/CometSearch/CometSearch.cpp index 02503aaf..1b1637cd 100644 --- a/CometSearch/CometSearch.cpp +++ b/CometSearch/CometSearch.cpp @@ -23,7 +23,7 @@ #include "ModificationsPermuter.h" #include -#include +#include #include #include @@ -754,16 +754,6 @@ bool CometSearch::RunSearch(int iPercentStart, return false; } - if (g_staticParams.options.bCreateIndex) - { - struct IndexProteinStruct sEntry; - - // store protein name - strcpy(sEntry.szProt, dbe.strName.c_str()); - sEntry.lProteinFilePosition = dbe.lProteinFilePosition; - g_pvProteinNames.insert({ sEntry.lProteinFilePosition, sEntry }); - } - // Load sequence while (((iTmpCh=getc(fp)) != '>') && (iTmpCh != EOF)) { @@ -1614,10 +1604,18 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, if (g_staticParams.options.bClipNtermAA) // skip the N-term residue of every peptide iStartPos = 1; + unsigned short siVarModProteinFilter = 0; // bitwise representation of mmapProtein, all bits set to "0" initially + for (int i = 0; i < VMODS; ++i) { piVarModCounts[i] = 0; - pbVarModProteinFilter[i] = true; + if (g_staticParams.variableModParameters.varModList[i].dVarModMass != 0.0) + { + pbVarModProteinFilter[i] = true; + cometbitset(siVarModProteinFilter, i); // set the "i"th bit to 1 + } + else + pbVarModProteinFilter[i] = false; } // If variable modifications protein filter is applied, check if current sequence @@ -1625,30 +1623,47 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, // current protein is not on the list, do not apply that particular variable mod. // Any variable mod on the list will have pbVarModProteinFilter[?] = false unless // the current protein matches a protein on the list. - if (g_staticParams.variableModParameters.mmapProteinLModsList.size() > 0) + if (g_staticParams.variableModParameters.bVarModProteinFilter) { char szProteinAccession[256]; sscanf(dbe.strName.c_str(), "%255s", szProteinAccession); szProteinAccession[255] = '\0'; - auto it = g_staticParams.variableModParameters.mmapProteinLModsList.begin(); - while (it != g_staticParams.variableModParameters.mmapProteinLModsList.end()) + auto it = g_staticParams.variableModParameters.mmapProteinModsList.begin(); + while (it != g_staticParams.variableModParameters.mmapProteinModsList.end()) { int iWhichMod = it->first; pbVarModProteinFilter[iWhichMod - 1] = false; // do not apply this mod to this protein unless it's on the mmapProteinModsList - while (it != g_staticParams.variableModParameters.mmapProteinLModsList.end() && it->first == iWhichMod) + cometbitclear(siVarModProteinFilter, iWhichMod - 1); + + while (it != g_staticParams.variableModParameters.mmapProteinModsList.end() && it->first == iWhichMod) { if (strstr(szProteinAccession, it->second.c_str())) { pbVarModProteinFilter[iWhichMod - 1] = true; + cometbitset(siVarModProteinFilter, iWhichMod - 1); // set "iWhichMod - 1" bit to 1 } it++; } } } +/* + printf("\nOK prot %s, ", dbe.strName.c_str()); + for (int i = 0; i < FRAGINDEX_VMODS; ++i) + { + printf("%d", pbVarModProteinFilter[i]); + } + printf(" "); + for (int i = 0; i < FRAGINDEX_VMODS; ++i) + { + printf("%d", cometbitcheck(siVarModProteinFilter, i)==0?0:1); + } + printf("\n"); +*/ + // Quick clip n-term & PEFF variant check. Start summing amino acid mass at // the start variant position and work backwards. If the mass is larger than // the max mass or the peptide length is longer than the max length before we @@ -1733,6 +1748,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, strncpy(sEntry.szPeptide, szProteinSeq + iStartPos, iLenPeptide); sEntry.szPeptide[iLenPeptide]='\0'; + sEntry.siVarModProteinFilter = siVarModProteinFilter; // little sanity check here to not include peptides with '*' in them // although mass check above should've caught these before @@ -1741,7 +1757,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, sEntry.lIndexProteinFilePosition = _proteinInfo.lProteinFilePosition; memset(sEntry.pcVarModSites, 0, sizeof(char) * (iLenPeptide + 2)); - g_pvDBIndex.push_back(sEntry); + g_pvDBIndex.push_back(sEntry); // can save a few transient bytes by going with g_vRawPeptides here } Threading::UnlockMutex(g_pvDBIndexMutex); @@ -2163,7 +2179,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, { // if variable mod protein filter applied, set residue mod count to 0 for the // particular variable mod if current protein not on the protein filter list - if (g_staticParams.variableModParameters.mmapProteinLModsList.size() > 0) + if (g_staticParams.variableModParameters.bVarModProteinFilter) { for (int i = 0; i < VMODS; ++i) { @@ -3227,7 +3243,7 @@ void CometSearch::XcorrScore(char *szProteinSeq, dXcorr *= 0.005; // Scale intensities to 50 and divide score by 1E4. - dXcorr= std::round(dXcorr* 10000.0) / 10000.0; // round to 4 decimal points + dXcorr= std::round(dXcorr* 1000.0) / 1000.0; // round to 3 decimal points Threading::LockMutex(pQuery->accessMutex); @@ -3383,7 +3399,7 @@ void CometSearch::XcorrScoreI(char *szProteinSeq, dXcorr *= 0.005; // Scale intensities to 50 and divide score by 1E4. - dXcorr= std::round(dXcorr* 10000.0) / 10000.0; // round to 4 decimal points + dXcorr= std::round(dXcorr* 1000.0) / 1000.0; // round to 3 decimal points Threading::LockMutex(pQuery->accessMutex); diff --git a/CometSearch/CometSearchManager.cpp b/CometSearch/CometSearchManager.cpp index 7ce93d1a..702a9d59 100644 --- a/CometSearch/CometSearchManager.cpp +++ b/CometSearch/CometSearchManager.cpp @@ -40,7 +40,6 @@ std::vector g_pvInputFiles; StaticParams g_staticParams; vector g_pvDBIndex; MassRange g_massRange; -map g_pvProteinNames; // for db index Mutex g_pvQueryMutex; Mutex g_pvDBIndexMutex; Mutex g_preprocessMemoryPoolMutex; @@ -1376,15 +1375,15 @@ bool CometSearchManager::InitializeStaticParams() g_staticParams.variableModParameters.bVarModSearch = false; g_staticParams.variableModParameters.bVarTermModSearch = false; g_staticParams.variableModParameters.bBinaryModSearch = false; + g_staticParams.variableModParameters.bUseFragmentNeutralLoss = false; g_staticParams.variableModParameters.bVarProteinNTermMod = false; g_staticParams.variableModParameters.bVarProteinCTermMod = false; - + g_staticParams.variableModParameters.bVarModProteinFilter = false; + g_staticParams.variableModParameters.bRareVarModPresent = false; if (g_staticParams.peffInfo.iPeffSearch) g_staticParams.variableModParameters.bVarModSearch = true; - g_staticParams.variableModParameters.bRareVarModPresent = false; - for (int i=0; i> vpTmp; - - printf(" Protein variable modifications filter:\n"); - - while (fgets(szBuf, 512, fp)) + // If specified, read in the protein variable mod filter file content. + // Do this here only for classic search or if creating the plain peptide index. + if (g_staticParams.variableModParameters.sProteinLModsListFile.length() > 0) { - if (strlen(szBuf) > 3) - { - char szProtein[512]; - int iWhichMod; + bool bVarModUsed = false; - if (sscanf(szBuf, "%d %s", &iWhichMod, szProtein) == 2) + // Do a quick check to confirm there's a variable mod specified, + // otherwise there's no point in parsing the file. + for (int iMod = 0; iMod < VMODS; ++iMod) + { + if (g_staticParams.variableModParameters.varModList[iMod].dVarModMass != 0.0) { - if (iWhichMod > 0 && iWhichMod <= VMODS) - { - g_staticParams.variableModParameters.mmapProteinLModsList.insert({ iWhichMod, szProtein }); - } + bVarModUsed = true; + break; } } - } - fclose(fp); - auto it = g_staticParams.variableModParameters.mmapProteinLModsList.begin(); - while (it != g_staticParams.variableModParameters.mmapProteinLModsList.end()) - { - int iWhichMod = it->first; - bool bFirst = true; - - printf(" - variable_mod%02d: ", iWhichMod); - while (it != g_staticParams.variableModParameters.mmapProteinLModsList.end() && it->first == iWhichMod) + if (bVarModUsed) { - if (!bFirst) - printf(", "); - printf("%s", it->second.c_str()); - it++; - bFirst = false; + bSucceeded = ReadProteinVarModFilterFile(); + if (!bSucceeded) + return bSucceeded; } - printf("\n"); } - printf("\n"); } g_staticParams.precalcMasses.iMinus17 = BIN(g_staticParams.massUtility.dH2O); @@ -3174,9 +3154,7 @@ bool CometSearchManager::DoSingleSpectrumSearch(int iPrecursorCharge, // spectra, we MUST "goto cleanup_results" before exiting the loop, // or we will create a memory leak! - int iArraySize = (int)((g_staticParams.options.dPeptideMassHigh + g_staticParams.tolerances.dInputTolerancePlus + 2.0) * g_staticParams.dInverseBinWidth); - - double *pdTmpSpectrum = new double[iArraySize]; // use this to determine most intense b/y-ions masses to report back + double *pdTmpSpectrum = new double[g_staticParams.iArraySizeGlobal]; // use this to determine most intense b/y-ions masses to report back bool bSucceeded = CometPreprocess::PreprocessSingleSpectrum(iPrecursorCharge, dMZ, pdMass, pdInten, iNumPeaks, pdTmpSpectrum); int iSize; ThreadPool* tp = _tp; // filled in InitializeSingleSpectrumSearch @@ -3387,7 +3365,7 @@ bool CometSearchManager::DoSingleSpectrumSearch(int iPrecursorCharge, double mz = (mass + (ctCharge - 1)*PROTON_MASS) / ctCharge; iTmp = BIN(mz); - if (iTmp 0.0) + if (iTmp < g_staticParams.iArraySizeGlobal && pdTmpSpectrum[iTmp] > 0.0) { Fragment frag; frag.intensity = pdTmpSpectrum[iTmp]; @@ -3435,7 +3413,7 @@ bool CometSearchManager::DoSingleSpectrumSearch(int iPrecursorCharge, double dNLfragMz = mz - (dNLmass / ctCharge); iTmp = BIN(dNLfragMz); - if (iTmp < iArraySize && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) + if (iTmp < g_staticParams.iArraySizeGlobal && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) { Fragment frag; frag.intensity = pdTmpSpectrum[iTmp]; @@ -3511,9 +3489,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, // spectra, we MUST "goto cleanup_results" before exiting the loop, // or we will create a memory leak! - int iArraySize = (int)((g_staticParams.options.dPeptideMassHigh + g_staticParams.tolerances.dInputTolerancePlus + 2.0) * g_staticParams.dInverseBinWidth); - - double* pdTmpSpectrum = new double[iArraySize]; // use this to determine most intense b/y-ions masses to report back + double* pdTmpSpectrum = new double[g_staticParams.iArraySizeGlobal]; // use this to determine most intense b/y-ions masses to report back bool bSucceeded = CometPreprocess::PreprocessSingleSpectrum(iPrecursorCharge, dMZ, pdMass, pdInten, iNumPeaks, pdTmpSpectrum); @@ -3552,20 +3528,18 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, if (iSize > g_staticParams.options.iNumStored) iSize = g_staticParams.options.iNumStored; - // simply take top xcorr peptide as E-value calculation too expensive if (iSize > 1) { std::sort(g_pvQuery.at(0)->_pResults, g_pvQuery.at(0)->_pResults + iSize, CometPostAnalysis::SortFnXcorr); } + takeSearchResultsN = topN; // return up to the top N results, or iSize + if (takeSearchResultsN > iSize) + takeSearchResultsN = iSize; + if (bSucceeded && g_pvQuery.at(0)->iMatchPeptideCount > 0) { - int iSize = g_pvQuery.at(0)->iMatchPeptideCount; - - if (iSize > g_staticParams.options.iNumStored) - iSize = g_staticParams.options.iNumStored; - - CometPostAnalysis::CalculateSP(g_pvQuery.at(0)->_pResults, 0, 1); // only do for top entry + CometPostAnalysis::CalculateSP(g_pvQuery.at(0)->_pResults, 0, takeSearchResultsN); CometPostAnalysis::CalculateEValue(0, 0); CometPostAnalysis::CalculateDeltaCn(0); } @@ -3580,10 +3554,6 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, Query* pQuery; pQuery = g_pvQuery.at(0); // return info for top hit only - takeSearchResultsN = topN; // return up to the top N results, or iSize - if (takeSearchResultsN > iSize) - takeSearchResultsN = iSize; - for (int idx = 0; idx < takeSearchResultsN; ++idx) { Scores score; @@ -3595,7 +3565,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, std::string eachStrReturnProtein; vector eachMatchedFragments; - if (iSize > 0 && pQuery->_pResults[idx].fXcorr > 0.0 && pQuery->_pResults[idx].iLenPeptide > 0) + if (iSize > 0 && pQuery->_pResults[idx].fXcorr > XCORR_CUTOFF && pQuery->_pResults[idx].iLenPeptide > 0) { Results* pOutput = pQuery->_pResults; @@ -3750,7 +3720,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, double mz = (mass + (ctCharge - 1) * PROTON_MASS) / ctCharge; iTmp = BIN(mz); - if (iTmp < iArraySize && pdTmpSpectrum[iTmp] > 0.0) + if (iTmp < g_staticParams.iArraySizeGlobal && pdTmpSpectrum[iTmp] > 0.0) { Fragment frag; frag.intensity = pdTmpSpectrum[iTmp]; @@ -3798,7 +3768,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, double dNLfragMz = mz - (dNLmass / ctCharge); iTmp = BIN(dNLfragMz); - if (iTmp < iArraySize && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) + if (iTmp < g_staticParams.iArraySizeGlobal && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) { Fragment frag; frag.intensity = pdTmpSpectrum[iTmp]; @@ -3851,3 +3821,86 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, return bSucceeded; } + + +// Restrict variable mods to a list of proteins that are read here +// File format is an "int string" on each line where "int" is the +// variable modfication number and "string" is a single protein accession word +bool CometSearchManager::ReadProteinVarModFilterFile() +{ + FILE* fp; + char szBuf[512]; + + if ((fp = fopen(g_staticParams.variableModParameters.sProteinLModsListFile.c_str(), "r")) != NULL) + { + printf(" Protein variable modifications filter:\n"); + + while (fgets(szBuf, 512, fp)) + { + if (strlen(szBuf) > 3) + { + char szProtein[512]; + int iWhichMod; + + if (sscanf(szBuf, "%d %s", &iWhichMod, szProtein) == 2) + { + if (iWhichMod > 0 && iWhichMod <= VMODS) + { + // check if specified iWhichMod actually corresponds to a non-zero variable mod + if (!isEqual(g_staticParams.variableModParameters.varModList[iWhichMod -1].dVarModMass, 0.0)) + g_staticParams.variableModParameters.mmapProteinModsList.insert({ iWhichMod, szProtein }); + } + } + } + } + fclose(fp); + + if (g_staticParams.variableModParameters.mmapProteinModsList.size() > 0) + { + g_staticParams.variableModParameters.bVarModProteinFilter = true; + + // print out the parsed proteins + auto it = g_staticParams.variableModParameters.mmapProteinModsList.begin(); + while (it != g_staticParams.variableModParameters.mmapProteinModsList.end()) + { + int iWhichMod = it->first; + int iCount = 0; + bool bFirst = true; + + printf(" - variable_mod%02d: ", iWhichMod); + while (it != g_staticParams.variableModParameters.mmapProteinModsList.end() && it->first == iWhichMod) + { + if (iCount < 3) + { + if (!bFirst) + printf(", "); + printf("%s", it->second.c_str()); + } + else if (iCount == 3) + { + printf(", ..."); + } + it++; + iCount++; + bFirst = false; + } + printf("\n"); + } + printf("\n"); + } + else + g_staticParams.variableModParameters.bVarModProteinFilter = false; + + return true; + } + else + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error - cannot read protein variable mod filter file \"%s\".\n", + g_staticParams.variableModParameters.sProteinLModsListFile.c_str()); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } +} diff --git a/CometSearch/CometSearchManager.h b/CometSearch/CometSearchManager.h index e9fc518d..e3889759 100644 --- a/CometSearch/CometSearchManager.h +++ b/CometSearch/CometSearchManager.h @@ -106,6 +106,7 @@ class CometSearchManager : public ICometSearchManager private: bool InitializeStaticParams(); + bool ReadProteinVarModFilterFile(); bool singleSearchInitializationComplete; int singleSearchThreadCount; std::map _mapStaticParams; diff --git a/CometSearch/Common.h b/CometSearch/Common.h index e193f0b3..2792d0f0 100644 --- a/CometSearch/Common.h +++ b/CometSearch/Common.h @@ -69,7 +69,7 @@ using namespace std; #define GITHUBSHA "" #endif -#define comet_version "2024.01 rev. 2" +#define comet_version "2024.02 rev. 0" #define copyright "(c) University of Washington" extern string g_sCometVersion; // version string including git hash @@ -79,6 +79,12 @@ extern string g_sCometVersion; // version string including git hash #define isEqual(x, y) (std::abs(x-y) <= ( (std::abs(x) > std::abs(y) ? std::abs(y) : std::abs(x)) * FLT_EPSILON)) +#define cometbitset(byte, nbit) ((byte) |= (1<<(nbit))) // https://www.codementor.io/@hbendali/c-c-macro-bit-operations-ztrat0et6 +#define cometbitclear(byte, nbit) ((byte) &= ~(1<<(nbit))) +#define cometbitflip(byte, nbit) ((byte) ^= (1<<(nbit))) +#define cometbitcheck(byte, nbit) ((byte) & (1<<(nbit))) + + using namespace MSToolkit; #ifdef CRUX #define logout(szString) cerr << szString diff --git a/CometSearch/ModificationsPermuter.cpp b/CometSearch/ModificationsPermuter.cpp index b7b4cd88..76ad9b30 100644 --- a/CometSearch/ModificationsPermuter.cpp +++ b/CometSearch/ModificationsPermuter.cpp @@ -276,8 +276,10 @@ vector ModificationsPermuter::getModifiableSequences(vector ModificationsPermuter::getModifiableSequences(vector 01000010 (for modChar = 'M') diff --git a/RealtimeSearch/Search.cs b/RealtimeSearch/Search.cs index 140f6ba0..f6c8f0cd 100644 --- a/RealtimeSearch/Search.cs +++ b/RealtimeSearch/Search.cs @@ -32,7 +32,7 @@ static void Main(string[] args) CometSearchManagerWrapper SearchMgr = new CometSearchManagerWrapper(); SearchSettings searchParams = new SearchSettings(); - + string rawFileName = args[0]; string sDB = args[1]; double dPeptideMassLow = 0; @@ -155,7 +155,7 @@ static void Main(string[] args) int iProteinLengthCutoff = 30; - if (vPeptide.Count > 0 && (iScanNumber % 10) == 0) + if (vPeptide.Count > 0 && (iScanNumber % 1) == 0) { for (int x = 0; x < vPeptide.Count; ++x) { @@ -250,18 +250,22 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, sTmp = iTmp.ToString(); SearchMgr.SetParam("decoy_search", sTmp, iTmp); */ - dTmp = 20.0; // peptide mass tolerance plus + dTmp = 3.0; // peptide mass tolerance plus sTmp = dTmp.ToString(); SearchMgr.SetParam("peptide_mass_tolerance_upper", sTmp, dTmp); - dTmp = -20.0; // peptide mass tolerance minus ; if this is not set, will use -1*peptide_mass_tolerance_plus + dTmp = -3.0; // peptide mass tolerance minus ; if this is not set, will use -1*peptide_mass_tolerance_plus sTmp = dTmp.ToString(); SearchMgr.SetParam("peptide_mass_tolerance_lower", sTmp, dTmp); - iTmp = 2; // 0=Da, 2=ppm + iTmp = 0; // 0=Da, 2=ppm sTmp = iTmp.ToString(); SearchMgr.SetParam("peptide_mass_units", sTmp, iTmp); + iTmp = 0; + sTmp = iTmp.ToString(); + SearchMgr.SetParam("isotope_error", sTmp, iTmp); + iTmp = 3; sTmp = iTmp.ToString(); SearchMgr.SetParam("fragindex_min_ions_score", sTmp, iTmp); @@ -286,23 +290,23 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, sTmp = iTmp.ToString(); SearchMgr.SetParam("max_index_runtime", sTmp, iTmp); - iTmp = 1; // 1 = m/z tolerance + iTmp = 0; // 1 = m/z tolerance sTmp = iTmp.ToString(); SearchMgr.SetParam("precursor_tolerance_type", sTmp, iTmp); - iTmp = 2; // 0=off, 1=0/1 (C13 error), 2=0/1/2, 3=0/1/2/3, 4=-8/-4/0/4/8 (for +4/+8 labeling) + iTmp = 0; // 0=off, 1=0/1 (C13 error), 2=0/1/2, 3=0/1/2/3, 4=-8/-4/0/4/8 (for +4/+8 labeling) sTmp = iTmp.ToString(); SearchMgr.SetParam("isotope_error", sTmp, iTmp); - dTmp = 0.02; // fragment bin width + dTmp = 1.0005; // fragment bin width sTmp = dTmp.ToString(); SearchMgr.SetParam("fragment_bin_tol", sTmp, dTmp); - dTmp = 0.0; // fragment bin offset + dTmp = 0.4; // fragment bin offset sTmp = dTmp.ToString(); SearchMgr.SetParam("fragment_bin_offset", sTmp, dTmp); - iTmp = 0; // 0=use flanking peaks, 1=M peak only + iTmp = 1; // 0=use flanking peaks, 1=M peak only sTmp = iTmp.ToString(); SearchMgr.SetParam("theoretical_fragment_ions", sTmp, iTmp); @@ -322,6 +326,7 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, sTmp = dTmp.ToString(); SearchMgr.SetParam("percentage_base_peak", sTmp, dTmp); +/* unused for the search as these are applied during the plain peptide .idx index creation VarModsWrapper varMods = new VarModsWrapper(); sTmp = "15.9949 M 0 2 -1 0 0"; varMods.set_VarModMass(15.9949); @@ -336,6 +341,7 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, iTmp = 4; sTmp = iTmp.ToString(); SearchMgr.SetParam("max_variable_mods_in_peptide", sTmp, iTmp); +*/ // Now actually open the .idx database to read mass range from it int iLineCount = 0;