Skip to content

Commit

Permalink
Add variable mod protein filter support to FI. Use bits in siVarModPr…
Browse files Browse the repository at this point in the history
…oteinFilter to track which mods are allowed for each peptide in the plain peptide index. There should be an opportunity in getModifiableSequences to also apply some (but not all) variable mod protein filtering but that will come later and will not save much in the grand scheme of things.
  • Loading branch information
jke000 committed Sep 26, 2024
1 parent fd4d7fd commit 55d75c6
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 215 deletions.
9 changes: 8 additions & 1 deletion Comet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,12 @@ void LoadParameters(char *pszParamsFile,
sprintf(szParamStringVal, "%lf %lf", doubleRangeParam.dStart, doubleRangeParam.dEnd);
pSearchMgr->SetParam("clear_mz_range", szParamStringVal, doubleRangeParam);
}
else if (!strcmp(szParamName, "percentage_base_peak"))
{
sscanf(szParamVal, "%lf", &dDoubleParam);
sprintf(szParamStringVal, "%lf", dDoubleParam);
pSearchMgr->SetParam("percentage_base_peak", szParamStringVal, dDoubleParam);
}
else if (!strcmp(szParamName, "export_additional_pepxml_scores"))
{
sscanf(szParamVal, "%d", &iIntParam);
Expand Down Expand Up @@ -1853,10 +1859,11 @@ mass_offsets = # one or more mass offsets to search (val
minimum_peaks = 10 # required minimum number of peaks in spectrum to search (default 10)\n");

fprintf(fp,
"minimum_intensity = 0 # minimum intensity value to read in\n\
"minimum_intensity = 0 # minimum intensity value to read in\n\
remove_precursor_peak = 0 # 0=no, 1=yes, 2=all charge reduced precursor peaks (for ETD), 3=phosphate neutral loss peaks\n\
remove_precursor_tolerance = 1.5 # +- Da tolerance for precursor removal\n\
clear_mz_range = 0.0 0.0 # clear out all peaks in the specified m/z range e.g. remove reporter ion region of TMT spectra\n\
percentage_base_peak = 0.0 # specify a percentage (e.g. \"0.05\" for 5%%) of the base peak intensity as a minimum intensity threshold\n\
\n\
#\n\
# static modifications\n\
Expand Down
36 changes: 24 additions & 12 deletions CometSearch/CombinatoricsUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count)
int *c = new int[k + 3];

c[0] = 0;
for (int i = 1; i <= k; ++i) {
for (int i = 1; i <= k; ++i)
{
c[i] = i - 1;
}
// Initialize sentinels
Expand All @@ -116,7 +117,8 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count)
// Prepare next iteration
// T2 and T6 loop
int x = 0;
if (j > 0) {
if (j > 0)
{
x = j;
c[j] = x;
j--;
Expand All @@ -126,30 +128,36 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count)
// return ret;
}
// T3
if (c[1] + 1 < c[2]) {
if (c[1] + 1 < c[2])
{
c[1]++;
combinations[idx] = ret;
idx++;
continue;
// return ret;
}
else {
else
{
j = 2;
}
// T4
bool stepDone = false;
while (!stepDone) {
while (!stepDone)
{
c[j - 1] = j - 2;
x = c[j] + 1;
if (x == c[j + 1]) {
if (x == c[j + 1])
{
j++;
}
else {
else
{
stepDone = true;
}
}
// T5
if (j > k) {
if (j > k)
{
more = false;
combinations[idx] = ret;
idx++;
Expand All @@ -169,11 +177,14 @@ int** CombinatoricsUtils::makeCombinations(int n, int r, int count)

int CombinatoricsUtils::nChooseK(const int n, const int k)
{
if (n == k || k == 0) return 1;
if (n == k || k == 0)
return 1;

if (k == 1 || k == n - 1) return n;
if (k == 1 || k == n - 1)
return n;

if (k > n / 2) return nChooseK(n, n - k);
if (k > n / 2)
return nChooseK(n, n - k);

if (n <= N && k <= K)
{
Expand All @@ -196,7 +207,8 @@ int CombinatoricsUtils::nChooseK(const int n, const int k)
int CombinatoricsUtils::getCombinationCount(int n, int k)
{
int total = 0;
if (k > n) k = n;
if (k > n)
k = n;
for (; k >= 1; k--)
{
total += nChooseK(n, k);
Expand Down
15 changes: 4 additions & 11 deletions CometSearch/CometData.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,14 @@ struct EnzymeInfo
iSearchEnzyme2OffSet = a.iSearchEnzyme2OffSet;
iSampleEnzymeOffSet = a.iSampleEnzymeOffSet;

int i;

for (i = 0; i < ENZYME_NAME_LEN; i++)
for (int i = 0; i < ENZYME_NAME_LEN; ++i)
{
szSearchEnzymeName[i] = a.szSearchEnzymeName[i];
szSearchEnzyme2Name[i] = a.szSearchEnzyme2Name[i];
szSampleEnzymeName[i] = a.szSampleEnzymeName[i];
}

for (i = 0; i < MAX_ENZYME_AA; i++)
for (int i = 0; i < MAX_ENZYME_AA; ++i)
{
szSearchEnzymeBreakAA[i] = a.szSearchEnzymeBreakAA[i];
szSearchEnzymeNoBreakAA[i] = a.szSearchEnzymeNoBreakAA[i];
Expand All @@ -367,16 +365,14 @@ struct EnzymeInfo
iSearchEnzyme2OffSet = a.iSearchEnzyme2OffSet;
iSampleEnzymeOffSet = a.iSampleEnzymeOffSet;

int i;

for (i = 0; i < ENZYME_NAME_LEN; i++)
for (int i = 0; i < ENZYME_NAME_LEN; ++i)
{
szSearchEnzymeName[i] = a.szSearchEnzymeName[i];
szSearchEnzyme2Name[i] = a.szSearchEnzyme2Name[i];
szSampleEnzymeName[i] = a.szSampleEnzymeName[i];
}

for (i = 0; i < MAX_ENZYME_AA; i++)
for (int i = 0; i < MAX_ENZYME_AA; ++i)
{
szSearchEnzymeBreakAA[i] = a.szSearchEnzymeBreakAA[i];
szSearchEnzymeNoBreakAA[i] = a.szSearchEnzymeNoBreakAA[i];
Expand Down Expand Up @@ -479,11 +475,8 @@ struct InputFileInfo
struct SingleSpectrumStruct

{

double dMass;

double dInt;

};

enum CometParamType
Expand Down
10 changes: 7 additions & 3 deletions CometSearch/CometDataInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class CometSearchManager;
#define NO_PEFF_VARIANT -127

#define FRAGINDEX_VMODS 5 // only parse first five variable mods for fragment ion index searches
// if this is ever larger than 16, need to extend range of siVarModProteinFilter

#define VMODS 15 // also "VMODS+1" is 4th dimension of uiBinnedIonMasses to cover unmodified ions (0), mod NL (1-15)
#define VMOD_1_INDEX 0
#define VMOD_2_INDEX 1
Expand Down Expand Up @@ -424,6 +426,7 @@ struct DBIndex
char pcVarModSites[MAX_PEPTIDE_LEN_P2]; // encodes 0-9 indicating which var mod at which position
comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList
double dPepMass; // MH+ pep mass
unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein

bool operator==(const DBIndex &rhs) const
{
Expand Down Expand Up @@ -462,6 +465,7 @@ struct PlainPeptideIndex
string sPeptide;
comet_fileoffset_t lIndexProteinFilePosition; // points to entry in g_pvProteinsList
double dPepMass; // MH+ pep mass, unmodified mass; modified mass in FragmentPeptidesStruct
unsigned short siVarModProteinFilter; // bitwise representation of mmapProtein

bool operator==(const PlainPeptideIndex &rhs) const
{
Expand Down Expand Up @@ -559,13 +563,14 @@ struct VarModParams
bool bBinaryModSearch; // set to true if any of the variable mods are of binary mod variety
bool bUseFragmentNeutralLoss; // set to true if any custom NL is set; applied only to 1+ and 2+ fragments
bool bRareVarModPresent; // set to true if any of iRequireThisMod == -1
bool bVarModProteinFilter; // set to trueif protein mods list is applied
int iRequireVarMod; // 0=no; else use bits to determine which varmods are required
int iMaxVarModPerPeptide;
int iMaxPermutations;
VarMods varModList[VMODS];
char cModCode[VMODS]; // mod characters
string sProteinLModsListFile; // file containing list of proteins to restrict application of varmods to
multimap<int, string> mmapProteinLModsList; // <varmod#, protein name> vector read from sProteinModsListFile if present
multimap<int, string> mmapProteinModsList; // <varmod#, protein name> vector read from sProteinModsListFile if present

VarModParams& operator=(VarModParams& a)
{
Expand All @@ -576,6 +581,7 @@ struct VarModParams
bBinaryModSearch = a.bBinaryModSearch;
bUseFragmentNeutralLoss = a.bUseFragmentNeutralLoss;
bRareVarModPresent = a.bRareVarModPresent;
bVarModProteinFilter = a.bVarModProteinFilter;
iRequireVarMod = a.iRequireVarMod;
iMaxVarModPerPeptide = a.iMaxVarModPerPeptide;
iMaxPermutations = a.iMaxPermutations;
Expand Down Expand Up @@ -943,8 +949,6 @@ extern StaticParams g_staticParams;

extern string g_psGITHUB_SHA; // grab the GITHUB_SHA environment variable and trim to 7 chars; null if environment variable not present

extern map<long long, IndexProteinStruct> g_pvProteinNames;

extern vector<DBIndex> g_pvDBIndex;

extern vector<vector<comet_fileoffset_t>> g_pvProteinsList;
Expand Down
Loading

0 comments on commit 55d75c6

Please sign in to comment.