Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

restore peptide index support #71

Merged
merged 18 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions Comet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ void Usage(char *pszCmd)
logout(" -F<num> to specify the first/start scan to search, overriding entry in parameters file\n");
logout(" -L<num> to specify the last/end scan to search, overriding entry in parameters file\n");
logout(" (-L option is required if -F option is used)\n");
logout(" -i create peptide index file only (specify .idx file as database for index search)\n");
logout(" -i create .idx file for fragment ion indexing\n");
logout(" -j create .idx file for peptide indexing\n");
logout("\n");
sprintf(szTmp, " example: %s file1.mzXML file2.mzXML\n", pszCmd);
logout(szTmp);
Expand Down Expand Up @@ -197,7 +198,15 @@ void SetOptions(char *arg,
break;
case 'i':
sprintf(szParamStringVal, "1");
pSearchMgr->SetParam("create_index", szParamStringVal, 1);
pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 1);
sprintf(szParamStringVal, "0");
pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 0);
break;
case 'j':
sprintf(szParamStringVal, "0");
pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 0);
sprintf(szParamStringVal, "1");
pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 1);
break;
default:
break;
Expand Down
2 changes: 1 addition & 1 deletion CometSearch/CometCheckForUpdates.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#define _COMETCHECKFORUPDATES_H_

#include "Common.h"
#include "CometDataInternal.h"
//#include "CometDataInternal.h"

#include <errno.h>
#include <string.h>
Expand Down
18 changes: 12 additions & 6 deletions CometSearch/CometDataInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ struct Options
int bSkipAlreadyDone; // 0=search everything; 1=don't re-search if .out exists
int bMango; // 0=normal; 1=Mango x-link ms2 input
int bScaleFragmentNL; // 0=no; 1=scale fragment NL for each modified residue contained in fragment
int bCreateIndex; // 0=normal search; 1=create peptide index file
int bCreateFragmentIndex; // 0=normal search; 1=create fragment ion index file
int bCreatePeptideIndex; // 0=normal search; 1=create peptide index file; only one of bCreateFragmentIndex and bCreatePeptideIndex can be 1
int bVerboseOutput;
int bShowFragmentIons;
int bExplicitDeltaCn; // if set to 1, do not use sequence similarity logic
Expand Down Expand Up @@ -194,7 +195,8 @@ struct Options
bSkipAlreadyDone = a.bSkipAlreadyDone;
bMango = a.bMango;
bScaleFragmentNL = a.bScaleFragmentNL;
bCreateIndex = a.bCreateIndex;
bCreatePeptideIndex = a.bCreatePeptideIndex;
bCreateFragmentIndex = a.bCreateFragmentIndex;
bVerboseOutput = a.bVerboseOutput;
bShowFragmentIons = a.bShowFragmentIons;
bExplicitDeltaCn = a.bExplicitDeltaCn;
Expand Down Expand Up @@ -712,12 +714,13 @@ struct StaticParams
double dOneMinusBinOffset; // this is used in BIN() many times so calculate once
IonInfo ionInformation;
int iXcorrProcessingOffset;
int bIndexDb; // 0 = normal fasta; 1 = indexed database
int iIndexDb; // 0 = normal fasta; 1 = fragment ion indexed; 2 = peptide index
vector<double> vectorMassOffsets;
vector<double> precursorNLIons;
int iPrecursorNLSize;
int iOldModsEncoding;
bool bSkipToStartScan;
std::chrono::high_resolution_clock::time_point tRealTimeStart; // track run time of real-time index search

StaticParams()
{
Expand Down Expand Up @@ -767,7 +770,7 @@ struct StaticParams
szMod[0] = '\0';

iXcorrProcessingOffset = 75;
bIndexDb = 0;
iIndexDb = 0;

databaseInfo.szDatabase[0] = '\0';

Expand Down Expand Up @@ -882,7 +885,8 @@ struct StaticParams
options.bSkipAlreadyDone = 1;
options.bMango = 0;
options.bScaleFragmentNL = 0;
options.bCreateIndex = 0;
options.bCreatePeptideIndex = 0;
options.bCreateFragmentIndex = 0;
options.bVerboseOutput = 0;
options.iDecoySearch = 0;
options.iNumThreads = 4;
Expand Down Expand Up @@ -949,7 +953,7 @@ extern StaticParams g_staticParams;

extern string g_psGITHUB_SHA; // grab the GITHUB_SHA environment variable and trim to 7 chars; null if environment variable not present

extern vector<DBIndex> g_pvDBIndex;
extern vector<DBIndex> g_pvDBIndex; // used in both peptide index and fragment ion index; latter to store plain peptides

extern vector<vector<comet_fileoffset_t>> g_pvProteinsList;

Expand All @@ -972,6 +976,8 @@ extern int* PEPTIDE_MOD_SEQ_IDXS;

extern int MOD_NUM;
extern bool g_bPlainPeptideIndexRead; // set to true if plain peptide index file is read (and fragment index generated)
// poor choice of name for the fragment index .idx given peptide index is back
extern bool g_bPeptideIndexRead; // set to true if peptide index file is read

// Query stores information for peptide scoring and results
// This struct is allocated for each spectrum/charge combination
Expand Down
19 changes: 9 additions & 10 deletions CometSearch/CometFragmentIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
// limitations under the License.


#include "Common.h"
#include "CometFragmentIndex.h"
#include "CometSearch.h"
#include "ThreadPool.h"
#include "CometStatus.h"
//#include "CometPostAnalysis.h"
#include "CometMassSpecUtils.h"
#include "ModificationsPermuter.h"

Expand All @@ -37,7 +35,7 @@ int MOD_NUM = 0;

Mutex CometFragmentIndex::_vFragmentPeptidesMutex;

//comet_fileoffset_t clSizeCometFileOffset;

#ifdef _WIN32
#ifdef _WIN64
comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //win64
Expand All @@ -48,6 +46,7 @@ comet_fileoffset_t clSizeCometFileOffset = (long long)sizeof(comet_fileoffset_t)
comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //linux
#endif


CometFragmentIndex::CometFragmentIndex()
{
}
Expand Down Expand Up @@ -652,7 +651,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
exit(1);
}

strOut = " Creating plain peptide/protein index file:\n";
strOut = " Creating plain peptide/protein index file for fragment ion indexing:\n";
logout(strOut.c_str());
fflush(stdout);
strOut = " - parse peptides from database ... ";
Expand All @@ -671,15 +670,15 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)

if (bSucceeded)
{
g_staticParams.options.bCreateIndex = true;
g_staticParams.bIndexDb = false;
g_staticParams.options.bCreateFragmentIndex = true;
g_staticParams.iIndexDb = 0;

// this step calls RunSearch just to pull out all peptides
// to write into the .idx pepties/proteins file
bSucceeded = CometSearch::RunSearch(0, 0, tp);

g_staticParams.options.bCreateIndex = false;
g_staticParams.bIndexDb = true;
g_staticParams.options.bCreateFragmentIndex = false;
g_staticParams.iIndexDb = 1;
}

if (bSwapIdxExtension)
Expand Down Expand Up @@ -767,7 +766,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
cout << " - write peptides/proteins to file" << endl;

// write out index header
fprintf(fp, "Comet peptide index. Comet version %s\n", g_sCometVersion.c_str());
fprintf(fp, "Comet fragment ion index plain peptides. Comet version %s\n", g_sCometVersion.c_str());
fprintf(fp, "InputDB: %s\n", g_staticParams.databaseInfo.szDatabase);
fprintf(fp, "MassRange: %lf %lf\n", g_staticParams.options.dPeptideMassLow, g_staticParams.options.dPeptideMassHigh);
fprintf(fp, "LengthRange: %d %d\n", g_staticParams.options.peptideLengthRange.iStart, g_staticParams.options.peptideLengthRange.iEnd);
Expand Down Expand Up @@ -893,7 +892,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
if (g_bPlainPeptideIndexRead)
return 1;

if (g_staticParams.options.bCreateIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
if (g_staticParams.options.bCreateFragmentIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
else // database already is .idx
strIndexFile = g_staticParams.databaseInfo.szDatabase;
Expand Down
13 changes: 6 additions & 7 deletions CometSearch/CometFragmentIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#define _COMETFRAGMENTINDEX_H_

#include "Common.h"
#include "CometDataInternal.h"
#include "CometSearch.h"
#include <functional>

Expand All @@ -32,6 +31,10 @@ class CometFragmentIndex
static bool CreateFragmentIndex(ThreadPool *tp);
static string ElapsedTime(std::chrono::time_point<std::chrono::steady_clock> tStartTime);
static int WhichPrecursorBin(double dMass);
static bool CompareByPeptide(const DBIndex &lhs,
const DBIndex &rhs);
static bool CompareByMass(const DBIndex &lhs,
const DBIndex &rhs);

private:

Expand All @@ -52,16 +55,12 @@ class CometFragmentIndex
unsigned int y);
static void SortFragmentThreadProc(int iWhichThread,
ThreadPool* tp);
static bool CompareByPeptide(const DBIndex &lhs,
const DBIndex &rhs);
static bool CompareByMass(const DBIndex &lhs,
const DBIndex &rhs);

/*
unsigned int _uiBinnedIonMasses[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1];
unsigned int _uiBinnedIonMassesDecoy[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1];
unsigned int _uiBinnedPrecursorNL[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE];
unsigned int _uiBinnedPrecursorNLDecoy[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE];

*/
static bool *_pbSearchMemoryPool; // Pool of memory to be shared by search threads
static bool **_ppbDuplFragmentArr; // Number of arrays equals number of threads

Expand Down
21 changes: 11 additions & 10 deletions CometSearch/CometInterfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ namespace CometInterfaces
{
public:
virtual ~ICometSearchManager() {}
virtual bool CreateIndex() = 0;
virtual bool CreateFragmentIndex() = 0;
virtual bool CreatePeptideIndex() = 0;
virtual bool DoSearch() = 0;
virtual bool InitializeSingleSpectrumSearch() = 0;
virtual void FinalizeSingleSpectrumSearch() = 0;
Expand All @@ -43,15 +44,15 @@ namespace CometInterfaces
vector<Fragment> & matchedFragments,
Scores & scores) = 0;
virtual bool DoSingleSpectrumSearchMultiResults(const int topN,
const int iPrecursorCharge,
const double dMZ,
double* dMass,
double* dInten,
const int iNumPeaks,
vector<string>& strReturnPeptide,
vector<string>& strReturnProtein,
vector<vector<Fragment>>& matchedFragments,
vector<Scores>& scores) = 0;
const int iPrecursorCharge,
const double dMZ,
double* dMass,
double* dInten,
const int iNumPeaks,
vector<string>& strReturnPeptide,
vector<string>& strReturnProtein,
vector<vector<Fragment>>& matchedFragments,
vector<Scores>& scores) = 0;
virtual void AddInputFiles(vector<InputFileInfo*> &pvInputFiles) = 0;
virtual void SetOutputFileBaseName(const char *pszBaseName) = 0;
virtual void SetParam(const string &name, const string &strValue, const string &value) = 0;
Expand Down
Loading
Loading