-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #587 from hechth/target_screen
added target screening tool
- Loading branch information
Showing
6 changed files
with
155 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import argparse | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def mz_match(marker, peak, ppm): | ||
return np.abs(marker - peak) <= ((peak + marker) / 2) * ppm * 1e-06 | ||
|
||
|
||
def rt_match(marker, peak, tol): | ||
return np.abs(marker - peak) <= tol | ||
|
||
|
||
def find_matches(peaks, markers, ppm, rt_tol): | ||
# Create a meshgrid of all combinations of mz and rt values | ||
marker_mz = markers['mz'].values[:, np.newaxis] | ||
peak_mz = peaks['mz'].values | ||
marker_rt = markers['rt'].values[:, np.newaxis] | ||
peak_rt = peaks['rt'].values | ||
|
||
# Calculate mz and rt matches | ||
mz_matches = mz_match(marker_mz, peak_mz, ppm) | ||
rt_matches = rt_match(marker_rt, peak_rt, rt_tol) | ||
|
||
# Find the indices where both mz and rt match | ||
match_indices = np.where(mz_matches & rt_matches) | ||
|
||
# Create a DataFrame of hits | ||
matched_markers = markers.iloc[match_indices[0]].reset_index(drop=True) | ||
matched_peaks = peaks.iloc[match_indices[1]].reset_index(drop=True) | ||
hits = pd.concat([matched_markers[['formula']].reset_index(drop=True), matched_peaks], axis=1) | ||
|
||
# Calculate mz and rt differences | ||
hits['mz_diff'] = np.abs(matched_markers['mz'].values - matched_peaks['mz'].values) | ||
hits['rt_diff'] = np.abs(matched_markers['rt'].values - matched_peaks['rt'].values) | ||
|
||
return hits | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Find matches between peaks and markers.') | ||
parser.add_argument('--peaks', required=True, help='Path to the peaks parquet file.') | ||
parser.add_argument('--markers', required=True, help='Path to the markers CSV file.') | ||
parser.add_argument('--output', required=True, help='Path to the output TSV file.') | ||
parser.add_argument('--ppm', type=int, default=5, help='PPM tolerance for mz matching.') | ||
parser.add_argument('--rt_tol', type=int, default=10, help='RT tolerance for rt matching.') | ||
args = parser.parse_args() | ||
|
||
peaks = pd.read_parquet(args.peaks) | ||
markers = pd.read_csv(args.markers, sep='\t') | ||
|
||
hits = find_matches(peaks, markers, args.ppm, args.rt_tol) | ||
|
||
hits.to_csv(args.output, sep='\t', index=False) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
<tool id="target_screen" name="MS target screening" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | ||
<description>Extract peaks from recetox-aplcms tables using a list of target ions</description> | ||
<macros> | ||
<token name="@TOOL_VERSION@">0.1.0</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
</macros> | ||
|
||
<requirements> | ||
<requirement type="package" version="2.2.3">pandas</requirement> | ||
<requirement type="package" version="17.0.0">pyarrow</requirement> | ||
</requirements> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
python3 '${__tool_directory__}/target_screen.py' --peaks '$peaks' --markers '$markers' --output '$hits' --ppm $ppm --rt_tol $rt | ||
]]></command> | ||
|
||
<inputs> | ||
<param name="peaks" type="data" format="parquet"/> | ||
<param name="markers" type="data" format="tabular"/> | ||
<param name="ppm" type="integer" min="0" max="1000" value="10" label="ppm" help="Tolerance for peak filtering in ppm." /> | ||
<param name="rt" type="integer" min="0" max="100" value="10" label="rt tolerance" help="Toelrance regarding retention time to filter out peaks" /> | ||
</inputs> | ||
|
||
<outputs> | ||
<data name="hits" format="tabular" label="${tool.name} on ${on_string}" /> | ||
</outputs> | ||
|
||
<tests> | ||
<test> | ||
<param name="peaks" value="target_screen/peaks.parquet"/> | ||
<param name="markers" value="target_screen/markers.tsv"/> | ||
<output name="hits" value="target_screen/out.tsv"/> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
.. class:: infomark | ||
**What it does** | ||
This tool pulls out peaks from a table given a list of markers. | ||
The markers are matched based on m/z values with a specified ppm tolerance and matched based on retention time with a tolerance in units of retention time. | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.25080/Majora-92bf1922-00a</citation> | ||
</citations> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
formula mz rt | ||
C8H6Cl2O3 218.9621 474.6 | ||
C9H15N3O1 180.1142 458.23 | ||
C5H2Cl3N1O1 195.9129 488.1 | ||
C13H10O3 213.0557 508.40 | ||
C13H9FO3 231.0463 521.48 | ||
C6H5NO3 138.0197 166.56 | ||
C6H4Cl1N1O2 155.9858 176.62 | ||
C19H28N2O5S 395.1646 598.96 | ||
C10H12N2O3S1 239.0496 312.55 | ||
C4H11O3P1S1 169.0094 168.08 | ||
C14H17Cl2NO2 300.0564 689.79 | ||
C11H13ClO2 167.0633 572.93 | ||
C12H4Cl2F6N4OS 434.9314 767.86 | ||
C12H4Cl2F6N4O2S 450.9263 791.29 | ||
C16H22ClN3O2 322.1328 706.5 | ||
C16H11ClF6N2O 395.0391 741.93 | ||
C10H11Cl1O3 213.0324 533.9 | ||
C7H9NO2S 170.0281 363.59 | ||
C12H7Cl3O2 286.9439 830.97 | ||
C18H15Cl3O8 462.976 662.99 | ||
C12H7Cl3O5S 366.9007 700.52 | ||
C9H9N4Cl 207.0443 403.37 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
formula mz rt sd1 sd2 area mz_diff rt_diff | ||
C8H6Cl2O3 218.9619738108278 473.4840709352675 0.6057217022739683 2.7706017478506073 1239147.63695882 0.00012618917219242576 1.1159290647325406 | ||
C9H15N3O1 180.11422341297595 450.9460162486645 0.4692965104502825 4.727634916193644 1100073.3285644436 2.341297593488889e-05 7.2839837513355405 | ||
C5H2Cl3N1O1 195.91267599889463 487.37949630118806 0.8695685392506757 2.8811688054510127 734461.0596300099 0.00022400110538001172 0.7205036988119673 | ||
C13H10O3 213.05556658306853 508.4123751384482 2.9585968043814983 3.226731392934289 1787580.264815322 0.00013341693147594924 0.01237513844824889 | ||
C13H9FO3 231.04576243564085 521.2436784813573 0.9930695671903609 2.469013097815558 1316270.081622402 0.0005375643591492008 0.23632151864273965 | ||
C10H12N2O3S1 239.04945126090132 311.8317362000094 0.5578277726641567 3.57063615115722 3042462.634739455 0.0001487390986767423 0.7182637999906092 | ||
C14H17Cl2NO2 300.0561299922103 685.3731548839577 0.8491884774374224 2.8491999009146074 1021277.4141378121 0.0002700077897088704 4.416845116042282 | ||
C12H4Cl2F6N4OS 434.93037227267905 766.6610671335172 0.6265405149641161 3.55175113250731 43923382.478327975 0.0010277273209453597 1.198932866482778 | ||
C12H4Cl2F6N4O2S 450.9259113906124 789.7479646306683 0.5765707513162325 3.4834377486718897 35843894.74749327 0.00038860938764173625 1.5420353693316429 | ||
C16H22ClN3O2 322.13274143359513 705.9176130811956 0.765497607933695 2.9798451004946203 7686414.229962895 5.8566404845805664e-05 0.5823869188044455 | ||
C16H11ClF6N2O 395.0387483584033 741.1840034426168 0.9150873601266857 2.396923077539685 692605.613740076 0.0003516415966942077 0.7459965573831369 | ||
C10H11Cl1O3 213.03219616261535 532.8368925687558 0.8335128693984499 2.548404631638127 1231177.7029795102 0.00020383738464602175 1.0631074312441342 | ||
C7H9NO2S 170.0280487596005 363.28514725405876 0.8844811055327363 2.7876246329523737 915161.3987675996 5.124039950032966e-05 0.3048527459412185 | ||
C12H7Cl3O2 286.9434413572324 831.0018611928409 0.32058179843066653 1.7667251294853705 19934.364712896095 0.00045864276756901745 0.03186119284089273 | ||
C18H15Cl3O8 462.97625391610677 662.6552310211961 0.9093786171678189 2.128435471267278 1209160.0005544876 0.00025391610677161225 0.3347689788039361 | ||
C12H7Cl3O5S 366.90097256680355 699.9403505546061 0.8393755187990459 2.354260942300286 9578789.63215569 0.0002725668035736817 0.5796494453938976 | ||
C9H9N4Cl 207.04420254367005 402.95120970553893 1.2647033563807812 2.594410018631832 40475158.16355405 9.74563299394049e-05 0.4187902944610755 |
Binary file not shown.