-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsra2runacc.py
executable file
·37 lines (26 loc) · 1019 Bytes
/
sra2runacc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
"""
Use Entrez utilities to get all the run accession numbers for a SRA study.
Such run accession numbers can be passed, for example, to a NCBI SRA Toolkit
utility like fastq-dump or fasterq-dump to retrieve all the FASTQ files of
the different samples in the SRA study.
"""
# Useful references:
# * NCBI SRA Toolkit docs: https://www.ncbi.nlm.nih.gov/sra/docs/
# * NCBI SRA Toolkit repo: https://github.com/ncbi/sra-tools
# * NCBI SRA Toolkit wiki: https://github.com/ncbi/sra-tools/wiki
import sys
import argparse
import re
sys.path += ['.', '..']
import entrez as ez
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('sra', metavar='SRAid', help='SRA identifier')
args = parser.parse_args()
for line in ez.on_search(db='sra', term=args.sra, tool='summary'):
if 'Name="Runs"' in line:
acc = re.search('acc=\"(?P<acc>\w+[0-9]+)\"', line).group('acc')
print(acc)
if __name__ == '__main__':
main()