-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_tf_to_mongo.py
45 lines (39 loc) · 1.19 KB
/
parse_tf_to_mongo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/python
# -*- coding: utf-8 -*-
# __author__: shicaiping
from bson.objectid import ObjectId
from pymongo import MongoClient
import argparse
import pandas as pd
import re
import os
from biocluster.config import Config
def insert_db(file, database):
"""
将数据库中的转录因子信息导入参考库
"""
db = Config().get_mongo_client(mtype="ref_rna", ref=True)[Config().get_mongo_dbname("ref_rna", ref=True)]
collection = db['known_tf']
with open(file, "r") as f:
data_list = []
head = f.readline()
for line in f:
items = line.strip().split("\t")
data = {
'gene_id' : items[0],
'transcript_id' : items[1],
'tf_id' : items[2],
'family' : items[3],
'specie' : items[4],
'db' : database,
}
data_list.append(data)
collection.insert_many(data_list)
def _main():
parser = argparse.ArgumentParser(description='insert')
parser.add_argument('-i', '--file')
parser.add_argument('-db', '--database')
args = parser.parse_args()
insert_db(args.file, args.database)
if __name__ == "__main__":
_main()