From 85bf2e2e4419c50e59e6136e04a921b0ded89a8c Mon Sep 17 00:00:00 2001 From: Damien de Vienne Date: Mon, 8 Mar 2021 20:10:32 +0000 Subject: [PATCH] forEukOnly --- tree/GetAllTilesCoord.eukonly.py | 50 ++++ tree/Traverse_To_Pgsql_eukonly.py | 398 ++++++++++++++++++++++++++++++ 2 files changed, 448 insertions(+) create mode 100755 tree/GetAllTilesCoord.eukonly.py create mode 100755 tree/Traverse_To_Pgsql_eukonly.py diff --git a/tree/GetAllTilesCoord.eukonly.py b/tree/GetAllTilesCoord.eukonly.py new file mode 100755 index 0000000..f998122 --- /dev/null +++ b/tree/GetAllTilesCoord.eukonly.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +import math + +def deg2num(lat_deg, lon_deg, zoom): + lat_rad = math.radians(lat_deg) + n = 2.0 ** zoom + xtile = int((lon_deg + 180.0) / 360.0 * n) + ytile = int((1.0 - math.log(math.tan(lat_rad) + (1 / math.cos(lat_rad))) / math.pi) / 2.0 * n) + return [xtile, ytile] + +zoom = False +lat = False +lon = False + +coo = open("XYZcoordinates", "w"); ##output + +def getXYZ(fi): + with open(fi) as f: + for line in f: + tmp = line.split(":") + if (len(tmp)>1): + key = tmp[0].replace("\"", "").replace(" ","") + val = tmp[1].replace("\"", "").replace(" ","").replace(",","").rstrip() + if (key=='zoom'): + zoom = val; + if (key=='lat'): + lat = val + if (key=='lon'): + lon = val; + #do stuff + if (int(zoom)<=20): +# print zoom + xy = deg2num(float(lat), float(lon), float(zoom)) + coo.write("%d %d %s\n" % (xy[0], xy[1],zoom)) + if (int(zoom)>=5): + xy2 = deg2num(float(lat), float(lon), float(int(zoom)-1)) + xy3 = deg2num(float(lat), float(lon), float(int(zoom)-2)) + xy4 = deg2num(float(lat), float(lon), float(int(zoom)-3)) + coo.write("%d %d %d\n" % (xy2[0], xy2[1],int(zoom)-1)) + coo.write("%d %d %d\n" % (xy3[0], xy3[1],int(zoom)-2)) + coo.write("%d %d %d\n" % (xy4[0], xy4[1],int(zoom)-3)) + zoom = False + lat = False + lon = False + +#getXYZ('TreeFeatures1.json') +getXYZ('TreeFeatures2.json') +#getXYZ('TreeFeatures3.json') + +coo.close(); diff --git a/tree/Traverse_To_Pgsql_eukonly.py b/tree/Traverse_To_Pgsql_eukonly.py new file mode 100755 index 0000000..9be7cc5 --- /dev/null +++ b/tree/Traverse_To_Pgsql_eukonly.py @@ -0,0 +1,398 @@ +#!/usr/bin/python + +# FEV, 5 2016 +# WE REMOVE THE WRITING TO JSON AND trees. This is performed by another code. +# This code is more complete than the previous one. +# I switched to ete3 +# We read the tree from external file (trees are retrieved with the code called "gettrees.py"). +# Added possibility to have groups containing only one descendants to be visible. Adds a few zoom levels (not so many) + +import sys +import os +from argparse import ArgumentParser, FileType ##for options handling +import numpy as np +#new +import math + +from ete3 import Tree +#from ete3 import NCBITaxa +import psycopg2 ##for postgresql connection +#import cPickle as pickle +from getTrees_fun import getTheTrees + +parser = ArgumentParser(description='Open taxonomic tree and recode it into PostGRES/PostGIS database for visualisation in Lifemap.') +parser.add_argument('group', help='Group to look at. Can be 1,2 or 3 for Archaea, Eukaryotes and Bacteria respectively', choices=['1','2','3']) +parser.add_argument('start', help='index of the first node met in the tree', type=int) +parser.add_argument('--lang', nargs='?', const='EN', default='EN', help='Language chosen. FR for french, EN (default) for english', choices=['EN','FR']) +parser.add_argument('--updatedb', nargs='?', const='True', default='True', help='Should the NCBI taxonomy db be updated ?', choices=['True','False']) +parser.add_argument('--simplify', nargs='?', const='True', default='False', help='Should the tree be simplified by removing environmental and unindentified species?', choices=['True','False']) + +args = parser.parse_args() +#print args + +def midpoint(x1, y1, x2, y2): +#Input values as degrees +#Convert to radians + lat1 = math.radians(x1) + lon1 = math.radians(y1) + lat2 = math.radians(x2) + lon2 = math.radians(y2) + bx = math.cos(lat2) * math.cos(lon2 - lon1) + by = math.cos(lat2) * math.sin(lon2 - lon1) + lat3 = math.atan2(math.sin(lat1) + math.sin(lat2),math.sqrt((math.cos(lat1) + bx) * (math.cos(lat1) + bx) + by**2)) + lon3 = lon1 + math.atan2(by, math.cos(lat1) + bx) + return [math.degrees(lat3), math.degrees(lon3)] + + + +##update db (if requested?) +def updateDB(): + print 'Updating databases...' + os.system("wget ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz -N") + os.system("tar xvzf taxdump.tar.gz -C taxo/") + #unzip taxref + os.system("unzip -o taxo/TAXREF_INPN_v11.zip -d taxo/") + +def simplify(arbre): + initialSize = len(arbre) + for n in arbre.traverse(): + if (n.is_leaf()==True) and (n.rank=='no rank'): + n.detach() + else: + if ('Unclassified' in n.sci_name) or ('unclassified' in n.sci_name) or ('uncultured' in n.sci_name) or ('Uncultured' in n.sci_name) or ('unidentified' in n.sci_name) or ('Unidentified' in n.sci_name) or ('environmental' in n.sci_name) or ('sp.' in n.sci_name): + n.detach() + print "Tree HAS BEEN simplified" + finalSize = len(arbre) + diffInSize = (initialSize-finalSize) + print str(diffInSize) + " tips have been removed (" + str(round(float(diffInSize)/float(initialSize)*100, 2)) + '%)' + print "FINAL TREE SIZE: " + str(finalSize) + return arbre + + +if (args.updatedb=='True'): + updateDB() + +##get arguments +groupnb = args.group ##will be written + +T = getTheTrees() + +##let's try to write the tree entirely here, in a file + +#print sys.argv[1]; +starti = args.start; +print "Downloading tree..." +if (groupnb=="1"): + #with open('ARCHAEA.pkl', 'rb') as input: + #t = pickle.load(input) + t = T['2157'].detach() + #SIMPLIFY THE TREE IF REQUESTED + if args.simplify=="True": + t = simplify(t) + #t = Tree("ARCHAEA") + print "Archaeal tree loaded..." + ##and we save it + t.write(outfile="ARCHAEA", features = ["name", "taxid"], format_root_node=True) + t.x = 6.0; + t.y = 9.660254-10.0; + t.alpha = 30.0; + t.ray = 10.0; + starti = starti; +if (groupnb=="2"): + # with open('EUKARYOTES.pkl', 'rb') as input: + # t = pickle.load(input) + t = T['2759'].detach() + #SIMPLIFY THE TREE IF REQUESTED + if args.simplify=="True": + t = simplify(t) + print "Eukaryotic tree loaded" + t.write(outfile="EUKARYOTES", features = ["name", "taxid"], format_root_node=True) + t.x = 0; + t.y = 0; + t.alpha = 90.0; + t.ray = 30.0; + starti = starti; +if (groupnb=="3"): + # with open('BACTERIA.pkl', 'rb') as input: + # t = pickle.load(input) + t = T['2'].detach() + #SIMPLIFY THE TREE IF REQUESTED + if args.simplify=="True": + t = simplify(t) + print "Bacterial tree loaded" + t.write(outfile="BACTERIA", features = ["name", "taxid"], format_root_node=True) + t.x = 0.0; + t.y = -11.0; + t.alpha = 270.0; + t.ray = 10.0; + starti = starti; + +t.zoomview = np.ceil(np.log2(30/t.ray)); + + +#specis and node ids +nbsp = len(t) +spid = starti +ndid = starti + nbsp +rootnb = ndid+1 +maxZoomView=0 + +##FUNCTIONS +def rad(deg): + return((deg*np.pi)/180); +def halfCircle(x,y,r,start,end,nsteps): + rs = np.linspace(start,end,num=nsteps) + xc = x+r*np.cos(rs) + yc = y+r*np.sin(rs) + return(xc,yc) +def ellipse(x,y,r, alpha, nsteps): + start=0 + end=np.pi+start + rs = np.linspace(start,end,num=nsteps) + a = r + b = float(r)/6 ##Change this value to change the shape of polygons. This controls how flat is the elliptic side of the polygon. The other side is always a half cricle. + xs = a*np.cos(rs) + ys = b*np.sin(rs) + ##rotation + xs2 = x+(xs*np.cos(alpha)-ys*np.sin(alpha)) + ys2 = y+(xs*np.sin(alpha)+ys*np.cos(alpha)) + return(xs2,ys2) +def HalfCircPlusEllips(x,y,r,alpha, start, end,nsteps): + circ = halfCircle(x,y,r,start,end, nsteps) + elli = ellipse(x,y,r,alpha,nsteps) + return (np.concatenate((circ[0], elli[0])),np.concatenate((circ[1], elli[1]))) + +##CONNECT TO POSTGRESQL/POSTGIS DATABASE +try: + conn = psycopg2.connect("dbname='tree' user='lm' host='localhost'") #password will be directly retrieved from ~/.pgpassconn +except: + print "I am unable to connect to the database" + sys.exit(1) + +cur = conn.cursor() +##INITIALIZE DATABASE +if (groupnb=="2"): + ##we delete current tables + cur.execute("select exists(select * from information_schema.tables where table_name='points')") + if (cur.fetchone()[0]): ## we drop tables only if they exist. + print 'REMOVING OLD TABLES' + cur.execute("DROP TABLE points;") + cur.execute("DROP TABLE lines;") + cur.execute("DROP TABLE polygons;") + conn.commit() + ##we create the database structure here + cur.execute("CREATE TABLE points(id bigint,ref smallint,z_order smallint,branch boolean,tip boolean,zoomview integer,clade boolean,cladecenter boolean,rankame boolean,sci_name text,common_name text,full_name text,rank text, name text, nbdesc integer,taxid text,way geometry(POINT,900913));") + cur.execute("CREATE TABLE lines(id bigint,ref smallint,z_order smallint,branch boolean,tip boolean,zoomview integer,clade boolean,cladecenter boolean,rankname boolean,sci_name text,common_name text,full_name text,rank text,name text, nbdesc integer,taxid text,way geometry(LINESTRING,900913));") + cur.execute("CREATE TABLE polygons(id bigint,ref smallint,z_order smallint,branch boolean,tip boolean,zoomview integer,clade boolean,cladecenter boolean,rankame boolean,sci_name text,common_name text,full_name text,rank text, name text, nbdesc integer,taxid text,way geometry(POLYGON,900913));") + conn.commit() + print "\nTABLES HAVE BEEN CREATED. Done.\n" + ##we include the root node + cur.execute("INSERT INTO points (id, sci_name, common_name,rank,nbdesc,tip, zoomview,taxid,way) VALUES(1000000000, 'Root','Root','Root',1000000, FALSE, 1,1,ST_Transform(ST_GeomFromText('POINT(0 -4.226497)', 4326), 900913));") + conn.commit() + +def writeosmNode(node): + ##we write INFO FOR EACH NODE. Clades will be delt with later on. We put less info than for the json file + command = "INSERT INTO points (id, taxid, sci_name, common_name,rank,nbdesc,zoomview, tip,way) VALUES(%d,%s,'%s','%s','%s',%d,%d,'%s',ST_Transform(ST_GeomFromText('POINT(%.20f %.20f)', 4326), 900913));" % (node.id, node.taxid, node.sci_name, node.common_name, node.rank, node.nbdesc,node.zoomview, node.is_leaf(), node.x, node.y); + cur.execute(command); + ##conn.commit(); + ##write json for search + +def writeosmWays(node, id): + #Create branch names + Upsci_name = node.up.sci_name; + Upcommon_name = node.up.common_name; + Downsci_name = node.sci_name; + Downcommon_name = node.common_name; + left = Upsci_name + " " + Upcommon_name; + right = Downsci_name + " " + Downcommon_name; + if (node.x >= node.up.x): #we are on the right + wayName = "\u2190 " + left + " - " + right + " \u2192" + else: #we are on the left + wayName = "\u2190 " + right + " - " + left + " \u2192" + + ##new with midpoints: + midlatlon = midpoint(node.up.x, node.up.y, node.x, node.y) + + command = "INSERT INTO lines (id, branch, zoomview, ref, name, way) VALUES(%d,'TRUE',%d,'%s',E'%s',ST_Transform(ST_GeomFromText('LINESTRING(%.20f %.20f, %.20f %.20f,%.20f %.20f)', 4326), 900913));" % (id, node.zoomview, groupnb, wayName, node.up.x, node.up.y, midlatlon[0],midlatlon[1], node.x, node.y); + cur.execute(command); + ##conn.commit(); + +def writeosmpolyg(node, ids): + polyg = HalfCircPlusEllips(node.x,node.y,node.ray,rad(node.alpha) + np.pi/2, rad(node.alpha) - np.pi/2, rad(node.alpha) + np.pi/2, 30) + polygcenter = (np.mean(polyg[0]),np.mean(polyg[1])); + cooPolyg = 'POLYGON((%.20f %.20f ' % (polyg[0][0], polyg[1][0]); + for i in range(1,59): + cooPolyg += ',%.20f %.20f' % (polyg[0][i], polyg[1][i]); + cooPolyg += ',%.20f %.20f' % (polyg[0][0], polyg[1][0]); #to close the ring... + cooPolyg += '))'; + command = "INSERT INTO polygons (id, ref, clade, taxid, sci_name, common_name, rank, nbdesc,zoomview, way) VALUES(%d,'%s','TRUE', %s,'%s','%s','%s',%d,%d, ST_Transform(ST_GeomFromText('%s', 4326), 900913));" % (ids[60], groupnb, node.taxid, node.sci_name, node.common_name, node.rank, node.nbdesc, node.zoomview, cooPolyg); + cur.execute(command); + ##conn.commit(); + #and add the clade center. + command = "INSERT INTO points (id, cladecenter, taxid, sci_name, common_name,rank,nbdesc,zoomview, way) VALUES('%d','TRUE', %s,'%s','%s','%s',%d,%d,ST_Transform(ST_GeomFromText('POINT(%.20f %.20f)', 4326), 900913));" % (ids[61], node.taxid, node.sci_name, node.common_name, node.rank, node.nbdesc,node.zoomview, polygcenter[0], polygcenter[1]); + cur.execute(command); + ##conn.commit(); + #we add a way on which we will write the rank + cooLine = 'LINESTRING(%.20f %.20f' % (polyg[0][35], polyg[1][35]); + for i in range(36,45): + cooLine += ',%.20f %.20f' % (polyg[0][i], polyg[1][i]); + cooLine += ')'; + command = "INSERT INTO lines (id, ref, rankname, sci_name, zoomview, rank, nbdesc, way) VALUES(%d,%s,'TRUE','%s',%d,'%s',%d, ST_Transform(ST_GeomFromText('%s', 4326), 900913));" % (ids[62], groupnb, node.sci_name, node.zoomview, node.rank, node.nbdesc, cooLine); + cur.execute(command); + ##conn.commit(); + + +print "Tree traversal..." +for n in t.traverse(): + special = 0 + n.dist=1.0 + tot = 0.0 + if n.is_leaf(): + spid = spid +1 + n.id = spid + else: + ndid = ndid+1 + n.id = ndid + child = n.children + ##NEW -->| + if ((len(child)==1)&(len(n)>1)): + special=1 + if ((len(child)==1)&(len(n)==1)): + special=2 + ## |<-- NEW + for i in child: + tot = tot + np.sqrt(len(i)); + nbdesc = len(n); + ##remove special chars in names + ####IF --LANG IS SET TO FR, WE CHGANGE HERE THE RANK AND COMMON NAMES + if (args.lang=='FR'): + n.common_name = n.common_name_FR + n.rank = n.rank_FR + #####OK + n.common_name_long = ', '.join(n.common_name) + n.common_name = n.common_name[0] if len(n.common_name)>0 else "" + ##we create a 'long' common name. the common name going to db is only the first of the list + n.common_name = n.common_name.replace("'","''"); + n.rank = n.rank.replace("'","''"); + n.sci_name = n.sci_name.replace("'","''") + #add parenthesis to the common name + if n.common_name!='': + n.common_name = "(" + n.common_name + ")" + if n.common_name_long!='': + n.common_name_long = "(" + n.common_name_long + ")" + n.nbdesc = nbdesc; + nbsons = len(child); + angles = []; + ray = n.ray; + for i in child: + #i.ang = 180*(len(i)/float(nbdesc))/2; + i.ang = 180*(np.sqrt(len(i))/tot)/2; #using sqrt we decrease difference between large and small groups + angles.append(i.ang); + if (special==1): + i.ray = ray-(ray*20)/100 + else: + if (special==2): + i.ray = ray-(ray*50)/100 + else: + i.ray = (ray*np.sin(rad(i.ang))/np.cos(rad(i.ang)))/(1+(np.sin(rad(i.ang))/np.cos(rad(i.ang)))); + i.dist = ray - i.ray; + ang = np.repeat(angles, 2); + ang = np.cumsum(ang); + ang = ang[0::2]; + ang = [i-(90-n.alpha) for i in ang]; + cpt = 0 + for i in child: + i.alpha = ang[cpt]; + i.x = n.x + i.dist*np.cos(rad(i.alpha)); + i.y = n.y + i.dist*np.sin(rad(i.alpha)); + i.zoomview = np.ceil(np.log2(30/i.ray)) + if i.zoomview <= 0: + i.zoomview = 0 + if maxZoomView temp.txt ; mv temp.txt '+ jsonfile; +os.system(consoleexex); +json = open(jsonfile, "a"); +json.write("\t}\n]\n") +json.close()