diff --git a/metanetx_uniprot/README.md b/metanetx_uniprot/README.md
new file mode 100644
index 00000000..5b76a0d0
--- /dev/null
+++ b/metanetx_uniprot/README.md
@@ -0,0 +1,31 @@
+# MetaNetX and UniProt Content
+
+Code is reused from Biochem4j: https://github.com/neilswainston/biochem4j/tree/master/sbcdb
+
+Access chemical, reaction, enzyme, and organism information from the following sources:
+- libchebipy (note, the _parsers.py file found in this repo must be updated for the libchebipy library at ~/libchebipy/_parsers.py)
+- NCBITaxonomy
+- MetaNetX
+- Rhea
+- UniProt
+
+To run the full pipeline to get all relationships: 
+
+```
+python build.py ~/biochem4j ',' 1
+```
+
+To run and only get reference proteome taxa that also exist in kg-microbe:
+```
+python build_taxa_ids.py ~/biochem4j 1
+```
+*Note, uses ncbitaxon.json (built from kg-microbe) which is expected to be in the Files directory.
+
+To build the entire graph by combining all separate triples files, and creating a kgx format nodes file:
+```
+python combine_rels.py --directory ~/biochem4j/rels
+python create_labels_file.py --directory ~/biochem4j/rels
+```
+This will output the following files:
+- ~/biochem4j/rels/combined_kg.csv
+- ~/biochem4j/combined_kgx_merged-kg_nodes.csv
diff --git a/metanetx_uniprot/TestingFiles/chem_prop.tsv b/metanetx_uniprot/TestingFiles/chem_prop.tsv
new file mode 100644
index 00000000..d4f28677
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/chem_prop.tsv
@@ -0,0 +1,358 @@
+### MetaNetX/MNXref reconciliation ###
+#Based on the following resources:
+#
+#RESOURCE:  MetaNetX/MNXref
+#VERSION:   4.4
+#DATE:      2022/03/16
+#URL:       https://www.metanetx.org
+#LICENSE:	
+#	MetaNetX copyright 2011 SystemsX, SIB Swiss Institute of Bioinformatics
+#	Except where otherwise noted, the data  available  from  this  site  are
+#	licensed under a Creative Commons Attribution 4.0 International License.
+#	MNXref  uses  information  on  cellular  compartments,  reactions,   and
+#	metabolites that is sourced from a number  of  external  resources.  The
+#	licensing agreements of those resources are specified  in  each  of  the
+#	downloadable  files  listed  below.  For  each  compound,  reaction  and
+#	cellular compartment in the MNXref namespace we indicate which  external
+#	resource  provided  the  information  used  in  MNXref.  Compounds   and
+#	reactions in the MNXref namespace may be identical to, or  differ  from,
+#	those in the external resource. In either case the data from MNXref  may
+#	be considered to be subject to the original  licensing  restrictions  of
+#	the external resource.
+#	(https://www.metanetx.org/mnxdoc/mnxref.html)
+#
+#RESOURCE:  BiGG
+#VERSION:   1.6.0, last updated: 2019/10/31 (downloaded on 2021/07/23)
+#URL:       http://bigg.ucsd.edu
+#LICENSE:	
+#	Copyright 2015 The Regents of the University of California
+#	
+#	All Rights Reserved
+#	
+#	Permission to use, copy, modify and distribute any part of  BiGG  Models
+#	for educational, research and  non-profit  purposes,  without  fee,  and
+#	without a written agreement is hereby granted, provided that  the  above
+#	copyright notice, this paragraph  and  the  following  three  paragraphs
+#	appear in all copies.
+#	
+#	Those desiring to incorporate BiGG Models into  commercial  products  or
+#	use for commercial purposes should contact  the  Technology  Transfer  &
+#	Intellectual Property Services, University  of  California,  San  Diego,
+#	9500 Gilman Drive, Mail Code 0910, La Jolla, CA  92093-0910,  Ph:  (858)
+#	534-5815, FAX: (858) 534-7345, e-mail: invent@ucsd.edu.
+#	
+#	In no event shall the University of California be liable  to  any  party
+#	for direct, indirect, special,  incidental,  or  consequential  damages,
+#	including lost profits, arising out of the use of  this  bigg  database,
+#	even if the University of California has been advised of the possibility
+#	of such damage.
+#	
+#	The BiGG Models provided  herein  is  on  an  "as  is"  basis,  and  the
+#	University of California  has  no  obligation  to  provide  maintenance,
+#	support, updates, enhancements,  or  modifications.  The  University  of
+#	California makes no representations and extends  no  warranties  of  any
+#	kind, either implied or express, including,  but  not  limited  to,  the
+#	implied warranties  of  merchantability  or  fitness  for  a  particular
+#	purpose, or that the use of  the  BiGG  Models  will  not  infringe  any
+#	patent, trademark or other rights.
+#	(http://bigg.ucsd.edu/)
+#
+#RESOURCE:  The Cell Component Ontology
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://bioinformatics.ai.sri.com/CCO/
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  ChEBI
+#VERSION:   203 (downloaded on 2021/09/30)
+#URL:       https://www.ebi.ac.uk/chebi/
+#LICENSE:	
+#	All data in the  database  is  non-proprietary  or  is  derived  from  a
+#	non-proprietary source. It is thus freely accessible  and  available  to
+#	anyone. In addition, each data item is fully  traceable  and  explicitly
+#	referenced to the original source.
+#	(https://www.ebi.ac.uk/chebi/aboutChebiForward.do)
+#
+#RESOURCE:  enviPath
+#VERSION:   (downloaded on 2021/11/24)
+#URL:       https://envipath.org
+#LICENSE:	
+#	The core data sets of enviPath are licensed under the Creative Commons
+#	Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+#	license. This allows you to use them in a non-commercial context, for
+#	example if you work at a University or for a public research institute.
+#	You can even redistribute and modify the data using the same license. If
+#	you want to use the data commercially, contact us, we offer commercial
+#	license agreements.
+#	We summarized how you can use the data on our license page.
+#	(https://envipath.com/license/)
+#
+#RESOURCE:  HMDB
+#VERSION:   4.0 (downloaded on 2021/06/18)
+#URL:       https://hmdb.ca
+#LICENSE:	
+#	HMDB is offered to the public as a freely available  resource.  Use  and
+#	re-distribution of the  data,  in  whole  or  in  part,  for  commercial
+#	purposes requires  explicit  permission  of  the  authors  and  explicit
+#	acknowledgment  of  the  source  material  (HMDB)   and   the   original
+#	publication.
+#	(https://hmdb.ca/about)
+#
+#RESOURCE:  KEGG
+#VERSION:   98.0+/06-11, Jun 21 (downloaded on 2021/06/11)
+#URL:       https://www.kegg.jp
+#LICENSE:	
+#	Academic users may freely use the KEGG website and may also freely  link
+#	to the KEGG website.
+#	Non-academic  users  may  use  the  KEGG  website  as  end   users   for
+#	non-commercial purposes, but any other use requires a license agreement.
+#	Academic users who utilize KEGG  for  providing  academic  services  are
+#	requested to obtain a KEGG  FTP  subscription  for  organizational  use,
+#	which includes a proper license agreement.
+#	Non-academic  users  and  Academic  users  intending  to  use  KEGG  for
+#	commercial purposes are requested to obtain a license agreement  through
+#	KEGG's exclusive licensing agent, Pathway Solutions.
+#	(https://www.kegg.jp/kegg/legal.html)
+#
+#RESOURCE:  LipidMaps
+#VERSION:   2021-05-28 (downloaded on 2021/06/11)
+#URL:       https://www.lipidmaps.org
+#LICENSE:	
+#	The Lipidomics Gateway is provided on an "as is" basis, without warranty
+#	or representation of any kind, express or implied. The  content  of  the
+#	Lipidomics Gateway website  is  protected  by  international  copyright,
+#	trademark and other laws. You may download articles and web  pages  from
+#	this site for your personal, non-commercial use only, provided that  you
+#	keep intact all authorship, copyright and other proprietary notices. The
+#	Featured Lipid can also be used for educational purposes, provided  that
+#	credit is given to the Lipidomics Gateway. If  you  use  the  Lipidomics
+#	Gateway, you accept these terms. The LIPID MAPS Consortium reserves  the
+#	right to modify these terms at any time.
+#	(https://www.lipidmaps.org/about/)
+#
+#RESOURCE:  MetaCyc
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://metacyc.org
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  Reactome
+#VERSION:   77 June 14, 2021 (downloaded on 2021/09/03)
+#URL:       https://reactome.org
+#LICENSE:	
+#	Reactome is an open source and open access resource, available to anyone.
+#	Usage of Reactome material is covered by two Creative Commons licenses:
+#	
+#	The terms of the Creative Commons Public Domain (CC0) License apply to all
+#	Reactome annotation files, e.g. identifier mapping data, specialized data
+#	files, and interaction data derived from Reactome.
+#	(https://reactome.org/license/)
+#
+#RESOURCE:  Rhea
+#VERSION:   119 (downloaded on 2021/11/03)
+#URL:       https://www.rhea-db.org
+#LICENSE:	
+#	All data in Rhea is freely accessible and available for anyone to use under
+#	the Creative Commons Attribution License.
+#	(https://www.rhea-db.org/documentation)
+#
+#RESOURCE:  SABIO-RK
+#VERSION:   Software Update: 2021/05/11 -- Database Release: 2021/05/28 (downloaded on 2021/07/01)
+#URL:       http://sabiork.h-its.org
+#LICENSE:	
+#	HITS, gGmbH HITS own the  SABIO-RK  database,  its  interfaces  and  its
+#	associated  documentation  (all  referred  to  in   the   following   as
+#	"Database").  You  should  carefully  read  the  following   terms   and
+#	conditions before  using  this  Database.  Your  use  of  this  Database
+#	indicates your acceptance of this license agreement and  all  terms  and
+#	conditions.You are hereby granted a non-exclusive  and  non-transferable
+#	license to use  the  Database  according  to  the  following  terms  and
+#	conditions. This license is  to  use  the  Database  for  Non-Commercial
+#	Purpose only. Non-Commercial Purpose  means  the  use  of  the  Database
+#	solely for  internal  non-commercial  research  and  academic  purposes.
+#	Non-Commercial Purpose excludes, without  limitation,  any  use  of  the
+#	Database, as part of, or in any way in  connection  with  a  product  or
+#	service which is sold, offered for sale, licensed,  leased,  loaned,  or
+#	rented. Permission to use this Database for  Non-Commercial  Purpose  is
+#	hereby granted without fee and subject to the following  terms  of  this
+#	license.
+#	
+#	Commercial Use
+#	If you desire to  use  the  Database  for  profit-making  or  commercial
+#	purposes, you agree to negotiate in good faith a license with  the  HITS
+#	prior to such profit-making or commercial use. The HITS  shall  have  no
+#	obligation to grant such license to you,  and  may  grant  exclusive  or
+#	non-exclusive licenses to others. You agree to notify the  HITS  of  any
+#	inquiries you have  for  commercial  use  of  the  Database  and/or  its
+#	modifications. You may contact the following email to discuss commercial
+#	use: sabiork at h-its.org
+#	
+#	Governing Law
+#	This Agreement is governed  by  the  law  of  the  Federal  Republic  of
+#	Germany. The application of the UN Convention on the Sale  of  Goods  is
+#	excluded.
+#	
+#	Disclaimer of Warranty
+#	Because this Database is licensed free of charge, there is  no  warranty
+#	for the data in it contained and the methods used for its querying.  The
+#	HITS makes no warranty or  representation  that  the  operation  of  the
+#	Database in this compilation will be error-free, and the HITS  is  under
+#	no obligation to provide any services, by way of maintenance, update, or
+#	otherwise.
+#	
+#	THIS DATABASE AND THE  ACCOMPANYING  FILES  ARE  LICENSED  "AS  IS"  AND
+#	WITHOUT WARRANTIES AS TO PERFORMANCE OR  MERCHANTABILITY  OR  ANY  OTHER
+#	WARRANTIES WHETHER EXPRESSED OR IMPLIED. NO WARRANTY OF  FITNESS  FOR  A
+#	PARTICULAR PURPOSE IS OFFERED. THE ENTIRE RISK AS  TO  THE  QUALITY  AND
+#	PERFORMANCE OF THE  PROGRAM  IS  WITH  YOU.  SHOULD  THE  PROGRAM  PROVE
+#	DEFECTIVE, YOU ASSUME THE COST OF ALL  NECESSARY  SERVICING,  REPAIR  OR
+#	CORRECTION.
+#	
+#	Limitation of Liability
+#	IN NO EVENT WILL  HITS,  OR  ANY  OTHER  PARTY  WHO  MAY  MODIFY  AND/OR
+#	REDISTRIBUTE THE DATABASE AS PERMITTED  ABOVE,  BE  LIABLE  TO  YOU  FOR
+#	DAMAGES, INCLUDING ANY GENERAL,  SPECIAL,  INCIDENTAL  OR  CONSEQUENTIAL
+#	DAMAGES ARISING  OUT  OF  THE  USE  OR  INABILITY  TO  USE  THE  PROGRAM
+#	(INCLUDING BUT NOT LIMITED TO  LOSS  OF  DATA  OR  DATA  BEING  RENDERED
+#	INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A  FAILURE  OF
+#	THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF VTIP  AND  HITS
+#	OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#	
+#	Reference to SABIO-RK  Users  will  cite  SABIO-RK  in  publications  or
+#	presentations, whenever the data used was extracted from  the  database.
+#	Termination This  agreement  is  effective  until  terminated.  You  may
+#	terminate this agreement  at  any  time  by  destroying  all  associated
+#	material (e.g., documentation or web service clients) to the database in
+#	your possession and by stopping any access to the database  directly  or
+#	from  software  generated  by  you.  This   agreement   will   terminate
+#	immediately without notice from and HITS if you fail to comply with  any
+#	of the terms and conditions of this license. This  agreement  will  also
+#	terminate immediately without notice from the HITS if  it  is  found  to
+#	implement patented algorithms or contain copyrighted code not  owned  or
+#	licensed the HITS for the purpose  of  its  inclusion  in  the  SABIO-RK
+#	Database. This agreement cannot be terminated by any other mechanism  or
+#	for any other reason than those stated herein.
+#	
+#	Place of Court
+#	The exclusive venue for all disputes arising from or in connection  with
+#	this Agreement is Mannheim, Germany (HRB 337446), when the Licensee is a
+#	business person, a legal entity governed by public  law,  or  a  special
+#	fund governed by public law,  or  does  not  have  a  general  place  of
+#	jurisdiction  within  the  Federal  Republic  of  Germany.  Address  all
+#	correspondence  regarding  this  license  to  electronic  mail  address:
+#	sabiork at h-its.org Any inquiries  and  comments  regarding  bugs,  bug
+#	fixes, enhancements, modifications or any other similar issues should be
+#	directed to: sabiork at h-its.org
+#	
+#	Copyright 2007 by HITS, gGmbH. All rights reserved.
+#	(http://sabiork.h-its.org/layouts/content/termscondition.gsp)
+#
+#RESOURCE:  The SEED
+#VERSION:   2.6.1 (July 31, 2020) (downloaded on 2021/08/09)
+#URL:       https://modelseed.org
+#LICENSE:	
+#	All tools and datasets that make up the SEED are in the public domain.
+#	(https://modelseed.org)
+#
+#RESOURCE:  SwissLipids
+#VERSION:   (downloaded on 2021/07/29)
+#URL:       https://www.swisslipids.org
+#LICENSE:	
+#	SwissLipids  is  licensed  under  a  Creative  Commons   Attribution-Non
+#	Commercial-NoDerivatives 4.0 International License.
+#	
+#	Commercial users and those who wish to  use  this  work  for  commercial
+#	purposes  please  contact  the  SIB  technology  transfer  officer   at:
+#	marc.filliettaz@genebio.com
+#	(https://www.swisslipids.org/#/downloads)
+#ID	name	reference	formula	charge	mass	InChI	InChIKey	SMILES					
+MNXM738702	NADPH	chebi:57783	C21H26N7O17P3	-4	741.06200	InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/p-4/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1	InChIKey=ACFIXJIJDZMPPO-NNYOXOHSSA-J	NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1
+MNXM97613	tetracycline	chebi:77932	C22H24N2O8	0	444.15327	InChI=1S/C22H24N2O8/c1-21(31)8-5-4-6-11(25)12(8)16(26)13-9(21)7-10-15(24(2)3)17(27)14(20(23)30)19(29)22(10,32)18(13)28/h4-6,9-10,15,25,27-28,31-32H,7H2,1-3H3,(H2,23,30)/t9-,10-,15-,21+,22-/m0/s1	InChIKey=OFVLGDICTFRJMM-WESIUVDSSA-N	C[NH+](C)[C@@H]1C([O-])=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)[C@H]3C[C@@H]12
+MNXM162730	11a-hydroxytetracycline	chebi:132727	C22H24N2O9	0	460.14818	InChI=1S/C22H24N2O9/c1-20(31)8-5-4-6-10(25)12(8)16(27)22(33)11(20)7-9-14(24(2)3)15(26)13(18(23)29)17(28)21(9,32)19(22)30/h4-6,9,11,14,25-26,31-33H,7H2,1-3H3,(H2,23,29)/t9-,11+,14-,20+,21+,22-/m0/s1	InChIKey=FWVRSACGGAUWNP-BWOONYPSSA-N	C[NH+](C)[C@@H]1C([O-])=C(C(N)=O)C(=O)[C@@]2(O)C(=O)[C@@]3(O)C(=O)c4c(O)cccc4[C@@](C)(O)[C@H]3C[C@@H]12
+MNXM5	NADP(+)	chebi:58349	C21H25N7O17P3	-3	740.05362	InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p-3/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1	InChIKey=XJLXINKUBYWONI-NNYOXOHSSA-K	NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)c1
+MNXM737425	1,2-di-(9Z-octadecenoyl)-sn-glycero-3-phosphoethanolamine	chebi:74986	C41H78NO8P	0	743.54651	InChI=1S/C41H78NO8P/c1-3-5-7-9-11-13-15-17-19-21-23-25-27-29-31-33-40(43)47-37-39(38-49-51(45,46)48-36-35-42)50-41(44)34-32-30-28-26-24-22-20-18-16-14-12-10-8-6-4-2/h17-20,39H,3-16,21-38,42H2,1-2H3,(H,45,46)/b19-17-,20-18-/t39-/m1/s1	InChIKey=MWRBNPKJOOWZPW-NYVOMTAGSA-N	CCCCCCCC/C=C\CCCCCCCC(=O)OC[C@H](COP(=O)([O-])OCC[NH3+])OC(=O)CCCCCCC/C=C\CCCCCCCC
+MNXM1107708	(9Z)-octadecenoate	chebi:30823	C18H33O2	-1	281.24860	InChI=1S/C18H34O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h9-10H,2-8,11-17H2,1H3,(H,19,20)/p-1/b10-9-	InChIKey=ZQPPMHVWECSIRJ-KTKRTIGZSA-M	CCCCCCCC/C=C\CCCCCCCC(=O)[O-]
\ No newline at end of file
diff --git a/metanetx_uniprot/TestingFiles/chem_xref.tsv b/metanetx_uniprot/TestingFiles/chem_xref.tsv
new file mode 100644
index 00000000..9ce7e27d
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/chem_xref.tsv
@@ -0,0 +1,362 @@
+### MetaNetX/MNXref reconciliation ###
+#Based on the following resources:
+#
+#RESOURCE:  MetaNetX/MNXref
+#VERSION:   4.4
+#DATE:      2022/03/16
+#URL:       https://www.metanetx.org
+#LICENSE:	
+#	MetaNetX copyright 2011 SystemsX, SIB Swiss Institute of Bioinformatics
+#	Except where otherwise noted, the data  available  from  this  site  are
+#	licensed under a Creative Commons Attribution 4.0 International License.
+#	MNXref  uses  information  on  cellular  compartments,  reactions,   and
+#	metabolites that is sourced from a number  of  external  resources.  The
+#	licensing agreements of those resources are specified  in  each  of  the
+#	downloadable  files  listed  below.  For  each  compound,  reaction  and
+#	cellular compartment in the MNXref namespace we indicate which  external
+#	resource  provided  the  information  used  in  MNXref.  Compounds   and
+#	reactions in the MNXref namespace may be identical to, or  differ  from,
+#	those in the external resource. In either case the data from MNXref  may
+#	be considered to be subject to the original  licensing  restrictions  of
+#	the external resource.
+#	(https://www.metanetx.org/mnxdoc/mnxref.html)
+#
+#RESOURCE:  BiGG
+#VERSION:   1.6.0, last updated: 2019/10/31 (downloaded on 2021/07/23)
+#URL:       http://bigg.ucsd.edu
+#LICENSE:	
+#	Copyright 2015 The Regents of the University of California
+#	
+#	All Rights Reserved
+#	
+#	Permission to use, copy, modify and distribute any part of  BiGG  Models
+#	for educational, research and  non-profit  purposes,  without  fee,  and
+#	without a written agreement is hereby granted, provided that  the  above
+#	copyright notice, this paragraph  and  the  following  three  paragraphs
+#	appear in all copies.
+#	
+#	Those desiring to incorporate BiGG Models into  commercial  products  or
+#	use for commercial purposes should contact  the  Technology  Transfer  &
+#	Intellectual Property Services, University  of  California,  San  Diego,
+#	9500 Gilman Drive, Mail Code 0910, La Jolla, CA  92093-0910,  Ph:  (858)
+#	534-5815, FAX: (858) 534-7345, e-mail: invent@ucsd.edu.
+#	
+#	In no event shall the University of California be liable  to  any  party
+#	for direct, indirect, special,  incidental,  or  consequential  damages,
+#	including lost profits, arising out of the use of  this  bigg  database,
+#	even if the University of California has been advised of the possibility
+#	of such damage.
+#	
+#	The BiGG Models provided  herein  is  on  an  "as  is"  basis,  and  the
+#	University of California  has  no  obligation  to  provide  maintenance,
+#	support, updates, enhancements,  or  modifications.  The  University  of
+#	California makes no representations and extends  no  warranties  of  any
+#	kind, either implied or express, including,  but  not  limited  to,  the
+#	implied warranties  of  merchantability  or  fitness  for  a  particular
+#	purpose, or that the use of  the  BiGG  Models  will  not  infringe  any
+#	patent, trademark or other rights.
+#	(http://bigg.ucsd.edu/)
+#
+#RESOURCE:  The Cell Component Ontology
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://bioinformatics.ai.sri.com/CCO/
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  ChEBI
+#VERSION:   203 (downloaded on 2021/09/30)
+#URL:       https://www.ebi.ac.uk/chebi/
+#LICENSE:	
+#	All data in the  database  is  non-proprietary  or  is  derived  from  a
+#	non-proprietary source. It is thus freely accessible  and  available  to
+#	anyone. In addition, each data item is fully  traceable  and  explicitly
+#	referenced to the original source.
+#	(https://www.ebi.ac.uk/chebi/aboutChebiForward.do)
+#
+#RESOURCE:  enviPath
+#VERSION:   (downloaded on 2021/11/24)
+#URL:       https://envipath.org
+#LICENSE:	
+#	The core data sets of enviPath are licensed under the Creative Commons
+#	Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+#	license. This allows you to use them in a non-commercial context, for
+#	example if you work at a University or for a public research institute.
+#	You can even redistribute and modify the data using the same license. If
+#	you want to use the data commercially, contact us, we offer commercial
+#	license agreements.
+#	We summarized how you can use the data on our license page.
+#	(https://envipath.com/license/)
+#
+#RESOURCE:  HMDB
+#VERSION:   4.0 (downloaded on 2021/06/18)
+#URL:       https://hmdb.ca
+#LICENSE:	
+#	HMDB is offered to the public as a freely available  resource.  Use  and
+#	re-distribution of the  data,  in  whole  or  in  part,  for  commercial
+#	purposes requires  explicit  permission  of  the  authors  and  explicit
+#	acknowledgment  of  the  source  material  (HMDB)   and   the   original
+#	publication.
+#	(https://hmdb.ca/about)
+#
+#RESOURCE:  KEGG
+#VERSION:   98.0+/06-11, Jun 21 (downloaded on 2021/06/11)
+#URL:       https://www.kegg.jp
+#LICENSE:	
+#	Academic users may freely use the KEGG website and may also freely  link
+#	to the KEGG website.
+#	Non-academic  users  may  use  the  KEGG  website  as  end   users   for
+#	non-commercial purposes, but any other use requires a license agreement.
+#	Academic users who utilize KEGG  for  providing  academic  services  are
+#	requested to obtain a KEGG  FTP  subscription  for  organizational  use,
+#	which includes a proper license agreement.
+#	Non-academic  users  and  Academic  users  intending  to  use  KEGG  for
+#	commercial purposes are requested to obtain a license agreement  through
+#	KEGG's exclusive licensing agent, Pathway Solutions.
+#	(https://www.kegg.jp/kegg/legal.html)
+#
+#RESOURCE:  LipidMaps
+#VERSION:   2021-05-28 (downloaded on 2021/06/11)
+#URL:       https://www.lipidmaps.org
+#LICENSE:	
+#	The Lipidomics Gateway is provided on an "as is" basis, without warranty
+#	or representation of any kind, express or implied. The  content  of  the
+#	Lipidomics Gateway website  is  protected  by  international  copyright,
+#	trademark and other laws. You may download articles and web  pages  from
+#	this site for your personal, non-commercial use only, provided that  you
+#	keep intact all authorship, copyright and other proprietary notices. The
+#	Featured Lipid can also be used for educational purposes, provided  that
+#	credit is given to the Lipidomics Gateway. If  you  use  the  Lipidomics
+#	Gateway, you accept these terms. The LIPID MAPS Consortium reserves  the
+#	right to modify these terms at any time.
+#	(https://www.lipidmaps.org/about/)
+#
+#RESOURCE:  MetaCyc
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://metacyc.org
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  Reactome
+#VERSION:   77 June 14, 2021 (downloaded on 2021/09/03)
+#URL:       https://reactome.org
+#LICENSE:	
+#	Reactome is an open source and open access resource, available to anyone.
+#	Usage of Reactome material is covered by two Creative Commons licenses:
+#	
+#	The terms of the Creative Commons Public Domain (CC0) License apply to all
+#	Reactome annotation files, e.g. identifier mapping data, specialized data
+#	files, and interaction data derived from Reactome.
+#	(https://reactome.org/license/)
+#
+#RESOURCE:  Rhea
+#VERSION:   119 (downloaded on 2021/11/03)
+#URL:       https://www.rhea-db.org
+#LICENSE:	
+#	All data in Rhea is freely accessible and available for anyone to use under
+#	the Creative Commons Attribution License.
+#	(https://www.rhea-db.org/documentation)
+#
+#RESOURCE:  SABIO-RK
+#VERSION:   Software Update: 2021/05/11 -- Database Release: 2021/05/28 (downloaded on 2021/07/01)
+#URL:       http://sabiork.h-its.org
+#LICENSE:	
+#	HITS, gGmbH HITS own the  SABIO-RK  database,  its  interfaces  and  its
+#	associated  documentation  (all  referred  to  in   the   following   as
+#	"Database").  You  should  carefully  read  the  following   terms   and
+#	conditions before  using  this  Database.  Your  use  of  this  Database
+#	indicates your acceptance of this license agreement and  all  terms  and
+#	conditions.You are hereby granted a non-exclusive  and  non-transferable
+#	license to use  the  Database  according  to  the  following  terms  and
+#	conditions. This license is  to  use  the  Database  for  Non-Commercial
+#	Purpose only. Non-Commercial Purpose  means  the  use  of  the  Database
+#	solely for  internal  non-commercial  research  and  academic  purposes.
+#	Non-Commercial Purpose excludes, without  limitation,  any  use  of  the
+#	Database, as part of, or in any way in  connection  with  a  product  or
+#	service which is sold, offered for sale, licensed,  leased,  loaned,  or
+#	rented. Permission to use this Database for  Non-Commercial  Purpose  is
+#	hereby granted without fee and subject to the following  terms  of  this
+#	license.
+#	
+#	Commercial Use
+#	If you desire to  use  the  Database  for  profit-making  or  commercial
+#	purposes, you agree to negotiate in good faith a license with  the  HITS
+#	prior to such profit-making or commercial use. The HITS  shall  have  no
+#	obligation to grant such license to you,  and  may  grant  exclusive  or
+#	non-exclusive licenses to others. You agree to notify the  HITS  of  any
+#	inquiries you have  for  commercial  use  of  the  Database  and/or  its
+#	modifications. You may contact the following email to discuss commercial
+#	use: sabiork at h-its.org
+#	
+#	Governing Law
+#	This Agreement is governed  by  the  law  of  the  Federal  Republic  of
+#	Germany. The application of the UN Convention on the Sale  of  Goods  is
+#	excluded.
+#	
+#	Disclaimer of Warranty
+#	Because this Database is licensed free of charge, there is  no  warranty
+#	for the data in it contained and the methods used for its querying.  The
+#	HITS makes no warranty or  representation  that  the  operation  of  the
+#	Database in this compilation will be error-free, and the HITS  is  under
+#	no obligation to provide any services, by way of maintenance, update, or
+#	otherwise.
+#	
+#	THIS DATABASE AND THE  ACCOMPANYING  FILES  ARE  LICENSED  "AS  IS"  AND
+#	WITHOUT WARRANTIES AS TO PERFORMANCE OR  MERCHANTABILITY  OR  ANY  OTHER
+#	WARRANTIES WHETHER EXPRESSED OR IMPLIED. NO WARRANTY OF  FITNESS  FOR  A
+#	PARTICULAR PURPOSE IS OFFERED. THE ENTIRE RISK AS  TO  THE  QUALITY  AND
+#	PERFORMANCE OF THE  PROGRAM  IS  WITH  YOU.  SHOULD  THE  PROGRAM  PROVE
+#	DEFECTIVE, YOU ASSUME THE COST OF ALL  NECESSARY  SERVICING,  REPAIR  OR
+#	CORRECTION.
+#	
+#	Limitation of Liability
+#	IN NO EVENT WILL  HITS,  OR  ANY  OTHER  PARTY  WHO  MAY  MODIFY  AND/OR
+#	REDISTRIBUTE THE DATABASE AS PERMITTED  ABOVE,  BE  LIABLE  TO  YOU  FOR
+#	DAMAGES, INCLUDING ANY GENERAL,  SPECIAL,  INCIDENTAL  OR  CONSEQUENTIAL
+#	DAMAGES ARISING  OUT  OF  THE  USE  OR  INABILITY  TO  USE  THE  PROGRAM
+#	(INCLUDING BUT NOT LIMITED TO  LOSS  OF  DATA  OR  DATA  BEING  RENDERED
+#	INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A  FAILURE  OF
+#	THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF VTIP  AND  HITS
+#	OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#	
+#	Reference to SABIO-RK  Users  will  cite  SABIO-RK  in  publications  or
+#	presentations, whenever the data used was extracted from  the  database.
+#	Termination This  agreement  is  effective  until  terminated.  You  may
+#	terminate this agreement  at  any  time  by  destroying  all  associated
+#	material (e.g., documentation or web service clients) to the database in
+#	your possession and by stopping any access to the database  directly  or
+#	from  software  generated  by  you.  This   agreement   will   terminate
+#	immediately without notice from and HITS if you fail to comply with  any
+#	of the terms and conditions of this license. This  agreement  will  also
+#	terminate immediately without notice from the HITS if  it  is  found  to
+#	implement patented algorithms or contain copyrighted code not  owned  or
+#	licensed the HITS for the purpose  of  its  inclusion  in  the  SABIO-RK
+#	Database. This agreement cannot be terminated by any other mechanism  or
+#	for any other reason than those stated herein.
+#	
+#	Place of Court
+#	The exclusive venue for all disputes arising from or in connection  with
+#	this Agreement is Mannheim, Germany (HRB 337446), when the Licensee is a
+#	business person, a legal entity governed by public  law,  or  a  special
+#	fund governed by public law,  or  does  not  have  a  general  place  of
+#	jurisdiction  within  the  Federal  Republic  of  Germany.  Address  all
+#	correspondence  regarding  this  license  to  electronic  mail  address:
+#	sabiork at h-its.org Any inquiries  and  comments  regarding  bugs,  bug
+#	fixes, enhancements, modifications or any other similar issues should be
+#	directed to: sabiork at h-its.org
+#	
+#	Copyright 2007 by HITS, gGmbH. All rights reserved.
+#	(http://sabiork.h-its.org/layouts/content/termscondition.gsp)
+#
+#RESOURCE:  The SEED
+#VERSION:   2.6.1 (July 31, 2020) (downloaded on 2021/08/09)
+#URL:       https://modelseed.org
+#LICENSE:	
+#	All tools and datasets that make up the SEED are in the public domain.
+#	(https://modelseed.org)
+#
+#RESOURCE:  SwissLipids
+#VERSION:   (downloaded on 2021/07/29)
+#URL:       https://www.swisslipids.org
+#LICENSE:	
+#	SwissLipids  is  licensed  under  a  Creative  Commons   Attribution-Non
+#	Commercial-NoDerivatives 4.0 International License.
+#	
+#	Commercial users and those who wish to  use  this  work  for  commercial
+#	purposes  please  contact  the  SIB  technology  transfer  officer   at:
+#	marc.filliettaz@genebio.com
+#	(https://www.swisslipids.org/#/downloads)
+#source	ID	description
+BIOMASS	BIOMASS	BIOMASS
+CHEBI:57783	MNXM738702	NADPH||2'-O-phosphonatoadenosine 5'-{3-[1-(3-carbamoyl-1,4-dihydropyridin-1-yl)-1,4-anhydro-D-ribitol-5-yl] diphosphate}||NADPH tetraanion||NADPH(4-)
+CHEBI:77932	MNXM97613	tetracycline||(1S,4aS,11S,11aS,12aS)-3-carbamoyl-1-(dimethylazaniumyl)-4a,5,7,11-tetrahydroxy-11-methyl-4,6-dioxo-1,4,4a,6,11,11a,12,12a-octahydrotetracen-2-olate||tetracycline zwitterion
+CHEBI:132727	MNXM162730	11a-hydroxytetracycline||(1S,4aR5aS,11S,11aR,12aS)-3-carbamoyl-1-(dimethylazaniumyl)-4a,5a,7,11-tetrahydroxy-11-methyl-4,5,6-trioxo-1,4,4a,5,5a,6,11,11a,12,12a-decahydrotetracen-2-olate||11a-hydroxytetracycline zwitterion
+chebi:15377	WATER	H2O||BOUND WATER||HOH||WATER||Wasser||Water||[OH2]||acqua||agua||aqua||dihydridooxygen||dihydrogen oxide||eau||hydrogen hydroxide||oxidane||water
+CHEBI:58349	MNXM5	NADP(+)||2'-O-phosphonatoadenosine 5'-{3-[1-(3-carbamoylpyridinio)-1,4-anhydro-D-ribitol-5-yl] diphosphate}||NADP trianion||NADP(3-)
+CHEBI:74986	MNXM737425	1,2-di-(9Z-octadecenoyl)-sn-glycero-3-phosphoethanolamine||1,2-dioleoyl-sn-glycero-3-phosphoethanolamine zwitterion||1-(9Z)-octadecenoyl-2-(9Z)-octadecenoyl-sn-glycero-3-phosphoethanolamine zwitterion||1-C18:1(omega-9)-2-C18:1(omega-9)-phosphatidylethanolamine zwitterion||2-azaniumylethyl (2R)-2,3-bis[(9Z)-octadec-9-enoyloxy]propyl phosphate
+chebi:14389	MNXM738220	secondary/obsolete/fantasy identifier
+CHEBI:15378	MNXM1	H(+)||H+||Hydron||hydrogen(1+)||hydron
+CHEBI:30823	MNXM1107708	(9Z)-octadecenoate||(9Z)-octadec-9-enoate||(Z)-9-octadecenoic acid, ion(1-)||Oleat||cis-9-octadecenoate||oleate||oleic acid anion
\ No newline at end of file
diff --git a/metanetx_uniprot/TestingFiles/ncbitaxon.json b/metanetx_uniprot/TestingFiles/ncbitaxon.json
new file mode 100644
index 00000000..2324a45b
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/ncbitaxon.json
@@ -0,0 +1,188 @@
+{
+  "graphs" : [ {
+    "nodes" : [ {
+      "id" : "http://purl.obolibrary.org/obo/NCBITaxon_817",
+      "meta" : {
+        "xrefs" : [ {
+          "val" : "PMID:16559622"
+        }, {
+          "val" : "GC_ID:11"
+        }, {
+          "val" : "PMID:28066339"
+        } ],
+        "synonyms" : [ {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacteroides incommunis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Pseudobacterium fragilis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Ristella uncata",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Ristella incommunis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacteroides inaequalis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Pseudobacterium incommunis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacteroides uncatus",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Sphaerophorus inaequalis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Fusiformis fragilis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Sphaerophorus intermedius",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Pseudobacterium inaequalis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacillus fragilis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Ristella fragilis",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Pseudobacterium uncatum",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        } ],
+        "basicPropertyValues" : [ {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:665938"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:33929"
+        }, {
+          "pred" : "http://purl.obolibrary.org/obo/ncbitaxon#has_rank",
+          "val" : "http://purl.obolibrary.org/obo/NCBITaxon_species"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
+          "val" : "ncbi_taxonomy"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:469587"
+        } ]
+      },
+      "type" : "CLASS",
+      "lbl" : "Bacteroides fragilis"
+    },  {
+      "id" : "http://purl.obolibrary.org/obo/NCBITaxon_562",
+      "meta" : {
+        "xrefs" : [ {
+          "val" : "GC_ID:11"
+        }, {
+          "val" : "PMID:10319482"
+        } ],
+        "synonyms" : [ {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Enterococcus coli",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasExactSynonym",
+          "val" : "Escherichia/Shigella coli",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#equivalent_name"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacillus coli",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacterium coli",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasRelatedSynonym",
+          "val" : "Bacterium coli commune",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#synonym"
+        }, {
+          "pred" : "hasExactSynonym",
+          "val" : "E. coli",
+          "xrefs" : [ ],
+          "synonymType" : "http://purl.obolibrary.org/obo/ncbitaxon#common_name"
+        } ],
+        "basicPropertyValues" : [ {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:1806490"
+        }, {
+          "pred" : "http://purl.obolibrary.org/obo/ncbitaxon#has_rank",
+          "val" : "http://purl.obolibrary.org/obo/NCBITaxon_species"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:469598"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:1637691"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
+          "val" : "ncbi_taxonomy"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:662104"
+        }, {
+          "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
+          "val" : "NCBITaxon:662101"
+        } ]
+      },
+      "type" : "CLASS",
+      "lbl" : "Escherichia coli"
+    } ],
+    "edges" : [ {
+      "sub" : "http://purl.obolibrary.org/obo/NCBITaxon_295405",
+      "pred" : "is_a",
+      "obj" : "http://purl.obolibrary.org/obo/NCBITaxon_817"
+    }, {
+      "sub" : "http://purl.obolibrary.org/obo/NCBITaxon_1389418",
+      "pred" : "is_a",
+      "obj" : "http://purl.obolibrary.org/obo/NCBITaxon_562"
+    } ],
+    "id" : "http://purl.obolibrary.org/obo/ncbitaxon.owl",
+    "meta" : {
+      "subsets" : [ ],
+      "xrefs" : [ ],
+      "basicPropertyValues" : [ ]
+    },
+    "equivalentNodesSets" : [ ],
+    "logicalDefinitionAxioms" : [ ],
+    "`domainRangeAxioms`" : [ ],
+    "propertyChainAxioms" : [ ]
+  } ]
+}
\ No newline at end of file
diff --git a/metanetx_uniprot/TestingFiles/reac_prop.tsv b/metanetx_uniprot/TestingFiles/reac_prop.tsv
new file mode 100644
index 00000000..75826bdb
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/reac_prop.tsv
@@ -0,0 +1,359 @@
+### MetaNetX/MNXref reconciliation ###
+#Based on the following resources:
+#
+#RESOURCE:  MetaNetX/MNXref
+#VERSION:   4.4
+#DATE:      2022/03/16
+#URL:       https://www.metanetx.org
+#LICENSE:	
+#	MetaNetX copyright 2011 SystemsX, SIB Swiss Institute of Bioinformatics
+#	Except where otherwise noted, the data  available  from  this  site  are
+#	licensed under a Creative Commons Attribution 4.0 International License.
+#	MNXref  uses  information  on  cellular  compartments,  reactions,   and
+#	metabolites that is sourced from a number  of  external  resources.  The
+#	licensing agreements of those resources are specified  in  each  of  the
+#	downloadable  files  listed  below.  For  each  compound,  reaction  and
+#	cellular compartment in the MNXref namespace we indicate which  external
+#	resource  provided  the  information  used  in  MNXref.  Compounds   and
+#	reactions in the MNXref namespace may be identical to, or  differ  from,
+#	those in the external resource. In either case the data from MNXref  may
+#	be considered to be subject to the original  licensing  restrictions  of
+#	the external resource.
+#	(https://www.metanetx.org/mnxdoc/mnxref.html)
+#
+#RESOURCE:  BiGG
+#VERSION:   1.6.0, last updated: 2019/10/31 (downloaded on 2021/07/23)
+#URL:       http://bigg.ucsd.edu
+#LICENSE:	
+#	Copyright 2015 The Regents of the University of California
+#	
+#	All Rights Reserved
+#	
+#	Permission to use, copy, modify and distribute any part of  BiGG  Models
+#	for educational, research and  non-profit  purposes,  without  fee,  and
+#	without a written agreement is hereby granted, provided that  the  above
+#	copyright notice, this paragraph  and  the  following  three  paragraphs
+#	appear in all copies.
+#	
+#	Those desiring to incorporate BiGG Models into  commercial  products  or
+#	use for commercial purposes should contact  the  Technology  Transfer  &
+#	Intellectual Property Services, University  of  California,  San  Diego,
+#	9500 Gilman Drive, Mail Code 0910, La Jolla, CA  92093-0910,  Ph:  (858)
+#	534-5815, FAX: (858) 534-7345, e-mail: invent@ucsd.edu.
+#	
+#	In no event shall the University of California be liable  to  any  party
+#	for direct, indirect, special,  incidental,  or  consequential  damages,
+#	including lost profits, arising out of the use of  this  bigg  database,
+#	even if the University of California has been advised of the possibility
+#	of such damage.
+#	
+#	The BiGG Models provided  herein  is  on  an  "as  is"  basis,  and  the
+#	University of California  has  no  obligation  to  provide  maintenance,
+#	support, updates, enhancements,  or  modifications.  The  University  of
+#	California makes no representations and extends  no  warranties  of  any
+#	kind, either implied or express, including,  but  not  limited  to,  the
+#	implied warranties  of  merchantability  or  fitness  for  a  particular
+#	purpose, or that the use of  the  BiGG  Models  will  not  infringe  any
+#	patent, trademark or other rights.
+#	(http://bigg.ucsd.edu/)
+#
+#RESOURCE:  The Cell Component Ontology
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://bioinformatics.ai.sri.com/CCO/
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  ChEBI
+#VERSION:   203 (downloaded on 2021/09/30)
+#URL:       https://www.ebi.ac.uk/chebi/
+#LICENSE:	
+#	All data in the  database  is  non-proprietary  or  is  derived  from  a
+#	non-proprietary source. It is thus freely accessible  and  available  to
+#	anyone. In addition, each data item is fully  traceable  and  explicitly
+#	referenced to the original source.
+#	(https://www.ebi.ac.uk/chebi/aboutChebiForward.do)
+#
+#RESOURCE:  enviPath
+#VERSION:   (downloaded on 2021/11/24)
+#URL:       https://envipath.org
+#LICENSE:	
+#	The core data sets of enviPath are licensed under the Creative Commons
+#	Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+#	license. This allows you to use them in a non-commercial context, for
+#	example if you work at a University or for a public research institute.
+#	You can even redistribute and modify the data using the same license. If
+#	you want to use the data commercially, contact us, we offer commercial
+#	license agreements.
+#	We summarized how you can use the data on our license page.
+#	(https://envipath.com/license/)
+#
+#RESOURCE:  HMDB
+#VERSION:   4.0 (downloaded on 2021/06/18)
+#URL:       https://hmdb.ca
+#LICENSE:	
+#	HMDB is offered to the public as a freely available  resource.  Use  and
+#	re-distribution of the  data,  in  whole  or  in  part,  for  commercial
+#	purposes requires  explicit  permission  of  the  authors  and  explicit
+#	acknowledgment  of  the  source  material  (HMDB)   and   the   original
+#	publication.
+#	(https://hmdb.ca/about)
+#
+#RESOURCE:  KEGG
+#VERSION:   98.0+/06-11, Jun 21 (downloaded on 2021/06/11)
+#URL:       https://www.kegg.jp
+#LICENSE:	
+#	Academic users may freely use the KEGG website and may also freely  link
+#	to the KEGG website.
+#	Non-academic  users  may  use  the  KEGG  website  as  end   users   for
+#	non-commercial purposes, but any other use requires a license agreement.
+#	Academic users who utilize KEGG  for  providing  academic  services  are
+#	requested to obtain a KEGG  FTP  subscription  for  organizational  use,
+#	which includes a proper license agreement.
+#	Non-academic  users  and  Academic  users  intending  to  use  KEGG  for
+#	commercial purposes are requested to obtain a license agreement  through
+#	KEGG's exclusive licensing agent, Pathway Solutions.
+#	(https://www.kegg.jp/kegg/legal.html)
+#
+#RESOURCE:  LipidMaps
+#VERSION:   2021-05-28 (downloaded on 2021/06/11)
+#URL:       https://www.lipidmaps.org
+#LICENSE:	
+#	The Lipidomics Gateway is provided on an "as is" basis, without warranty
+#	or representation of any kind, express or implied. The  content  of  the
+#	Lipidomics Gateway website  is  protected  by  international  copyright,
+#	trademark and other laws. You may download articles and web  pages  from
+#	this site for your personal, non-commercial use only, provided that  you
+#	keep intact all authorship, copyright and other proprietary notices. The
+#	Featured Lipid can also be used for educational purposes, provided  that
+#	credit is given to the Lipidomics Gateway. If  you  use  the  Lipidomics
+#	Gateway, you accept these terms. The LIPID MAPS Consortium reserves  the
+#	right to modify these terms at any time.
+#	(https://www.lipidmaps.org/about/)
+#
+#RESOURCE:  MetaCyc
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://metacyc.org
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  Reactome
+#VERSION:   77 June 14, 2021 (downloaded on 2021/09/03)
+#URL:       https://reactome.org
+#LICENSE:	
+#	Reactome is an open source and open access resource, available to anyone.
+#	Usage of Reactome material is covered by two Creative Commons licenses:
+#	
+#	The terms of the Creative Commons Public Domain (CC0) License apply to all
+#	Reactome annotation files, e.g. identifier mapping data, specialized data
+#	files, and interaction data derived from Reactome.
+#	(https://reactome.org/license/)
+#
+#RESOURCE:  Rhea
+#VERSION:   119 (downloaded on 2021/11/03)
+#URL:       https://www.rhea-db.org
+#LICENSE:	
+#	All data in Rhea is freely accessible and available for anyone to use under
+#	the Creative Commons Attribution License.
+#	(https://www.rhea-db.org/documentation)
+#
+#RESOURCE:  SABIO-RK
+#VERSION:   Software Update: 2021/05/11 -- Database Release: 2021/05/28 (downloaded on 2021/07/01)
+#URL:       http://sabiork.h-its.org
+#LICENSE:	
+#	HITS, gGmbH HITS own the  SABIO-RK  database,  its  interfaces  and  its
+#	associated  documentation  (all  referred  to  in   the   following   as
+#	"Database").  You  should  carefully  read  the  following   terms   and
+#	conditions before  using  this  Database.  Your  use  of  this  Database
+#	indicates your acceptance of this license agreement and  all  terms  and
+#	conditions.You are hereby granted a non-exclusive  and  non-transferable
+#	license to use  the  Database  according  to  the  following  terms  and
+#	conditions. This license is  to  use  the  Database  for  Non-Commercial
+#	Purpose only. Non-Commercial Purpose  means  the  use  of  the  Database
+#	solely for  internal  non-commercial  research  and  academic  purposes.
+#	Non-Commercial Purpose excludes, without  limitation,  any  use  of  the
+#	Database, as part of, or in any way in  connection  with  a  product  or
+#	service which is sold, offered for sale, licensed,  leased,  loaned,  or
+#	rented. Permission to use this Database for  Non-Commercial  Purpose  is
+#	hereby granted without fee and subject to the following  terms  of  this
+#	license.
+#	
+#	Commercial Use
+#	If you desire to  use  the  Database  for  profit-making  or  commercial
+#	purposes, you agree to negotiate in good faith a license with  the  HITS
+#	prior to such profit-making or commercial use. The HITS  shall  have  no
+#	obligation to grant such license to you,  and  may  grant  exclusive  or
+#	non-exclusive licenses to others. You agree to notify the  HITS  of  any
+#	inquiries you have  for  commercial  use  of  the  Database  and/or  its
+#	modifications. You may contact the following email to discuss commercial
+#	use: sabiork at h-its.org
+#	
+#	Governing Law
+#	This Agreement is governed  by  the  law  of  the  Federal  Republic  of
+#	Germany. The application of the UN Convention on the Sale  of  Goods  is
+#	excluded.
+#	
+#	Disclaimer of Warranty
+#	Because this Database is licensed free of charge, there is  no  warranty
+#	for the data in it contained and the methods used for its querying.  The
+#	HITS makes no warranty or  representation  that  the  operation  of  the
+#	Database in this compilation will be error-free, and the HITS  is  under
+#	no obligation to provide any services, by way of maintenance, update, or
+#	otherwise.
+#	
+#	THIS DATABASE AND THE  ACCOMPANYING  FILES  ARE  LICENSED  "AS  IS"  AND
+#	WITHOUT WARRANTIES AS TO PERFORMANCE OR  MERCHANTABILITY  OR  ANY  OTHER
+#	WARRANTIES WHETHER EXPRESSED OR IMPLIED. NO WARRANTY OF  FITNESS  FOR  A
+#	PARTICULAR PURPOSE IS OFFERED. THE ENTIRE RISK AS  TO  THE  QUALITY  AND
+#	PERFORMANCE OF THE  PROGRAM  IS  WITH  YOU.  SHOULD  THE  PROGRAM  PROVE
+#	DEFECTIVE, YOU ASSUME THE COST OF ALL  NECESSARY  SERVICING,  REPAIR  OR
+#	CORRECTION.
+#	
+#	Limitation of Liability
+#	IN NO EVENT WILL  HITS,  OR  ANY  OTHER  PARTY  WHO  MAY  MODIFY  AND/OR
+#	REDISTRIBUTE THE DATABASE AS PERMITTED  ABOVE,  BE  LIABLE  TO  YOU  FOR
+#	DAMAGES, INCLUDING ANY GENERAL,  SPECIAL,  INCIDENTAL  OR  CONSEQUENTIAL
+#	DAMAGES ARISING  OUT  OF  THE  USE  OR  INABILITY  TO  USE  THE  PROGRAM
+#	(INCLUDING BUT NOT LIMITED TO  LOSS  OF  DATA  OR  DATA  BEING  RENDERED
+#	INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A  FAILURE  OF
+#	THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF VTIP  AND  HITS
+#	OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#	
+#	Reference to SABIO-RK  Users  will  cite  SABIO-RK  in  publications  or
+#	presentations, whenever the data used was extracted from  the  database.
+#	Termination This  agreement  is  effective  until  terminated.  You  may
+#	terminate this agreement  at  any  time  by  destroying  all  associated
+#	material (e.g., documentation or web service clients) to the database in
+#	your possession and by stopping any access to the database  directly  or
+#	from  software  generated  by  you.  This   agreement   will   terminate
+#	immediately without notice from and HITS if you fail to comply with  any
+#	of the terms and conditions of this license. This  agreement  will  also
+#	terminate immediately without notice from the HITS if  it  is  found  to
+#	implement patented algorithms or contain copyrighted code not  owned  or
+#	licensed the HITS for the purpose  of  its  inclusion  in  the  SABIO-RK
+#	Database. This agreement cannot be terminated by any other mechanism  or
+#	for any other reason than those stated herein.
+#	
+#	Place of Court
+#	The exclusive venue for all disputes arising from or in connection  with
+#	this Agreement is Mannheim, Germany (HRB 337446), when the Licensee is a
+#	business person, a legal entity governed by public  law,  or  a  special
+#	fund governed by public law,  or  does  not  have  a  general  place  of
+#	jurisdiction  within  the  Federal  Republic  of  Germany.  Address  all
+#	correspondence  regarding  this  license  to  electronic  mail  address:
+#	sabiork at h-its.org Any inquiries  and  comments  regarding  bugs,  bug
+#	fixes, enhancements, modifications or any other similar issues should be
+#	directed to: sabiork at h-its.org
+#	
+#	Copyright 2007 by HITS, gGmbH. All rights reserved.
+#	(http://sabiork.h-its.org/layouts/content/termscondition.gsp)
+#
+#RESOURCE:  The SEED
+#VERSION:   2.6.1 (July 31, 2020) (downloaded on 2021/08/09)
+#URL:       https://modelseed.org
+#LICENSE:	
+#	All tools and datasets that make up the SEED are in the public domain.
+#	(https://modelseed.org)
+#
+#RESOURCE:  SwissLipids
+#VERSION:   (downloaded on 2021/07/29)
+#URL:       https://www.swisslipids.org
+#LICENSE:	
+#	SwissLipids  is  licensed  under  a  Creative  Commons   Attribution-Non
+#	Commercial-NoDerivatives 4.0 International License.
+#	
+#	Commercial users and those who wish to  use  this  work  for  commercial
+#	purposes  please  contact  the  SIB  technology  transfer  officer   at:
+#	marc.filliettaz@genebio.com
+#	(https://www.swisslipids.org/#/downloads)
+#ID	mnx_equation	reference	classifs	is_balanced	is_transport
+EMPTY	 = 	mnx:EMPTY		B	
+MNXR114744	1 MNXM162730@MNXD1 + 1 MNXM5@MNXD1 + 1 WATER@MNXD1 = 1 MNXM1@MNXD1 + 1 MNXM735438@MNXD1 + 1 MNXM738702@MNXD1 + 1 MNXM97613@MNXD1	rheaR:50004	1.14.13.231	B	
+MNXR171656	1 MNXM5@MNXD1 + 1 MNXM743287@MNXD1 + 1 WATER@MNXD1 = 1 MNXM735438@MNXD1 + 1 MNXM738702@MNXD1 + 1 MNXM743286@MNXD1	rheaR:61444			
+MNXR168222	1 MNXM1089988@MNXD1 + 1 MNXM1102167@MNXD1 = 1 MNXM1089989@MNXD1 + 1 MNXM1102072@MNXD1	rheaR:42776	2.1.1.180		
+MNXR165961	1 MNXM1107698@MNXD1 + 1 WATER@MNXD1 = 1 MNXM1108087@MNXD1 + 1 MNXM728579@MNXD1	rheaR:18689	3.1.1.32		
+MNXR171532	2 MNXM1107708@MNXD1 + 2 MNXM1@MNXD1 + 1 MNXM734941@MNXD1 = 1 MNXM737425@MNXD1 + 2 WATER@MNXD1	rheaR:60624		B	
+MNXR171532	2 MNXM1107708@MNXD1 + 2 MNXM1@MNXD1 + 1 MNXM734941@MNXD1 = 1 MNXM737425@MNXD1 + 2 WATER@MNXD1	rheaR:60624		B	
\ No newline at end of file
diff --git a/metanetx_uniprot/TestingFiles/reac_xref.tsv b/metanetx_uniprot/TestingFiles/reac_xref.tsv
new file mode 100644
index 00000000..d03bb0c0
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/reac_xref.tsv
@@ -0,0 +1,365 @@
+### MetaNetX/MNXref reconciliation ###
+#Based on the following resources:
+#
+#RESOURCE:  MetaNetX/MNXref
+#VERSION:   4.4
+#DATE:      2022/03/16
+#URL:       https://www.metanetx.org
+#LICENSE:	
+#	MetaNetX copyright 2011 SystemsX, SIB Swiss Institute of Bioinformatics
+#	Except where otherwise noted, the data  available  from  this  site  are
+#	licensed under a Creative Commons Attribution 4.0 International License.
+#	MNXref  uses  information  on  cellular  compartments,  reactions,   and
+#	metabolites that is sourced from a number  of  external  resources.  The
+#	licensing agreements of those resources are specified  in  each  of  the
+#	downloadable  files  listed  below.  For  each  compound,  reaction  and
+#	cellular compartment in the MNXref namespace we indicate which  external
+#	resource  provided  the  information  used  in  MNXref.  Compounds   and
+#	reactions in the MNXref namespace may be identical to, or  differ  from,
+#	those in the external resource. In either case the data from MNXref  may
+#	be considered to be subject to the original  licensing  restrictions  of
+#	the external resource.
+#	(https://www.metanetx.org/mnxdoc/mnxref.html)
+#
+#RESOURCE:  BiGG
+#VERSION:   1.6.0, last updated: 2019/10/31 (downloaded on 2021/07/23)
+#URL:       http://bigg.ucsd.edu
+#LICENSE:	
+#	Copyright 2015 The Regents of the University of California
+#	
+#	All Rights Reserved
+#	
+#	Permission to use, copy, modify and distribute any part of  BiGG  Models
+#	for educational, research and  non-profit  purposes,  without  fee,  and
+#	without a written agreement is hereby granted, provided that  the  above
+#	copyright notice, this paragraph  and  the  following  three  paragraphs
+#	appear in all copies.
+#	
+#	Those desiring to incorporate BiGG Models into  commercial  products  or
+#	use for commercial purposes should contact  the  Technology  Transfer  &
+#	Intellectual Property Services, University  of  California,  San  Diego,
+#	9500 Gilman Drive, Mail Code 0910, La Jolla, CA  92093-0910,  Ph:  (858)
+#	534-5815, FAX: (858) 534-7345, e-mail: invent@ucsd.edu.
+#	
+#	In no event shall the University of California be liable  to  any  party
+#	for direct, indirect, special,  incidental,  or  consequential  damages,
+#	including lost profits, arising out of the use of  this  bigg  database,
+#	even if the University of California has been advised of the possibility
+#	of such damage.
+#	
+#	The BiGG Models provided  herein  is  on  an  "as  is"  basis,  and  the
+#	University of California  has  no  obligation  to  provide  maintenance,
+#	support, updates, enhancements,  or  modifications.  The  University  of
+#	California makes no representations and extends  no  warranties  of  any
+#	kind, either implied or express, including,  but  not  limited  to,  the
+#	implied warranties  of  merchantability  or  fitness  for  a  particular
+#	purpose, or that the use of  the  BiGG  Models  will  not  infringe  any
+#	patent, trademark or other rights.
+#	(http://bigg.ucsd.edu/)
+#
+#RESOURCE:  The Cell Component Ontology
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://bioinformatics.ai.sri.com/CCO/
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  ChEBI
+#VERSION:   203 (downloaded on 2021/09/30)
+#URL:       https://www.ebi.ac.uk/chebi/
+#LICENSE:	
+#	All data in the  database  is  non-proprietary  or  is  derived  from  a
+#	non-proprietary source. It is thus freely accessible  and  available  to
+#	anyone. In addition, each data item is fully  traceable  and  explicitly
+#	referenced to the original source.
+#	(https://www.ebi.ac.uk/chebi/aboutChebiForward.do)
+#
+#RESOURCE:  enviPath
+#VERSION:   (downloaded on 2021/11/24)
+#URL:       https://envipath.org
+#LICENSE:	
+#	The core data sets of enviPath are licensed under the Creative Commons
+#	Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+#	license. This allows you to use them in a non-commercial context, for
+#	example if you work at a University or for a public research institute.
+#	You can even redistribute and modify the data using the same license. If
+#	you want to use the data commercially, contact us, we offer commercial
+#	license agreements.
+#	We summarized how you can use the data on our license page.
+#	(https://envipath.com/license/)
+#
+#RESOURCE:  HMDB
+#VERSION:   4.0 (downloaded on 2021/06/18)
+#URL:       https://hmdb.ca
+#LICENSE:	
+#	HMDB is offered to the public as a freely available  resource.  Use  and
+#	re-distribution of the  data,  in  whole  or  in  part,  for  commercial
+#	purposes requires  explicit  permission  of  the  authors  and  explicit
+#	acknowledgment  of  the  source  material  (HMDB)   and   the   original
+#	publication.
+#	(https://hmdb.ca/about)
+#
+#RESOURCE:  KEGG
+#VERSION:   98.0+/06-11, Jun 21 (downloaded on 2021/06/11)
+#URL:       https://www.kegg.jp
+#LICENSE:	
+#	Academic users may freely use the KEGG website and may also freely  link
+#	to the KEGG website.
+#	Non-academic  users  may  use  the  KEGG  website  as  end   users   for
+#	non-commercial purposes, but any other use requires a license agreement.
+#	Academic users who utilize KEGG  for  providing  academic  services  are
+#	requested to obtain a KEGG  FTP  subscription  for  organizational  use,
+#	which includes a proper license agreement.
+#	Non-academic  users  and  Academic  users  intending  to  use  KEGG  for
+#	commercial purposes are requested to obtain a license agreement  through
+#	KEGG's exclusive licensing agent, Pathway Solutions.
+#	(https://www.kegg.jp/kegg/legal.html)
+#
+#RESOURCE:  LipidMaps
+#VERSION:   2021-05-28 (downloaded on 2021/06/11)
+#URL:       https://www.lipidmaps.org
+#LICENSE:	
+#	The Lipidomics Gateway is provided on an "as is" basis, without warranty
+#	or representation of any kind, express or implied. The  content  of  the
+#	Lipidomics Gateway website  is  protected  by  international  copyright,
+#	trademark and other laws. You may download articles and web  pages  from
+#	this site for your personal, non-commercial use only, provided that  you
+#	keep intact all authorship, copyright and other proprietary notices. The
+#	Featured Lipid can also be used for educational purposes, provided  that
+#	credit is given to the Lipidomics Gateway. If  you  use  the  Lipidomics
+#	Gateway, you accept these terms. The LIPID MAPS Consortium reserves  the
+#	right to modify these terms at any time.
+#	(https://www.lipidmaps.org/about/)
+#
+#RESOURCE:  MetaCyc
+#VERSION:   25.0 (downloaded on 2021/06/03)
+#URL:       https://metacyc.org
+#LICENSE:	
+#	"Open  Databases"  means  the  EcoCyc   and   MetaCyc   Pathway/genome
+#	databases.
+#	
+#	2.1 Open Databases. SRI  hereby  grants  to  LICENSEE  a  non-exclusive,
+#	royalty-free license to use, modify and redistribute the Open  Databases
+#	(as such term  is  defined  in  Exhibit  B)  and  LICENSEE's  modified
+#	versions thereof on a royalty-free basis, worldwide and for any purpose;
+#	provided, in each case, that if LICENSEE modifies any Open Database (the
+#	modified  version  being  a  "Modified  Open  Database"),  then  (i)
+#	LICENSEE must provide a copy of the Modified Open Database to  SRI  (and
+#	hereby grants to  SRI  a  nonexclusive,  royalty-free  license  to  use,
+#	modify, and redistribute the Modified Open Database  worldwide  and  for
+#	any purpose and to authorize others to do so);  and  (ii)  any  Modified
+#	Open Databases, or websites from which such Modified Open Databases  may
+#	be obtained, must clearly and prominently:
+#	
+#	(a) identify the Open Databases from which they were derived:
+#	
+#	(b) include all applicable copyright notices and author lists  from  the
+#	Open Databases from which they were derived; and
+#	
+#	(c) identify or summarize all modifications that were made.
+#	
+#	Any distribution of such Modified Open Databases  without  the  required
+#	notices is a violation of SRI's and its licensors' copyright  and  other
+#	proprietary rights. All trademarks, service marks, and trade  names  are
+#	proprietary to SRI and its licensors. The Open Databases, including  any
+#	files incorporated in or generated from  the  Open  Databases  and  data
+#	accompanying the Open Databases, are licensed to LICENSEE by SRI and its
+#	licensors, and SRI and its licensors do not transfer title or any  other
+#	rights in the Open Databases to LICENSEE. LICENSEE may not use the  Open
+#	Databases except as otherwise specified herein.
+#	
+#	2.1.1 If SRI,  in  its  sole  discretion,  determines  that  a  Modified
+#	Database is of sufficient quality and interest to the  community  to  be
+#	hosted on biocyc.org, then SRI may (if the  Modified  Database  includes
+#	significant curation over the original Open Database it is derived from,
+#	or the last version of the Modified Database provided to SRI) provide to
+#	LICENSEE a  personal,  one-year  subscription  to  biocyc  at  no  cost;
+#	provided, however, that if LICENSEE edits the Modified  Database  via  a
+#	MySQL server operated by SRI or  its  contractors,  such  free  one-year
+#	subscription will be forfeited.
+#	(https://biocyc.org/ptools-academic-license.shtml)
+#
+#RESOURCE:  Reactome
+#VERSION:   77 June 14, 2021 (downloaded on 2021/09/03)
+#URL:       https://reactome.org
+#LICENSE:	
+#	Reactome is an open source and open access resource, available to anyone.
+#	Usage of Reactome material is covered by two Creative Commons licenses:
+#	
+#	The terms of the Creative Commons Public Domain (CC0) License apply to all
+#	Reactome annotation files, e.g. identifier mapping data, specialized data
+#	files, and interaction data derived from Reactome.
+#	(https://reactome.org/license/)
+#
+#RESOURCE:  Rhea
+#VERSION:   119 (downloaded on 2021/11/03)
+#URL:       https://www.rhea-db.org
+#LICENSE:	
+#	All data in Rhea is freely accessible and available for anyone to use under
+#	the Creative Commons Attribution License.
+#	(https://www.rhea-db.org/documentation)
+#
+#RESOURCE:  SABIO-RK
+#VERSION:   Software Update: 2021/05/11 -- Database Release: 2021/05/28 (downloaded on 2021/07/01)
+#URL:       http://sabiork.h-its.org
+#LICENSE:	
+#	HITS, gGmbH HITS own the  SABIO-RK  database,  its  interfaces  and  its
+#	associated  documentation  (all  referred  to  in   the   following   as
+#	"Database").  You  should  carefully  read  the  following   terms   and
+#	conditions before  using  this  Database.  Your  use  of  this  Database
+#	indicates your acceptance of this license agreement and  all  terms  and
+#	conditions.You are hereby granted a non-exclusive  and  non-transferable
+#	license to use  the  Database  according  to  the  following  terms  and
+#	conditions. This license is  to  use  the  Database  for  Non-Commercial
+#	Purpose only. Non-Commercial Purpose  means  the  use  of  the  Database
+#	solely for  internal  non-commercial  research  and  academic  purposes.
+#	Non-Commercial Purpose excludes, without  limitation,  any  use  of  the
+#	Database, as part of, or in any way in  connection  with  a  product  or
+#	service which is sold, offered for sale, licensed,  leased,  loaned,  or
+#	rented. Permission to use this Database for  Non-Commercial  Purpose  is
+#	hereby granted without fee and subject to the following  terms  of  this
+#	license.
+#	
+#	Commercial Use
+#	If you desire to  use  the  Database  for  profit-making  or  commercial
+#	purposes, you agree to negotiate in good faith a license with  the  HITS
+#	prior to such profit-making or commercial use. The HITS  shall  have  no
+#	obligation to grant such license to you,  and  may  grant  exclusive  or
+#	non-exclusive licenses to others. You agree to notify the  HITS  of  any
+#	inquiries you have  for  commercial  use  of  the  Database  and/or  its
+#	modifications. You may contact the following email to discuss commercial
+#	use: sabiork at h-its.org
+#	
+#	Governing Law
+#	This Agreement is governed  by  the  law  of  the  Federal  Republic  of
+#	Germany. The application of the UN Convention on the Sale  of  Goods  is
+#	excluded.
+#	
+#	Disclaimer of Warranty
+#	Because this Database is licensed free of charge, there is  no  warranty
+#	for the data in it contained and the methods used for its querying.  The
+#	HITS makes no warranty or  representation  that  the  operation  of  the
+#	Database in this compilation will be error-free, and the HITS  is  under
+#	no obligation to provide any services, by way of maintenance, update, or
+#	otherwise.
+#	
+#	THIS DATABASE AND THE  ACCOMPANYING  FILES  ARE  LICENSED  "AS  IS"  AND
+#	WITHOUT WARRANTIES AS TO PERFORMANCE OR  MERCHANTABILITY  OR  ANY  OTHER
+#	WARRANTIES WHETHER EXPRESSED OR IMPLIED. NO WARRANTY OF  FITNESS  FOR  A
+#	PARTICULAR PURPOSE IS OFFERED. THE ENTIRE RISK AS  TO  THE  QUALITY  AND
+#	PERFORMANCE OF THE  PROGRAM  IS  WITH  YOU.  SHOULD  THE  PROGRAM  PROVE
+#	DEFECTIVE, YOU ASSUME THE COST OF ALL  NECESSARY  SERVICING,  REPAIR  OR
+#	CORRECTION.
+#	
+#	Limitation of Liability
+#	IN NO EVENT WILL  HITS,  OR  ANY  OTHER  PARTY  WHO  MAY  MODIFY  AND/OR
+#	REDISTRIBUTE THE DATABASE AS PERMITTED  ABOVE,  BE  LIABLE  TO  YOU  FOR
+#	DAMAGES, INCLUDING ANY GENERAL,  SPECIAL,  INCIDENTAL  OR  CONSEQUENTIAL
+#	DAMAGES ARISING  OUT  OF  THE  USE  OR  INABILITY  TO  USE  THE  PROGRAM
+#	(INCLUDING BUT NOT LIMITED TO  LOSS  OF  DATA  OR  DATA  BEING  RENDERED
+#	INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A  FAILURE  OF
+#	THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF VTIP  AND  HITS
+#	OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#	
+#	Reference to SABIO-RK  Users  will  cite  SABIO-RK  in  publications  or
+#	presentations, whenever the data used was extracted from  the  database.
+#	Termination This  agreement  is  effective  until  terminated.  You  may
+#	terminate this agreement  at  any  time  by  destroying  all  associated
+#	material (e.g., documentation or web service clients) to the database in
+#	your possession and by stopping any access to the database  directly  or
+#	from  software  generated  by  you.  This   agreement   will   terminate
+#	immediately without notice from and HITS if you fail to comply with  any
+#	of the terms and conditions of this license. This  agreement  will  also
+#	terminate immediately without notice from the HITS if  it  is  found  to
+#	implement patented algorithms or contain copyrighted code not  owned  or
+#	licensed the HITS for the purpose  of  its  inclusion  in  the  SABIO-RK
+#	Database. This agreement cannot be terminated by any other mechanism  or
+#	for any other reason than those stated herein.
+#	
+#	Place of Court
+#	The exclusive venue for all disputes arising from or in connection  with
+#	this Agreement is Mannheim, Germany (HRB 337446), when the Licensee is a
+#	business person, a legal entity governed by public  law,  or  a  special
+#	fund governed by public law,  or  does  not  have  a  general  place  of
+#	jurisdiction  within  the  Federal  Republic  of  Germany.  Address  all
+#	correspondence  regarding  this  license  to  electronic  mail  address:
+#	sabiork at h-its.org Any inquiries  and  comments  regarding  bugs,  bug
+#	fixes, enhancements, modifications or any other similar issues should be
+#	directed to: sabiork at h-its.org
+#	
+#	Copyright 2007 by HITS, gGmbH. All rights reserved.
+#	(http://sabiork.h-its.org/layouts/content/termscondition.gsp)
+#
+#RESOURCE:  The SEED
+#VERSION:   2.6.1 (July 31, 2020) (downloaded on 2021/08/09)
+#URL:       https://modelseed.org
+#LICENSE:	
+#	All tools and datasets that make up the SEED are in the public domain.
+#	(https://modelseed.org)
+#
+#RESOURCE:  SwissLipids
+#VERSION:   (downloaded on 2021/07/29)
+#URL:       https://www.swisslipids.org
+#LICENSE:	
+#	SwissLipids  is  licensed  under  a  Creative  Commons   Attribution-Non
+#	Commercial-NoDerivatives 4.0 International License.
+#	
+#	Commercial users and those who wish to  use  this  work  for  commercial
+#	purposes  please  contact  the  SIB  technology  transfer  officer   at:
+#	marc.filliettaz@genebio.com
+#	(https://www.swisslipids.org/#/downloads)
+#source	ID	description
+EMPTY	EMPTY	Empty equation
+rhea:50004	MNXR114744	1 chebi:15378@rheaC:comp + 1 chebi:15379@rheaC:comp + 1 chebi:57783@rheaC:comp + 1 chebi:77932@rheaC:comp <?> 1 chebi:132727@rheaC:comp + 1 chebi:15377@rheaC:comp + 1 chebi:58349@rheaC:comp
+rheaR:50004	MNXR114744	1 chebi:15378@rheaC:comp + 1 chebi:15379@rheaC:comp + 1 chebi:57783@rheaC:comp + 1 chebi:77932@rheaC:comp <?> 1 chebi:132727@rheaC:comp + 1 chebi:15377@rheaC:comp + 1 chebi:58349@rheaC:comp
+rhea:61444	MNXR171656	1 chebi:144644@rheaC:comp + 1 chebi:15378@rheaC:comp + 1 chebi:15379@rheaC:comp + 1 chebi:57783@rheaC:comp <?> 1 chebi:144645@rheaC:comp + 1 chebi:15377@rheaC:comp + 1 chebi:58349@rheaC:comp
+rheaR:61444	MNXR171656	1 chebi:144644@rheaC:comp + 1 chebi:15378@rheaC:comp + 1 chebi:15379@rheaC:comp + 1 chebi:57783@rheaC:comp <?> 1 chebi:144645@rheaC:comp + 1 chebi:15377@rheaC:comp + 1 chebi:58349@rheaC:comp
+rhea:42776	MNXR168222	1 chebi:59789@rheaC:comp + 1 rheaG:10228@rheaC:comp <?> 1 chebi:15378@rheaC:comp + 1 chebi:57856@rheaC:comp + 1 rheaG:10227@rheaC:comp
+rheaR:42776	MNXR168222	1 chebi:59789@rheaC:comp + 1 rheaG:10228@rheaC:comp <?> 1 chebi:15378@rheaC:comp + 1 chebi:57856@rheaC:comp + 1 rheaG:10227@rheaC:comp
+rhea:18690	MNXR165961	1 chebi:15377@rheaC:comp + 1 chebi:57643@rheaC:comp --> 1 chebi:15378@rheaC:comp + 1 chebi:28868@rheaC:comp + 1 chebi:57875@rheaC:comp
+rheaR:18690	MNXR165961	1 chebi:15377@rheaC:comp + 1 chebi:57643@rheaC:comp --> 1 chebi:15378@rheaC:comp + 1 chebi:28868@rheaC:comp + 1 chebi:57875@rheaC:comp
+rhea:60624	MNXR171532	1 chebi:74986@rheaC:comp + 2 chebi:15377@rheaC:comp <?> 1 chebi:143890@rheaC:comp + 2 chebi:15378@rheaC:comp + 2 chebi:30823@rheaC:comp
+rheaR:60624	MNXR171532	1 chebi:74986@rheaC:comp + 2 chebi:15377@rheaC:comp <?> 1 chebi:143890@rheaC:comp + 2 chebi:15378@rheaC:comp + 2 chebi:30823@rheaC:comp
+rhea:60625	MNXR171532	1 chebi:74986@rheaC:comp + 2 chebi:15377@rheaC:comp --> 1 chebi:143890@rheaC:comp + 2 chebi:15378@rheaC:comp + 2 chebi:30823@rheaC:comp
+rheaR:60625	MNXR171532	1 chebi:74986@rheaC:comp + 2 chebi:15377@rheaC:comp --> 1 chebi:143890@rheaC:comp + 2 chebi:15378@rheaC:comp + 2 chebi:30823@rheaC:comp
\ No newline at end of file
diff --git a/metanetx_uniprot/TestingFiles/rhea2uniprot_sprot.txt b/metanetx_uniprot/TestingFiles/rhea2uniprot_sprot.txt
new file mode 100644
index 00000000..05b819cc
--- /dev/null
+++ b/metanetx_uniprot/TestingFiles/rhea2uniprot_sprot.txt
@@ -0,0 +1,6 @@
+50004	UN	50004	Q01911
+61444	UN	61444	Q01911
+42776	UN	42776	A8C927
+18690	LR	18689	P0DTE9
+60624	UN	60624	P0DTE9
+60625	LR	60624	P0DTE9
diff --git a/metanetx_uniprot/_parsers.py b/metanetx_uniprot/_parsers.py
new file mode 100644
index 00000000..b646bf16
--- /dev/null
+++ b/metanetx_uniprot/_parsers.py
@@ -0,0 +1,686 @@
+'''
+libChEBIpy (c) University of Manchester 2015
+
+libChEBIpy is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import calendar
+import datetime
+import gzip
+import io
+import os.path
+import re
+import tempfile
+import zipfile
+
+import six.moves.urllib.parse as urlparse
+from six.moves.urllib.request import urlretrieve, urlcleanup
+
+from ._comment import Comment
+from ._compound_origin import CompoundOrigin
+from ._database_accession import DatabaseAccession
+from ._formula import Formula
+from ._name import Name
+from ._reference import Reference
+from ._relation import Relation
+from ._structure import Structure
+import wget
+
+
+__ALL_IDS = {}
+__ALL_NAMES = {}
+__COMMENTS = {}
+__COMPOUND_ORIGINS = {}
+__CHARGES = {}
+__CREATED_BYS = {}
+__DATABASE_ACCESSIONS = {}
+__DEFAULT_STRUCTURE_IDS = []
+__DEFINITIONS = {}
+__FORMULAE = {}
+__INCHIS = {}
+__INCHI_KEYS = {}
+__INCOMINGS = {}
+__MASSES = {}
+__MODIFIED_ONS = {}
+__NAMES = {}
+__OUTGOINGS = {}
+__PARENT_IDS = {}
+__SMILES = {}
+__SOURCES = {}
+__STARS = {}
+__STATUSES = {}
+
+__DOWNLOAD_PARAMS = {'path': os.path.join(os.path.expanduser('~'), 'libChEBI'),
+                     'auto_update': True}
+
+
+def set_download_cache_path(path):
+    '''Sets download cache path.'''
+    __DOWNLOAD_PARAMS['path'] = path
+
+
+def set_auto_update(auto_update):
+    '''Sets auto update flag.'''
+    __DOWNLOAD_PARAMS['auto_update'] = auto_update
+
+
+def get_formulae(chebi_id):
+    '''Returns formulae'''
+    if not __FORMULAE:
+        __parse_chemical_data()
+
+    return __FORMULAE[chebi_id] if chebi_id in __FORMULAE else []
+
+
+def get_all_formulae(chebi_ids):
+    '''Returns all formulae'''
+    all_formulae = [get_formulae(chebi_id) for chebi_id in chebi_ids]
+    return [x for sublist in all_formulae for x in sublist]
+
+
+def get_mass(chebi_id):
+    '''Returns mass'''
+    if not __MASSES:
+        __parse_chemical_data()
+
+    return __MASSES[chebi_id] if chebi_id in __MASSES else float('NaN')
+
+
+def get_charge(chebi_id):
+    '''Returns charge'''
+    if not __CHARGES:
+        __parse_chemical_data()
+
+    return __CHARGES[chebi_id] if chebi_id in __CHARGES else float('NaN')
+
+
+def __parse_chemical_data():
+    '''Gets and parses file'''
+    filename = get_file('chemical_data.tsv')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+
+            if tokens[3] == 'FORMULA':
+                # Many seemingly contradictory formulae exist,
+                # depending upon the source database
+                chebi_id = int(tokens[1])
+
+                if chebi_id not in __FORMULAE:
+                    __FORMULAE[chebi_id] = []
+
+                # Append formula:
+                form = Formula(tokens[4], tokens[2])
+                __FORMULAE[chebi_id].append(form)
+
+            elif tokens[3] == 'MASS':
+                __MASSES[int(tokens[1])] = float(tokens[4])
+
+            elif tokens[3] == 'CHARGE':
+                __CHARGES[int(tokens[1])] = int(tokens[4]
+                                                if tokens[4][-1] != '-'
+                                                else '-' + tokens[4][:-1])
+
+
+def get_comments(chebi_id):
+    '''Returns comments'''
+    if not __COMMENTS:
+        __parse_comments()
+
+    return __COMMENTS[chebi_id] if chebi_id in __COMMENTS else []
+
+
+def get_all_comments(chebi_ids):
+    '''Returns all comments'''
+    all_comments = [get_comments(chebi_id) for chebi_id in chebi_ids]
+    return [x for sublist in all_comments for x in sublist]
+
+
+def __parse_comments():
+    '''Gets and parses file'''
+    filename = get_file('comments.tsv')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+            chebi_id = int(tokens[1])
+
+            if chebi_id not in __COMMENTS:
+                __COMMENTS[chebi_id] = []
+
+            # Append Comment:
+            com = Comment(tokens[3],
+                          tokens[4],
+                          tokens[5],
+                          datetime.datetime.strptime(tokens[2], '%Y-%M-%d'))
+
+            __COMMENTS[chebi_id].append(com)
+
+
+def get_compound_origins(chebi_id):
+    '''Returns compound origins'''
+    if not __COMPOUND_ORIGINS:
+        __parse_compound_origins()
+    return __COMPOUND_ORIGINS[chebi_id] if chebi_id in \
+        __COMPOUND_ORIGINS else []
+
+
+def get_all_compound_origins(chebi_ids):
+    '''Returns all compound origins'''
+    all_compound_origins = [get_compound_origins(chebi_id)
+                            for chebi_id in chebi_ids]
+    return [x for sublist in all_compound_origins for x in sublist]
+
+
+def __parse_compound_origins():
+    '''Gets and parses file'''
+    filename = get_file('compound_origins.tsv')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+
+            if len(tokens) > 10:
+                chebi_id = int(tokens[1])
+
+                if chebi_id not in __COMPOUND_ORIGINS:
+                    __COMPOUND_ORIGINS[chebi_id] = []
+
+                # Append CompoundOrigin:
+                comp_orig = CompoundOrigin(tokens[2], tokens[3],
+                                           tokens[4], tokens[5],
+                                           tokens[6], tokens[7],
+                                           tokens[8], tokens[9],
+                                           tokens[10])
+                __COMPOUND_ORIGINS[chebi_id].append(comp_orig)
+
+
+def get_status(chebi_id):
+    '''Returns status'''
+    if not __STATUSES:
+        __parse_compounds()
+
+    return __STATUSES[chebi_id] if chebi_id in __STATUSES else None
+
+
+def get_source(chebi_id):
+    '''Returns source'''
+    if not __SOURCES:
+        __parse_compounds()
+
+    return __SOURCES[chebi_id] if chebi_id in __SOURCES else None
+
+
+def get_parent_id(chebi_id):
+    '''Returns parent id'''
+    if not __PARENT_IDS:
+        __parse_compounds()
+
+    return __PARENT_IDS[chebi_id] if chebi_id in __PARENT_IDS else float('NaN')
+
+
+def get_all_ids(chebi_id):
+    '''Returns all ids'''
+    if not __ALL_IDS:
+        __parse_compounds()
+
+    return __ALL_IDS[chebi_id] if chebi_id in __ALL_IDS else []
+
+
+def get_name(chebi_id):
+    '''Returns name'''
+    if not __NAMES:
+        __parse_compounds()
+
+    return __NAMES[chebi_id] if chebi_id in __NAMES else None
+
+
+def get_definition(chebi_id):
+    '''Returns definition'''
+    if not __DEFINITIONS:
+        __parse_compounds()
+
+    return __DEFINITIONS[chebi_id] if chebi_id in __DEFINITIONS else None
+
+
+def get_modified_on(chebi_id):
+    '''Returns modified on'''
+    if not __MODIFIED_ONS:
+        __parse_compounds()
+
+    return __MODIFIED_ONS[chebi_id] if chebi_id in __MODIFIED_ONS else None
+
+
+def get_all_modified_on(chebi_ids):
+    '''Returns all modified on'''
+    all_modified_ons = [get_modified_on(chebi_id) for chebi_id in chebi_ids]
+    all_modified_ons = [modified_on for modified_on in all_modified_ons
+                        if modified_on is not None]
+    return None if not all_modified_ons else sorted(all_modified_ons)[-1]
+
+
+def get_created_by(chebi_id):
+    '''Returns created by'''
+    if not __CREATED_BYS:
+        __parse_compounds()
+
+    return __CREATED_BYS[chebi_id] if chebi_id in __MODIFIED_ONS else None
+
+
+def get_star(chebi_id):
+    '''Returns star'''
+    if not __STARS:
+        __parse_compounds()
+
+    return __STARS[chebi_id] if chebi_id in __STARS else float('NaN')
+
+
+def __parse_compounds():
+    '''Gets and parses file'''
+    filename = get_file('compounds.tsv.gz')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+            chebi_id = int(tokens[0])
+
+            __STATUSES[chebi_id] = tokens[1]
+            __SOURCES[chebi_id] = tokens[3]
+
+            parent_id_token = tokens[4]
+            __PARENT_IDS[chebi_id] = float('NaN') \
+                if parent_id_token == 'null' \
+                else int(parent_id_token)
+            __put_all_ids(chebi_id, chebi_id)
+
+            if parent_id_token != 'null':
+                parent_id = int(parent_id_token)
+                __put_all_ids(parent_id, chebi_id)
+
+            __NAMES[chebi_id] = None if tokens[5] == 'null' else tokens[5]
+            __DEFINITIONS[chebi_id] = None if tokens[6] == 'null' \
+                else tokens[6]
+            __MODIFIED_ONS[chebi_id] = None if tokens[7] == 'null' \
+                else datetime.datetime.strptime(tokens[7], '%Y-%m-%d')
+            __CREATED_BYS[chebi_id] = None if tokens[8] == 'null' \
+                or len(tokens) == 9 else tokens[8]
+            __STARS[chebi_id] = float('NaN') \
+                if tokens[9 if len(tokens) > 9 else 8] == 'null' \
+                else int(tokens[9 if len(tokens) > 9 else 8])
+
+
+def __put_all_ids(parent_id, child_id):
+    '''COMMENT'''
+    if parent_id in __ALL_IDS:
+        __ALL_IDS[parent_id].append(child_id)
+    else:
+        __ALL_IDS[parent_id] = [child_id]
+
+
+def get_database_accessions(chebi_id):
+    '''Returns database accession'''
+    if not __DATABASE_ACCESSIONS:
+        __parse_database_accessions()
+
+    return __DATABASE_ACCESSIONS[chebi_id] if chebi_id in \
+        __DATABASE_ACCESSIONS else []
+
+
+def get_all_database_accessions(chebi_ids):
+    '''Returns all database accessions'''
+    all_database_accessions = [get_database_accessions(chebi_id)
+                               for chebi_id in chebi_ids]
+    return [x for sublist in all_database_accessions for x in sublist]
+
+
+def __parse_database_accessions():
+    '''Gets and parses file'''
+    filename = get_file('database_accession.tsv')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+            chebi_id = int(tokens[1])
+
+            if chebi_id not in __DATABASE_ACCESSIONS:
+                __DATABASE_ACCESSIONS[chebi_id] = []
+
+            # Append DatabaseAccession:
+            dat_acc = DatabaseAccession(tokens[3], tokens[4], tokens[2])
+
+            __DATABASE_ACCESSIONS[chebi_id].append(dat_acc)
+
+
+def get_inchi(chebi_id):
+    '''Returns InChI string'''
+    if not __INCHIS:
+        __parse_inchi()
+
+    return __INCHIS[chebi_id] if chebi_id in __INCHIS else None
+
+
+def __parse_inchi():
+    '''Gets and parses file'''
+    filename = get_file('chebiId_inchi.tsv')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+            __INCHIS[int(tokens[0])] = tokens[1]
+
+
+def get_names(chebi_id):
+    '''Returns names'''
+    if not __ALL_NAMES:
+        __parse_names()
+
+    return __ALL_NAMES[chebi_id] if chebi_id in __ALL_NAMES else []
+
+
+def get_all_names(chebi_ids):
+    '''Returns all names'''
+    all_names = [get_names(chebi_id) for chebi_id in chebi_ids]
+    return [x for sublist in all_names for x in sublist]
+
+
+def __parse_names():
+    '''Gets and parses file'''
+    filename = get_file('names.tsv.gz')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+            chebi_id = int(tokens[1])
+
+            if chebi_id not in __ALL_NAMES:
+                __ALL_NAMES[chebi_id] = []
+
+            # Append Name:
+            nme = Name(tokens[4],
+                       tokens[2],
+                       tokens[3],
+                       tokens[5] == 'T',
+                       tokens[6])
+
+            __ALL_NAMES[chebi_id].append(nme)
+
+
+def get_references(chebi_ids):
+    '''Returns references'''
+    references = []
+    chebi_ids = [str(chebi_id) for chebi_id in chebi_ids]
+
+    filename = get_file('reference.tsv.gz')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split('\t')
+
+            if tokens[0] in chebi_ids:
+                # Append Reference:
+                if len(tokens) > 3:
+                    ref = Reference(tokens[1], tokens[2], tokens[3],
+                                    tokens[4])
+                else:
+                    ref = Reference(tokens[1], tokens[2])
+
+                references.append(ref)
+    return references
+
+
+def get_outgoings(chebi_id):
+    '''Returns outgoings'''
+    if not __OUTGOINGS:
+        __parse_relation()
+
+    return __OUTGOINGS[chebi_id] if chebi_id in __OUTGOINGS else []
+
+
+def get_all_outgoings(chebi_ids):
+    '''Returns all outgoings'''
+    all_outgoings = [get_outgoings(chebi_id) for chebi_id in chebi_ids]
+    return [x for sublist in all_outgoings for x in sublist]
+
+
+def get_incomings(chebi_id):
+    '''Returns incomings'''
+    if not __INCOMINGS:
+        __parse_relation()
+
+    return __INCOMINGS[chebi_id] if chebi_id in __INCOMINGS else []
+
+
+def get_all_incomings(chebi_ids):
+    '''Returns all incomings'''
+    all_incomings = [get_incomings(chebi_id) for chebi_id in chebi_ids]
+    return [x for sublist in all_incomings for x in sublist]
+
+
+def __parse_relation():
+    '''Gets and parses file'''
+    relation_filename = get_file('relation.tsv')
+    relation_textfile = open(relation_filename, 'r')
+
+    next(relation_textfile)
+
+    for line in relation_textfile:
+        tokens = line.strip().split('\t')
+
+        source_chebi_id = int(tokens[3])
+        target_chebi_id = int(tokens[2])
+        typ = tokens[1]
+
+        if source_chebi_id not in __OUTGOINGS:
+            __OUTGOINGS[source_chebi_id] = []
+
+        if target_chebi_id not in __INCOMINGS:
+            __INCOMINGS[target_chebi_id] = []
+
+        target_relation = Relation(typ, str(target_chebi_id), tokens[4])
+        source_relation = Relation(typ, str(source_chebi_id), tokens[4])
+
+        __OUTGOINGS[source_chebi_id].append(target_relation)
+        __INCOMINGS[target_chebi_id].append(source_relation)
+
+
+def get_inchi_key(chebi_id):
+    '''Returns InChI key'''
+    if not __INCHI_KEYS:
+        __parse_structures()
+
+    return __INCHI_KEYS[chebi_id] if chebi_id in __INCHI_KEYS else None
+
+
+def get_smiles(chebi_id):
+    '''Returns InChI key'''
+    if not __SMILES:
+        __parse_structures()
+
+    return __SMILES[chebi_id] if chebi_id in __SMILES else None
+
+
+def get_mol(chebi_id):
+    '''Returns mol'''
+    chebi_id_regexp = '^\\d+\\,' + str(chebi_id) + '\\,.*'
+    mol_file_end_regexp = '\",mol,\\dD,[Y\\|N],[Y\\|N]$'
+    this_structure = []
+
+    filename = get_file('structures.csv.gz')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        in_chebi_id = False
+
+        next(textfile)
+
+        for line in textfile:
+            if in_chebi_id or line[0].isdigit():
+                if re.match(chebi_id_regexp, line):
+                    tokens = line.strip().split(',')
+                    in_chebi_id = True
+                    this_structure = []
+                    this_structure.append(','.join(tokens[2:])
+                                          .replace('\"', ''))
+                    this_structure.append('\n')
+                elif in_chebi_id:
+
+                    if re.match(mol_file_end_regexp, line):
+                        tokens = line.strip().split(',')
+
+                        if _is_default_structure(tokens[3]):
+                            tokens = line.strip().split(',')
+                            this_structure.append(tokens[0].replace('\"', ''))
+                            return Structure(''.join(this_structure),
+                                             Structure.mol,
+                                             int(tokens[2][0]))
+
+                        # else:
+                        this_structure = []
+                        in_chebi_id = False
+                        continue
+
+                    this_structure.append(line)
+
+    return None
+
+
+def get_mol_filename(chebi_id):
+    '''Returns mol file'''
+    mol = get_mol(chebi_id)
+
+    if mol is None:
+        return None
+
+    file_descriptor, mol_filename = tempfile.mkstemp(str(chebi_id) +
+                                                     '_', '.mol')
+    mol_file = open(mol_filename, 'w')
+    mol_file.write(mol.get_structure())
+    mol_file.close()
+    os.close(file_descriptor)
+
+    return mol_filename
+
+
+def __parse_structures():
+    '''COMMENT'''
+    filename = get_file('structures.csv.gz')
+
+    with io.open(filename, 'r', encoding='cp1252') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.strip().split(',')
+
+            if len(tokens) == 7:
+                if tokens[3] == 'InChIKey':
+                    __INCHI_KEYS[int(tokens[1])] = \
+                        Structure(tokens[2],
+                                  Structure.InChIKey,
+                                  int(tokens[4][0]))
+                elif tokens[3] == 'SMILES':
+                    __SMILES[int(tokens[1])] = \
+                        Structure(tokens[2],
+                                  Structure.SMILES,
+                                  int(tokens[4][0]))
+
+
+def get_file(filename):
+    '''Downloads filename from ChEBI FTP site'''
+    destination = __DOWNLOAD_PARAMS['path']
+    filepath = os.path.join(destination, filename)
+
+    if not __is_current(filepath):
+
+        if not os.path.exists(destination):
+            os.makedirs(destination)
+
+        url = 'ftp://ftp.ebi.ac.uk/pub/databases/chebi/' + \
+            'Flat_file_tab_delimited/'
+        
+        wget.download(url+filename, out=filepath)
+        
+        #urlretrieve(urlparse.urljoin(url, filename), filepath)
+        #urlcleanup()
+
+    if filepath.endswith('.zip'):
+        zfile = zipfile.ZipFile(filepath, 'r')
+        filepath = os.path.join(destination, zfile.namelist()[0])
+        zfile.extractall(destination)
+    elif filepath.endswith('.gz'):
+        unzipped_filepath = filepath[:-len('.gz')]
+
+        if os.path.exists(unzipped_filepath) \
+                and __is_current(unzipped_filepath):
+            filepath = unzipped_filepath
+        else:
+            input_file = gzip.open(filepath, 'rb')
+            filepath = os.path.join(destination, input_file.name[:-len('.gz')])
+            output_file = open(filepath, 'wb')
+
+            for line in input_file:
+                output_file.write(line)
+
+            input_file.close()
+            output_file.close()
+
+    return filepath
+
+
+def __is_current(filepath):
+    '''Checks whether file is current'''
+    if not __DOWNLOAD_PARAMS['auto_update']:
+        return True
+
+    if not os.path.isfile(filepath):
+        return False
+
+    return datetime.datetime.utcfromtimestamp(os.path.getmtime(filepath)) \
+        > __get_last_update_time()
+
+
+def __get_last_update_time():
+    '''Returns last FTP site update time'''
+    now = datetime.datetime.utcnow()
+
+    # Get the first Tuesday of the month
+    first_tuesday = __get_first_tuesday(now)
+
+    if first_tuesday < now:
+        return first_tuesday
+    # else:
+    first_of_month = datetime.datetime(now.year, now.month, 1)
+    last_month = first_of_month + datetime.timedelta(days=-1)
+    return __get_first_tuesday(last_month)
+
+
+def __get_first_tuesday(this_date):
+    '''Get the first Tuesday of the month'''
+    month_range = calendar.monthrange(this_date.year, this_date.month)
+    first_of_month = datetime.datetime(this_date.year, this_date.month, 1)
+    first_tuesday_day = (calendar.TUESDAY - month_range[0]) % 7
+    first_tuesday = first_of_month + datetime.timedelta(days=first_tuesday_day)
+    return first_tuesday
+
+
+def _is_default_structure(def_struct):
+    '''Is default structure?'''
+    return def_struct.upper() == 'Y'
diff --git a/metanetx_uniprot/build.py b/metanetx_uniprot/build.py
new file mode 100644
index 00000000..edf45b74
--- /dev/null
+++ b/metanetx_uniprot/build.py
@@ -0,0 +1,78 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import multiprocessing
+import sys
+
+import chebi_utils, chemical_utils, mnxref_utils, ncbi_taxonomy_utils, reaction_utils, rhea_utils, spectra_utils, utils, seq_utils #, kegg_utils
+
+
+def build_csv(dest_dir, array_delimiter, num_threads):
+    '''Build database CSV files.'''
+    writer = utils.Writer(dest_dir)
+    reac_man = reaction_utils.ReactionManager()
+
+    # Get Organism data:
+    print('Parsing NCBI Taxonomy')
+    ncbi_taxonomy_utils.load(reac_man, writer, array_delimiter) #--> writes Organism_Enzyme.tsv
+
+    # Get Chemical and Reaction data.
+    # Write chemistry csv files:
+    chem_man = chemical_utils.ChemicalManager(array_delimiter=array_delimiter)
+
+
+    ## Getting error: urllib.error.URLError: <urlopen error ftp error: error_temp('425 Failed to establish connection.')>
+    #print('Parsing ChEBI')
+    #chebi_utils.load(chem_man, writer)
+
+    ####Using all memory (120+Gb) and eventually is killed
+    # Get Spectrum data:
+    #print('Parsing spectrum data')
+    #spectra_utils.load(writer, chem_man, array_delimiter=array_delimiter)
+    
+
+    ####Not including KEGG for now
+    # Get Reaction / Enzyme / Organism data:
+    #print('Parsing KEGG')
+    #kegg_utils.load(reac_man, num_threads=num_threads)
+    
+ 
+    print('Parsing Rhea')
+    ##Returns rhea reaction ids
+    reaction_ids = rhea_utils.load(reac_man, num_threads=num_threads)
+    reac_man.write_files(writer) #--> writes Enzyme_Reaction.tsv
+
+    print('Parsing MNXref')
+    mnx_loader = mnxref_utils.MnxRefLoader(chem_man, reac_man, writer, reaction_ids, process_ids,ncbi_taxonomy_utils,array_delimiter)
+    print('mxn loading')
+    mnx_loader.load() #--> writes Reaction_Chemical.tsv, Chemical_Process.tsv, ##NOT WORKING: Process_Disease.tsv, Process_Phenotype.tsv
+    
+    chem_man.write_files(writer) #--> writes Chemicals.tsv
+    
+
+def main(args):
+    '''main method'''
+    num_threads = 0
+
+    if len(args) > 2:
+        try:
+            num_threads = int(args[2])
+        except ValueError:
+            if args[2] == 'True':
+                num_threads = multiprocessing.cpu_count()
+
+    print('Running build with ' + str(num_threads) + ' threads')
+
+    build_csv(args[0], args[1], num_threads)
+
+
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/metanetx_uniprot/build_taxa_ids.py b/metanetx_uniprot/build_taxa_ids.py
new file mode 100644
index 00000000..ab076d8f
--- /dev/null
+++ b/metanetx_uniprot/build_taxa_ids.py
@@ -0,0 +1,190 @@
+
+## Output all taxa IDs that exist in kg-microbe and as reference proteomes in UniProt.
+
+
+import os
+import sys
+import tarfile
+import tempfile
+import urllib
+from urllib.request import urlretrieve
+
+from kgx.cli.cli_utils import transform
+import pandas as pd
+from seq_utils import _get_uniprot_batch_reference_proteome
+
+import utils, seq_utils
+
+
+__NCBITAXONOMY_URL = 'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
+
+__UNIPROT_REFERENCE_PROTEOMES_URL = 'https://rest.uniprot.org/proteomes/search?&format=tsv&query=%28%28taxonomy_id%3A2%29%20OR%20%28taxonomy_id%3A2157%29%29%20AND%20%28proteome_type%3A1%29&size=500'
+
+def build_csv(dest_dir, num_threads):
+    #'''Build database CSV files.'''
+<<<<<<< HEAD
+=======
+    #writer = utils.Writer(dest_dir)
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
+
+    # Get Organism data:
+    print('Parsing NCBI Taxonomy')
+    load(dest_dir) #--> writes Organism_Enzyme.tsv
+
+
+
+def load(output_dir, source=__NCBITAXONOMY_URL, ref_source=__UNIPROT_REFERENCE_PROTEOMES_URL):
+    '''Loads NCBI Taxonomy data.'''
+    #To get data directly from NCBI Taxon
+    #nodes_filename, names_filename = _get_ncbi_taxonomy_files(source)
+    #nodes, rels = _parse_nodes(nodes_filename, array_delimiter)
+    #_parse_names(nodes, names_filename, array_delimiter)
+    #######
+    #To get data from kg-microbe
+<<<<<<< HEAD
+    nodes_filename = os.getcwd()+'/Files/ncbitaxon_nodes.tsv'     #ncbitaxon.json
+    #For testing
+    #nodes_filename = os.getcwd()+'/TestingFiles/ncbitaxon.json'
+    print('parsing ncbi taxon tsv file') #json
+    #_parse_nodes_kgmicrobe only used if reading ncbitaxon.json
+    #kgx_nodes_file = _parse_nodes_kgmicrobe(nodes_filename,'ncbitaxon_transformed',output_dir)
+    print('length of ncbitaxon_nodes.tsv: ',len(pd.read_csv(nodes_filename,sep='\t')))  #kgx_nodes))
+
+    #Update to kgx_nodes_file if ncbitaxon.json is input
+    nodes,nodes_df = transform_kgx_output_format(nodes_filename)  #kgx_nodes_file)
+=======
+    nodes_filename = os.getcwd()+'/Files/ncbitaxon.json'
+    #For testing
+    #nodes_filename = os.getcwd()+'/TestingFiles/ncbitaxon.json'
+    print('parsing ncbi taxon json file')
+    kgx_nodes_json = _parse_nodes_kgmicrobe(nodes_filename,'ncbitaxon_transformed',output_dir)
+
+    nodes,nodes_df = transform_kgx_output_format(kgx_nodes_json)
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
+
+    #Constrain by those that have reference proteomes, don't use if testing
+    ref_organisms = _get_uniprot_batch_reference_proteome(ref_source)
+    ref_organism_ids = [str(k['Organism Id']) for k in ref_organisms]
+    node_vals = [i for i in nodes if i in ref_organism_ids]
+
+<<<<<<< HEAD
+    nodes_not_in_refProteome = list(set(ref_organism_ids) - set(nodes))
+    print('nodes_not_in_refProteome: ',nodes_not_in_refProteome)
+
+    node_vals = ['NCBITaxon:' + i for i in node_vals]
+    kgx_nodes_subset = nodes_df[nodes_df['id'].isin(node_vals)]
+    kgx_nodes_subset.to_csv(output_dir+'/Organism.tsv', index=False, sep='\t')
+=======
+    node_vals = ['NCBITaxon:' + i for i in node_vals]
+    kgx_nodes_json_subset = nodes_df[nodes_df['id'].isin(node_vals)]
+    kgx_nodes_json_subset.to_csv(output_dir+'/Organism.tsv', index=False, sep='\t')
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
+    print('Wrote file: ',output_dir+'/Organism.tsv')
+
+def _get_ncbi_taxonomy_files(source):
+    '''Downloads and extracts NCBI Taxonomy files.'''
+    temp_dir = tempfile.gettempdir()
+    temp_gzipfile = tempfile.NamedTemporaryFile()
+    urlretrieve(source, temp_gzipfile.name)
+
+    temp_tarfile = tarfile.open(temp_gzipfile.name, 'r:gz')
+    temp_tarfile.extractall(temp_dir)
+
+    temp_gzipfile.close()
+    temp_tarfile.close()
+
+    return os.path.join(temp_dir, 'nodes.dmp'), \
+        os.path.join(temp_dir, 'names.dmp')
+
+def _parse_nodes_kgmicrobe(filename, output_name,output_dir):
+    '''Parses nodes file.'''
+    
+<<<<<<< HEAD
+    transform(inputs=[filename], input_format='tsv', output= os.path.join(output_dir, output_name), output_format='tsv') #obojson
+=======
+    transform(inputs=[filename], input_format='obojson', output= os.path.join(output_dir, output_name), output_format='tsv')
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
+
+    return output_dir+'/'+output_name+'_nodes.tsv'
+    
+def transform_kgx_output_format(transformed_nodes_tsv):
+
+    labels = pd.read_csv(transformed_nodes_tsv, sep = '\t', usecols = ['id','name'])
+
+    nodes = []
+
+    #Get node IDs to help subset according to reference proteomes
+    for i in range(len(labels)):
+<<<<<<< HEAD
+        try:
+            tax_id = labels.iloc[i].loc['id'].split('NCBITaxon:')[1]
+            nodes.append(tax_id)
+        except IndexError: print(labels.iloc[i].loc['id'])
+=======
+        tax_id = labels.iloc[i].loc['id'].split('NCBITaxon:')[1]
+        nodes.append(tax_id)
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
+
+    return nodes,labels
+
+
+def _parse_nodes(filename):
+    '''Parses nodes file.'''
+    nodes = {}
+    rels = []
+
+    with open(filename, 'r') as textfile:
+        for line in textfile:
+            tokens = [x.strip() for x in line.split('|')]
+            tax_id = tokens[0]
+
+            if tax_id != '1':
+                rels.append([tax_id, 'is_a', tokens[1]])
+
+            nodes[tax_id] = {'taxonomy:ID(Organism)': tax_id,
+                             ':LABEL':
+                             'Organism' + ',' + tokens[2]}
+
+    return nodes, rels
+
+
+def _parse_names(nodes, filename):
+    '''Parses names file.'''
+
+    with open(filename, 'r') as textfile:
+        for line in textfile:
+            tokens = [x.strip() for x in line.split('|')]
+            node = nodes[tokens[0]]
+
+            if 'name' not in node:
+                node['name'] = tokens[1]
+                node['names:string[]'] = set([node['name']])
+            else:
+                node['names:string[]'].add(tokens[1])
+
+    for _, node in nodes.items():
+        if 'names:string[]' in node:
+            node['names:string[]'] = \
+                ','.join(node['names:string[]'])
+
+
+def main(args):
+    '''main method'''
+    num_threads = 0
+
+    if len(args) > 2:
+        try:
+            num_threads = int(args[2])
+        except ValueError:
+            if args[2] == 'True':
+                num_threads = multiprocessing.cpu_count()
+
+    print('Running build with ' + str(num_threads) + ' threads')
+
+    build_csv(args[0], num_threads)
+
+
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
\ No newline at end of file
diff --git a/metanetx_uniprot/chebi_utils.py b/metanetx_uniprot/chebi_utils.py
new file mode 100644
index 00000000..284a687d
--- /dev/null
+++ b/metanetx_uniprot/chebi_utils.py
@@ -0,0 +1,39 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+from libchebipy._chebi_entity import ChebiEntity
+
+
+def load(chem_manager, writer):
+    '''Loads ChEBI data from libChEBIpy.'''
+    chebi_ids = []
+    rels = []
+
+    _add_node('CHEBI:24431', chebi_ids, rels, chem_manager)
+
+    writer.write_rels(rels, 'Chemical', 'Chemical')
+
+
+def _add_node(chebi_id, chebi_ids, rels, chem_manager):
+    '''Constructs a node from libChEBI.'''
+    if chebi_id not in chebi_ids:
+        chebi_ids.append(chebi_id)
+
+        chem_id, entity = chem_manager.add_chemical({'chebi': chebi_id})
+
+        for incoming in entity.get_incomings():
+            target_id = incoming.get_target_chebi_id()
+
+            chebi_ent = ChebiEntity(target_id)
+
+            if chebi_ent.get_parent_id():
+                target_id = chebi_ent.get_parent_id()
+
+            _add_node(target_id, chebi_ids, rels, chem_manager)
+            rels.append([target_id, incoming.get_type(), chem_id])
diff --git a/metanetx_uniprot/chemical_utils.py b/metanetx_uniprot/chemical_utils.py
new file mode 100644
index 00000000..dc3fb310
--- /dev/null
+++ b/metanetx_uniprot/chemical_utils.py
@@ -0,0 +1,175 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import math
+import uuid
+
+from libchebipy._chebi_entity import ChebiEntity, ChebiException
+
+import namespace_utils as ns_utils
+from synbiochem.utils import chem_utils
+
+
+class ChemicalManager(object):
+    '''Class to implement a manager of Chemical data.'''
+
+    def __init__(self, array_delimiter):
+        '''Constructor.'''
+        self.__array_delimiter = array_delimiter
+        self.__nodes = {}
+        self.__chem_ids = {}
+
+    def write_files(self, writer):
+        '''Write neo4j import files.'''
+        return writer.write_nodes(self.__nodes.values(), 'Chemical')
+
+    def write_rels(self, writer, rels):
+        return writer.write_rels(rels, 'Chemical', 'Process')
+
+    def add_chemical(self, properties):
+        '''Adds a chemical to the collection of nodes, ensuring uniqueness.'''
+        chem_id, chebi_ent = self.__get_chem_id(properties)
+
+        if 'charge:float' in properties:
+            charge = properties.pop('charge:float')
+
+            if not math.isnan(charge):
+                properties['charge:float'] = int(charge)
+
+        if chem_id not in self.__nodes:
+            properties[':LABEL'] = 'Chemical'
+            properties['id:ID(Chemical)'] = chem_id
+            properties['source'] = 'chebi' if 'chebi' in properties else 'mnx'
+
+            _normalise_mass(properties)
+            self.__nodes[chem_id] = properties
+        else:
+            self.__nodes[chem_id].update(properties)
+
+        return chem_id, chebi_ent
+
+    def get_props(self, prop, default=None):
+        '''Gets all chem_ids to property as a dict.'''
+        return {key: self.__nodes[chem_id].get(prop, default)
+                for key, chem_id in self.__chem_ids.items()}
+
+    def get_prop(self, chem_id, prop, default=None):
+        '''Gets a property.'''
+        return self.__nodes[self.__chem_ids[chem_id]].get(prop, default)
+
+    def __get_chem_id(self, properties):
+        '''Manages chemical id mapping.'''
+        chebi_id = properties.get('chebi', None)
+        chebi_ent = None
+
+        if chebi_id:
+            try:
+                chebi_id, chebi_ent = _get_chebi_data(chebi_id, properties,
+                                                      self.__array_delimiter)
+            except ChebiException as exception:
+                properties.pop('chebi')
+                chebi_id = None
+                print(exception)
+            except ValueError as exception:
+                properties.pop('chebi')
+                chebi_id = None
+                print(exception)
+
+        mnx_id = properties.get('mnx', None)
+        inchi_id = properties.get('inchi', None)
+
+        if chebi_id:
+            self.__chem_ids[chebi_id] = chebi_id
+
+            if inchi_id:
+                self.__chem_ids[inchi_id] = chebi_id
+
+            if mnx_id:
+                self.__chem_ids[mnx_id] = chebi_id
+
+            return chebi_id, chebi_ent
+
+        if inchi_id:
+            chem_id = self.__chem_ids.get(inchi_id, None)
+
+            if chem_id:
+                return chem_id, None
+
+        if mnx_id:
+            chem_id = self.__chem_ids.get(mnx_id, None)
+
+            if chem_id:
+                return chem_id, None
+
+            if inchi_id:
+                self.__chem_ids[inchi_id] = mnx_id
+
+            self.__chem_ids[mnx_id] = mnx_id
+            return mnx_id, None
+
+        new_id = str(uuid.uuid4())
+        self.__chem_ids[inchi_id] = new_id
+
+        return new_id, None
+
+
+def _get_chebi_data(chebi_id, properties, array_delimiter):
+    '''Gets ChEBI data.'''
+    chebi_ent = ChebiEntity(str(chebi_id))
+
+    if chebi_ent.get_parent_id():
+        chebi_id = chebi_ent.get_parent_id()
+    else:
+        chebi_id = chebi_ent.get_id()
+
+    properties['chebi'] = chebi_id
+
+    formula = chebi_ent.get_formula()
+    charge = chebi_ent.get_charge()
+    inchi = chebi_ent.get_inchi()
+    smiles = chebi_ent.get_smiles()
+
+    if formula:
+        properties['formula'] = formula
+
+    if not math.isnan(charge):
+        properties['charge:float'] = charge
+
+    if inchi:
+        properties['inchi'] = inchi
+
+    if smiles:
+        properties['smiles'] = smiles
+
+    properties['name'] = chebi_ent.get_name()
+    properties['names:string[]'] = \
+        array_delimiter.join([name.get_name()
+                              for name in chebi_ent.get_names()] +
+                             [chebi_ent.get_name()])
+
+    for db_acc in chebi_ent.get_database_accessions():
+        namespace = ns_utils.resolve_namespace(
+            db_acc.get_type(), True)
+
+        if namespace is not None:
+            properties[namespace] = db_acc.get_accession_number()
+
+    return chebi_id, chebi_ent
+
+
+def _normalise_mass(properties):
+    '''Removes ambiguity in mass values by recalculating according to chemical
+    formula.'''
+    properties.pop('mass:float', None)
+
+    if 'formula' in properties and properties['formula'] is not None:
+        mono_mass = chem_utils.get_molecular_mass(properties['formula'])
+
+        if not math.isnan(mono_mass):
+            properties['monoisotopic_mass:float'] = mono_mass
diff --git a/metanetx_uniprot/combine_rels.py b/metanetx_uniprot/combine_rels.py
new file mode 100644
index 00000000..0446c8c1
--- /dev/null
+++ b/metanetx_uniprot/combine_rels.py
@@ -0,0 +1,64 @@
+import os
+import pandas as pd
+import argparse
+
+
+def parse_kg_file(kg_filename):
+
+    kg = pd.read_csv(kg_filename,delimiter=';')
+
+    if len(kg.columns) == 3: kg.columns = [['subject','predicate','object']]
+    if len(kg.columns) == 4:
+        kg.columns = [['subject','predicate','object','source']]
+        kg = kg[['subject','predicate','object']]
+
+    return kg
+
+def concat_kgs(kg1,kg2):
+
+    combined_kg = pd.concat([kg1, kg2], axis=0)
+    combined_kg = combined_kg.drop_duplicates().reset_index(drop=True)
+
+    return combined_kg
+
+#Define arguments for each required and optional input
+def defineArguments():
+    parser=argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("--directory",dest="Directory",required=True,help="Directory")
+
+    return parser
+
+def main():
+
+    #rels_files_dir = '/Users/brooksantangelo/Documents/HunterLab/Exploration/biochem4j/kg-microbe/metanetx_uniprot/refProteome/LocalRun_0915
+
+    #Generate argument parser and define arguments
+    parser = defineArguments()
+    args = parser.parse_args()
+    
+    directory = args.Directory
+
+    rels_files_dir = directory+'/rels/'
+    rels_files = os.listdir(rels_files_dir)
+
+    rels_files = [i for i in rels_files if 'combined_kg' not in i]
+
+    kg_0 = parse_kg_file(rels_files_dir+rels_files[0])
+
+    for fname in rels_files[1:]:
+
+        if fname.endswith('.csv'):
+
+            kg = parse_kg_file(rels_files_dir+fname)
+            kg_0 = concat_kgs(kg_0,kg)
+
+    kg_0.to_csv(rels_files_dir + 'combined_kg.csv', sep = "\t", index = False)
+
+
+if __name__ == '__main__':
+    main()
+
+
+    
+
diff --git a/metanetx_uniprot/create_labels_file.py b/metanetx_uniprot/create_labels_file.py
new file mode 100644
index 00000000..4d25eab4
--- /dev/null
+++ b/metanetx_uniprot/create_labels_file.py
@@ -0,0 +1,151 @@
+
+
+
+from tqdm import tqdm
+import pandas as pd
+import argparse
+from collections import defaultdict 
+
+
+
+def process_kg_covid19_files(triples_file,labels_file):
+    triples_df = pd.read_csv(triples_file,sep = '\t', usecols = ['subject', 'object', 'predicate'])
+    triples_df.columns.str.lower()
+
+    labels = pd.read_csv(labels_file, sep = '\t', usecols = ['id','category', 'name','description'])
+
+    triples_df_relevant = triples_df.loc[((triples_df['subject'].str.contains('MONDO:')) & (triples_df['object'].str.contains('GO:'))) | ((triples_df['object'].str.contains('MONDO:')) & (triples_df['subject'].str.contains('GO:')))]
+
+    labels_relevant = labels.loc[(labels['id'].str.contains('MONDO:')) | (labels['id'].str.contains('GO:')) | (labels['id'].str.contains('CHEBI:')) | (labels['id'].str.contains('NCBITaxon:'))]
+    
+    #1785727 total, 435 total MONDO/GO or GO/MONDO relationships
+    print(len(labels_relevant),len(labels))
+    
+    return triples_df_relevant,labels_relevant
+
+def get_process_disease_phenio_data(triples_file,labels_file,process_ids):
+
+    print('Extracting kg-phenio relationships')
+    triples_df, labels_dict = process_kg_covid19_files(triples_file,labels_file)
+
+    #triples_df = triples_df.replace(regex=['http://purl.obolibrary.org/obo/'],value='').replace(regex=['_'],value=':')
+    
+    rels = []
+
+    for i in tqdm(range(len(triples_df))):
+        if triples_df.iloc[i].loc['object'] in process_ids and 'MONDO:' in triples_df.iloc[i].loc['subject']:
+        #if ('GO_' in triples_df.iloc[i].loc['subject'] and 'MONDO_' in triples_df.iloc[i].loc['object']) or ('GO_' in triples_df.iloc[i].loc['object'] and 'MONDO_' in triples_df.iloc[i].loc['subject']):
+            print(triples_df.iloc[i])
+            rels.append([triples_df.iloc[i].loc['subject'], triples_df.iloc[i].loc['predicate'],
+                                    triples_df.iloc[i].loc['object'],
+                                    {'source': 'kg-phenio'}])
+
+    return rels
+
+#Define arguments for each required and optional input
+def defineArguments():
+    parser=argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("--directory",dest="Directory",required=True,help="Directory")
+
+    return parser
+
+def main():
+
+    #directory = '/Users/brooksantangelo/Documents/HunterLab/Exploration/biochem4j/kg-microbe/metanetx_uniprot/refProteome/LocalRun_0915'
+
+    #Generate argument parser and define arguments
+    parser = defineArguments()
+    args = parser.parse_args()
+    
+    directory = args.Directory
+
+    phenio_labels_file = '/Users/brooksantangelo/Documents/HunterLab/Exploration/kg-phenio/phenio_merged-kg_nodes.tsv'
+    phenio_triples_file = '/Users/brooksantangelo/Documents/HunterLab/Exploration/kg-phenio/phenio_merged-kg_edges.tsv'
+
+    #Updated 6/19 based on file location
+    kg_covid19_triples_file = '/Users/brooksantangelo/Documents/HunterLab/Cartoomics/PostRevisionUpdates/Inputs/kg-covid19/merged-kg_edges.tsv'
+    kg_covid19_labels_file = '/Users/brooksantangelo/Documents/HunterLab/Cartoomics/PostRevisionUpdates/Inputs/kg-covid19/merged-kg_nodes.tsv'
+
+    enzyme_file = directory + '/nodes' + '/Enzyme.csv' 
+
+    kg_filename = directory + '/rels' + '/combined_kg.csv' 
+
+    kg = pd.read_csv(kg_filename,delimiter='\t')
+    kg = kg[['subject','object']]
+    kg_vals = pd.unique(kg[['subject', 'object']].values.ravel()).tolist()
+    kg_vals = [str(x) for x in kg_vals]
+
+    kg_labels = {}
+
+    phenio_triples,phenio_labels = process_kg_covid19_files(phenio_triples_file,phenio_labels_file)
+    covid19_triples,covid19_labels = process_kg_covid19_files(kg_covid19_triples_file,kg_covid19_labels_file)
+
+    enzyme_df = pd.read_csv(enzyme_file,delimiter=';')
+    enz_list = []
+
+    #Get uri (ex: O88037) and labels (ex: Probable SapB synthase) for all enzymes 
+    print('extracting enzyme labels')
+    for i in range(len(enzyme_df)):
+        enz_list.append({'id': 'Uniprot:'+enzyme_df.iloc[i].loc['uniprot:ID(Enzyme)'] ,
+                   'category': 'biolink:Protein' ,
+                   'name': enzyme_df.iloc[i].loc['names'],
+                   'description': ''})
+        
+    enzyme_new_df = pd.DataFrame(enz_list)
+    
+    kg_list = []
+    #Convert all uris that exist in phenio or kg-covid19 to labels
+    for i in tqdm(kg_vals):
+        #Determine category of node. What if GO term is not biological process?
+        if 'NCBITaxon:' in i: cat = 'biolink:OrganismalEntity'
+        if 'MONDO:' in i: cat = 'biolink:Disease'
+        if 'CHEBI:' in i: cat = 'biolink:ChemicalSubstance'
+        if 'GO:' in i: cat = 'biolink:BiologicalProcess'
+        try:
+            kg_list.append({'id': i ,
+                   'category':  cat ,
+                   'name': phenio_labels.loc[phenio_labels['id'] == i,'name'].values[0],
+                   'description': ''})
+        except (KeyError,IndexError):
+            #print('val doesnt exist in phenio: ',i)
+            pass
+        try:
+            kg_list.append({'id': i ,
+                   'category':  cat ,
+                   'name': covid19_labels.loc[covid19_labels['id'] == i,'name'].values[0],
+                   'description': ''})
+        except (KeyError,IndexError):
+            #print('val doesnt exist in kg-covid19: ',i)
+            pass
+
+    kg_new_df = pd.DataFrame(kg_list)
+
+    #Combine enzymes df with other labels from phenio and kg-covid19
+    combined_nodes = pd.concat([kg_new_df, enzyme_new_df], axis=0)
+
+    #Add Rhea labels:
+    rhea_vals = [i for i in kg_vals if 'rhea' in i.lower()]
+    rhea_list = []
+    #Dictionary to output Rhea nodes in current kg form, not kgx
+    rhea_labels = {}
+    for i in rhea_vals:
+        rhea_list.append({'id': i ,
+                   'category':  'biolink:Reaction' ,
+                   'name': i,
+                   'description': ''})
+        rhea_labels[i] = {'id':i, 'label':i}
+
+    #Output Rhea_nodes file
+    rhea_kg_df = pd.DataFrame(rhea_labels.values())
+    rhea_kg_df.to_csv(directory + '/nodes' + '/Rhea_nodes.csv', index=False, encoding='utf-8', sep=';')
+
+    rhea_new_df = pd.DataFrame(rhea_list)
+
+    #Combine all df label types and output
+    combined_nodes = pd.concat([combined_nodes, rhea_new_df], axis=0)
+    combined_nodes.to_csv(directory + '/combined_kgx_merged-kg_nodes.csv',sep='\t',index=False)
+    
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/metanetx_uniprot/enzyme_utils.py b/metanetx_uniprot/enzyme_utils.py
new file mode 100644
index 00000000..95de560e
--- /dev/null
+++ b/metanetx_uniprot/enzyme_utils.py
@@ -0,0 +1,114 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+#from synbiochem.utils import seq_utils
+import queue
+from seq_utils import *
+
+
+class EnzymeManager(object):
+    '''Class to implement a manager of Enzyme data.'''
+
+    def __init__(self):
+        '''Constructor.'''
+        self.__nodes = {}
+        self.__node_enzymes = {}
+        self.__org_enz_rels = []
+
+    def get_nodes(self):
+        '''Gets enzyme nodes.'''
+        return self.__nodes.values()
+
+    def get_enz_nodes(self):
+        #nodes_enzymes_df = pd.DataFrame(self.__node_enzymes.items(), columns=['entity_uri', 'label'])
+        return self.__node_enzymes.values()
+
+
+    def get_org_enz_rels(self):
+        '''Gets organism-to-enzyme relationships.'''
+        return self.__org_enz_rels
+
+    def add_uniprot_data(self, enzyme_ids, source, num_threads=0):
+        '''Gets Uniprot data.'''
+
+        #fields = ['entry name', 'protein names', 'organism-id', 'ec']
+        fields = ['id', 'protein_name', 'organism_id', 'ec']
+        enzyme_ids = [enzyme_id for enzyme_id in enzyme_ids
+                      if enzyme_id not in self.__nodes]
+        uniprot_values = get_uniprot_values(enzyme_ids, fields,
+                                                      batch_size=128,  # changed to 128 from 512
+                                                      verbose=False,  #Changed to False
+                                                      num_threads=num_threads)
+
+        for uniprot_id, uniprot_value in uniprot_values.items():
+            enzyme_node = {':LABEL': 'Enzyme',
+                           'uniprot:ID(Enzyme)': uniprot_id}
+            self.__nodes[uniprot_id] = enzyme_node
+
+            organism_id = uniprot_value.pop('Organism (ID)') \
+                if 'Organism (ID)' in uniprot_value else None
+
+            if 'Entry name' in uniprot_value:
+                enzyme_node['entry'] = 'Uniprot:'+uniprot_value['Entry name']
+
+            if 'Protein names' in uniprot_value:
+                enzyme_node['names'] = 'Uniprot:'+uniprot_value['Protein names']
+
+                if enzyme_node['names']:
+                    enzyme_node['name'] = enzyme_node['names'][0]
+
+            if 'EC number' in uniprot_value:
+                enzyme_node['ec-code'] = uniprot_value['EC number']
+
+            if organism_id:
+                self.__org_enz_rels.append([organism_id, 'expresses',
+                                            uniprot_id, {'source': source}])
+
+    #Builds into reactionManager
+    def add_uniprot_data_organism(self, organism_ids, source, num_threads=0):
+        '''Gets Uniprot data.'''
+
+        fields = ['id', 'accession','protein_name', 'organism_id', 'ec']
+        print('querying uniprot for enzymes per organism')
+        ##Uniprot returns list of dicts for each entry
+        uniprot_values = get_uniprot_values_organism(organism_ids, fields, 
+                                                                   batch_size=128,
+                                                                   verbose=False,
+                                                                   num_threads=num_threads)
+
+        print('adding uniprot data to graph')
+        
+        ##To return all organism-enzyme entries
+        for entry in tqdm(uniprot_values):
+            enzyme_node = {':LABEL': 'Enzyme',
+                        'uniprot:ID(Enzyme)': entry['Entry']}
+            self.__nodes[entry['Entry']] = enzyme_node
+
+            organism_id = entry['Organism (ID)'] \
+                if 'Organism (ID)' in entry.keys() else None
+
+            if 'Entry' in entry.keys():
+                enzyme_node['entry'] = entry['Entry']
+
+            if 'Protein names' in entry:
+                enzyme_node['names'] = entry['Protein names'][0]
+
+                if 'names' in entry.keys():
+                    enzyme_node['name'] = entry['names'][0]
+
+            if 'EC number' in entry:
+                enzyme_node['ec-code'] = entry['EC number']
+
+            if organism_id:
+                self.__org_enz_rels.append(['NCBITaxon:'+organism_id, 'expresses','Uniprot:'+entry['Entry'], {'source': source}])
+
+            self.__node_enzymes['Uniprot:'+entry['Entry']] = {'entity_uri':'Uniprot:'+entry['Entry'], 'label':enzyme_node['names']}
+
+        return uniprot_values
+        
\ No newline at end of file
diff --git a/metanetx_uniprot/go_utils.py b/metanetx_uniprot/go_utils.py
new file mode 100644
index 00000000..8d8fab92
--- /dev/null
+++ b/metanetx_uniprot/go_utils.py
@@ -0,0 +1,10 @@
+from kgx.cli.cli_utils import transform
+import os
+
+go_plus_file = '/Users/brooksantangelo/Documents/HunterLab/Exploration/biochem4j/kg-microbe/metanetx_uniprot/Files/go-plus.owl'
+
+
+output_dir = '/Users/brooksantangelo/Documents/HunterLab/biochem4j/biochem4j/'
+name = 'go_plus_transformed'
+
+transform(inputs=[go_plus_file], input_format='xml', output= os.path.join(output_dir, name), output_format='tsv')
diff --git a/metanetx_uniprot/index.py b/metanetx_uniprot/index.py
new file mode 100644
index 00000000..3adb2499
--- /dev/null
+++ b/metanetx_uniprot/index.py
@@ -0,0 +1,32 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import os
+import subprocess
+import sys
+
+
+def index_db(db_loc):
+    '''Index database.'''
+    directory = os.path.dirname(os.path.realpath(__file__))
+    filename = os.path.join(directory, 'init.cql')
+
+    with open(filename, 'rU') as init_file:
+        for line in init_file:
+            params = ['neo4j-shell', '-path', db_loc, '-c', line.strip()]
+            subprocess.call(params)
+
+
+def main(argv):
+    '''main method'''
+    index_db(argv[0])
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/metanetx_uniprot/init.cql b/metanetx_uniprot/init.cql
new file mode 100644
index 00000000..7e7216e9
--- /dev/null
+++ b/metanetx_uniprot/init.cql
@@ -0,0 +1,35 @@
+CREATE CONSTRAINT ON (n:Organism) ASSERT n.taxonomy IS UNIQUE;
+CREATE CONSTRAINT ON (n:Enzyme) ASSERT n.entry IS UNIQUE;
+CREATE CONSTRAINT ON (n:Enzyme) ASSERT n.uniprot IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.`bigg.reaction` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.id IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.`kegg.reaction` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.metacyc IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.mnx IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.reactome IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.rhea IS UNIQUE;
+CREATE CONSTRAINT ON (n:Reaction) ASSERT n.seed IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`bigg.metabolite` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.cas IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.chebi IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.chemidplus IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.chemspider IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.drugbank IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.hmdb IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.id IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`kegg.compound` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`kegg.drug` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`kegg.glycan` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.knapsack IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.lipidmaps IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.metacyc IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.mnx IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.molbase IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.pdb IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.pubmed IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.reactome IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.resid IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`seed.compound` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`umbbd.compound` IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.unipathway IS UNIQUE;
+CREATE CONSTRAINT ON (n:Chemical) ASSERT n.`wikipedia.en` IS UNIQUE;
\ No newline at end of file
diff --git a/metanetx_uniprot/kegg_utils.py b/metanetx_uniprot/kegg_utils.py
new file mode 100644
index 00000000..95c8d65b
--- /dev/null
+++ b/metanetx_uniprot/kegg_utils.py
@@ -0,0 +1,93 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+from collections import defaultdict
+import urllib
+from urllib.request import urlopen
+
+from synbiochem.utils import thread_utils
+
+
+def load(reaction_manager, organisms=None, num_threads=0):
+    '''Loads KEGG data.'''
+
+    if organisms is None:
+        organisms = \
+            sorted([line.split()[1] for line in
+                    urllib.urlopen('http://rest.kegg.jp/list/organism')])
+
+    # EC to gene, gene to Uniprot:
+    ec_genes, gene_uniprots = _get_gene_data(organisms, num_threads)
+
+    data = defaultdict(list)
+
+    # KEGG Reaction to EC:
+    kegg_reac_ec = _parse_url('http://rest.kegg.jp/link/ec/reaction')
+
+    for kegg_reac, ec_terms in kegg_reac_ec.items():
+        for ec_term in ec_terms:
+            if ec_term in ec_genes:
+                for gene in ec_genes[ec_term]:
+                    if gene in gene_uniprots:
+                        uniprots = [val[3:] for val in gene_uniprots[gene]]
+                        data[kegg_reac[3:]].extend(uniprots)
+
+    reaction_manager.add_react_to_enz(data, 'kegg.reaction', num_threads)
+
+
+def _get_gene_data(organisms, num_threads):
+    '''Gets gene data.'''
+    ec_genes = defaultdict(list)
+    gene_uniprots = defaultdict(list)
+
+    if num_threads:
+        thread_pool = thread_utils.ThreadPool(num_threads)
+
+        for org in organisms:
+            thread_pool.add_task(_parse_organism, org, ec_genes, gene_uniprots)
+
+        thread_pool.wait_completion()
+    else:
+        for org in organisms:
+            _parse_organism(org, ec_genes, gene_uniprots)
+
+    return ec_genes, gene_uniprots
+
+
+def _parse_organism(org, ec_genes, gene_uniprots):
+    '''Parse organism.'''
+    print 'KEGG: loading ' + org
+
+    for key, value in _parse_url('http://rest.kegg.jp/link/' + org.lower() +
+                                 '/enzyme').items():
+        ec_genes[key].extend(value)
+
+    for key, value in _parse_url('http://rest.kegg.jp/conv/uniprot/' +
+                                 org.lower()).items():
+        gene_uniprots[key].extend(value)
+
+
+def _parse_url(url, attempts=16):
+    '''Parses url to form key to list of values dictionary.'''
+    data = defaultdict(list)
+
+    for _ in range(attempts):
+        try:
+            for line in urllib.urlopen(url):
+                tokens = line.split()
+
+                if len(tokens) > 1:
+                    data[tokens[0]].append(tokens[1])
+
+            return data
+        except urllib.URLError as err:
+            # Take no action, but try again...
+            print '\t'.join([url, str(err)])
+
+    return data
diff --git a/metanetx_uniprot/mnxref_utils.py b/metanetx_uniprot/mnxref_utils.py
new file mode 100644
index 00000000..7eb3fafb
--- /dev/null
+++ b/metanetx_uniprot/mnxref_utils.py
@@ -0,0 +1,407 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+# pylint: disable=no-member
+# pylint: disable=too-few-public-methods
+# pylint: disable=too-many-locals
+from collections import Counter
+import csv
+import itertools
+import math
+import re
+import urllib
+from urllib.request import urlopen
+import requests
+
+import numpy
+#from subliminal import balance
+
+import namespace_utils
+from synbiochem.utils import chem_utils
+import os
+
+
+_METANETX_URL = 'http://metanetx.org/cgi-bin/mnxget/mnxref/'
+#For test, also update __read_data function
+#_METANETX_URL = os.getcwd()+'/TestingFiles/'
+
+class MnxRefReader(object):
+    '''Class to read MnxRef data from the chem_prop.tsv, the chem_xref.tsv and
+    reac_prop.tsv files.'''
+
+    def __init__(self, source=_METANETX_URL):
+        self.__source = source
+        self.__mnx_id_patt = re.compile(r'(MNX[MR])(\d+)')
+        self.__chem_data = {}
+        self.__reac_data = {}
+
+    def get_chem_data(self):
+        '''Gets chemical data.'''
+        if not self.__chem_data:
+            mxn_chebi_mapping = self.__read_chem_prop()
+            self.__read_xref('chem_xref.tsv', self.__chem_data, True)
+
+        return self.__chem_data,mxn_chebi_mapping
+
+    def get_reac_data(self,reaction_ids):
+        '''Gets reaction data.'''
+        if not self.__reac_data:
+            mxn_reaction_ids,mxn_rhea_mapping = self.__read_reac_prop(reaction_ids)
+            self.__read_xref('reac_xref.tsv', self.__reac_data, False)
+
+        #Only include reaction data for reactions in reaction_ids
+        self.__reac_data = {key:val for key,val in self.__reac_data.items() if key in mxn_reaction_ids}
+
+        return self.__reac_data,mxn_rhea_mapping
+
+    def __read_chem_prop(self):
+        '''Read chemical properties and create Nodes.'''
+        chem_prop_keys = ['id', 'name', 'reference','formula', 'charge:float',
+                          'mass:float', 'inchi', 'inchikey', 'smiles']
+
+        mxn_chebi_mapping = {}
+
+        for values in self.__read_data('chem_prop.tsv'):
+            if not values[0].startswith('#'):
+                values[0] = self.__parse_id(values[0])
+                values[2] = self.__parse_id(values[2])
+                props = dict(zip(chem_prop_keys, values))
+
+                #For mapping mxn IDs to Chebi Ids
+                mxn_chebi_mapping[values[0]] = values[2]
+
+                props.pop('reference')
+                _convert_to_float(props, 'charge:float')
+                _convert_to_float(props, 'mass:float')
+                props = {key: value for key, value in props.items()
+                         if value != ''}
+                self.__chem_data[values[0]] = props
+
+        return mxn_chebi_mapping
+
+    def __read_xref(self, filename, data, chemical):
+        '''Read xrefs and update Nodes.'''
+        xref_keys = ['XREF', 'MNX_ID', 'Description']
+
+        for values in self.__read_data(filename):
+            if not values[0].startswith('#'):
+                xrefs = dict(zip(xref_keys[:len(values)], values))
+                evidence = 'none'
+
+                if evidence == 'identity' or evidence == 'structural':
+                    xrefs['MNX_ID'] = self.__parse_id(xrefs['MNX_ID'])
+                    xref = xrefs['XREF'].split(':')
+
+                    if xrefs['MNX_ID'] in data:
+                        entry = data[xrefs['MNX_ID']]
+                        self.__add_xref(xref, entry, chemical)
+
+    def __add_xref(self, xref, entry, chemical):
+        '''Adds an xref.'''
+        namespace = namespace_utils.resolve_namespace(xref[0],
+                                                      chemical)
+
+        if namespace is not None:
+            xref[1] = self.__parse_id(xref[1])
+
+            entry[namespace] = xref[1] \
+                if namespace != 'chebi' \
+                else 'CHEBI:' + xref[1]
+
+    def __read_reac_prop(self,reaction_ids):
+        '''Read reaction properties and create Nodes.'''
+        reac_prop_keys = ['id', 'equation', 'reference', 'ec', 'balance', 'transport']
+
+        ##Relabel reaction ids by MXN id rather than rhea id
+        mxn_reaction_ids = []
+
+        mxn_rhea_mapping = {}
+
+        for values in self.__read_data('reac_prop.tsv'):
+            if not values[0].startswith('#'): 
+                if values[0] == 'EMPTY': continue
+                values[0] = self.__parse_id(values[0])
+                values[2] = self.__parse_id(values[2])
+                #Grab MXN id if in reaction IDs from filtering by organisms/enzymes
+                try:
+                    if 'rhea' in values[2].split(':')[0].lower() and values[2].split(':')[1] in reaction_ids:
+                        mxn_reaction_ids.append(values[0])
+                        mxn_rhea_mapping[values[0]] = values[2].split(':')[1]
+                except IndexError: continue
+
+                props = dict(zip(reac_prop_keys, values))
+                props.pop('reference')
+
+                try:
+                    participants = chem_utils.parse_equation(
+                        props.pop('equation'))
+
+                    for participant in participants:
+                        participant[0] = self.__parse_id(participant[0])
+
+                        if participant[0] not in self.__chem_data:
+                            self.__add_chem(participant[0])
+
+                    props['reac_defn'] = participants
+                    self.__reac_data[values[0]] = props
+                except ValueError:
+                    print('WARNING: Suspected polymerisation reaction: ' + \
+                        values[0] + '\t' + str(props))
+
+        return mxn_reaction_ids,mxn_rhea_mapping
+
+    def __add_chem(self, chem_id):
+        '''Adds a chemical with given id.'''
+        props = {'id': chem_id}
+        self.__chem_data[chem_id] = props
+        return props
+
+    def __read_data(self, filename):
+        '''Downloads and reads tab-limited files into lists of lists of
+        strings.'''
+        
+        with requests.Session() as s:
+            download = s.get(self.__source + filename)
+
+            decoded_content = download.content.decode('utf-8')
+
+            cr = csv.reader(decoded_content.splitlines(), delimiter='\t')
+            my_list = list(cr)
+        return my_list
+        '''
+        ###Reads downloaded file for offline testing
+        #cr = csv.reader((self.__source + filename).splitlines(), delimiter='\t')
+        import pandas as pd
+        cr = pd.read_csv(self.__source + filename, delimiter='\t', comment='#',header=None)
+        cr_d = []
+        for i in range(len(cr)):
+            l = []
+            for j in range(len(cr.columns)):
+                l.append(cr.iloc[i,j])
+            cr_d.append(l)
+        
+        return cr_d
+        '''
+        
+
+    def __parse_id(self, item_id):
+        '''Parses mnx ids.'''
+        matches = self.__mnx_id_patt.findall(item_id)
+
+        for mat in matches:
+            return mat[0] + str(int(mat[1]))
+
+        return item_id
+
+
+class MnxRefLoader(object):
+    '''Loads MNXref data into neo4j format.'''
+
+    def __init__(self, chem_man, reac_man, writer,reaction_ids,process_ids,ncbi_taxonomy_utils,array_delimiter):
+        self.__chem_man = chem_man
+        self.__reac_man = reac_man
+        self.__writer = writer
+        self.__reactions = reaction_ids
+        self.__processes = process_ids
+        self.__ncbi_tax = ncbi_taxonomy_utils
+        self.__array_delimiter = array_delimiter
+
+    def load(self):
+        '''Loads MnxRef data from chem_prop.tsv, chem_xref.tsv,
+        reac_prop.tsv and reac_xref.tsv files.'''
+        reader = MnxRefReader()
+
+        #First gets all chemical data from MxnRef (chem_xref and chem_prop) and adds to __chem_man
+        c_vals,mxn_chebi_mapping = reader.get_chem_data()
+        for properties in c_vals.values():
+            #Includes chemical as chebi ID if you use reference
+            properties['mnx'] = properties.pop('id')  #'reference')
+            self.__chem_man.add_chemical(properties)
+
+        #Then gets reaction data from reac_xref and reac_prop and adds to __chem_man only for reaction ids founds linked to organisms
+        reac_data,mxn_rhea_mapping = reader.get_reac_data(self.__reactions)
+        chem_rels = self.__add_reac_nodes(reac_data)
+
+        #Convert rxn id's to Rhea (get mappings from reac_prop) and chemicals to CHEBI IDs
+        #rels is list of lists
+        #print('mxn_chebi_mapping: ',mxn_chebi_mapping)
+        mxn_chebi_mapping['MNXM1'] = 'chebi:24636'
+        mxn_chebi_mapping['WATER'] = 'chebi:15377'
+
+        chemical_ids = []
+
+        for i in enumerate(chem_rels):
+            #MXN ids to rhea ids
+            #reac_ids should have rhea to help identify
+            chem_rels[i[0]][0] = 'Rhea:'+mxn_rhea_mapping[i[1][0]]
+            try:
+                #MXN ids to chebi ids
+                chem_rels[i[0]][2] = mxn_chebi_mapping[i[1][2]]
+            except KeyError:
+                if 'WATER' in i[1][2]:
+                    mxn_chebi_mapping[i[1][2]] = 'chebi:15377'
+                    chem_rels[i[0]][2] = mxn_chebi_mapping[i[1][2]]
+                else:
+                    print('could not map chemical to chebi ID: ',i[1][2])
+            chemical_ids.append(chem_rels[i[0]][2])
+
+
+        #Gets all chemicals from reac_data and adds go processes, and gets all go processes from rhea2go and adds chemicals
+
+        print('self.__processes in mxnref load: ',self.__processes)
+        print('length of self.__processes in mxnref load: ',len(self.__processes))
+        #go plus
+        go_plus_filename = os.getcwd()+'/Files/GO-PLUS.csv'
+        go_plus_rels,process_ids = self.__reac_man.read_go_plus(go_plus_filename,self.__processes,chemical_ids)
+
+        print('go_plus_rels: ',go_plus_rels[0:5])
+
+        #HPO
+        hpo_kgx_nodes_json = os.getcwd()+'/Files/hp_kgx_tsv_nodes.tsv'
+        hpo_kgx_edges_json = os.getcwd()+'/Files/hp_kgx_tsv_edges.tsv'
+        #kgx_nodes_json,kgx_edges_json = self.__ncbi_tax._parse_nodes_kgmicrobe(go_plus_filename, self.__array_delimiter, 'hpo_transformed')
+        nodes,rels = self.__reac_man.transform_kgx_output_format_hp(hpo_kgx_nodes_json,hpo_kgx_edges_json)
+        #Contrain pehnotype - process rels from processes filtered previously 
+        hpo_rels = []
+        for i in rels:
+            if i[0] in process_ids or i[2] in process_ids:
+                hpo_rels.append(i)
+
+        n1 = self.__writer.write_nodes(nodes, 'Phenotype') #node_vals #- works
+        f1 = self.__writer.write_rels(hpo_rels, 'Process', 'Phenotype') #rel_vals
+
+        #PKL for GO-MONDO
+        #pkl_rels = self.__reac_man.get_process_disease_pkl_data(os.getcwd()+'/Files/PheKnowLator_v3.0.2_full_instance_relationsOnly_OWLNETS_Triples_Identifiers.txt',os.getcwd()+'/Files/PheKnowLator_v3.0.2_full_instance_relationsOnly_OWLNETS_NodeLabels.txt',self.__processes)
+
+        #KG-phenio for GO-MONDO
+        phenio_rels = self.__reac_man.get_process_disease_phenio_data(os.getcwd()+'/Files/phenio_merged-kg_edges.tsv',os.getcwd()+'/Files/phenio_merged-kg_nodes.tsv',process_ids)
+
+        f2 = self.__writer.write_rels(go_plus_rels, 'GoPlus_Chemical', 'Process') #- works
+
+        f3 = self.__writer.write_rels(chem_rels, 'Reaction', 'Chemical') #-works
+        print('phenio_rels: ',phenio_rels[0:5])
+        f4 = self.__writer.write_rels(phenio_rels, 'Phenio_Process', 'Disease')
+
+        return [] #,[self.__writer.write_rels(chem_rels, 'Reaction', 'Chemical')], [self.__writer.write_rels(pkl_rels, 'Process', 'Disease')]
+
+    def __add_reac_nodes(self, reac_data):
+        '''Get reaction nodes from data.'''
+        reac_id_def = {}
+
+        for properties in reac_data.values():
+            reac_def = []
+            mnx_id = properties.pop('id')
+
+            # Remove equation and description (may be inconsistent with
+            # balanced reaction):
+            if 'description' in properties:
+                properties.pop('description')
+
+            for prt in properties.pop('reac_defn'):
+                chem_id, _ = self.__chem_man.add_chemical({'mnx': prt[0]})
+
+                reac_def.append([self.__chem_man.get_prop(prt[0], 'formula'),
+                                 self.__chem_man.get_prop(prt[0],
+                                                          'charge:float', 0),
+                                 prt[1],
+                                 chem_id])
+
+            #NOT BALANCING REACTIONS since this library doesn't seem to exist anymore
+            '''
+            if all([values[0] is not None for values in reac_def]):
+                balanced, _, balanced_def = balance.balance_reac(reac_def)
+                #properties['balance'] = balanced
+            else:
+                properties['balance'] = 'unknown'
+                balanced_def = reac_def
+            '''
+            properties['balance'] = 'unknown'
+            balanced_def = reac_def
+
+
+            reac_id = self.__reac_man.add_reaction('mnx', mnx_id,
+                                                   properties)
+            #reac_id_def looks like {'MNXR165961': [[None, 0, -1.0, 'MNXM1107698'], [None, 0, -1.0, 'WATER@MNXD1'], [None, 0, 1.0, 'MNXM1108087'], [None, 0, 1.0, 'MNXM728579']], 'MNXR171532': [['C18H33O2', -1, -2.0, 'MNXM1107708'], [None, 0, -2.0, 'MNXM1'], [None, 0, -1.0, 'MNXM734941'], ['C41H78NO8P', 0, 1.0, 'MNXM737425'], [None, 0, 2.0, 'WATER@MNXD1']]}
+            reac_id_def[reac_id] = balanced_def
+
+        chem_id_mass = self.__chem_man.get_props('monoisotopic_mass:float',
+                                                 float('NaN'))
+        cofactors = [chem_id
+                     for chem_id, mass in chem_id_mass.items()
+                     if mass > 0 and mass < 44]  # Assume mass < CO2 = cofactor
+
+        cofactor_pairs = _calc_cofactors(reac_id_def.values(), cofactors)
+        rels = []
+
+        for reac_id, defn in reac_id_def.items():
+            reactants = [term[3] for term in defn if term[2] < 0]
+            products = [term[3] for term in defn if term[2] > 0]
+            reac_cofactors = []
+
+            # Set metabolites as cofactors:
+            for met in [term[3] for term in defn]:
+                if met in cofactors:
+                    reac_cofactors.append(met)
+
+            # Set pairs as cofactors:
+            for pair in itertools.product(reactants, products):
+                if tuple(sorted(pair)) in cofactor_pairs:
+                    reac_cofactors.extend(pair)
+
+            for term in defn:
+                cof_chebi_id = term[3]
+                react_chebi_id = term[2]
+                rels.append([reac_id,
+                             'has_cofactor' if term[3] in reac_cofactors
+                             else 'has_reactant',
+                             term[3],
+                             {'stoichiometry:float': term[2]}])
+
+        return rels
+
+
+def _calc_cofactors(reaction_defs, cofactors, cutoff=0.8):
+    '''Calculates cofactors.'''
+    pairs = Counter()
+
+    # Calculate all reactant / product pairs...
+    for reaction_def in reaction_defs:
+        reactants = [term[3] for term in reaction_def if term[2] < 0 and
+                     term[3] not in cofactors]
+        products = [term[3] for term in reaction_def if term[2] > 0 and
+                    term[3] not in cofactors]
+
+        pairs.update([tuple(sorted(pair))
+                      for pair in itertools.product(reactants, products)])
+
+    return _filter(pairs, cutoff)
+
+
+def _filter(counter, cutoff):
+    '''Filter counter items according to cutoff.'''
+    # Count occurences of pairs, then bin into a histogram...
+    hist_counter = Counter(counter.values())
+
+    # Fit straight-line to histogram log-log plot and filter...
+    x_val, y_val = zip(*list(hist_counter.items()))
+    l_x_val = numpy.log(x_val)[0]
+    l_y_val = numpy.log(y_val)[0]
+    if l_x_val == 0.0: l_x_val += 0.01
+    m_val, b_val = numpy.polyfit([l_x_val], [l_y_val], 1)
+    return [item[0] for item in counter.items()
+            if item[1] > math.exp(cutoff * -b_val / m_val)]
+
+
+def _convert_to_float(dictionary, key):
+    '''Converts a key value in a dictionary to a float.'''
+    if dictionary.get(key, None):
+        dictionary[key] = float(dictionary[key] if dictionary[key] != 'NA' else 'NaN')
+    else:
+        # Remove key:
+        dictionary.pop(key, None)
diff --git a/metanetx_uniprot/namespace_utils.py b/metanetx_uniprot/namespace_utils.py
new file mode 100644
index 00000000..bb6bd665
--- /dev/null
+++ b/metanetx_uniprot/namespace_utils.py
@@ -0,0 +1,61 @@
+'''
+synbiochem (c) University of Manchester 2015
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+__CHEMICAL_NAMESPACE = {
+    # value (namespace) corresponds to identifiers.org:
+    'bigg': 'bigg.metabolite',
+    'CAS Registry Number': 'cas',
+    'chebi': 'chebi',
+    'ChemIDplus accession': 'chemidplus',
+    'Chemspider accession': 'chemspider',
+    'DrugBank accession': 'drugbank',
+    'hmdb': 'hmdb',
+    'HMDB accession': 'hmdb',
+    'kegg': 'kegg.compound',
+    'KEGG COMPOUND accession': 'kegg.compound',
+    'KEGG DRUG accession': 'kegg.drug',
+    'KEGG GLYCAN accession': 'kegg.glycan',
+    'KNApSAcK accession': 'knapsack',
+    'lipidmaps': 'lipidmaps',
+    'LIPID MAPS instance accession': 'lipidmaps',
+    'MolBase accession': 'molbase',
+    'PDB accession': 'pdb',
+    'PubMed citation': 'pubmed',
+    'reactome': 'reactome',
+    'RESID accession': 'resid',
+    'seed': 'seed.compound',
+    'umbbd': 'umbbd.compound',
+    'UM-BBD compID': 'umbbd.compound',
+    'upa': 'unipathway',
+    'Wikipedia accession': 'wikipedia.en',
+
+    # Not in identifiers.org:
+    'metacyc': 'metacyc',
+    'MetaCyc accession': 'metacyc',
+    'mnx': 'mnx'
+}
+
+__REACTION_NAMESPACE = {
+    # value (namespace) corresponds to identifiers.org:
+    'bigg': 'bigg.reaction',
+    'kegg': 'kegg.reaction',
+    'reactome': 'reactome',
+    'rhea': 'rhea',
+    'seed': 'seed',
+
+    # Not in identifiers.org:
+    'metacyc': 'metacyc',
+    'mnx': 'mnx',
+}
+
+
+def resolve_namespace(name, chemical):
+    '''Maps name to distinct namespace from identifiers.org.'''
+    namespace = __CHEMICAL_NAMESPACE if chemical else __REACTION_NAMESPACE
+    return namespace[name] if name in namespace else None
diff --git a/metanetx_uniprot/ncbi_taxonomy_utils.py b/metanetx_uniprot/ncbi_taxonomy_utils.py
new file mode 100644
index 00000000..e563331d
--- /dev/null
+++ b/metanetx_uniprot/ncbi_taxonomy_utils.py
@@ -0,0 +1,140 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import os
+import sys
+import tarfile
+import tempfile
+import urllib
+from urllib.request import urlretrieve
+
+from kgx.cli.cli_utils import transform
+import pandas as pd
+
+
+__NCBITAXONOMY_URL = 'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
+
+
+def load(reaction_manager, writer, array_delimiter, source=__NCBITAXONOMY_URL):
+    '''Loads NCBI Taxonomy data.'''
+    #Not used currently
+    #nodes_filename, names_filename = _get_ncbi_taxonomy_files(source)
+    #nodes, rels = _parse_nodes(nodes_filename, array_delimiter)
+    #_parse_names(nodes, names_filename, array_delimiter)
+    #######
+    nodes_filename = os.getcwd()+'/Files/ncbitaxon.json'
+    #nodes_filename = os.getcwd()+'/TestingFiles/ncbitaxon.json'
+    print('parsing ncbi taxon json file')
+    kgx_nodes_json,kgx_edges_json = _parse_nodes_kgmicrobe(nodes_filename, array_delimiter)
+    nodes,rels = transform_kgx_output_format(kgx_nodes_json,kgx_edges_json)
+
+    writer.write_nodes(nodes.values(), 'Organism')
+    writer.write_rels(rels, 'Organism', 'Organism')
+
+    print('adding organism-enzyme relationships')
+    reaction_manager.add_org_to_enz(nodes, 'uniprot')
+
+
+def _get_ncbi_taxonomy_files(source):
+    '''Downloads and extracts NCBI Taxonomy files.'''
+    temp_dir = tempfile.gettempdir()
+    temp_gzipfile = tempfile.NamedTemporaryFile()
+    urlretrieve(source, temp_gzipfile.name)
+
+    temp_tarfile = tarfile.open(temp_gzipfile.name, 'r:gz')
+    temp_tarfile.extractall(temp_dir)
+
+    temp_gzipfile.close()
+    temp_tarfile.close()
+
+    return os.path.join(temp_dir, 'nodes.dmp'), \
+        os.path.join(temp_dir, 'names.dmp')
+
+def _parse_nodes_kgmicrobe(filename, array_delimiter):
+    '''Parses nodes file.'''
+
+    output_dir = '/Users/brooksantangelo/Documents/HunterLab/biochem4j/biochem4j/'
+    name = 'ncbitaxon_transformed'
+    
+    transform(inputs=[filename], input_format='obojson', output= os.path.join(output_dir, name), output_format='tsv')
+
+    return output_dir+name+'_nodes.tsv',output_dir+name+'_edges.tsv'
+    
+def transform_kgx_output_format(transformed_nodes_tsv,transformed_edges_tsv):
+
+    labels = pd.read_csv(transformed_nodes_tsv, sep = '\t', usecols = ['id','name'])
+    triples_df = pd.read_csv(transformed_edges_tsv,sep = '\t', usecols = ['subject', 'object', 'predicate'])
+    triples_df.columns.str.lower()
+
+    nodes = {}
+    rels = []
+
+    for i in range(len(labels)):
+        tax_id = labels.iloc[i].loc['id'].split('NCBITaxon:')[1]
+        nodes[tax_id] = {'taxonomy:ID(Organism)': tax_id,
+                             ':LABEL':
+                             'Organism,unknown'}
+
+    for i in range(len(triples_df)):
+        s = triples_df.iloc[i].loc['subject']
+        p = triples_df.iloc[i].loc['predicate']
+        o = triples_df.iloc[i].loc['object']
+        rels.append([s, p, o])
+
+    return nodes,rels
+
+
+def _parse_nodes(filename, array_delimiter):
+    '''Parses nodes file.'''
+    nodes = {}
+    rels = []
+
+    with open(filename, 'r') as textfile:
+        for line in textfile:
+            tokens = [x.strip() for x in line.split('|')]
+            tax_id = tokens[0]
+
+            if tax_id != '1':
+                rels.append([tax_id, 'is_a', tokens[1]])
+
+            nodes[tax_id] = {'taxonomy:ID(Organism)': tax_id,
+                             ':LABEL':
+                             'Organism' + array_delimiter + tokens[2]}
+
+    return nodes, rels
+
+
+def _parse_names(nodes, filename, array_delimiter):
+    '''Parses names file.'''
+
+    with open(filename, 'r') as textfile:
+        for line in textfile:
+            tokens = [x.strip() for x in line.split('|')]
+            node = nodes[tokens[0]]
+
+            if 'name' not in node:
+                node['name'] = tokens[1]
+                node['names:string[]'] = set([node['name']])
+            else:
+                node['names:string[]'].add(tokens[1])
+
+    for _, node in nodes.items():
+        if 'names:string[]' in node:
+            node['names:string[]'] = \
+                array_delimiter.join(node['names:string[]'])
+
+
+
+def main(argv):
+    '''main method'''
+    load(*argv)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/metanetx_uniprot/reaction_utils.py b/metanetx_uniprot/reaction_utils.py
new file mode 100644
index 00000000..f6a5f390
--- /dev/null
+++ b/metanetx_uniprot/reaction_utils.py
@@ -0,0 +1,312 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+from enzyme_utils import EnzymeManager
+
+from numpy import *
+import pandas as pd
+from tqdm import tqdm
+import csv
+
+
+class ReactionManager(object):
+    '''Class to implement a manager of Reaction data.'''
+
+    def __init__(self):
+        '''Constructor.'''
+        self.__nodes = {}
+        self.__reac_ids = {}
+        self.__reac_enz_rels = []
+        self.__enz_reac_rels = []
+        self.__go_reac_rels = []
+        self.__org_enz_rels = []
+        self.__enz_man = EnzymeManager()
+
+    def write_files(self, writer):
+        '''Write neo4j import files.'''
+        return ([writer.write_nodes(self.__nodes.values(),
+                                    'Reaction'),
+                 writer.write_nodes(self.__enz_man.get_nodes(),
+                                    'Enzyme'),
+                 writer.write_nodes(self.__enz_man.get_enz_nodes(),
+                                    'Enzyme_nodes')],
+                [writer.write_rels(self.__reac_enz_rels,
+                                   'Reaction', 'Enzyme'),
+                #Gets reactions connected to all enzymes
+                writer.write_rels(self.__enz_reac_rels,
+                                   'Reaction', 'Enzyme'),
+                #Gets reactions connected to all go processes
+                writer.write_rels(self.__go_reac_rels,
+                                   'Reaction', 'Process'),
+                writer.write_rels(self.__enz_man.get_org_enz_rels(),
+                                   'Organism', 'Enzyme')])
+
+    def add_reaction(self, source, reac_id, properties):
+        '''Adds a reaction to the collection of nodes, ensuring uniqueness.'''
+        reac_id = self.__reac_ids[source + reac_id] \
+            if source + reac_id in self.__reac_ids else reac_id
+
+        if reac_id not in self.__nodes:
+            properties[':LABEL'] = 'Reaction'
+            properties['id:ID(Reaction)'] = reac_id
+            properties['source'] = source
+            properties[source] = reac_id
+            self.__nodes[reac_id] = properties
+
+            if 'mnx' in properties:
+                self.__reac_ids['mnx' + properties['mnx']] = reac_id
+
+            if 'kegg.reaction' in properties:
+                self.__reac_ids[
+                    'kegg.reaction' + properties['kegg.reaction']] = reac_id
+
+            if 'rhea' in properties:
+                self.__reac_ids['rhea' + properties['rhea']] = reac_id
+        else:
+            self.__nodes[reac_id].update(properties)
+
+        print('from add_reaction in reaction_utils.py')
+        print(self.__nodes.values())
+
+        return reac_id
+
+    def add_react_to_enz(self, data, source, num_threads=0):
+        '''Submit data to the graph.'''
+        # Create Reaction and Enzyme nodes:
+        enzyme_ids = self.__create_react_enz(data, source)
+
+        # Create Enzyme nodes:
+        self.__enz_man.add_uniprot_data(enzyme_ids, source, num_threads) 
+
+    #data here is rhea-enzyme file, go_data is rhea-go file
+    def add_react_to_enz_organism(self, data, source, go_data, num_threads=0):
+
+        #Create Reaction relationships
+        reaction_ids,process_ids = self.__create_enz_react(data, go_data, source)
+
+        return reaction_ids,process_ids
+
+    def __create_react_enz(self, data, source):
+        '''Creates Reaction and Enzyme nodes and their Relationships.'''
+        enzyme_ids = []
+
+        for reac_id, uniprot_ids in data.items():
+            reac_id = self.add_reaction(source, reac_id, {})
+
+            for uniprot_id in uniprot_ids:
+                enzyme_ids.append(uniprot_id)
+                self.__reac_enz_rels.append([reac_id, 'catalysed_by',
+                                             uniprot_id,
+                                             {'source': source}])
+
+        return list(set(enzyme_ids))
+
+    def __create_enz_react(self, data, go_data, source):
+        '''Creates Reaction and Enzyme nodes and their Relationships.'''
+        print('adding reaction to enzyme relationships')
+        reaction_ids = []
+        process_ids = []
+        enzyme_ids = self.__enz_man.get_nodes()
+
+        for enz_id in enzyme_ids:
+            #Gets relationships between reactions and enzymes from Rhea only if they exist in the enzymes pulled from organism filtering step
+            reac_ids = [key for key, value in data.items() if enz_id['entry'] in value]
+            
+            reaction_ids = reaction_ids+reac_ids
+            for j in reac_ids:
+                #reac_ids should have rhea to help identify and protein should have UniProt
+                self.__enz_reac_rels.append(['Rhea:'+j, 'catalysed_by',
+                                                'Uniprot:'+enz_id['entry'],
+                                                {'source': source}])
+
+        print('adding reaction to process relationships')
+        #Gets relationships between reactions and Go processes from Rhea only if they exist in above reaction ids
+        go_reac_ids = [key for key, value in go_data.items() if key in reaction_ids]
+        reaction_ids = reaction_ids+go_reac_ids
+
+        for j in go_reac_ids:
+            rxns = go_data[j]
+            for k in rxns:
+                process_ids.append(k)
+                #reac_ids should have rhea to help identify
+                self.__go_reac_rels.append(['Rhea:'+j, 'affects',
+                                                k,
+                                                {'source': source}])
+
+        return list(set(reaction_ids)),list(set(process_ids))
+
+    def add_org_to_enz(self, nodes, source, num_threads=0):
+        '''Submit data to the graph.'''
+        # Create Organism nodes:
+        organism_ids = self.__create_organism_ids(nodes, source)
+
+        print('number of orgs for just reference proteomes')
+        print(len(organism_ids))
+
+        ## For testing
+        #organism_ids = organism_ids[0:10]
+
+        # Create Organism and Enzyme nodes:
+        self.__enz_man.add_uniprot_data_organism(organism_ids, source, num_threads)
+
+    def __create_organism_ids(self, data, source):
+
+        ids = unique(list(data.keys()))
+
+        return ids
+
+    def read_go_plus(self,go_plus_file,process_ids,chemical_ids):
+        '''Read chemical properties and create Nodes.'''
+        go_keys = ['Class ID', 'Preferred Label', 'Synonyms','Definitions','Obsolete','CUI','Semantic Types','Parents']
+
+        rels = []
+        
+        d = pd.read_csv(go_plus_file, delimiter=',',keep_default_na=False)
+        go_data = d[go_keys]
+        go_data = go_data.replace(regex=['http://purl.obolibrary.org/obo/'],value='').replace(regex=['_'],value=':')
+
+        #Create go-plus nodes
+        #add to nodes: http://www.w3.org/2000/01/rdf-schema#label
+        
+        d = d.drop(go_keys,axis=1)  #+['Parents'], axis=1)
+        #Update values
+        #Ensure subject is not deprecated
+        d = d[d['http://www.w3.org/2002/07/owl#deprecated'] != 'TRUE']
+        d = d.replace(regex=['http://purl.obolibrary.org/obo/'],value='').replace(regex=['_'],value=':')
+        d = d.replace(regex=['go#'],value='')
+        
+        #Update columns
+        #Columns to ignore
+        cols_to_drop = ['http://data.bioontology.org/metadata/prefixIRI','http://data.bioontology.org/metadata/treeView','go#','http://purl.obolibrary.org/obo/IAO_','http://www.w3.org/2000/01/rdf-schema#','http://www.w3.org/2004/02/skos/core#','http://www.w3.org/2002/07/owl#deprecated','http://www.w3.org/2000/01/rdf-schema#label','http://purl.org/dc/terms/','obsolete ','has_narrow_synonym','has_obo_format_version','has_obo_namespace','has_related_synonym','has_scope','has_synonym_type','definition','http://www.geneontology.org/formats/oboInOwl#id','has_alternative_id','http://purl.obolibrary.org/obo/go#creation_date','http://www.geneontology.org/formats/oboInOwl#creation_date','synonym_type_property','Systematic synonym','temporally related to','term replaced by','term tracker item','title','http://www.geneontology.org/formats/oboInOwl#created_by','has_exact_synonym']
+        cols_to_drop = d.columns[d.columns.str.contains('|'.join(cols_to_drop))]
+        d = d.drop(cols_to_drop, axis=1)
+        #There are 2 contains relationships, develops_from
+        d.columns = d.columns.str.replace('http://data.bioontology.org/metadata/obo/contains','biontology_contains', regex=False)
+        d.columns = d.columns.str.replace('http://data.bioontology.org/metadata/obo/develops_from','biontology_develops_from', regex=False)
+        d.columns = d.columns.str.replace('http://data.bioontology.org/metadata/obo/','', regex=False)
+        d.columns = d.columns.str.replace('http://purl.obolibrary.org/obo/', '', regex=False)
+        d.columns = d.columns.str.replace('http://www.geneontology.org/formats/oboInOwl#', '', regex=False)
+        
+        for i in tqdm(range(len(d))):
+            s_id = go_data.iloc[i].loc['Class ID']
+            for p_label in d.columns:
+                if d.iloc[i].loc[p_label] != '':
+                    if (s_id in chemical_ids or p_label in process_ids) or (s_id in process_ids or p_label in chemical_ids):
+                        all_objects = d.iloc[i].loc[p_label].split('|')
+                        for j in all_objects:
+                            rels.append([s_id, p_label,
+                                                j,
+                                                {'source': 'go-plus'}])
+        
+        go_process_ids = []
+        for i, v in enumerate(rels):
+            for x in v:
+                if "GO:" in x:
+                    go_process_ids.append(x)
+
+        go_process_ids = list(set(go_process_ids))
+
+        print('len process_ids before adding go plus terms: ',len(process_ids))
+        process_ids = process_ids+go_process_ids
+        process_ids = list(set(process_ids))
+        print('len process_ids after adding go plus terms: ',len(process_ids))
+
+        return rels,process_ids
+
+    def transform_kgx_output_format_hp(self,transformed_nodes_tsv,transformed_edges_tsv):
+
+        labels = pd.read_csv(transformed_nodes_tsv, sep = '\t', usecols = ['id','name'])
+        triples_df = pd.read_csv(transformed_edges_tsv,sep = '\t', usecols = ['subject', 'object', 'predicate'])
+        triples_df.columns.str.lower()
+
+        nodes = {}
+        rels = []
+
+
+        #Constrain rels and nodes to only GO process: HP relationships
+        #Constrain rels and nodes to only GO processes that are used in prior rels
+        for i in range(len(triples_df)):
+            s = triples_df.iloc[i].loc['subject']
+            p = triples_df.iloc[i].loc['predicate']
+            o = triples_df.iloc[i].loc['object']
+            if ('GO:' in s and 'HP:' in o) or ('GO:' in o and 'HP:' in s):
+                rels.append([s, p, o])
+
+
+        for i in range(len(labels)):
+            if any(labels.iloc[i].loc['id'] in sublist for sublist in labels):
+                nodes[labels.iloc[i].loc['id']] = {'class:ID': labels.iloc[i].loc['id'],
+                                    ':LABEL':
+                                    labels.iloc[i].loc['id'].split(':')[0]}
+
+        return nodes,rels
+
+    def process_pkl_files(self,triples_file,labels_file):
+    
+        triples_df = pd.read_csv(triples_file,sep = '	', quoting=csv.QUOTE_NONE)
+        triples_df.columns.str.lower()
+
+        triples_df.replace({'<': ''}, regex=True, inplace=True)
+        triples_df.replace({'>': ''}, regex=True, inplace=True)
+
+        labels = pd.read_csv(labels_file, sep = '	', quoting=csv.QUOTE_NONE)
+        labels.columns.str.lower()
+
+        #Remove brackets from URI
+        labels['entity_uri'] = labels['entity_uri'].str.replace("<","")
+        labels['entity_uri'] = labels['entity_uri'].str.replace(">","")
+
+
+        return triples_df,labels
+
+    def get_process_disease_pkl_data(self,triples_file,labels_file,process_ids):
+
+        print('Extracting PKL relationships')
+        triples_df, labels_dict = self.process_pkl_files(triples_file,labels_file)
+
+        rels = []
+
+        for i in tqdm(range(len(triples_df))):
+            if triples_df.iloc[i].loc['object'] in process_ids and 'MONDO_' in triples_df.iloc[i].loc['subject']:
+                rels.append([triples_df.iloc[i].loc['subject'].replace('http://purl.obolibrary.org/obo/','').replace('_',':'), labels_dict.loc[labels_dict['entity_uri'] == triples_df.iloc[i].loc['predicate'],'label'].values[0],
+                                        triples_df.iloc[i].loc['object'].replace('http://purl.obolibrary.org/obo/','').replace('_',':'),
+                                        {'source': 'pheknowlator'}])
+
+        return rels
+
+
+    def process_kg_phenio_files(self,triples_file,labels_file):
+
+        triples_df = pd.read_csv(triples_file,sep = '\t', usecols = ['subject', 'object', 'predicate'])
+        triples_df.columns.str.lower()
+
+        labels = pd.read_csv(labels_file, sep = '\t', usecols = ['id','category', 'name','description'])
+        labels.columns = ['entity_uri','category', 'label','description/definition']
+
+        triples_df_relevant = triples_df.loc[((triples_df['subject'].str.contains('MONDO:')) & (triples_df['object'].str.contains('GO:'))) | ((triples_df['object'].str.contains('MONDO:')) & (triples_df['subject'].str.contains('GO:')))]
+        
+        #1785727 total, 435 total MONDO/GO or GO/MONDO relationships
+        print(len(triples_df),len(triples_df_relevant))
+        
+        return triples_df_relevant,labels
+
+    def get_process_disease_phenio_data(self,triples_file,labels_file,process_ids):
+
+        print('Extracting kg-phenio relationships')
+        triples_df, labels_dict = self.process_kg_phenio_files(triples_file,labels_file)
+
+        rels = []
+
+        for i in tqdm(range(len(triples_df))):
+            if triples_df.iloc[i].loc['object'] in process_ids and 'MONDO:' in triples_df.iloc[i].loc['subject']:
+                rels.append([triples_df.iloc[i].loc['subject'], triples_df.iloc[i].loc['predicate'],
+                                        triples_df.iloc[i].loc['object'],
+                                        {'source': 'kg-phenio'}])
+
+        return rels
\ No newline at end of file
diff --git a/metanetx_uniprot/rhea_utils.py b/metanetx_uniprot/rhea_utils.py
new file mode 100644
index 00000000..a9a9ff6a
--- /dev/null
+++ b/metanetx_uniprot/rhea_utils.py
@@ -0,0 +1,74 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import tempfile
+import urllib
+from urllib.request import urlretrieve
+import os
+
+
+__RHEA_URL = 'ftp://ftp.expasy.org/databases/rhea/tsv/rhea2uniprot%5Fsprot.tsv'
+#For test, also update load function
+#__RHEA_URL = os.getcwd()+'/TestingFiles/rhea2uniprot_sprot.txt'
+
+__RHEA_GO_URL = 'ftp://ftp.expasy.org/databases/rhea/tsv/rhea2go.tsv'
+#__RHEA_GO_URL = os.getcwd()+'/TestingFiles/rhea2go_NOTREAL.txt'
+
+def load(reaction_manager, source=__RHEA_URL, go_source = __RHEA_GO_URL, num_threads=0):
+    '''Loads Rhea data.'''
+    # Parse data:
+    
+    temp_file = tempfile.NamedTemporaryFile()
+    urlretrieve(source, temp_file.name)
+    data = _parse(temp_file.name)
+    
+    
+    temp_file = tempfile.NamedTemporaryFile()
+    urlretrieve(go_source, temp_file.name)
+    go_data = _parse(temp_file.name)
+
+    ##If using test data
+    #data = _parse(source)
+    #go_data = _parse(go_source)
+    ######Not sure why source is Rhea here, calls to UniProt
+    #Remove, since this goes from rhea2uniprot to uniprot enzymes. use add_org_to_enz function in ncbi_taxonomy_utils instead
+    #reaction_manager.add_react_to_enz(data, 'rhea', num_threads)
+    reaction_ids,process_ids = reaction_manager.add_react_to_enz_organism(data, 'rhea', go_data, num_threads) 
+
+    return reaction_ids,process_ids
+
+
+def _parse(filename):
+    '''Parses file.'''
+    data = {}
+
+    with open(filename, 'r') as textfile:
+        next(textfile)
+
+        for line in textfile:
+            tokens = line.split('\t')
+
+            if len(tokens) == 4:
+                uniprot_id = tokens[3].strip()
+
+                if not tokens[0] or not tokens[2]:
+                    print(','.join(tokens))
+
+                _add(data, tokens[0], uniprot_id)
+                _add(data, tokens[2], uniprot_id)
+
+    return data
+
+
+def _add(data, rhea_id, uniprot_id):
+    '''Adds Rhea id and Uniprot id to data.'''
+    if rhea_id in data:
+        data[rhea_id].append(uniprot_id)
+    else:
+        data[rhea_id] = [uniprot_id]
diff --git a/metanetx_uniprot/seq_utils.py b/metanetx_uniprot/seq_utils.py
new file mode 100644
index 00000000..7131d200
--- /dev/null
+++ b/metanetx_uniprot/seq_utils.py
@@ -0,0 +1,200 @@
+'''
+synbiochem (c) University of Manchester 2015
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+from collections import defaultdict
+import itertools
+import operator
+import os
+import random
+import re
+import ssl
+from subprocess import call
+import tempfile
+from urllib import parse
+
+from Bio import Seq, SeqIO, SeqRecord
+from Bio.Blast import NCBIXML
+from Bio.Data import CodonTable
+from Bio.Restriction import Restriction, Restriction_Dictionary
+from Bio.SeqUtils.MeltingTemp import Tm_NN
+import requests
+from synbiochem.biochem4j import taxonomy
+from synbiochem.utils import thread_utils
+import queue
+
+import numpy as np
+from tqdm import tqdm
+import sys
+
+def get_uniprot_values(uniprot_ids, fields, batch_size, verbose=False,
+                       num_threads=0):
+    '''Gets dictionary of ids to values from Uniprot.'''
+    values = []
+
+    if num_threads:
+        thread_pool = thread_utils.ThreadPool(num_threads)
+
+        for i in range(0, len(uniprot_ids), batch_size):
+            thread_pool.add_task(_get_uniprot_batch, uniprot_ids, i,
+                                batch_size, fields, values, verbose)
+
+        thread_pool.wait_completion()
+    else:
+        for i in range(0, len(uniprot_ids), batch_size):
+            _get_uniprot_batch(uniprot_ids, i, batch_size, fields, values,
+                               verbose)
+
+    return {value['Entry']: value for value in values}
+
+
+def search_uniprot(query, fields, limit=128):
+    '''Gets dictionary of ids to values from Uniprot.'''
+    values = []
+
+    url = 'http://www.uniprot.org/uniprot/?query=' + parse.quote(query) + \
+        '&sort=score&limit=' + str(limit) + \
+        '&format=tab&columns=id,' + ','.join([parse.quote(field)
+                                              for field in fields])
+
+    _parse_uniprot_data(url, values)
+
+    return values
+
+
+def _get_uniprot_batch(uniprot_ids, i, batch_size, fields, values, verbose):
+    '''Get batch of Uniprot data.'''
+    if verbose:
+        print('seq_utils: getting Uniprot values ' + str(i) + ' - ' +
+              str(min(i + batch_size, len(uniprot_ids))) + ' / ' +
+              str(len(uniprot_ids)))
+
+    #If getting values in batch Remove 'accession:' +  from start of join([HERE .....]) and accession: from query=HERE
+    batch = uniprot_ids[i:min(i + batch_size, len(uniprot_ids))]
+    query = '%20OR%20'.join(['accession:' + uniprot_id for uniprot_id in batch])
+    url = 'https://rest.uniprot.org/uniprotkb/search?query=' + query + \
+        '&format=tsv&fields=accession%2C' + '%2C'.join([parse.quote(field)
+                                              for field in fields])
+
+    _parse_uniprot_data(url, values)
+
+
+def _parse_uniprot_data(url, values):
+    '''Parses Uniprot data.'''
+    headers = None
+
+    try:
+        resp = requests.get(url, allow_redirects=True)
+
+        for line in resp.iter_lines():
+            line = line.decode('utf-8')
+            tokens = line.strip().split('\t')
+
+            if headers is None:
+                headers = tokens
+            else:
+                resp = dict(zip(headers, tokens))
+
+                if 'Protein names' in resp:
+                    regexp = re.compile(r'(?<=\()[^)]*(?=\))|^[^(][^()]*')
+                    names = regexp.findall(resp.pop('Protein names'))
+                    resp['Protein names'] = [nme.strip() for nme in names]
+
+                for key in resp:
+                    if key.startswith('Cross-reference'):
+                        resp[key] = resp[key].split(';')
+
+                if 'Error messages' in resp:
+                    print(resp); sys.exit()
+                values.append(resp)
+    except Exception as err:
+        print(err)
+
+
+def get_uniprot_values_organism(organism_ids, fields, batch_size, verbose=False, num_threads=0):
+    values = []
+
+    for i in tqdm(range(0, len(organism_ids), batch_size)):
+        values = _get_uniprot_batch_organism(organism_ids, i, batch_size, fields, values,verbose)
+
+    ##Issue: Only returns one enzyme per organism
+    #return {value['Organism (ID)']: value for value in values}
+    ##Returns list of dicts for each organism-id enzyme entry
+    return values
+
+def _get_uniprot_batch_organism(organism_ids, i, batch_size, fields, values, verbose):
+    '''Get batch of Uniprot data.'''
+    if verbose:
+        print('seq_utils: getting Uniprot values ' + str(i) + ' - ' +
+              str(min(i + batch_size, len(organism_ids))) + ' / ' +
+              str(len(organism_ids)))
+
+    #If getting values in batch Remove 'accession:' +  from start of join([HERE .....]) and accession: from query=HERE
+    batch = organism_ids[i:min(i + batch_size, len(organism_ids))]
+    query = '%20OR%20'.join(['organism_id:' + organism_id for organism_id in batch])
+    url = 'https://rest.uniprot.org/uniprotkb/search?query=' + query + \
+        '&format=tsv&size=500&keywords=Reference+proteome&fields=organism_id%2C' + '%2C'.join([parse.quote(field)
+    #    '&format=tsv&size=1&fields=organism_id%2C' + '%2C'.join([parse.quote(field)
+                                              for field in fields])
+
+    _parse_uniprot_data(url, values)
+    return values
+
+def parse_response(res,values):
+    
+    headers = None
+
+    for line in res.iter_lines():
+        line = line.decode('utf-8')
+        tokens = line.strip().split('\t')
+
+        if headers is None:
+            headers = tokens
+        else:
+            res = dict(zip(headers, tokens))
+            #print(res)
+            #print(type(res))
+            #print(type(values))
+            values.append(res)
+            
+        #print(values)
+    
+    return values
+
+
+def get_jobs(url,values):
+    
+    session = requests.Session()
+    
+    paging = True
+    
+    first_page = session.get(url)
+    first_response = parse_response(first_page,values)
+        
+    while paging == True:
+
+        if 'next' in first_page.links:
+            next_url = first_page.links['next']['url']
+            next_page = session.get(next_url)
+            next_response = parse_response(next_page,values)
+            first_page = next_page
+        else:
+            paging = False
+            break
+
+def _get_uniprot_batch_reference_proteome(url):
+
+    values = []
+
+    get_jobs(url,values)
+
+<<<<<<< HEAD
+    return values
+=======
+    return values
+>>>>>>> 79638d7925b65aea0f3e96bf5441ae4a883cfbb0
diff --git a/metanetx_uniprot/spectra_utils.py b/metanetx_uniprot/spectra_utils.py
new file mode 100644
index 00000000..1efce1bb
--- /dev/null
+++ b/metanetx_uniprot/spectra_utils.py
@@ -0,0 +1,122 @@
+'''
+SYNBIOCHEM-DB (c) University of Manchester 2015
+
+SYNBIOCHEM-DB is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+import os
+import tempfile
+import urllib
+import zipfile
+from urllib.request import urlretrieve
+
+import ijson
+
+
+__MONA_URL = 'http://mona.fiehnlab.ucdavis.edu/rest/downloads/retrieve/' + \
+    'd2eb33f0-b22e-49a7-bc31-eb951f8347b2'
+
+__MONA_FILENAME = 'MoNA-export-All_Spectra.json'
+
+_NAME_MAP = {'kegg': 'kegg.compound',
+             'molecular formula': 'formula',
+             'total exact mass': 'monoisotopic_mass:float'}
+
+
+def load(writer, chem_manager,
+         array_delimiter='|', url=__MONA_URL, filename=__MONA_FILENAME):
+    '''Build Spectrum nodes and relationships.'''
+    nodes = []
+    rels = []
+
+    records = _parse(_get_file(url, filename), array_delimiter)
+
+    for record in records:
+        chem_id, _ = chem_manager.add_chemical(record['chemical'])
+        nodes.append(record['spectrum'])
+        rels.append([chem_id, 'has', record['spectrum']['id:ID(Spectrum)']])
+
+    return [writer.write_nodes(nodes, 'Spectrum')], \
+        [writer.write_rels(rels, 'Chemical', 'Spectrum')]
+
+
+def _parse(filename, array_delimiter):
+    '''Parses MoNA json file.'''
+    records = []
+    record = {'chemical': {'names:string[]': []},
+              'spectrum': {':LABEL': 'Spectrum', 'tags:string[]': []}}
+    name = None
+
+    for prefix, typ, value in ijson.parse(open(filename)):
+        if prefix == 'item' and typ == 'start_map':
+            record = {'chemical': {'names:string[]': []},
+                      'spectrum': {':LABEL': 'Spectrum',
+                                   'tags:string[]': []}}
+        elif prefix == 'item.compound.item.inchi':
+            record['chemical']['inchi'] = value
+        elif prefix == 'item.compound.item.names.item.name':
+            if 'name' not in record['chemical']:
+                record['chemical']['name'] = value
+            record['chemical']['names:string[]'].append(value)
+        elif prefix == 'item.compound.item.metaData.item.name' or \
+                prefix == 'item.metaData.item.name':
+            name = _normalise_name(value.lower())
+        elif prefix == 'item.compound.item.metaData.item.value':
+            _parse_compound_metadata(name, value, record)
+            name = None
+        elif prefix == 'item.id':
+            record['spectrum']['id:ID(Spectrum)'] = value
+        elif prefix == 'item.metaData.item.value':
+            record['spectrum'][name] = value
+            name = None
+        elif prefix == 'item.spectrum':
+            values = [float(val) for term in value.split()
+                      for val in term.split(':')]
+            record['spectrum']['m/z:float[]'] = \
+                array_delimiter.join(map(str, values[0::2]))
+            record['spectrum']['I:float[]'] = \
+                array_delimiter.join(map(str, values[1::2]))
+        elif prefix == 'item.tags.item.text':
+            record['spectrum']['tags:string[]'].append(value)
+        elif prefix == 'item' and typ == 'end_map':
+            records.append(record)
+
+    return records
+
+
+def _get_file(url, filename):
+    '''Gets file from url.'''
+    destination = os.path.join(os.path.expanduser('~'), 'MoNA')
+
+    if not os.path.exists(destination):
+        os.makedirs(destination)
+
+    filepath = os.path.join(destination, filename)
+
+    if not os.path.exists(filepath):
+        tmp_file = tempfile.NamedTemporaryFile(delete=False)
+        urlretrieve(url, tmp_file.name)
+        zfile = zipfile.ZipFile(tmp_file.name, 'r')
+        filepath = os.path.join(destination, zfile.namelist()[0])
+        zfile.extractall(destination)
+
+    return filepath
+
+
+def _parse_compound_metadata(name, value, record):
+    '''Parses compound metadata.'''
+    if name == 'chebi' and isinstance(value, str):
+        value = value.replace('CHEBI:', '').split()[0]
+
+    record['chemical'][_normalise_name(name)] = value
+
+
+def _normalise_name(name):
+    '''Normalises name in name:value pairs.'''
+    if name in _NAME_MAP:
+        return _NAME_MAP[name]
+
+    return name.replace(':', '_')
diff --git a/metanetx_uniprot/test/__init__.py b/metanetx_uniprot/test/__init__.py
new file mode 100644
index 00000000..e0aa1f5e
--- /dev/null
+++ b/metanetx_uniprot/test/__init__.py
@@ -0,0 +1,9 @@
+'''
+synbiochem (c) University of Manchester 2015
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
diff --git a/metanetx_uniprot/test/test_enzyme_utils.py b/metanetx_uniprot/test/test_enzyme_utils.py
new file mode 100644
index 00000000..c0318f65
--- /dev/null
+++ b/metanetx_uniprot/test/test_enzyme_utils.py
@@ -0,0 +1,39 @@
+'''
+synbiochem (c) University of Manchester 2015
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+# pylint: disable=too-many-public-methods
+import unittest
+
+from sbcdb.enzyme_utils import EnzymeManager
+
+
+class TestEnzymeManager(unittest.TestCase):
+    '''Test class for EnzymeManager.'''
+
+    def setUp(self):
+        unittest.TestCase.setUp(self)
+        self.__manager = EnzymeManager()
+
+    def test_add_uniprot_data(self):
+        '''Tests add_uniprot_data method.'''
+        enzyme_ids = ['P19367', 'Q2KNB7']
+
+        # Test unthreaded:
+        self.__manager.add_uniprot_data(enzyme_ids, source='test')
+        self.assertEqual(len(enzyme_ids), len(self.__manager.get_nodes()))
+
+        # Test threaded:
+        self.__manager.add_uniprot_data(enzyme_ids, source='test',
+                                        num_threads=24)
+        self.assertEqual(len(enzyme_ids), len(self.__manager.get_nodes()))
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()
diff --git a/metanetx_uniprot/test/test_mnxref_utils.py b/metanetx_uniprot/test/test_mnxref_utils.py
new file mode 100644
index 00000000..88a3da23
--- /dev/null
+++ b/metanetx_uniprot/test/test_mnxref_utils.py
@@ -0,0 +1,37 @@
+'''
+synbiochem (c) University of Manchester 2015
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+# pylint: disable=too-many-public-methods
+import unittest
+
+from sbcdb.mnxref_utils import MnxRefReader
+
+
+class TestMnxRefReader(unittest.TestCase):
+    '''Test class for MnxRefReader.'''
+
+    def setUp(self):
+        unittest.TestCase.setUp(self)
+        reader = MnxRefReader()
+        self.__chem_data = reader.get_chem_data()
+        self.__reac_data = reader.get_reac_data()
+
+    def test_get_chem_data(self):
+        '''Tests get_chem_data method.'''
+        self.assertEquals(self.__chem_data['MNXM1354']['chebi'], 'CHEBI:58282')
+
+    def test_get_reac_data(self):
+        '''Tests get_chem_data method.'''
+        eqn = '1 MNXM1 + 1 MNXM6 + 1 MNXM97401 = 1 MNXM5 + 1 MNXM97393'
+        self.assertEquals(self.__reac_data['MNXR62989']['equation'], eqn)
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()
diff --git a/metanetx_uniprot/utils.py b/metanetx_uniprot/utils.py
new file mode 100644
index 00000000..67639e71
--- /dev/null
+++ b/metanetx_uniprot/utils.py
@@ -0,0 +1,73 @@
+'''
+synbiochem (c) University of Manchester 2016
+
+synbiochem is licensed under the MIT License.
+
+To view a copy of this license, visit <http://opensource.org/licenses/MIT/>.
+
+@author:  neilswainston
+'''
+# pylint: disable=invalid-name
+# pylint: disable=too-many-arguments
+import os
+from shutil import rmtree
+
+import pandas as pd
+
+
+class Writer(object):
+    '''CSV file writer class for biochem4j files.'''
+
+    def __init__(self, dest_dir):
+        self.__nodes_dir = os.path.join(os.path.abspath(dest_dir), 'nodes')
+        self.__rels_dir = os.path.join(os.path.abspath(dest_dir), 'rels')
+
+        if os.path.exists(self.__nodes_dir):
+            rmtree(self.__nodes_dir)
+
+        os.makedirs(self.__nodes_dir)
+
+        if os.path.exists(self.__rels_dir):
+            rmtree(self.__rels_dir)
+
+        os.makedirs(self.__rels_dir)
+
+    def write_nodes(self, nodes, group, separator=';'):
+        '''Writes Nodes to csv file.'''
+        if not nodes:
+            return None
+
+        df = pd.DataFrame(nodes)
+        df.dropna(axis=1, how='all', inplace=True)
+
+        filename = os.path.join(self.__nodes_dir, group + '.csv')
+        df.to_csv(filename, index=False, encoding='utf-8', sep=separator)
+        print('just wrote: ',filename)
+
+        return filename
+
+    def write_rels(self, rels, group_start, group_end, separator=';'):
+        '''Writes Relationships to csv file.'''
+        if not rels:
+            return None
+
+        columns = [':START_ID(' + group_start + ')',
+                   ':TYPE',
+                   ':END_ID(' + group_end + ')']
+
+        if len(rels[0]) > 3:
+            columns.append('PROPERTIES')
+
+        df = pd.DataFrame(rels, columns=columns)
+
+        if len(rels[0]) > 3:
+            props_df = pd.DataFrame(list(df['PROPERTIES']))
+            df.drop('PROPERTIES', axis=1, inplace=True)
+            df = df.join(props_df)
+
+        filename = os.path.join(self.__rels_dir,
+                                group_start + '_' + group_end + '.csv')
+        df.to_csv(filename, index=False, encoding='utf-8', sep=separator)
+        print('just wrote: ',filename)
+
+        return filename
diff --git a/notebooks/Uniprot_API_test.ipynb b/notebooks/Uniprot_API_test.ipynb
new file mode 100644
index 00000000..c5eec384
--- /dev/null
+++ b/notebooks/Uniprot_API_test.ipynb
@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "id": "underlying-necessity",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''\n",
+    "class ReactionManager(object):\n",
+    "    #Class to implement a manager of Reaction data.\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        #Constructor.\n",
+    "        self.__nodes = {}\n",
+    "        self.__reac_ids = {}\n",
+    "        self.__reac_enz_rels = []\n",
+    "        self.__org_enz_rels = []\n",
+    "        self.__enz_man = EnzymeManager()\n",
+    "'''\n",
+    "\n",
+    "\n",
+    "def add_uniprot_data(enzyme_ids, source, num_threads=0):\n",
+    "    print(enzyme_ids)\n",
+    "    '''Gets Uniprot data.'''\n",
+    "\n",
+    "    #fields = ['entry name', 'protein names', 'organism-id', 'ec']\n",
+    "    fields = ['id', 'protein_name', 'organism_id', 'ec']\n",
+    "    #enzyme_ids = [enzyme_id for enzyme_id in enzyme_ids if enzyme_id not in self.__nodes]\n",
+    "    uniprot_values = get_uniprot_values(enzyme_ids, fields,batch_size=128,verbose=False,num_threads=num_threads)\n",
+    "\n",
+    "    print('add_uniprot_data function: added uniprot values: ',len(uniprot_values))\n",
+    "\n",
+    "\n",
+    "\n",
+    "    for uniprot_id, uniprot_value in uniprot_values.items():\n",
+    "        enzyme_node = {':LABEL': 'Enzyme',\n",
+    "                       'uniprot:ID(Enzyme)': uniprot_id}\n",
+    "        #self.__nodes[uniprot_id] = enzyme_node\n",
+    "\n",
+    "        organism_id = uniprot_value.pop('Organism (ID)') \\\n",
+    "            if 'Organism (ID)' in uniprot_value else None\n",
+    "\n",
+    "        if 'Entry name' in uniprot_value:\n",
+    "            enzyme_node['entry'] = uniprot_value['Entry name']\n",
+    "\n",
+    "        if 'Protein names' in uniprot_value:\n",
+    "            enzyme_node['names'] = uniprot_value['Protein names']\n",
+    "\n",
+    "            if enzyme_node['names']:\n",
+    "                enzyme_node['name'] = enzyme_node['names'][0]\n",
+    "\n",
+    "        if 'EC number' in uniprot_value:\n",
+    "            enzyme_node['ec-code'] = uniprot_value['EC number']\n",
+    "\n",
+    "        #if organism_id:\n",
+    "            #self.__org_enz_rels.append([organism_id, 'expresses',uniprot_id, {'source': source}])\n",
+    "            \n",
+    "\n",
+    "def get_uniprot_values(uniprot_ids, fields, batch_size, verbose=False, num_threads=0):\n",
+    "    values = []\n",
+    "\n",
+    "    if num_threads:\n",
+    "        thread_pool = thread_utils.ThreadPool(num_threads)\n",
+    "\n",
+    "        for i in range(0, len(uniprot_ids), batch_size):\n",
+    "            thread_pool.add_task(_get_uniprot_batch, uniprot_ids, i,batch_size, fields, values, verbose)\n",
+    "\n",
+    "        thread_pool.wait_completion()\n",
+    "    else:\n",
+    "        for i in range(0, len(uniprot_ids), batch_size):\n",
+    "            _get_uniprot_batch(uniprot_ids, i, batch_size, fields, values,verbose)\n",
+    "\n",
+    "    return {value['Entry']: value for value in values}\n",
+    "\n",
+    "\n",
+    "\n",
+    "def _get_uniprot_batch(uniprot_ids, i, batch_size, fields, values, verbose):\n",
+    "    '''Get batch of Uniprot data.'''\n",
+    "    if verbose:\n",
+    "        print('seq_utils: getting Uniprot values ' + str(i) + ' - ' +\n",
+    "              str(min(i + batch_size, len(uniprot_ids))) + ' / ' +\n",
+    "              str(len(uniprot_ids)))\n",
+    "\n",
+    "    #If getting values in batch Remove 'accession:' +  from start of join([HERE .....]) and accession: from query=HERE\n",
+    "    batch = uniprot_ids[i:min(i + batch_size, len(uniprot_ids))]\n",
+    "    query = '%20OR%20'.join(['accession:' + uniprot_id for uniprot_id in batch])\n",
+    "    url = 'https://rest.uniprot.org/uniprotkb/search?query=' + query + \\\n",
+    "        '&format=tsv&fields=accession%2C' + '%2C'.join([parse.quote(field)\n",
+    "                                              for field in fields])\n",
+    "\n",
+    "    print(url)\n",
+    "\n",
+    "    _parse_uniprot_data(url, values)\n",
+    "    \n",
+    "    \n",
+    "def _parse_uniprot_data(url, values):\n",
+    "    '''Parses Uniprot data.'''\n",
+    "    headers = None\n",
+    "\n",
+    "    try:\n",
+    "        resp = requests.get(url, allow_redirects=True)\n",
+    "\n",
+    "        for line in resp.iter_lines():\n",
+    "            line = line.decode('utf-8')\n",
+    "            tokens = line.strip().split('\\t')\n",
+    "\n",
+    "            if headers is None:\n",
+    "                headers = tokens\n",
+    "            else:\n",
+    "                resp = dict(zip(headers, tokens))\n",
+    "\n",
+    "                if 'Protein names' in resp:\n",
+    "                    regexp = re.compile(r'(?<=\\()[^)]*(?=\\))|^[^(][^()]*')\n",
+    "                    names = regexp.findall(resp.pop('Protein names'))\n",
+    "                    resp['Protein names'] = [nme.strip() for nme in names]\n",
+    "\n",
+    "                for key in resp:\n",
+    "                    if key.startswith('Cross-reference'):\n",
+    "                        resp[key] = resp[key].split(';')\n",
+    "                values.append(resp)\n",
+    "        print('values from parse_uniprot_data: ',type(values))\n",
+    "        return values\n",
+    "    except Exception as err:\n",
+    "        print(err)\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "russian-dispatch",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['B4RBW1', 'A9BIS7', 'B5Z3E3']\n",
+      "https://rest.uniprot.org/uniprotkb/search?query=accession:B4RBW1%20OR%20accession:A9BIS7%20OR%20accession:B5Z3E3&format=tsv&fields=accession%2Cid%2Cprotein_name%2Corganism_id%2Cec\n",
+      "add_uniprot_data function: added uniprot values:  3\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Query by protein ID\n",
+    "\n",
+    "\n",
+    "from urllib import parse\n",
+    "import requests\n",
+    "import re\n",
+    "\n",
+    "\n",
+    "num_threads = 1\n",
+    "source = 'rhea'\n",
+    "enzyme_ids = ['B4RBW1', 'A9BIS7', 'B5Z3E3']\n",
+    "\n",
+    "add_uniprot_data(enzyme_ids, source)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "id": "removable-gibraltar",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "#Download then work with it\n",
+    "\n",
+    "def add_uniprot_data_organism(organism_ids, source, num_threads=0):\n",
+    "    print(organism_ids)\n",
+    "    '''Gets Uniprot data.'''\n",
+    "\n",
+    "    #fields = ['entry name', 'protein names', 'organism-id', 'ec']\n",
+    "    fields = ['id', 'protein_name', 'organism_id', 'ec']\n",
+    "    #enzyme_ids = [enzyme_id for enzyme_id in enzyme_ids if enzyme_id not in self.__nodes]\n",
+    "    organism_values = get_uniprot_values_organism(organism_ids, fields,batch_size=128,verbose=False,num_threads=num_threads)\n",
+    "\n",
+    "    print('add_uniprot_data function: added uniprot values: ',len(organism_values))\n",
+    "\n",
+    "\n",
+    "\n",
+    "    for uniprot_id, uniprot_value in organism_values.items():\n",
+    "        enzyme_node = {':LABEL': 'Enzyme',\n",
+    "                       'uniprot:ID(Enzyme)': uniprot_id}\n",
+    "        #self.__nodes[uniprot_id] = enzyme_node\n",
+    "\n",
+    "        organism_id = uniprot_value.pop('Organism (ID)') \\\n",
+    "            if 'Organism (ID)' in uniprot_value else None\n",
+    "\n",
+    "        if 'Entry name' in uniprot_value:\n",
+    "            enzyme_node['entry'] = uniprot_value['Entry name']\n",
+    "\n",
+    "        if 'Protein names' in uniprot_value:\n",
+    "            enzyme_node['names'] = uniprot_value['Protein names']\n",
+    "\n",
+    "            if enzyme_node['names']:\n",
+    "                enzyme_node['name'] = enzyme_node['names'][0]\n",
+    "\n",
+    "        if 'EC number' in uniprot_value:\n",
+    "            enzyme_node['ec-code'] = uniprot_value['EC number']\n",
+    "\n",
+    "        #if organism_id:\n",
+    "            #self.__org_enz_rels.append([organism_id, 'expresses',uniprot_id, {'source': source}])\n",
+    "            \n",
+    "    return organism_values\n",
+    "\n",
+    "def get_uniprot_values_organism(organism_ids, fields, batch_size, verbose=False, num_threads=0):\n",
+    "    values = []\n",
+    "\n",
+    "    for i in range(0, len(organism_ids), batch_size):\n",
+    "        values = _get_uniprot_batch_organism(organism_ids, i, batch_size, fields, values,verbose)\n",
+    "\n",
+    "    return {value['Organism (ID)']: value for value in values}\n",
+    "\n",
+    "\n",
+    "def _get_uniprot_batch_organism(uniprot_ids, i, batch_size, fields, values, verbose):\n",
+    "    '''Get batch of Uniprot data.'''\n",
+    "    if verbose:\n",
+    "        print('seq_utils: getting Uniprot values ' + str(i) + ' - ' +\n",
+    "              str(min(i + batch_size, len(uniprot_ids))) + ' / ' +\n",
+    "              str(len(uniprot_ids)))\n",
+    "\n",
+    "    #If getting values in batch Remove 'accession:' +  from start of join([HERE .....]) and accession: from query=HERE\n",
+    "    batch = uniprot_ids[i:min(i + batch_size, len(uniprot_ids))]\n",
+    "    query = '%20OR%20'.join(['organism_id:' + uniprot_id for uniprot_id in batch])\n",
+    "    url = 'https://rest.uniprot.org/uniprotkb/search?query=' + query + \\\n",
+    "        '&format=tsv&fields=organism_id%2C' + '%2C'.join([parse.quote(field)\n",
+    "                                              for field in fields])\n",
+    "\n",
+    "    print('_get_uniprot_batch_organism url: ',url)\n",
+    "\n",
+    "    values = _parse_uniprot_data(url, values)\n",
+    "    return values\n",
+    "    \n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "id": "removed-unemployment",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['226900', '296591']\n",
+      "_get_uniprot_batch_organism url:  https://rest.uniprot.org/uniprotkb/search?query=organism_id:226900%20OR%20organism_id:296591&format=tsv&fields=organism_id%2Cid%2Cprotein_name%2Corganism_id%2Cec\n",
+      "values from parse_uniprot_data:  <class 'list'>\n",
+      "add_uniprot_data function: added uniprot values:  2\n",
+      "{'226900': {'Entry Name': 'GLMU_BACCR', 'EC number': '2.3.1.157; 2.7.7.23', 'Protein names': ['Bifunctional protein GlmU [Includes: UDP-N-acetylglucosamine pyrophosphorylase', 'EC 2.7.7.23', 'N-acetylglucosamine-1-phosphate uridyltransferase', 'EC 2.3.1.157']}, '296591': {'Entry Name': 'RLMN_POLSJ', 'EC number': '2.1.1.192', 'Protein names': ['Dual-specificity RNA methyltransferase RlmN', 'EC 2.1.1.192', '23S rRNA (adenine(2503', '2', '23S rRNA m2A2503 methyltransferase', 'Ribosomal RNA large subunit methyltransferase N', 'tRNA (adenine(37', '2', 'tRNA m2A37 methyltransferase']}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Query by organism ID\n",
+    "\n",
+    "#query = 'https://rest.uniprot.org/uniprotkb/search?query=organism_id:226900'\n",
+    "\n",
+    "\n",
+    "source = 'rhea'\n",
+    "organism_ids = ['226900','296591']\n",
+    "\n",
+    "organism_values = add_uniprot_data_organism(organism_ids, source)\n",
+    "\n",
+    "print(organism_values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "pleased-coaching",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}