From 5039cd78c36a0a3c103b8ba409319a0754d733cd Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 25 Jan 2016 15:57:44 -0700
Subject: [PATCH 01/36] updating poms for 0.7-SNAPSHOT development

---
 datasource-fileparsers/pom.xml | 5 ++---
 datasource-identifiers/pom.xml | 5 ++---
 datasource-rdfizer/pom.xml     | 5 ++---
 pom.xml                        | 7 +++----
 4 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/datasource-fileparsers/pom.xml b/datasource-fileparsers/pom.xml
index 67f1dc4..a1eee00 100644
--- a/datasource-fileparsers/pom.xml
+++ b/datasource-fileparsers/pom.xml
@@ -1,10 +1,9 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6-SNAPSHOT</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-fileparsers</artifactId>
 
diff --git a/datasource-identifiers/pom.xml b/datasource-identifiers/pom.xml
index 3a3aad5..ef3917f 100644
--- a/datasource-identifiers/pom.xml
+++ b/datasource-identifiers/pom.xml
@@ -1,10 +1,9 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6-SNAPSHOT</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-identifiers</artifactId>
 
diff --git a/datasource-rdfizer/pom.xml b/datasource-rdfizer/pom.xml
index 9206d25..1e4b68f 100644
--- a/datasource-rdfizer/pom.xml
+++ b/datasource-rdfizer/pom.xml
@@ -1,10 +1,9 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<parent>
 		<groupId>edu.ucdenver.ccp</groupId>
 		<artifactId>datasource</artifactId>
-		<version>0.6-SNAPSHOT</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-rdfizer</artifactId>
 
diff --git a/pom.xml b/pom.xml
index 47e458e..3e6cc6f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,9 +1,8 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource</artifactId>
-	<version>0.6-SNAPSHOT</version>
+	<version>0.7-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<properties>
@@ -210,7 +209,7 @@
 										</goals>
 									</pluginExecutionFilter>
 									<action>
-										<ignore></ignore>
+										<ignore />
 									</action>
 								</pluginExecution>
 							</pluginExecutions>

From c4fef4e27ce9d28a9d52b4f518cdaa9f66defb1b Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 25 Jan 2016 16:20:41 -0700
Subject: [PATCH 02/36] updating develop poms to master versions to avoid merge
 conflicts

---
 datasource-fileparsers/pom.xml | 2 +-
 datasource-identifiers/pom.xml | 2 +-
 datasource-rdfizer/pom.xml     | 2 +-
 pom.xml                        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/datasource-fileparsers/pom.xml b/datasource-fileparsers/pom.xml
index a1eee00..49ec869 100644
--- a/datasource-fileparsers/pom.xml
+++ b/datasource-fileparsers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6</version>
 	</parent>
 	<artifactId>datasource-fileparsers</artifactId>
 
diff --git a/datasource-identifiers/pom.xml b/datasource-identifiers/pom.xml
index ef3917f..a6ea21d 100644
--- a/datasource-identifiers/pom.xml
+++ b/datasource-identifiers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6</version>
 	</parent>
 	<artifactId>datasource-identifiers</artifactId>
 
diff --git a/datasource-rdfizer/pom.xml b/datasource-rdfizer/pom.xml
index 1e4b68f..a57828b 100644
--- a/datasource-rdfizer/pom.xml
+++ b/datasource-rdfizer/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<groupId>edu.ucdenver.ccp</groupId>
 		<artifactId>datasource</artifactId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6</version>
 	</parent>
 	<artifactId>datasource-rdfizer</artifactId>
 
diff --git a/pom.xml b/pom.xml
index 3e6cc6f..68b8d29 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2,7 +2,7 @@
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource</artifactId>
-	<version>0.7-SNAPSHOT</version>
+	<version>0.6</version>
 	<packaging>pom</packaging>
 
 	<properties>

From 48974a2c8655f76fb4d67c53873b12a537e8ad13 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 25 Jan 2016 16:20:43 -0700
Subject: [PATCH 03/36] Updating develop poms back to pre merge state

---
 datasource-fileparsers/pom.xml | 2 +-
 datasource-identifiers/pom.xml | 2 +-
 datasource-rdfizer/pom.xml     | 2 +-
 pom.xml                        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/datasource-fileparsers/pom.xml b/datasource-fileparsers/pom.xml
index 7c1441d..55a633d 100644
--- a/datasource-fileparsers/pom.xml
+++ b/datasource-fileparsers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-fileparsers</artifactId>
 
diff --git a/datasource-identifiers/pom.xml b/datasource-identifiers/pom.xml
index a6ea21d..ef3917f 100644
--- a/datasource-identifiers/pom.xml
+++ b/datasource-identifiers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-identifiers</artifactId>
 
diff --git a/datasource-rdfizer/pom.xml b/datasource-rdfizer/pom.xml
index a57828b..1e4b68f 100644
--- a/datasource-rdfizer/pom.xml
+++ b/datasource-rdfizer/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<groupId>edu.ucdenver.ccp</groupId>
 		<artifactId>datasource</artifactId>
-		<version>0.6</version>
+		<version>0.7-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-rdfizer</artifactId>
 
diff --git a/pom.xml b/pom.xml
index 68b8d29..3e6cc6f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2,7 +2,7 @@
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource</artifactId>
-	<version>0.6</version>
+	<version>0.7-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<properties>

From 2af943d40d9909f8a79eec9f9754e8ba51ddb020 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 25 Jan 2016 16:40:58 -0700
Subject: [PATCH 04/36] Removed project.version property

This was previously used to set the submodule versions, however is no
longer necessary due to the adoption of the jgitflow-maven-plugin
---
 pom.xml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 3e6cc6f..dd3e590 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,7 +9,6 @@
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 		<!-- This property is inherited by all sub-modules and used to define their 
 			version -->
-		<project.version>0.6-SNAPSHOT</project.version>
 	</properties>
 
 	<dependencyManagement>

From ea51a62a9edbf3cb25fc40cb7aade5d7df3608fa Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Thu, 28 Jan 2016 13:16:42 -0700
Subject: [PATCH 05/36] updated pom versions in scripts

---
 datasource-rdfizer/scripts/pom-rdf-gen-9606.xml      | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-gen-ids.xml       | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-gen.xml           | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
index 92156f6..da7eabd 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
@@ -4,7 +4,7 @@
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource-rdfizer-rdf-gen</artifactId>
 	<packaging>pom</packaging>
-	<version>0.6-SNAPSHOT</version>
+	<version>0.7-SNAPSHOT</version>
 
 	<properties>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -14,7 +14,7 @@
 		<dependency>
 			<groupId>edu.ucdenver.ccp</groupId>
 			<artifactId>datasource-rdfizer</artifactId>
-			<version>0.6-SNAPSHOT</version>
+			<version>0.7-SNAPSHOT</version>
 			<type>jar</type>
 			<scope>compile</scope>
 		</dependency>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-ids.xml b/datasource-rdfizer/scripts/pom-rdf-gen-ids.xml
index 06aff1f..532f61d 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-ids.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-ids.xml
@@ -6,7 +6,7 @@
   <groupId>edu.ucdenver.ccp</groupId>
   <artifactId>datasource-rdfizer-rdf-gen-ids</artifactId>
   <packaging>pom</packaging>
-  <version>0.6-SNAPSHOT</version>
+  <version>0.7-SNAPSHOT</version>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -16,7 +16,7 @@
     <dependency>
       <groupId>edu.ucdenver.ccp</groupId>
       <artifactId>datasource-rdfizer</artifactId>
-      <version>0.6-SNAPSHOT</version>
+      <version>0.7-SNAPSHOT</version>
       <type>jar</type>
       <scope>compile</scope>
     </dependency>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
index 009bdda..bd4c1bf 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
@@ -4,7 +4,7 @@
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource-rdfizer-rdf-gen</artifactId>
 	<packaging>pom</packaging>
-	<version>0.6-SNAPSHOT</version>
+	<version>0.7-SNAPSHOT</version>
 
 	<properties>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -14,7 +14,7 @@
 		<dependency>
 			<groupId>edu.ucdenver.ccp</groupId>
 			<artifactId>datasource-rdfizer</artifactId>
-			<version>0.6-SNAPSHOT</version>
+			<version>0.7-SNAPSHOT</version>
 			<type>jar</type>
 			<scope>compile</scope>
 		</dependency>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen.xml b/datasource-rdfizer/scripts/pom-rdf-gen.xml
index 29cacb9..0ab015d 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen.xml
@@ -4,7 +4,7 @@
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource-rdfizer-rdf-gen</artifactId>
 	<packaging>pom</packaging>
-	<version>0.6-SNAPSHOT</version>
+	<version>0.7-SNAPSHOT</version>
 
 	<properties>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -14,7 +14,7 @@
 		<dependency>
 			<groupId>edu.ucdenver.ccp</groupId>
 			<artifactId>datasource-rdfizer</artifactId>
-			<version>0.6-SNAPSHOT</version>
+			<version>0.7-SNAPSHOT</version>
 			<type>jar</type>
 			<scope>compile</scope>
 		</dependency>

From af7973f5fac4c521900644bf4975ef2b7f48b030 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 1 Feb 2016 09:42:14 -0700
Subject: [PATCH 06/36] Now uses proper BFO/RO identifiers

Swapped out a few human-readable property names for their appropriate
BFO/RO identifiers
---
 .../ccp/datasource/rdfizer/rdf/vocabulary/IAO.java        | 3 +--
 .../ccp/datasource/rdfizer/rdf/vocabulary/RO.java         | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/IAO.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/IAO.java
index 2abf3a2..60b7bb2 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/IAO.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/IAO.java
@@ -45,8 +45,7 @@
  */
 public enum IAO {
 
-	MENTIONS("mentions"),
-	//DENOTES("denotes");
+	MENTIONS("IAO_0000142"),
 	DENOTES("IAO_0000219"),
 	INFORMATION_CONTENT_ENITITY("IAO_0000030");
 
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/RO.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/RO.java
index 7b0ed6c..57ec4fa 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/RO.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/vocabulary/RO.java
@@ -45,10 +45,10 @@
  */
 public enum RO {
 
-	LOCATED_IN("located_in"),
-	PART_OF("part_of"),
-	HAS_PART("has_part"),
-	HAS_PARTICIPANT("has_participant");
+	LOCATED_IN("RO_0001025"),
+	PART_OF("BFO_0000050"),
+	HAS_PART("BFO_0000051"),
+	HAS_PARTICIPANT("RO_0000057");
 
 	private final String termName;
 

From 82f660e845857d1b7d9800f14139f4620d3325df Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 1 Feb 2016 09:47:44 -0700
Subject: [PATCH 07/36] Test cases now reflect use of RO/BFO identifiers

---
 .../rdfizer/rdf/ice/RdfRecordUtilTest.java    | 36 +++++------
 .../rdf/ice/RdfRecordWriterImplTest.java      | 36 +++++------
 .../rdfizer/rdf/ice/RecordUtilTest.java       | 60 +++++++++----------
 .../rdfizer/rdf/ice/SubRecordUtilTest.java    | 50 ++++++++--------
 4 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtilTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtilTest.java
index c424404..16c1061 100644
--- a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtilTest.java
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtilTest.java
@@ -148,8 +148,8 @@ private static class TestDataRecordWithNestedSubRecordCollection extends TestDat
 	public void testGetRecordSchemaStatements_WithSubRecordField() {
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Field)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Schema)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Field)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://purl.obolibrary.org/obo/IAO_0000030)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
@@ -157,33 +157,33 @@ public void testGetRecordSchemaStatements_WithSubRecordField() {
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Schema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"collection field\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"primitive int field\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://kabob.ucdenver.edu/iao/hasKeyPart, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#comment, \"test comment for TestDataRecord.stringField\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"string field\"@en)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Field)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Schema)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Field)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://purl.obolibrary.org/obo/IAO_0000030)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/2000/01/rdf-schema#comment, \"This is a sub-record class\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/2000/01/rdf-schema#label, \"sub record\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"sub string field\"@en)");
 
@@ -207,8 +207,8 @@ public void testGetRecordSchemaStatements_WithSubRecordField() {
 	public void testGetRecordSchemaStatements_WithSubRecordCollectionField() {
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Field)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Schema)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Field)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://purl.obolibrary.org/obo/IAO_0000030)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
@@ -216,33 +216,33 @@ public void testGetRecordSchemaStatements_WithSubRecordCollectionField() {
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Schema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"collection field\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"primitive int field\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://kabob.ucdenver.edu/iao/hasKeyPart, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#comment, \"test comment for TestDataRecord.stringField\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"string field\"@en)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/Field)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Schema)",
+						"(http://kabob.ucdenver.edu/iao/Schema, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/Field)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://purl.obolibrary.org/obo/IAO_0000030)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/2000/01/rdf-schema#comment, \"This is a sub-record class\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://www.w3.org/2000/01/rdf-schema#label, \"sub record\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Schema)",
-						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/Field)",
-						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
+						"(http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://purl.org/dc/terms/hasVersion, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1, http://www.w3.org/2000/01/rdf-schema#label, \"sub string field\"@en)");
 
diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplTest.java
index ecc9c82..266fca9 100644
--- a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplTest.java
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplTest.java
@@ -180,68 +180,68 @@ private List<String> getExpectedLines() {
 
 						"<http://kabob.ucdenver.edu/iao/eg/egDataSource20101217> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/eg/egDataSource> .",
 						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataSchema1> .",
-						"<http://kabob.ucdenver.edu/iao/eg/egDataSource20101217> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> .",
+						"<http://kabob.ucdenver.edu/iao/eg/egDataSource20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> .",
 						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/DataSet> .",
 						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://kabob.ucdenver.edu/iao/hasCreationDate> \"2010-12-17T00:00:00.000-07:00\"^^<http://www.w3.org/2001/XMLSchema#dateTime> .",
-						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> .",
+						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileDataSchema1> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_uMTGX3DgyrCZiUl_TdbT5kknbDc> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_uMTGX3DgyrCZiUl_TdbT5kknbDc> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_uMTGX3DgyrCZiUl_TdbT5kknbDc> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_chromosomeDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_uMTGX3DgyrCZiUl_TdbT5kknbDc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_uMTGX3DgyrCZiUl_TdbT5kknbDc> <http://purl.obolibrary.org/obo/IAO_0000219> \"1\"^^<http://www.w3.org/2001/XMLSchema#integer> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_-CCwiNPQGZXqxzHwnRQPLcufczE> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_-CCwiNPQGZXqxzHwnRQPLcufczE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_-CCwiNPQGZXqxzHwnRQPLcufczE> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneIDDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_-CCwiNPQGZXqxzHwnRQPLcufczE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_-CCwiNPQGZXqxzHwnRQPLcufczE> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_111_ICE> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_0Bh5BTPeMidQDIVhcwyulDl43_w> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_0Bh5BTPeMidQDIVhcwyulDl43_w> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_0Bh5BTPeMidQDIVhcwyulDl43_w> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneNameDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_0Bh5BTPeMidQDIVhcwyulDl43_w> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_0Bh5BTPeMidQDIVhcwyulDl43_w> <http://purl.obolibrary.org/obo/IAO_0000219> \"ABC-1\"@en .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_567_ICE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_homologousGeneIDsDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_Huvuy3PBpaT_1bkqnPJrUm56plE> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_Huvuy3PBpaT_1bkqnPJrUm56plE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_Huvuy3PBpaT_1bkqnPJrUm56plE> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_456_ICE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_Huvuy3PBpaT_1bkqnPJrUm56plE> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_homologousGeneIDsDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_Huvuy3PBpaT_1bkqnPJrUm56plE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_qfYm9hdvq_aJMz72kAWH5fEPvb4> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_SLEJJYDvYbjNjgDeZpwYtYyDzDE> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_qfYm9hdvq_aJMz72kAWH5fEPvb4> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_qfYm9hdvq_aJMz72kAWH5fEPvb4> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_678_ICE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_qfYm9hdvq_aJMz72kAWH5fEPvb4> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_homologousGeneIDsDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_qfYm9hdvq_aJMz72kAWH5fEPvb4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
-						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> .",
+						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileDataSchema1> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_Sq8QGIW4_EY29zLv9ndSuo0FXdM> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_Sq8QGIW4_EY29zLv9ndSuo0FXdM> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_Sq8QGIW4_EY29zLv9ndSuo0FXdM> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_chromosomeDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_Sq8QGIW4_EY29zLv9ndSuo0FXdM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_Sq8QGIW4_EY29zLv9ndSuo0FXdM> <http://purl.obolibrary.org/obo/IAO_0000219> \"2\"^^<http://www.w3.org/2001/XMLSchema#integer> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_t7j3WFg6gDoRywM0JQ9U211X5OY> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_t7j3WFg6gDoRywM0JQ9U211X5OY> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_t7j3WFg6gDoRywM0JQ9U211X5OY> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneIDDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_t7j3WFg6gDoRywM0JQ9U211X5OY> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_t7j3WFg6gDoRywM0JQ9U211X5OY> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_222_ICE> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_zh3IaDv1xyvAghT9MjJxG41szZ4> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_zh3IaDv1xyvAghT9MjJxG41szZ4> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_zh3IaDv1xyvAghT9MjJxG41szZ4> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneNameDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_zh3IaDv1xyvAghT9MjJxG41szZ4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_zh3IaDv1xyvAghT9MjJxG41szZ4> <http://purl.obolibrary.org/obo/IAO_0000219> \"DEF-2\"@en .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_139oy219DqSRmT4r65iQ4QFbaMc> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_icg7DNHgWpSrcO1RMs6jGswLysE> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_URI5a-9i5Ti9J5Qeet0-_rcjEvc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_139oy219DqSRmT4r65iQ4QFbaMc> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_139oy219DqSRmT4r65iQ4QFbaMc> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_555_ICE> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_139oy219DqSRmT4r65iQ4QFbaMc> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_homologousGeneIDsDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_homologousGeneIDs_139oy219DqSRmT4r65iQ4QFbaMc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
-						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> .",
+						"<http://kabob.ucdenver.edu/iao/eg/egGeneId2NameDatFileDataDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData> .",
 						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileDataSchema1> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_73htV8MhttNyomSqgl9a8326Trw> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_73htV8MhttNyomSqgl9a8326Trw> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_73htV8MhttNyomSqgl9a8326Trw> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_chromosomeDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_73htV8MhttNyomSqgl9a8326Trw> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_chromosome_73htV8MhttNyomSqgl9a8326Trw> <http://purl.obolibrary.org/obo/IAO_0000219> \"3\"^^<http://www.w3.org/2001/XMLSchema#integer> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_xNM_WvK_FybEYs5f8Xi23ySoEOA> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_xNM_WvK_FybEYs5f8Xi23ySoEOA> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_xNM_WvK_FybEYs5f8Xi23ySoEOA> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneIDDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_xNM_WvK_FybEYs5f8Xi23ySoEOA> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneID_xNM_WvK_FybEYs5f8Xi23ySoEOA> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_333_ICE> .",
-						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/has_part> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_8L2bT7V4aFD6tDh9E-14Yja2Cmc> .",
+						"<http://kabob.ucdenver.edu/iao/eg/R_GeneId2NameDatFileData_FtYHBoyFbhuQktYWySdRhIQf6YQ> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_8L2bT7V4aFD6tDh9E-14Yja2Cmc> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_8L2bT7V4aFD6tDh9E-14Yja2Cmc> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/eg/GeneId2NameDatFileData_geneNameDataField1> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_8L2bT7V4aFD6tDh9E-14Yja2Cmc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
 						"<http://kabob.ucdenver.edu/iao/eg/F_GeneId2NameDatFileData_geneName_8L2bT7V4aFD6tDh9E-14Yja2Cmc> <http://purl.obolibrary.org/obo/IAO_0000219> \"XYZ-9\"@en .");
diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RecordUtilTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RecordUtilTest.java
index 49fd076..2f737c7 100644
--- a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RecordUtilTest.java
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RecordUtilTest.java
@@ -173,11 +173,11 @@ public int hashCode() {
 	// "(http://kabob.ucdenver.edu/iao/kegg/keggDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/DataField)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1)");
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1)");
 	//
 	// List<? extends Statement> statements =
 	// RdfRecordUtil.getRecordSchemaDefinitionStatements(TestDataRecord.class);
@@ -201,17 +201,17 @@ public int hashCode() {
 	// "(http://kabob.ucdenver.edu/iao/kegg/keggDataField, http://www.w3.org/2000/01/rdf-schema#subClassOf, http://kabob.ucdenver.edu/iao/DataField)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_primitiveIntFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_primitiveIntFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_primitiveIntFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_primitiveIntFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_stringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_stringFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_stringFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_stringFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord_subrecordStringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord_subrecordStringFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord_subrecordStringFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord_subrecordStringFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_subRecordDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_subRecordDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_subRecordDataField1)");
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_subRecordDataField1)");
 	//
 	// Collection<? extends Statement> statements = RdfRecordUtil
 	// .getRecordSchemaStatements(TestDataRecordWithSubrecord.class, null, null, false);
@@ -232,11 +232,11 @@ public int hashCode() {
 	// .createSet(
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_collectionFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1)",
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_primitiveIntFieldDataField1)",
 	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField)",
-	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1)");
+	// "(http://kabob.ucdenver.edu/iao/kegg/TestDataRecordSchema1, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/TestDataRecord_stringFieldDataField1)");
 	//
 	// Collection<? extends Statement> statements =
 	// RdfRecordUtil.getRecordFieldDeclarationStatements(
@@ -265,7 +265,7 @@ public final void testGetDataSourceInstanceStatements() {
 				"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordDataSet20101221, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordSchema1)",
 				it.next().toString());
 		assertEquals(
-				"(http://kabob.ucdenver.edu/iao/kegg/keggDataSource20101221, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordDataSet20101221)",
+				"(http://kabob.ucdenver.edu/iao/kegg/keggDataSource20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordDataSet20101221)",
 				it.next().toString());
 		assertEquals(
 				"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordDataSet20101221, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/DataSet)",
@@ -283,22 +283,22 @@ public final void testGetRecordInstanceStatements() throws URISyntaxException {
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestExcludeFieldDataRecordDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestExcludeFieldDataRecordDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecordSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)");
@@ -323,29 +323,29 @@ public final void testGetRecordInstanceStatements_WithSubRecord() throws URISynt
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubrecordDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubrecordDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecordSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_DnIjgkTYdCorX3kHXYaWJhzcHJY)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_DnIjgkTYdCorX3kHXYaWJhzcHJY)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_DnIjgkTYdCorX3kHXYaWJhzcHJY, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_DnIjgkTYdCorX3kHXYaWJhzcHJY, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_DnIjgkTYdCorX3kHXYaWJhzcHJY, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_L5wUWixEQDDNsXhJedMS4OBYRm0)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_L5wUWixEQDDNsXhJedMS4OBYRm0)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_L5wUWixEQDDNsXhJedMS4OBYRm0, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_L5wUWixEQDDNsXhJedMS4OBYRm0, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_collectionField_L5wUWixEQDDNsXhJedMS4OBYRm0, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_primitiveIntField_6OTcnDKpkfNEWVrrykNM-emmlhk)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_primitiveIntField_6OTcnDKpkfNEWVrrykNM-emmlhk)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_primitiveIntField_6OTcnDKpkfNEWVrrykNM-emmlhk, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_primitiveIntField_6OTcnDKpkfNEWVrrykNM-emmlhk, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_primitiveIntField_6OTcnDKpkfNEWVrrykNM-emmlhk, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_stringField_jw2tCkX01xySTT2rYV5FCpiqRw0)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_stringField_jw2tCkX01xySTT2rYV5FCpiqRw0)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_stringField_jw2tCkX01xySTT2rYV5FCpiqRw0, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubrecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_stringField_jw2tCkX01xySTT2rYV5FCpiqRw0, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubrecord_stringField_jw2tCkX01xySTT2rYV5FCpiqRw0, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecordSchema1)",
-						"(http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataSubRecord_subrecordStringField_7VVwQkWKCGaD8uZQ2SMe2RFUNeU)",
+						"(http://kabob.ucdenver.edu/iao/kegg/R_TestDataSubRecord_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataSubRecord_subrecordStringField_7VVwQkWKCGaD8uZQ2SMe2RFUNeU)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataSubRecord_subrecordStringField_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataSubRecord_subrecordStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataSubRecord_subrecordStringField_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataSubRecord_subrecordStringField_7VVwQkWKCGaD8uZQ2SMe2RFUNeU, http://purl.obolibrary.org/obo/IAO_0000219, \"sub\"@en)");
@@ -370,22 +370,22 @@ public final void testGetRecordInstanceStatementsWithRecordKey() throws URISynta
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestExcludeFieldDataRecordKeyDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestExcludeFieldDataRecordKeyDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecordSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_O0waxDNfQT_lFeO4grbgwHJ_bxs, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_collectionField_oHpC_sn17AbL7y86SQEkK6oZqgA, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_primitiveIntField_K1E3g8ozXlcQiV3vtNaH7ikWY5I, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestExcludeFieldDataRecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestExcludeFieldDataRecord_stringField_wM4OI6HAehQ_w0UDN6cjfEXbpXg, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)");
diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/SubRecordUtilTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/SubRecordUtilTest.java
index dfd1f21..acf12d2 100644
--- a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/SubRecordUtilTest.java
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/SubRecordUtilTest.java
@@ -163,29 +163,29 @@ public final void testGetRecordInstanceStatementsWithSubRecord() throws URISynta
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubRecordDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubRecordDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_bxwod_CldpkRuVLuKbP0T5IQ6JQ)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_bxwod_CldpkRuVLuKbP0T5IQ6JQ)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_bxwod_CldpkRuVLuKbP0T5IQ6JQ, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_bxwod_CldpkRuVLuKbP0T5IQ6JQ, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_bxwod_CldpkRuVLuKbP0T5IQ6JQ, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_Rft4iXphN4pTKZIY-174Yxb3mcA)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_Rft4iXphN4pTKZIY-174Yxb3mcA)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_Rft4iXphN4pTKZIY-174Yxb3mcA, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_Rft4iXphN4pTKZIY-174Yxb3mcA, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_collectionField_Rft4iXphN4pTKZIY-174Yxb3mcA, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_primitiveIntField_hmNPO2pDKLqbj8jYanuGe3fDEro)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_primitiveIntField_hmNPO2pDKLqbj8jYanuGe3fDEro)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_primitiveIntField_hmNPO2pDKLqbj8jYanuGe3fDEro, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_primitiveIntField_hmNPO2pDKLqbj8jYanuGe3fDEro, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_primitiveIntField_hmNPO2pDKLqbj8jYanuGe3fDEro, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_stringField_cK1-ZKY-VbQR72YjJQpeLtNdm34)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_stringField_cK1-ZKY-VbQR72YjJQpeLtNdm34)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_stringField_cK1-ZKY-VbQR72YjJQpeLtNdm34, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_stringField_cK1-ZKY-VbQR72YjJQpeLtNdm34, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecord_stringField_cK1-ZKY-VbQR72YjJQpeLtNdm34, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecord)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
-						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws)",
+						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/IAO_0000219, \"XYZZZZZZZZ\"@en)");
@@ -213,36 +213,36 @@ public final void testGetRecordInstanceStatementsWithCollectionSubRecord() throw
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubRecordCollectionDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithSubRecordCollectionDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollectionSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_ActAF6W_MAU_W5CKGjJQDpVjRYs)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_ActAF6W_MAU_W5CKGjJQDpVjRYs)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_ActAF6W_MAU_W5CKGjJQDpVjRYs, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_ActAF6W_MAU_W5CKGjJQDpVjRYs, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_ActAF6W_MAU_W5CKGjJQDpVjRYs, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_h0X8Bargx3OrweU69XvjQp6FneE)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_h0X8Bargx3OrweU69XvjQp6FneE)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_h0X8Bargx3OrweU69XvjQp6FneE, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_h0X8Bargx3OrweU69XvjQp6FneE, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_collectionField_h0X8Bargx3OrweU69XvjQp6FneE, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_primitiveIntField_ASovVMTKCova71bDzai_hBIv1Ek)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_primitiveIntField_ASovVMTKCova71bDzai_hBIv1Ek)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_primitiveIntField_ASovVMTKCova71bDzai_hBIv1Ek, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_primitiveIntField_ASovVMTKCova71bDzai_hBIv1Ek, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_primitiveIntField_ASovVMTKCova71bDzai_hBIv1Ek, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_stringField_fuBM1_QOxFmmz2fqYeEdeqb9eoo)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_stringField_fuBM1_QOxFmmz2fqYeEdeqb9eoo)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_stringField_fuBM1_QOxFmmz2fqYeEdeqb9eoo, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithSubRecordCollection_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_stringField_fuBM1_QOxFmmz2fqYeEdeqb9eoo, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithSubRecordCollection_stringField_fuBM1_QOxFmmz2fqYeEdeqb9eoo, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecord)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
-						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws)",
+						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_h8DLlrIUz8gORKdbauuMlahhVws, http://purl.obolibrary.org/obo/IAO_0000219, \"XYZZZZZZZZ\"@en)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/SubRecord)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecordSchema1)",
-						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_YrTvhKIp4LTfUjgPWrmt54QkIPk)",
+						"(http://kabob.ucdenver.edu/iao/kegg/R_SubRecord_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_YrTvhKIp4LTfUjgPWrmt54QkIPk)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/SubRecord_subStringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_SubRecord_subStringField_YrTvhKIp4LTfUjgPWrmt54QkIPk, http://purl.obolibrary.org/obo/IAO_0000219, \"ABABABABA\"@en)");
@@ -273,36 +273,36 @@ public final void testGetRecordInstanceStatementsWithNestedSubRecord() throws UR
 
 		Set<String> expectedStatements = CollectionsUtil
 				.createSet(
-						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithNestedSubRecordDataSet20101221, http://purl.obolibrary.org/obo/has_part, http://record.uri)",
+						"(http://kabob.ucdenver.edu/iao/kegg/keggTestDataRecordWithNestedSubRecordDataSet20101221, http://purl.obolibrary.org/obo/BFO_0000051, http://record.uri)",
 						"(http://record.uri, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecord)",
 						"(http://record.uri, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecordSchema1)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_SG1QsGZwhJNaTdMnzgma3v5AB24)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_SG1QsGZwhJNaTdMnzgma3v5AB24)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_SG1QsGZwhJNaTdMnzgma3v5AB24, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_SG1QsGZwhJNaTdMnzgma3v5AB24, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_SG1QsGZwhJNaTdMnzgma3v5AB24, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_wICWjHJMs-mggQ_vE6Jc0mnb2As)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_wICWjHJMs-mggQ_vE6Jc0mnb2As)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_wICWjHJMs-mggQ_vE6Jc0mnb2As, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"@en)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_wICWjHJMs-mggQ_vE6Jc0mnb2As, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecord_collectionFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_collectionField_wICWjHJMs-mggQ_vE6Jc0mnb2As, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/kegg/NestedSubRecord)",
 						"(http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/NestedSubRecordSchema1)",
-						"(http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0)",
+						"(http://kabob.ucdenver.edu/iao/kegg/R_NestedSubRecord_QrM2DZEWgi_Lp-In-_vZnXBaPWU, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/irefweb/IRefWebInteractionSourceDatabase)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/irefweb/IRefWebInteractionSourceDatabaseSchema1)",
-						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseId_hxLOHMRgT97VZ1vytl4H1dILsuc)",
+						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseId_hxLOHMRgT97VZ1vytl4H1dILsuc)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseId_hxLOHMRgT97VZ1vytl4H1dILsuc, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/irefweb/IRefWebInteractionSourceDatabase_sourceDatabaseIdDataField1)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseId_hxLOHMRgT97VZ1vytl4H1dILsuc, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseId_hxLOHMRgT97VZ1vytl4H1dILsuc, http://purl.obolibrary.org/obo/IAO_0000219, http://kabob.ucdenver.edu/iao/mi_ontology/MI_ONTOLOGY_MI_0123_ICE)",
-						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseName_8f_AI3EeN_eQ7M4Y6Ds14YrZmcU)",
+						"(http://kabob.ucdenver.edu/iao/irefweb/R_IRefWebInteractionSourceDatabase_IPNOgWa085q7R1Ww21fz-xD4MV0, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseName_8f_AI3EeN_eQ7M4Y6Ds14YrZmcU)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseName_8f_AI3EeN_eQ7M4Y6Ds14YrZmcU, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/irefweb/IRefWebInteractionSourceDatabase_sourceDatabaseNameDataField1)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseName_8f_AI3EeN_eQ7M4Y6Ds14YrZmcU, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/irefweb/F_IRefWebInteractionSourceDatabase_sourceDatabaseName_8f_AI3EeN_eQ7M4Y6Ds14YrZmcU, http://purl.obolibrary.org/obo/IAO_0000219, \"miTerm123\"@en)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_primitiveIntField_hzUY3pYkyLsIgSjagFn909vXijg)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_primitiveIntField_hzUY3pYkyLsIgSjagFn909vXijg)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_primitiveIntField_hzUY3pYkyLsIgSjagFn909vXijg, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecord_primitiveIntFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_primitiveIntField_hzUY3pYkyLsIgSjagFn909vXijg, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_primitiveIntField_hzUY3pYkyLsIgSjagFn909vXijg, http://purl.obolibrary.org/obo/IAO_0000219, \"2\"^^<http://www.w3.org/2001/XMLSchema#integer>)",
-						"(http://record.uri, http://purl.obolibrary.org/obo/has_part, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_stringField_QEauZ8dFR2e9ZMcNoVMBLPRhFb8)",
+						"(http://record.uri, http://purl.obolibrary.org/obo/BFO_0000051, http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_stringField_QEauZ8dFR2e9ZMcNoVMBLPRhFb8)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_stringField_QEauZ8dFR2e9ZMcNoVMBLPRhFb8, http://kabob.ucdenver.edu/iao/hasTemplate, http://kabob.ucdenver.edu/iao/kegg/TestDataRecordWithNestedSubRecord_stringFieldDataField1)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_stringField_QEauZ8dFR2e9ZMcNoVMBLPRhFb8, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://kabob.ucdenver.edu/iao/FieldValue)",
 						"(http://kabob.ucdenver.edu/iao/kegg/F_TestDataRecordWithNestedSubRecord_stringField_QEauZ8dFR2e9ZMcNoVMBLPRhFb8, http://purl.obolibrary.org/obo/IAO_0000219, \"1\"@en)");

From 6732dcf968052909132c03bc83e06b6826b21b06 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Fri, 5 Feb 2016 16:07:36 -0700
Subject: [PATCH 08/36] Commented out DIP as it is part of IRefWeb

---
 .../rdfizer/rdf/ice/FileDataSource.java       | 53 ++++++++++---------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
index b7356aa..33b43ec 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
@@ -102,30 +102,35 @@
  */
 public enum FileDataSource {
 
-	/**
-	 * The DIP data file must be obtained manually. It is assumed to already be
-	 * in place when RDF generation commences. It must be the only file in the
-	 * DIP data source directory.
-	 * 
-	 */
-	DIP(DataSource.DIP) {
-
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			logger.info("sourceFileDirectory (exists): (" + sourceFileDirectory.exists() + ")" + sourceFileDirectory);
-			logger.info("file listing: " + Arrays.toString(sourceFileDirectory.listFiles()));
-			File dipDataFile = sourceFileDirectory.listFiles()[0];
-			logger.info("File exists: " + dipDataFile.exists() + " -- " + dipDataFile.getAbsolutePath());
-			FileUtil.validateFile(dipDataFile);
-			return new DipYYYYMMDDFileParser(dipDataFile, CharacterEncoding.US_ASCII, taxonIds);
-		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
-	},
+	
+	/*
+	 * DIP is now part of IRefWeb, so it has been commented out since it requires the extra manual step
+	 * of logging in to the DIP website and downloading the file (and IRefWeb does not).
+	 */
+//	/**
+//	 * The DIP data file must be obtained manually. It is assumed to already be
+//	 * in place when RDF generation commences. It must be the only file in the
+//	 * DIP data source directory.
+//	 * 
+//	 */
+//	DIP(DataSource.DIP) {
+//
+//		@Override
+//		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+//				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+//			logger.info("sourceFileDirectory (exists): (" + sourceFileDirectory.exists() + ")" + sourceFileDirectory);
+//			logger.info("file listing: " + Arrays.toString(sourceFileDirectory.listFiles()));
+//			File dipDataFile = sourceFileDirectory.listFiles()[0];
+//			logger.info("File exists: " + dipDataFile.exists() + " -- " + dipDataFile.getAbsolutePath());
+//			FileUtil.validateFile(dipDataFile);
+//			return new DipYYYYMMDDFileParser(dipDataFile, CharacterEncoding.US_ASCII, taxonIds);
+//		}
+//
+//		@Override
+//		protected boolean isTaxonAware() {
+//			return true;
+//		}
+//	},
 	/**
 	 * The HPRD HPRD_ID_MAPPINGS.txt file must be obtained manually. It is
 	 * assumed to already be in place when RDF generation commences.

From d115440aa5521fe4f3c5960be6ae33f704b97318 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Fri, 5 Feb 2016 16:11:48 -0700
Subject: [PATCH 09/36] Moved manually obtained datasources to the end

This will simplify the RDF generation procedure
---
 .../rdfizer/rdf/ice/FileDataSource.java       | 204 +++++++++---------
 1 file changed, 105 insertions(+), 99 deletions(-)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
index 33b43ec..e50597d 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
@@ -131,78 +131,7 @@ public enum FileDataSource {
 //			return true;
 //		}
 //	},
-	/**
-	 * The HPRD HPRD_ID_MAPPINGS.txt file must be obtained manually. It is
-	 * assumed to already be in place when RDF generation commences.
-	 */
-	HPRD_ID_MAPPINGS(DataSource.HPRD) {
-
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			File hprdIdMappingFile = new File(sourceFileDirectory,
-					HprdIdMappingsTxtFileParser.HPRD_ID_MAPPINGS_TXT_FILE_NAME);
-			FileUtil.validateFile(hprdIdMappingFile);
-			return new HprdIdMappingsTxtFileParser(hprdIdMappingFile, CharacterEncoding.US_ASCII);
-		}
 
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
-	/**
-	 * The TRANSFAC gene.dat and matrix.dat files must be obtained manually.
-	 * They are assumed to already be in place when RDF generation commences.
-	 */
-	TRANSFAC_GENE(DataSource.TRANSFAC) {
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			File transfacGeneDatFile = new File(sourceFileDirectory, TransfacGeneDatFileParser.GENE_DAT_FILE_NAME);
-			FileUtil.validateFile(transfacGeneDatFile);
-			return new TransfacGeneDatFileParser(transfacGeneDatFile, CharacterEncoding.ISO_8859_1);
-		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
-
-	TRANSFAC_MATRIX(DataSource.TRANSFAC) {
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			File transfacMatrixDatFile = new File(sourceFileDirectory, TransfacMatrixDatFileParser.MATRIX_DAT_FILE_NAME);
-			FileUtil.validateFile(transfacMatrixDatFile);
-			return new TransfacMatrixDatFileParser(transfacMatrixDatFile, CharacterEncoding.ISO_8859_1);
-		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
-	/**
-	 * The GAD all.txt data file must be obtained manually. It is assumed to
-	 * already be in place when RDF generation commences.
-	 */
-	GAD(DataSource.GAD) {
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			File gadAllTxtFile = new File(sourceFileDirectory,
-					GeneticAssociationDbAllTxtFileParser.GAD_ALL_TXT_FILE_NAME);
-			FileUtil.validateFile(gadAllTxtFile);
-			return new GeneticAssociationDbAllTxtFileParser(gadAllTxtFile, CharacterEncoding.US_ASCII);
-		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
 	/**
 	 *
 	 */
@@ -232,19 +161,7 @@ protected boolean isTaxonAware() {
 		}
 	},
 
-	PHARMGKB_RELATION(DataSource.PHARMGKB) {
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			File pharmgkbRelationshipsDataFile = new File(sourceFileDirectory, "relationships.tsv");
-			return new PharmGkbRelationFileParser(pharmgkbRelationshipsDataFile, CharacterEncoding.UTF_8);
-		}
 
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
 
 	PHARMGKB_DRUG(DataSource.PHARMGKB) {
 		@Override
@@ -409,21 +326,7 @@ protected boolean isTaxonAware() {
 			return false;
 		}
 	},
-	/**
-	 *
-	 */
-	OMIM(DataSource.OMIM) {
-		@Override
-		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-			return new OmimTxtFileParser(sourceFileDirectory, cleanSourceFiles);
-		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	},
+	
 	/**
 	 *
 	 */
@@ -693,6 +596,7 @@ protected boolean isTaxonAware() {
 			return true;
 		}
 	},
+	
 
 	/**
 	 * 
@@ -732,7 +636,109 @@ protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boo
 		protected boolean isTaxonAware() {
 			return true;
 		}
-	};
+	},
+	
+	/**
+	 * The HPRD HPRD_ID_MAPPINGS.txt file must be obtained manually. It is
+	 * assumed to already be in place when RDF generation commences.
+	 */
+	HPRD_ID_MAPPINGS(DataSource.HPRD) {
+
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			File hprdIdMappingFile = new File(sourceFileDirectory,
+					HprdIdMappingsTxtFileParser.HPRD_ID_MAPPINGS_TXT_FILE_NAME);
+			FileUtil.validateFile(hprdIdMappingFile);
+			return new HprdIdMappingsTxtFileParser(hprdIdMappingFile, CharacterEncoding.US_ASCII);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	},
+	/**
+	 * The TRANSFAC gene.dat and matrix.dat files must be obtained manually.
+	 * They are assumed to already be in place when RDF generation commences.
+	 */
+	TRANSFAC_GENE(DataSource.TRANSFAC) {
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			File transfacGeneDatFile = new File(sourceFileDirectory, TransfacGeneDatFileParser.GENE_DAT_FILE_NAME);
+			FileUtil.validateFile(transfacGeneDatFile);
+			return new TransfacGeneDatFileParser(transfacGeneDatFile, CharacterEncoding.ISO_8859_1);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	},
+
+	TRANSFAC_MATRIX(DataSource.TRANSFAC) {
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			File transfacMatrixDatFile = new File(sourceFileDirectory, TransfacMatrixDatFileParser.MATRIX_DAT_FILE_NAME);
+			FileUtil.validateFile(transfacMatrixDatFile);
+			return new TransfacMatrixDatFileParser(transfacMatrixDatFile, CharacterEncoding.ISO_8859_1);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	},
+	/**
+	 * The GAD all.txt data file must be obtained manually. It is assumed to
+	 * already be in place when RDF generation commences.
+	 */
+	GAD(DataSource.GAD) {
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			File gadAllTxtFile = new File(sourceFileDirectory,
+					GeneticAssociationDbAllTxtFileParser.GAD_ALL_TXT_FILE_NAME);
+			FileUtil.validateFile(gadAllTxtFile);
+			return new GeneticAssociationDbAllTxtFileParser(gadAllTxtFile, CharacterEncoding.US_ASCII);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	},
+	/**
+	 *
+	 */
+	OMIM(DataSource.OMIM) {
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			return new OmimTxtFileParser(sourceFileDirectory, cleanSourceFiles);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	},
+	PHARMGKB_RELATION(DataSource.PHARMGKB) {
+		@Override
+		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
+				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+			File pharmgkbRelationshipsDataFile = new File(sourceFileDirectory, "relationships.tsv");
+			return new PharmGkbRelationFileParser(pharmgkbRelationshipsDataFile, CharacterEncoding.UTF_8);
+		}
+
+		@Override
+		protected boolean isTaxonAware() {
+			return false;
+		}
+	}
+	;
 
 	public enum Split {
 		BY_STAGES, NONE;

From 1087aa07fa8bf84795adcabfef608a32e618aba5 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Fri, 5 Feb 2016 17:26:13 -0700
Subject: [PATCH 10/36] Update to RGD FTP server URL

---
 .../datasource/fileparsers/rgd/RgdGeneFileRecordReaderBase.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdGeneFileRecordReaderBase.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdGeneFileRecordReaderBase.java
index 02a4bb9..472fd95 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdGeneFileRecordReaderBase.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdGeneFileRecordReaderBase.java
@@ -55,7 +55,7 @@
  */
 public class RgdGeneFileRecordReaderBase extends SingleLineFileRecordReader<RgdGeneFileRecord> {
 
-	public static final String FTP_SERVER = "rgd.mcw.edu";
+	public static final String FTP_SERVER = "ftp.rgd.mcw.edu";
 	public static final String FTP_PATH = "pub/data_release";
 
 	public static final CharacterEncoding ENCODING = CharacterEncoding.UTF_8;

From 0869261802f922113d148e92cd12f8cc321754a1 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Fri, 5 Feb 2016 17:31:30 -0700
Subject: [PATCH 11/36] Update RGD FTP server URL

---
 .../fileparsers/rgd/RgdRatGeneMpAnnotationFileRecordReader.java | 2 +-
 .../rgd/RgdRatGeneNboAnnotationFileRecordReader.java            | 2 +-
 .../fileparsers/rgd/RgdRatGenePwAnnotationFileRecordReader.java | 2 +-
 .../rgd/RgdRatGeneRdoAnnotationFileRecordReader.java            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneMpAnnotationFileRecordReader.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneMpAnnotationFileRecordReader.java
index a22575a..38b8bc9 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneMpAnnotationFileRecordReader.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneMpAnnotationFileRecordReader.java
@@ -51,7 +51,7 @@
  */
 public class RgdRatGeneMpAnnotationFileRecordReader extends Gaf2FileRecordReader<RgdAnnotationGaf2FileRecord> {
 
-	@FtpDownload(server = "rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_mp", filetype = FileType.ASCII)
+	@FtpDownload(server = "ftp.rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_mp", filetype = FileType.ASCII)
 	private File annotationFile;
 
 	/**
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneNboAnnotationFileRecordReader.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneNboAnnotationFileRecordReader.java
index 430c59e..e8b7215 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneNboAnnotationFileRecordReader.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneNboAnnotationFileRecordReader.java
@@ -49,7 +49,7 @@
  */
 public class RgdRatGeneNboAnnotationFileRecordReader extends Gaf2FileRecordReader<RgdAnnotationGaf2FileRecord> {
 
-	@FtpDownload(server = "rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_nbo", filetype = FileType.ASCII)
+	@FtpDownload(server = "ftp.rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_nbo", filetype = FileType.ASCII)
 	private File annotationFile;
 
 	/**
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGenePwAnnotationFileRecordReader.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGenePwAnnotationFileRecordReader.java
index 13a84fd..13ce188 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGenePwAnnotationFileRecordReader.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGenePwAnnotationFileRecordReader.java
@@ -49,7 +49,7 @@
  */
 public class RgdRatGenePwAnnotationFileRecordReader extends Gaf2FileRecordReader<RgdAnnotationGaf2FileRecord> {
 
-	@FtpDownload(server = "rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_pw", filetype = FileType.ASCII)
+	@FtpDownload(server = "ftp.rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_pw", filetype = FileType.ASCII)
 	private File annotationFile;
 
 	/**
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneRdoAnnotationFileRecordReader.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneRdoAnnotationFileRecordReader.java
index 0220dd7..234c636 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneRdoAnnotationFileRecordReader.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneRdoAnnotationFileRecordReader.java
@@ -49,7 +49,7 @@
  */
 public class RgdRatGeneRdoAnnotationFileRecordReader extends Gaf2FileRecordReader<RgdAnnotationGaf2FileRecord> {
 
-	@FtpDownload(server = "rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_rdo", filetype = FileType.ASCII)
+	@FtpDownload(server = "ftp.rgd.mcw.edu", path = "pub/data_release/annotated_rgd_objects_by_ontology/", filename = "rattus_genes_rdo", filetype = FileType.ASCII)
 	private File annotationFile;
 
 	/**

From eca2ca98a3ac610d674e2f4d4e4fc5e3655887ba Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 11:35:33 -0700
Subject: [PATCH 12/36] Parsers no longer return null for unknown ids

They use the UnknownDataSourceIdentifier or
ProbableErrorDataSourceIdentifier classes to return IDs that either
don't have a mapping to a proper URI or appear to be errors, e.g. a
UniProt ID that doesn't match the UniProt ID regular expression
---
 .../drugbank/DrugBankDrugRecord.java          |  16 +-
 .../ebi/uniprot/UniProtFileRecord.java        |  10 +-
 .../GeneticAssociationDbAllTxtFileData.java   |  41 +-
 .../hgnc/HgncDownloadFileParser.java          |  20 +-
 .../hprd/HprdIdMappingsTxtFileParser.java     |   7 +-
 .../irefweb/IRefWebPsiMitab2_6FileParser.java | 195 ++++----
 .../kegg/KeggGeneIdListFileData.java          |  10 +-
 .../fileparsers/kegg/KeggGenesFileData.java   |   3 +-
 .../mgi/MRKSequenceFileParser.java            |  10 +-
 .../ncbi/gene/EntrezGeneInfoFileParser.java   |   8 +-
 .../pharmgkb/PharmGkbGeneFileParser.java      | 462 +++++++++---------
 .../pharmgkb/PharmGkbRelationFileParser.java  |   7 +-
 .../fileparsers/pro/ProMappingFileParser.java |  39 +-
 .../rgd/RgdAnnotationFileIdResolver.java      |  22 +-
 .../IRefWebPsiMitab2_6FileParserTest.java     | 137 +++---
 .../pro/ProMappingFileParserTest.java         |  66 ++-
 .../fileparsers/pro/PRO_promapping.txt        |   2 +-
 .../datasource/identifiers/DataSource.java    |  14 +-
 .../identifiers/DataSourceElement.java        |   2 +-
 .../identifiers/DataSourceIdResolver.java     |  79 +--
 .../identifiers/DataSourceIdentifier.java     |   2 +-
 .../NucleotideAccessionResolver.java          |  10 +-
 .../ProbableErrorDataSourceIdentifier.java    |  65 +++
 .../identifiers/ProteinAccessionResolver.java |  11 +-
 .../UnknownDataSourceIdentifier.java          |  21 +
 25 files changed, 721 insertions(+), 538 deletions(-)
 create mode 100644 datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
 create mode 100644 datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
index bdf8e53..e98a736 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
@@ -147,7 +147,9 @@
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.drugbank.DrugBankID;
 import edu.ucdenver.ccp.datasource.identifiers.drugbank.DrugsProductDatabaseID;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.interpro.PfamID;
@@ -1304,14 +1306,14 @@ private static DataSourceIdentifier<?> resolveIdentifier(String resource, String
 			} catch (IllegalArgumentException e) {
 				if (identifier.matches("\\d+")) {
 					return new GiNumberID(identifier);
+				} else {
+					return new ProbableErrorDataSourceIdentifier("identifier", "GenBank",
+							"Observed invalid GenBank protein identifier: " + identifier);
 				}
-				logger.warn("Observed invalid GenBank protein identifier: " + identifier);
-				return null;
 			}
 		} else if (resource.equals("GenBank")) {
-			try {
-				return NucleotideAccessionResolver.resolveNucleotideAccession(identifier);
-			} catch (IllegalArgumentException e) {
+			DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(identifier);
+			if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId.getClass())) {
 				return ProteinAccessionResolver.resolveProteinAccession(identifier);
 			}
 		} else if (resource.equals("UniProtKB")) {
@@ -1354,7 +1356,7 @@ private static DataSourceIdentifier<?> resolveIdentifier(String resource, String
 				id = new UniProtID(identifier);
 			} catch (IllegalArgumentException e) {
 				logger.warn("Unhandled identifier type: " + resource + " (identifier=" + identifier + ")");
-				return null;
+				return new UnknownDataSourceIdentifier(identifier, resource);
 			}
 			if (id != null) {
 				return id;
@@ -1362,7 +1364,7 @@ private static DataSourceIdentifier<?> resolveIdentifier(String resource, String
 		}
 
 		System.out.println("Unhandled identifier type: " + resource + " (identifier=" + identifier + ")");
-		return null;
+		return new UnknownDataSourceIdentifier(identifier, resource);
 		// throw new IllegalArgumentException("Unhandled identifier type: " +
 		// resource +
 		// " (identifier=" + identifier
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/UniProtFileRecord.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/UniProtFileRecord.java
index 87491f9..eeaccde 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/UniProtFileRecord.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/UniProtFileRecord.java
@@ -80,6 +80,8 @@
 import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.dip.DipInteractorID;
 import edu.ucdenver.ccp.datasource.identifiers.drugbank.DrugBankID;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.embl.EmblID;
@@ -899,14 +901,10 @@ private DataSourceIdentifier<?> resolveDatabaseIdentifer(String type, String idS
 					return new PirnrId(idStr);
 				}
 			} catch (IllegalArgumentException e) {
-				logger.warn("Invalid identifier detected: " + e.getMessage());
-				return null;
+				return new ProbableErrorDataSourceIdentifier(idStr, type, e.getMessage());
 			}
 
-			// throw new IllegalArgumentException("Unhandled identifier type: "
-			// + type + " :: " + idStr);
-			logger.warn("Unhandled identifier type: " + type + " :: " + idStr);
-			return null;
+			return new UnknownDataSourceIdentifier(idStr, type);
 		}
 	}
 
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
index 1fb4bc8..33ff25a 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
@@ -73,22 +73,24 @@
  * 
  * @author Bill Baumgartner
  * 
- *         ID _________ Association(Y/N) _________ Broad Phenotype Disease Class _________ Disease
- *         Class Code _________ MeSH Disease Terms _________ Chromosom _________ Chr-Band _________
- *         _________ Gene _________ DNA Start _________ DNA End P Value Reference _________ Pubmed
- *         ID _________ Allele Author Description _________ Allele Functional Effects _________
- *         Polymophism Class _________ Gene Name _________ RefSeq _________ Population _________
- *         MeSH Geolocation _________ Submitter _________ Locus Number _________ Unigene _________
- *         Narrow Phenotype _________ Mole. Phenotype Journal Title _________ rs Number _________
- *         OMIM ID Year _________ Conclusion _________ Study Info _________ Env. Factor _________ GI
- *         Gene A _________ GI Allele of Gene A _________ GI Gene B _________ GI Allele of Gene B
- *         _________ GI Gene C _________ GI Allele of Gene C _________ GI Association? GI combine
+ *         ID _________ Association(Y/N) _________ Broad Phenotype Disease Class
+ *         _________ Disease Class Code _________ MeSH Disease Terms _________
+ *         Chromosom _________ Chr-Band _________ _________ Gene _________ DNA
+ *         Start _________ DNA End P Value Reference _________ Pubmed ID
+ *         _________ Allele Author Description _________ Allele Functional
+ *         Effects _________ Polymophism Class _________ Gene Name _________
+ *         RefSeq _________ Population _________ MeSH Geolocation _________
+ *         Submitter _________ Locus Number _________ Unigene _________ Narrow
+ *         Phenotype _________ Mole. Phenotype Journal Title _________ rs Number
+ *         _________ OMIM ID Year _________ Conclusion _________ Study Info
+ *         _________ Env. Factor _________ GI Gene A _________ GI Allele of Gene
+ *         A _________ GI Gene B _________ GI Allele of Gene B _________ GI Gene
+ *         C _________ GI Allele of Gene C _________ GI Association? GI combine
  *         Env. Factor _________ GI relevant to Disease
  */
 
-@Record(dataSource = DataSource.GAD, schemaVersion="2", comment="Schema version is 2 b/c one field was dropped: GAD/CDC", label="GAD record")
+@Record(dataSource = DataSource.GAD, schemaVersion = "2", comment = "Schema version is 2 b/c one field was dropped: GAD/CDC", label = "GAD record")
 public class GeneticAssociationDbAllTxtFileData extends SingleLineFileRecord {
-	
 
 	private static final Logger logger = Logger.getLogger(GeneticAssociationDbAllTxtFileData.class);
 
@@ -422,7 +424,7 @@ public boolean hasAssociation() {
 	}
 
 	public static GeneticAssociationDbAllTxtFileData parseGeneticAssociationDbAllTxtLine(Line line) {
-		String[] toks = line.getText().split("\\t",-1);
+		String[] toks = line.getText().split("\\t", -1);
 		if (toks.length < 23) {
 			logger.warn("Invalid line detected (" + line.getLineNumber() + "): " + line.getText());
 		}
@@ -454,9 +456,10 @@ public static GeneticAssociationDbAllTxtFileData parseGeneticAssociationDbAllTxt
 		String geneName = toks[17];
 		String refseqURL = null;
 		try {
-		 refseqURL = toks[18];
+			refseqURL = toks[18];
 		} catch (ArrayIndexOutOfBoundsException e) {
-			logger.error("Caught exception. Line: (" + line.getLineNumber() + ") #toks: " + toks.length+" Message: " + e.getMessage() + " LINE: " + line.getText());
+			logger.error("Caught exception. Line: (" + line.getLineNumber() + ") #toks: " + toks.length + " Message: "
+					+ e.getMessage() + " LINE: " + line.getText());
 		}
 
 		DataSourceIdentifier<?> nucleotideId = null;
@@ -470,13 +473,7 @@ public static GeneticAssociationDbAllTxtFileData parseGeneticAssociationDbAllTxt
 			if (acc.matches("\\d+")) {
 				nucleotideId = new GiNumberID(acc);
 			} else {
-				try {
-					nucleotideId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
-				} catch (IllegalArgumentException e) {
-					logger.info("tok: " + refseqURL + ";");
-					logger.warn(e.getMessage());
-					nucleotideId = null;
-				}
+				nucleotideId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
 			}
 		}
 
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
index c17a690..7497744 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
@@ -59,7 +59,9 @@
 import edu.ucdenver.ccp.datasource.fileparsers.hgnc.HgncDownloadFileData.SpecialistDbIdLinkPair;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
 import edu.ucdenver.ccp.datasource.identifiers.ec.EnzymeCommissionID;
 import edu.ucdenver.ccp.datasource.identifiers.ensembl.EnsemblGeneID;
@@ -551,9 +553,7 @@ private DataSourceIdentifier<?> resolveSpecialistId(String idStr, String link) {
 			return new SlcId(idStr);
 		}
 
-		logger.warn("Unable to resolve id from: " + link);
-		return null;
-		// throw new IllegalArgumentException("Unknown link type: " + link);
+		return new UnknownDataSourceIdentifier(idStr, null);
 	}
 
 	/**
@@ -564,14 +564,12 @@ private Set<DataSourceIdentifier<?>> resolveAccessionNumbers(String accListStr)
 		Set<DataSourceIdentifier<?>> accNumbers = new HashSet<DataSourceIdentifier<?>>();
 		if (!accListStr.isEmpty()) {
 			for (String acc : accListStr.split(",")) {
-				try {
-					accNumbers.add(NucleotideAccessionResolver.resolveNucleotideAccession(acc));
-				} catch (IllegalArgumentException e) {
-					try {
-						accNumbers.add(ProteinAccessionResolver.resolveProteinAccession(acc));
-					} catch (IllegalArgumentException e2) {
-						logger.warn("Cannot resolve: " + acc + " -- " + e.getMessage());
-					}
+				DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+				if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
+					DataSourceIdentifier<String> proAccId = ProteinAccessionResolver.resolveProteinAccession(acc);
+					accNumbers.add(proAccId);
+				} else {
+					accNumbers.add(nucAccId);
 				}
 			}
 		}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
index bb48ecc..a5417ea 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
@@ -61,6 +61,7 @@
 import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecordReader;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
 import edu.ucdenver.ccp.datasource.identifiers.hprd.HprdID;
@@ -114,11 +115,11 @@ protected HprdIdMappingsTxtFileData parseRecordFromLine(Line line) {
 	}
 
 	private DataSourceIdentifier<?> resolveAccession(String acc) {
-		try {
-			return NucleotideAccessionResolver.resolveNucleotideAccession(acc);
-		} catch (IllegalArgumentException e) {
+		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+		if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
 			return ProteinAccessionResolver.resolveProteinAccession(acc);
 		}
+		return nucAccId;
 	}
 
 }
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
index 88b70b2..2c3a615 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
@@ -73,7 +73,9 @@
 import edu.ucdenver.ccp.datasource.fileparsers.taxonaware.TaxonAwareSingleLineFileRecordReader;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.bind.BindInteractionID;
 import edu.ucdenver.ccp.datasource.identifiers.dip.DipInteractionID;
 import edu.ucdenver.ccp.datasource.identifiers.dip.DipInteractorID;
@@ -121,8 +123,10 @@
 import edu.ucdenver.ccp.identifier.publication.PubMedID;
 
 /**
- * This class is used to parse DIPYYYMMDD files which can be downloaded from the DIP website
- * ftp://ftp.no.embnet.org/irefindex/data/archive/release_4.0/psimi_tab/All.mitab.06042009.txt.zip
+ * This class is used to parse DIPYYYMMDD files which can be downloaded from the
+ * DIP website
+ * ftp://ftp.no.embnet.org/irefindex/data/archive/release_4.0/psimi_tab
+ * /All.mitab.06042009.txt.zip
  * 
  * @author Bill Baumgartner
  * @see IRefWebMitab4_0FileData for file format and version specifications
@@ -133,12 +137,12 @@ public class IRefWebPsiMitab2_6FileParser extends TaxonAwareSingleLineFileRecord
 
 	private static final String HEADER = "#uidA\tuidB\taltA\taltB\taliasA\taliasB\tmethod\tauthor\tpmids\ttaxa\ttaxb\tinteractionType\tsourcedb\tinteractionIdentifier\tconfidence\texpansion\tbiological_role_A\tbiological_role_B\texperimental_role_A\texperimental_role_B\tinteractor_type_A\tinteractor_type_B\txrefs_A\txrefs_B\txrefs_Interaction\tAnnotations_A\tAnnotations_B\tAnnotations_Interaction\tHost_organism_taxid\tparameters_Interaction\tCreation_date\tUpdate_date\tChecksum_A\tChecksum_B\tChecksum_Interaction\tNegative\tOriginalReferenceA\tOriginalReferenceB\tFinalReferenceA\tFinalReferenceB\tMappingScoreA\tMappingScoreB\tirogida\tirogidb\tirigid\tcrogida\tcrogidb\tcrigid\ticrogida\ticrogidb\ticrigid\timex_id\tedgetype\tnumParticipants";
 
-//	public static final String FTP_FILE_NAME = "All.mitab.03022013.txt.zip";
+	// public static final String FTP_FILE_NAME = "All.mitab.03022013.txt.zip";
 	public static final String FTP_FILE_NAME = "All.mitab.07042015.txt.zip";
 	public static final CharacterEncoding ENCODING = CharacterEncoding.US_ASCII;
 	public static final String FTP_USER_NAME = "ftp";
 
-	@FtpDownload(server = FtpHost.IREFWEB_HOST, path = "irefindex/data/archive/release_10.0/psi_mitab/MITAB2.6/", filename = FTP_FILE_NAME, filetype = FileType.BINARY, username = FTP_USER_NAME, decompress = true, targetFileName="All.mitab.04072015.txt")
+	@FtpDownload(server = FtpHost.IREFWEB_HOST, path = "irefindex/data/archive/release_10.0/psi_mitab/MITAB2.6/", filename = FTP_FILE_NAME, filetype = FileType.BINARY, username = FTP_USER_NAME, decompress = true, targetFileName = "All.mitab.04072015.txt")
 	private File allMitabTxtFile;
 
 	public IRefWebPsiMitab2_6FileParser(File file, CharacterEncoding encoding) throws IOException,
@@ -192,7 +196,8 @@ protected String getExpectedFileHeader() throws IOException {
 	}
 
 	/**
-	 * Extracts information from a line from a file and returns a IRefWebPsiMitab2_5FileData object.
+	 * Extracts information from a line from a file and returns a
+	 * IRefWebPsiMitab2_5FileData object.
 	 * 
 	 * @param miOntologyTermResolver
 	 * @param line
@@ -284,62 +289,61 @@ private IRefWebInteraction getInteraction(String detectionMethodStr, String auth
 	private Set<DataSourceIdentifier<?>> resolveInteractionDbIds(String interactionIdStr) {
 		Set<DataSourceIdentifier<?>> ids = new HashSet<DataSourceIdentifier<?>>();
 		for (String id : interactionIdStr.split(RegExPatterns.PIPE)) {
-			if (id.startsWith("edgetype:") || id.endsWith(":-")) {
-				// do nothing - this is a redundant storage of edge type or a null identifier
-			} else if (id.startsWith("BIND_Translation:")) {
-				ids.add(new BindTranslationId(StringUtil.removePrefix(id, "BIND_Translation:")));
-			} else if (id.startsWith("irigid:")) {
-				ids.add(new IrigId(StringUtil.removePrefix(id, "irigid:")));
-			} else if (id.startsWith("rigid:")) {
-				ids.add(new RigId(StringUtil.removePrefix(id, "rigid:")));
-			} else if (id.startsWith("grid:")) {
-				ids.add(new BioGridID(StringUtil.removePrefix(id, "grid:")));
-			} else if (id.startsWith("bind:")) {
-				ids.add(new BindInteractionID(StringUtil.removePrefix(id, "bind:")));
-			} else if (id.startsWith("MPACT:")) {
-				ids.add(new MpactId(StringUtil.removePrefix(id, "MPACT:")));
-			} else if (id.startsWith("mint:")) {
-				ids.add(new MintID(StringUtil.removePrefix(id, "mint:")));
-			} else if (id.startsWith("intact:")) {
-				ids.add(new IntActID(StringUtil.removePrefix(id, "intact:")));
-			} else if (id.startsWith("dip:")) {
-				ids.add(new DipInteractionID(StringUtil.removePrefix(id, "dip:")));
-			} else if (id.startsWith("ophid:")) {
-				ids.add(new OphidId(StringUtil.removePrefix(id, "ophid:")));
-			} else if (id.startsWith("InnateDB:")) {
-				String idbId = StringUtil.removePrefix(id, "InnateDB:");
-				if (idbId.startsWith("IDB-")) {
-					idbId = StringUtil.removePrefix(idbId, "IDB-");
-				}
-				ids.add(new InnateDbId(idbId));
-			} else if (id.startsWith("innatedb:")) {
-				String idbId = StringUtil.removePrefix(id, "innatedb:");
-				if (idbId.startsWith("IDB-")) {
-					idbId = StringUtil.removePrefix(idbId, "IDB-");
-				}
-				ids.add(new InnateDbId(idbId));
-			} else if (id.startsWith("CORUM:")) {
-				ids.add(new CorumId(StringUtil.removePrefix(id, "CORUM:")));
-			} else if (id.startsWith("mpilit:")) {
-				ids.add(new MpiDbId(StringUtil.removePrefix(id, "mpilit:")));
-			} else if (id.startsWith("mpiimex:")) {
-				ids.add(new MpiDbId(StringUtil.removePrefix(id, "mpiimex:")));
-			} else if (id.startsWith("MatrixDB:")) {
-				ids.add(new MatrixDbId(StringUtil.removePrefix(id, "MatrixDB:")));
-			} else if (id.startsWith("biogrid:")) {
-				ids.add(new BioGridID(StringUtil.removePrefix(id, "biogrid:")));
-			} else if (id.startsWith("pubmed:")) {
-				ids.add(new PubMedID(StringUtil.removePrefix(id, "pubmed:")));
-			} else if (id.startsWith("HPRD")) {
-				try {
+			try {
+				if (id.startsWith("edgetype:") || id.endsWith(":-")) {
+					// do nothing - this is a redundant storage of edge type or
+					// a null identifier
+				} else if (id.startsWith("BIND_Translation:")) {
+					ids.add(new BindTranslationId(StringUtil.removePrefix(id, "BIND_Translation:")));
+				} else if (id.startsWith("irigid:")) {
+					ids.add(new IrigId(StringUtil.removePrefix(id, "irigid:")));
+				} else if (id.startsWith("rigid:")) {
+					ids.add(new RigId(StringUtil.removePrefix(id, "rigid:")));
+				} else if (id.startsWith("grid:")) {
+					ids.add(new BioGridID(StringUtil.removePrefix(id, "grid:")));
+				} else if (id.startsWith("bind:")) {
+					ids.add(new BindInteractionID(StringUtil.removePrefix(id, "bind:")));
+				} else if (id.startsWith("MPACT:")) {
+					ids.add(new MpactId(StringUtil.removePrefix(id, "MPACT:")));
+				} else if (id.startsWith("mint:")) {
+					ids.add(new MintID(StringUtil.removePrefix(id, "mint:")));
+				} else if (id.startsWith("intact:")) {
+					ids.add(new IntActID(StringUtil.removePrefix(id, "intact:")));
+				} else if (id.startsWith("dip:")) {
+					ids.add(new DipInteractionID(StringUtil.removePrefix(id, "dip:")));
+				} else if (id.startsWith("ophid:")) {
+					ids.add(new OphidId(StringUtil.removePrefix(id, "ophid:")));
+				} else if (id.startsWith("InnateDB:")) {
+					String idbId = StringUtil.removePrefix(id, "InnateDB:");
+					if (idbId.startsWith("IDB-")) {
+						idbId = StringUtil.removePrefix(idbId, "IDB-");
+					}
+					ids.add(new InnateDbId(idbId));
+				} else if (id.startsWith("innatedb:")) {
+					String idbId = StringUtil.removePrefix(id, "innatedb:");
+					if (idbId.startsWith("IDB-")) {
+						idbId = StringUtil.removePrefix(idbId, "IDB-");
+					}
+					ids.add(new InnateDbId(idbId));
+				} else if (id.startsWith("CORUM:")) {
+					ids.add(new CorumId(StringUtil.removePrefix(id, "CORUM:")));
+				} else if (id.startsWith("mpilit:")) {
+					ids.add(new MpiDbId(StringUtil.removePrefix(id, "mpilit:")));
+				} else if (id.startsWith("mpiimex:")) {
+					ids.add(new MpiDbId(StringUtil.removePrefix(id, "mpiimex:")));
+				} else if (id.startsWith("MatrixDB:")) {
+					ids.add(new MatrixDbId(StringUtil.removePrefix(id, "MatrixDB:")));
+				} else if (id.startsWith("biogrid:")) {
+					ids.add(new BioGridID(StringUtil.removePrefix(id, "biogrid:")));
+				} else if (id.startsWith("pubmed:")) {
+					ids.add(new PubMedID(StringUtil.removePrefix(id, "pubmed:")));
+				} else if (id.startsWith("HPRD")) {
 					ids.add(new HprdID(StringUtil.removePrefix(id, "HPRD:")));
-				} catch (IllegalArgumentException e) {
-					logger.warn(e.getMessage());
+				} else {
+					ids.add(new UnknownDataSourceIdentifier(id, null));
 				}
-			} else {
-				// throw new IllegalArgumentException("Unknown id prefix: " + id);
-				logger.warn("Unknown id prefix: " + id);
-				// return null;
+			} catch (IllegalArgumentException e) {
+				ids.add(new ProbableErrorDataSourceIdentifier(id, null, e.getMessage()));
 			}
 		}
 		return ids;
@@ -362,10 +366,10 @@ private DataSourceIdentifier<?> resolveInteractorId(String idStr) {
 			return null;
 		}
 		if (idStr.startsWith("xx:")) {
-			return null;
+			return new UnknownDataSourceIdentifier(idStr, null);
 		}
 		if (idStr.startsWith("other:")) {
-			return null;
+			return new UnknownDataSourceIdentifier(idStr, null);
 		}
 		if (idStr.equals("null")) {
 			return null;
@@ -485,14 +489,10 @@ private DataSourceIdentifier<?> resolveInteractorId(String idStr) {
 				return new EntrezGeneID(StringUtil.removePrefix(idStr, "entrezgene:"));
 			}
 		} catch (IllegalArgumentException e) {
-			logger.warn("Invalid identifier due to " + e.getMessage());
-			logger.warn("Trying identifier as GenBank ID...");
-			return getGenbankAccession(idStr);
+			return new ProbableErrorDataSourceIdentifier(idStr, null, e.getMessage());
 		}
 
-		// throw new IllegalArgumentException("Unknown id prefix: " + idStr);
-		logger.warn("Unknown id prefix: " + idStr);
-		return null;
+		return new UnknownDataSourceIdentifier(idStr, null);
 	}
 
 	/**
@@ -526,15 +526,11 @@ private DataSourceIdentifier<?> getRefseqAccession(String acc) {
 	 * @return
 	 */
 	private DataSourceIdentifier<?> getGenbankAccession(String acc) {
-		try {
-			return NucleotideAccessionResolver.resolveNucleotideAccession(acc);
-		} catch (IllegalArgumentException e) {
-			try {
-				return ProteinAccessionResolver.resolveProteinAccession(acc);
-			} catch (IllegalArgumentException e2) {
-				logger.warn("Detected invalid GenBank accession: " + acc);
-				return null;
-			}
+		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+		if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
+			return ProteinAccessionResolver.resolveProteinAccession(acc);
+		} else {
+			return nucAccId;
 		}
 	}
 
@@ -634,24 +630,24 @@ private IRefWebInteractor getInteractor(String uniqueIdStr, String altIdStr, Str
 	private Set<String> resolveAliasSymbols(String aliasStr) {
 		Set<String> aliases = new HashSet<String>();
 		for (String alias : aliasStr.split(RegExPatterns.PIPE)) {
-			String aliasSymbol = resolveAliasSymbol(alias);
-			if (aliasSymbol != null) {
+			String aliasSymbol = alias;//resolveAliasSymbol(alias);
+			if (aliasSymbol != null && !aliasSymbol.equals("-")) {
 				aliases.add(aliasSymbol);
 			}
 		}
 		return aliases;
 	}
 
-	/**
-	 * @param alias
-	 * @return
-	 */
-	private String resolveAliasSymbol(String aliasStr) {
-		if (aliasStr.startsWith("entrezgene/locuslink:")) {
-			return new String(StringUtil.removePrefix(aliasStr, "entrezgene/locuslink:"));
-		}
-		return null;
-	}
+//	/**
+//	 * @param alias
+//	 * @return
+//	 */
+//	private String resolveAliasSymbol(String aliasStr) {
+//		if (aliasStr.startsWith("entrezgene/locuslink:")) {
+//			return new String(StringUtil.removePrefix(aliasStr, "entrezgene/locuslink:"));
+//		}
+//		return aliasStr;
+//	}
 
 	/**
 	 * @param aliasStr
@@ -678,7 +674,8 @@ private DataSourceIdentifier<?> resolveAliasId(String aliasStr) {
 		if (aliasStr.startsWith("uniprotkb:")) {
 			return new UniProtEntryName(StringUtil.removePrefix(aliasStr, "uniprotkb:"));
 		} else if (aliasStr.startsWith("entrezgene/locuslink:")) {
-			// ignore, it is a gene symbol and is handled by resolveAliasSymbols()
+			// ignore, it is a gene symbol and is handled by
+			// resolveAliasSymbols()
 			return null;
 		} else if (aliasStr.startsWith("crogid:")) {
 			return new CrogId(StringUtil.removePrefix(aliasStr, "crogid:"));
@@ -691,25 +688,7 @@ private DataSourceIdentifier<?> resolveAliasId(String aliasStr) {
 		} else if (aliasStr.startsWith("hgnc:")) {
 			return new HgncGeneSymbolID(StringUtil.removePrefix(aliasStr, "hgnc:"));
 		}
-		throw new IllegalArgumentException("Unknown id prefix: " + aliasStr);
-	}
-
-	public static void main(String[] args) {
-		BasicConfigurator.configure();
-		File irefwebFile = new File("/tmp/irefweb.sample");
-		try {
-			IRefWebPsiMitab2_6FileParser parser = new IRefWebPsiMitab2_6FileParser(irefwebFile,
-					CharacterEncoding.US_ASCII);
-			while (parser.hasNext()) {
-				parser.next();
-			}
-		} catch (IllegalArgumentException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
+		return new UnknownDataSourceIdentifier(aliasStr, null);
 	}
 
 }
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGeneIdListFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGeneIdListFileData.java
index 2cdbc52..07ce879 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGeneIdListFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGeneIdListFileData.java
@@ -1,4 +1,3 @@
-
 package edu.ucdenver.ccp.datasource.fileparsers.kegg;
 
 /*
@@ -76,13 +75,8 @@ public static KeggGeneIdListFileData parseKeggGeneIDListLine(Line line) {
 			String keggGeneIDStr = toks[0].substring(toks[0].indexOf(":") + 1);
 			KeggGeneID keggInternalGeneID = new KeggGeneID(keggGeneIDStr);
 			DataSourceIdentifier<?> externalGeneID = DataSourceIdResolver.resolveId(toks[1]);
-			if (externalGeneID != null)
-				return new KeggGeneIdListFileData(keggInternalGeneID, externalGeneID, line.getByteOffset(),
-						line.getLineNumber());
-
-			logger.error("External gene id was not resolved from " + toks[1]);
-			return null;
-
+			return new KeggGeneIdListFileData(keggInternalGeneID, externalGeneID, line.getByteOffset(),
+					line.getLineNumber());
 		}
 
 		logger.error("Unexpected number of tokens (" + toks.length + ") on line: " + line.toString());
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGenesFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGenesFileData.java
index daf9cde..b53d36e 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGenesFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/kegg/KeggGenesFileData.java
@@ -144,8 +144,9 @@ private static Set<DataSourceIdentifier<?>> getDbLink(String line) {
 		String databaseName = toks[0].replaceAll(":", "");
 		for (int i = 1; i < toks.length; i++) {
 			DataSourceIdentifier<?> id = DataSourceIdResolver.resolveId(databaseName, toks[i]);
-			if (id != null)
+			if (id != null) {
 				ids.add(id);
+			}
 		}
 		ids.remove(null);
 		return ids;
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
index e2d7e1d..56e2ac3 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
@@ -132,13 +132,9 @@ protected MRKSequenceFileData parseRecordFromLine(Line line) {
 			String[] genBankIDs = toks[10].split(RegExPatterns.PIPE);
 			for (String genBankID : genBankIDs) {
 				if (genBankID.trim().length() > 0) {
-					try {
-						DataSourceIdentifier<String> resolveNucleotideAccession = NucleotideAccessionResolver
-								.resolveNucleotideAccession(genBankID);
-						genBankAccessionIDs.add(resolveNucleotideAccession);
-					} catch (IllegalArgumentException e) {
-						logger.warn("Unable to resolve supposed GenBank id: " + genBankID);
-					}
+					DataSourceIdentifier<String> resolveNucleotideAccession = NucleotideAccessionResolver
+							.resolveNucleotideAccession(genBankID);
+					genBankAccessionIDs.add(resolveNucleotideAccession);
 				}
 			}
 		}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneInfoFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneInfoFileParser.java
index e69f03f..3761a99 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneInfoFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneInfoFileParser.java
@@ -161,13 +161,7 @@ public static EntrezGeneInfoFileData parseGeneInfoLine(Line line) {
 		Set<DataSourceIdentifier<?>> dbXrefs = new HashSet<DataSourceIdentifier<?>>();
 		if (!toks[5].equals("-")) {
 			for (String id : toks[5].split("\\|")) {
-				DataSourceIdentifier<?> resolveGeneID = null;
-				try {
-					resolveGeneID = DataSourceIdResolver.resolveId(id);
-				} catch (IllegalArgumentException e) {
-					logger.warn("Exception during ID resolution for id: " + id);
-					resolveGeneID = null;
-				}
+				DataSourceIdentifier<?> resolveGeneID = DataSourceIdResolver.resolveId(id);
 				if (resolveGeneID != null) {
 					dbXrefs.add(resolveGeneID);
 				}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
index 50acece..98d0afb 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
@@ -33,233 +33,235 @@
  * #L%
  */
 
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.log4j.Logger;
-
-import edu.ucdenver.ccp.common.download.HttpDownload;
-import edu.ucdenver.ccp.common.file.CharacterEncoding;
-import edu.ucdenver.ccp.common.file.reader.Line;
-import edu.ucdenver.ccp.common.file.reader.StreamLineReader;
-import edu.ucdenver.ccp.common.string.RegExPatterns;
-import edu.ucdenver.ccp.common.string.StringConstants;
-import edu.ucdenver.ccp.common.string.StringUtil;
-import edu.ucdenver.ccp.common.string.StringUtil.RemoveFieldEnclosures;
-import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecordReader;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
-import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
-import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
-import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
-import edu.ucdenver.ccp.datasource.identifiers.ensembl.EnsemblGeneID;
-import edu.ucdenver.ccp.datasource.identifiers.hgnc.HgncID;
-import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
-import edu.ucdenver.ccp.datasource.identifiers.ncbi.omim.OmimID;
-import edu.ucdenver.ccp.datasource.identifiers.ncbi.refseq.RefSeqID;
-import edu.ucdenver.ccp.datasource.identifiers.obo.GeneOntologyID;
-import edu.ucdenver.ccp.datasource.identifiers.other.AlfredId;
-import edu.ucdenver.ccp.datasource.identifiers.other.CrossReferenceUrl;
-import edu.ucdenver.ccp.datasource.identifiers.other.CtdId;
-import edu.ucdenver.ccp.datasource.identifiers.other.GenAtlasId;
-import edu.ucdenver.ccp.datasource.identifiers.other.GeneCardId;
-import edu.ucdenver.ccp.datasource.identifiers.other.HugeId;
-import edu.ucdenver.ccp.datasource.identifiers.other.HumanCycGeneId;
-import edu.ucdenver.ccp.datasource.identifiers.other.IupharReceptorId;
-import edu.ucdenver.ccp.datasource.identifiers.other.ModBaseId;
-import edu.ucdenver.ccp.datasource.identifiers.other.MutDbId;
-import edu.ucdenver.ccp.datasource.identifiers.other.UcscGenomeBrowserId;
-import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbID;
-
-/**
- * The file format for the genes.tsv file has changed. This parser should be updated. New header:
- * PharmGKB Accession Id Entrez Id Ensembl Id Name Symbol Alternate Names Alternate Symbols Is VIP
- * Has Variant Annotation Cross-references
- * 
- * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
- * 
- */
-public class PharmGkbGeneFileParser extends SingleLineFileRecordReader<PharmGkbGeneFileRecord> {
-
-	private static final Logger logger = Logger.getLogger(PharmGkbGeneFileParser.class);
-
-	private static final String HEADER = "PharmGKB Accession Id\tEntrez Id\tEnsembl Id\tName\tSymbol\tAlternate Names\tAlternate Symbols\tIs VIP\tHas Variant Annotation\tCross-references\tHas CPIC Dosing Guideline\tChromosome\tChromosomal Start\tChromosomal Stop";
-
-	private static final CharacterEncoding ENCODING = CharacterEncoding.US_ASCII;
-
-	private static final String HUMANCYCGENE_PREFIX = "HumanCycGene:";
-
-	private static final String ALFRED_PREFIX = "alfred:";
-
-	private static final String CTD_PREFIX = "ctd:";
-
-	private static final String ENSEMBL_PREFIX = "ensembl:";
-
-	private static final String ENTREZGENE_PREFIX = "entrezGene:";
-
-	private static final String GENEATLAS_PREFIX = "genAtlas:";
-
-	private static final String GENECARD_PREFIX = "geneCard:";
-
-	private static final String GO_PREFIX = "go:";
-
-	private static final String HGNC_PREFIX = "hgnc:";
-
-	private static final String HUGE_PREFIX = "huge:";
-
-	private static final String IUPHAR_RECEPTOR_PREFIX = "iupharReceptor:";
-
-	private static final String MODBASE_PREFIX = "modBase:";
-
-	private static final String MUTDB_PREFIX = "mutDb:";
-
-	private static final String OMIM_PREFIX = "omim:";
-
-	private static final String REFSEQDNA_PREFIX = "refSeqDna:";
-
-	private static final String REFSEQPROTEIN_PREFIX = "refSeqProtein:";
-
-	private static final String REFSEQRNA_PREFIX = "refSeqRna:";
-
-	private static final String UCSCGENOMEBROWSER_PREFIX = "ucscGenomeBrowser:";
-
-	private static final String UNIPROT_PREFIX = "uniProtKb:";
-
-	private static final String URL_PREFIX = "url:";
-
-	@HttpDownload(url = "https://www.pharmgkb.org/download.do?objId=genes.zip&dlCls=common", fileName = "genes.zip", targetFileName = "genes.tsv", decompress = true)
-	private File pharmGkbGenesFile;
-
-	public PharmGkbGeneFileParser(File dataFile, CharacterEncoding encoding) throws IOException {
-		super(dataFile, encoding, null);
-	}
-
-	public PharmGkbGeneFileParser(File workDirectory, boolean clean) throws IOException {
-		super(workDirectory, ENCODING, null, null, null, clean);
-	}
-
-	@Override
-	protected StreamLineReader initializeLineReaderFromDownload(CharacterEncoding encoding, String skipLinePrefix)
-			throws IOException {
-		return new StreamLineReader(pharmGkbGenesFile, encoding, skipLinePrefix);
-	}
-
-	@Override
-	protected String getFileHeader() throws IOException {
-		return readLine().getText();
-	}
-
-	@Override
-	protected String getExpectedFileHeader() throws IOException {
-		return HEADER;
-	}
-
-	@Override
-	protected PharmGkbGeneFileRecord parseRecordFromLine(Line line) {
-		String[] toks = line.getText().split(RegExPatterns.TAB, -1);
-		PharmGkbID pharmGkbAccessionId = new PharmGkbID(toks[0]);
-		EntrezGeneID entrezGeneId = StringUtils.isNotBlank(toks[1]) ? new EntrezGeneID(toks[1]) : null;
-		EnsemblGeneID ensemblGeneId = StringUtils.isNotBlank(toks[2]) ? new EnsemblGeneID(toks[2]) : null;
-		String name = StringUtils.isNotBlank(toks[3]) ? new String(toks[3]) : null;
-		String symbol = StringUtils.isNotBlank(toks[4]) ? new String(toks[4]) : null;
-		Collection<String> alternativeNames = new ArrayList<String>();
-		if (!toks[5].isEmpty()) {
-			List<String> alternativeNameStrs = StringUtil.delimitAndTrim(toks[5], StringConstants.COMMA,
-					StringConstants.QUOTATION_MARK, RemoveFieldEnclosures.TRUE);
-			for (String altNameStr : alternativeNameStrs) {
-				alternativeNames.add(new String(altNameStr));
-			}
-		}
-		Collection<String> alternativeSymbols = new ArrayList<String>();
-		if (!toks[6].isEmpty()) {
-			List<String> alternativeSymbolStrs = StringUtil.delimitAndTrim(toks[6], StringConstants.COMMA,
-					StringConstants.QUOTATION_MARK, RemoveFieldEnclosures.TRUE);
-			for (String altSymbolStr : alternativeSymbolStrs) {
-				alternativeSymbols.add(new String(altSymbolStr));
-			}
-		}
-		boolean isVip = Boolean.parseBoolean(toks[7]);
-		boolean hasVariantAnnotation = Boolean.parseBoolean(toks[8]);
-		Collection<DataSourceIdentifier<?>> crossReferences = new ArrayList<DataSourceIdentifier<?>>();
-		if (!toks[9].isEmpty()) {
-			for (String refStr : toks[9].split(",")) {
-				DataSourceIdentifier<?> id = null;
-				try {
-					id = resolveCrossRefId(refStr);
-				} catch (IllegalArgumentException e) {
-					logger.warn("Unable to resolve cross-reference: " + refStr + " due to: " + e.getMessage());
-				}
-				if (id != null) {
-					crossReferences.add(id);
-				}
-			}
-		}
-		boolean hasCpicDosingGuideline = Boolean.parseBoolean(toks[10]);
-
-		String chromosome = (toks[11].equalsIgnoreCase("null")) ? null : toks[11];
-		Integer chromosomeStart = (toks[12].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[12]);
-		Integer chromosomeEnd = (toks[13].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[13]);
-
-		return new PharmGkbGeneFileRecord(pharmGkbAccessionId, entrezGeneId, ensemblGeneId, name, symbol,
-				alternativeNames, alternativeSymbols, isVip, hasVariantAnnotation, crossReferences,
-				hasCpicDosingGuideline, chromosome, chromosomeStart, chromosomeEnd, line.getByteOffset(),
-				line.getLineNumber());
-	}
-
-	/**
-	 * @param refStr
-	 * @return
-	 */
-	private DataSourceIdentifier<?> resolveCrossRefId(String refStr) {
-		if (refStr.startsWith(HUMANCYCGENE_PREFIX)) {
-			return new HumanCycGeneId(StringUtil.removePrefix(refStr, HUMANCYCGENE_PREFIX));
-		} else if (refStr.startsWith(ALFRED_PREFIX)) {
-			return new AlfredId(StringUtil.removePrefix(refStr, ALFRED_PREFIX));
-		} else if (refStr.startsWith(CTD_PREFIX)) {
-			return new CtdId(StringUtil.removePrefix(refStr, CTD_PREFIX));
-		} else if (refStr.startsWith(ENSEMBL_PREFIX)) {
-			return new EnsemblGeneID(StringUtil.removePrefix(refStr, ENSEMBL_PREFIX));
-		} else if (refStr.startsWith(ENTREZGENE_PREFIX)) {
-			return new EntrezGeneID(StringUtil.removePrefix(refStr, ENTREZGENE_PREFIX));
-		} else if (refStr.startsWith(GENEATLAS_PREFIX)) {
-			return new GenAtlasId(StringUtil.removePrefix(refStr, GENEATLAS_PREFIX));
-		} else if (refStr.startsWith(GENECARD_PREFIX)) {
-			return new GeneCardId(StringUtil.removePrefix(refStr, GENECARD_PREFIX));
-		} else if (refStr.startsWith(GO_PREFIX)) {
-			return new GeneOntologyID(StringUtil.removePrefix(refStr, GO_PREFIX));
-		} else if (refStr.startsWith(HGNC_PREFIX)) {
-			return new HgncID(StringUtil.removePrefix(refStr, HGNC_PREFIX));
-		} else if (refStr.startsWith(HUGE_PREFIX)) {
-			return new HugeId(StringUtil.removePrefix(refStr, HUGE_PREFIX));
-		} else if (refStr.startsWith(IUPHAR_RECEPTOR_PREFIX)) {
-			return new IupharReceptorId(StringUtil.removePrefix(refStr, IUPHAR_RECEPTOR_PREFIX));
-		} else if (refStr.startsWith(MODBASE_PREFIX)) {
-			return new ModBaseId(StringUtil.removePrefix(refStr, MODBASE_PREFIX));
-		} else if (refStr.startsWith(MUTDB_PREFIX)) {
-			return new MutDbId(StringUtil.removePrefix(refStr, MUTDB_PREFIX));
-		} else if (refStr.startsWith(OMIM_PREFIX)) {
-			return new OmimID(StringUtil.removePrefix(refStr, OMIM_PREFIX));
-		} else if (refStr.startsWith(REFSEQDNA_PREFIX)) {
-			return NucleotideAccessionResolver.resolveNucleotideAccession(StringUtil.removePrefix(refStr,
-					REFSEQDNA_PREFIX));
-		} else if (refStr.startsWith(REFSEQRNA_PREFIX)) {
-			return new RefSeqID(StringUtil.removePrefix(refStr, REFSEQRNA_PREFIX));
-		} else if (refStr.startsWith(REFSEQPROTEIN_PREFIX)) {
-			return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(refStr,
-					REFSEQPROTEIN_PREFIX));
-		} else if (refStr.startsWith(UCSCGENOMEBROWSER_PREFIX)) {
-			return new UcscGenomeBrowserId(StringUtil.removePrefix(refStr, UCSCGENOMEBROWSER_PREFIX));
-		} else if (refStr.startsWith(UNIPROT_PREFIX)) {
-			return new UniProtID(StringUtil.removePrefix(refStr, UNIPROT_PREFIX));
-		} else if (refStr.startsWith(URL_PREFIX)) {
-			return new CrossReferenceUrl(StringUtil.removePrefix(refStr, URL_PREFIX));
-		} else {
-			throw new IllegalArgumentException("Unknown cross-reference prefix: " + refStr);
-		}
-	}
-
-}
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import edu.ucdenver.ccp.common.download.HttpDownload;
+import edu.ucdenver.ccp.common.file.CharacterEncoding;
+import edu.ucdenver.ccp.common.file.reader.Line;
+import edu.ucdenver.ccp.common.file.reader.StreamLineReader;
+import edu.ucdenver.ccp.common.string.RegExPatterns;
+import edu.ucdenver.ccp.common.string.StringConstants;
+import edu.ucdenver.ccp.common.string.StringUtil;
+import edu.ucdenver.ccp.common.string.StringUtil.RemoveFieldEnclosures;
+import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecordReader;
+import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.NucleotideAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
+import edu.ucdenver.ccp.datasource.identifiers.ensembl.EnsemblGeneID;
+import edu.ucdenver.ccp.datasource.identifiers.hgnc.HgncID;
+import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
+import edu.ucdenver.ccp.datasource.identifiers.ncbi.omim.OmimID;
+import edu.ucdenver.ccp.datasource.identifiers.ncbi.refseq.RefSeqID;
+import edu.ucdenver.ccp.datasource.identifiers.obo.GeneOntologyID;
+import edu.ucdenver.ccp.datasource.identifiers.other.AlfredId;
+import edu.ucdenver.ccp.datasource.identifiers.other.CrossReferenceUrl;
+import edu.ucdenver.ccp.datasource.identifiers.other.CtdId;
+import edu.ucdenver.ccp.datasource.identifiers.other.GenAtlasId;
+import edu.ucdenver.ccp.datasource.identifiers.other.GeneCardId;
+import edu.ucdenver.ccp.datasource.identifiers.other.HugeId;
+import edu.ucdenver.ccp.datasource.identifiers.other.HumanCycGeneId;
+import edu.ucdenver.ccp.datasource.identifiers.other.IupharReceptorId;
+import edu.ucdenver.ccp.datasource.identifiers.other.ModBaseId;
+import edu.ucdenver.ccp.datasource.identifiers.other.MutDbId;
+import edu.ucdenver.ccp.datasource.identifiers.other.UcscGenomeBrowserId;
+import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbID;
+
+/**
+ * The file format for the genes.tsv file has changed. This parser should be
+ * updated. New header: PharmGKB Accession Id Entrez Id Ensembl Id Name Symbol
+ * Alternate Names Alternate Symbols Is VIP Has Variant Annotation
+ * Cross-references
+ * 
+ * @author Colorado Computational Pharmacology, UC Denver;
+ *         ccpsupport@ucdenver.edu
+ * 
+ */
+public class PharmGkbGeneFileParser extends SingleLineFileRecordReader<PharmGkbGeneFileRecord> {
+
+	private static final Logger logger = Logger.getLogger(PharmGkbGeneFileParser.class);
+
+	private static final String HEADER = "PharmGKB Accession Id\tEntrez Id\tEnsembl Id\tName\tSymbol\tAlternate Names\tAlternate Symbols\tIs VIP\tHas Variant Annotation\tCross-references\tHas CPIC Dosing Guideline\tChromosome\tChromosomal Start\tChromosomal Stop";
+
+	private static final CharacterEncoding ENCODING = CharacterEncoding.US_ASCII;
+
+	private static final String HUMANCYCGENE_PREFIX = "HumanCycGene:";
+
+	private static final String ALFRED_PREFIX = "alfred:";
+
+	private static final String CTD_PREFIX = "ctd:";
+
+	private static final String ENSEMBL_PREFIX = "ensembl:";
+
+	private static final String ENTREZGENE_PREFIX = "entrezGene:";
+
+	private static final String GENEATLAS_PREFIX = "genAtlas:";
+
+	private static final String GENECARD_PREFIX = "geneCard:";
+
+	private static final String GO_PREFIX = "go:";
+
+	private static final String HGNC_PREFIX = "hgnc:";
+
+	private static final String HUGE_PREFIX = "huge:";
+
+	private static final String IUPHAR_RECEPTOR_PREFIX = "iupharReceptor:";
+
+	private static final String MODBASE_PREFIX = "modBase:";
+
+	private static final String MUTDB_PREFIX = "mutDb:";
+
+	private static final String OMIM_PREFIX = "omim:";
+
+	private static final String REFSEQDNA_PREFIX = "refSeqDna:";
+
+	private static final String REFSEQPROTEIN_PREFIX = "refSeqProtein:";
+
+	private static final String REFSEQRNA_PREFIX = "refSeqRna:";
+
+	private static final String UCSCGENOMEBROWSER_PREFIX = "ucscGenomeBrowser:";
+
+	private static final String UNIPROT_PREFIX = "uniProtKb:";
+
+	private static final String URL_PREFIX = "url:";
+
+	@HttpDownload(url = "https://www.pharmgkb.org/download.do?objId=genes.zip&dlCls=common", fileName = "genes.zip", targetFileName = "genes.tsv", decompress = true)
+	private File pharmGkbGenesFile;
+
+	public PharmGkbGeneFileParser(File dataFile, CharacterEncoding encoding) throws IOException {
+		super(dataFile, encoding, null);
+	}
+
+	public PharmGkbGeneFileParser(File workDirectory, boolean clean) throws IOException {
+		super(workDirectory, ENCODING, null, null, null, clean);
+	}
+
+	@Override
+	protected StreamLineReader initializeLineReaderFromDownload(CharacterEncoding encoding, String skipLinePrefix)
+			throws IOException {
+		return new StreamLineReader(pharmGkbGenesFile, encoding, skipLinePrefix);
+	}
+
+	@Override
+	protected String getFileHeader() throws IOException {
+		return readLine().getText();
+	}
+
+	@Override
+	protected String getExpectedFileHeader() throws IOException {
+		return HEADER;
+	}
+
+	@Override
+	protected PharmGkbGeneFileRecord parseRecordFromLine(Line line) {
+		String[] toks = line.getText().split(RegExPatterns.TAB, -1);
+		PharmGkbID pharmGkbAccessionId = new PharmGkbID(toks[0]);
+		EntrezGeneID entrezGeneId = StringUtils.isNotBlank(toks[1]) ? new EntrezGeneID(toks[1]) : null;
+		EnsemblGeneID ensemblGeneId = StringUtils.isNotBlank(toks[2]) ? new EnsemblGeneID(toks[2]) : null;
+		String name = StringUtils.isNotBlank(toks[3]) ? new String(toks[3]) : null;
+		String symbol = StringUtils.isNotBlank(toks[4]) ? new String(toks[4]) : null;
+		Collection<String> alternativeNames = new ArrayList<String>();
+		if (!toks[5].isEmpty()) {
+			List<String> alternativeNameStrs = StringUtil.delimitAndTrim(toks[5], StringConstants.COMMA,
+					StringConstants.QUOTATION_MARK, RemoveFieldEnclosures.TRUE);
+			for (String altNameStr : alternativeNameStrs) {
+				alternativeNames.add(new String(altNameStr));
+			}
+		}
+		Collection<String> alternativeSymbols = new ArrayList<String>();
+		if (!toks[6].isEmpty()) {
+			List<String> alternativeSymbolStrs = StringUtil.delimitAndTrim(toks[6], StringConstants.COMMA,
+					StringConstants.QUOTATION_MARK, RemoveFieldEnclosures.TRUE);
+			for (String altSymbolStr : alternativeSymbolStrs) {
+				alternativeSymbols.add(new String(altSymbolStr));
+			}
+		}
+		boolean isVip = Boolean.parseBoolean(toks[7]);
+		boolean hasVariantAnnotation = Boolean.parseBoolean(toks[8]);
+		Collection<DataSourceIdentifier<?>> crossReferences = new ArrayList<DataSourceIdentifier<?>>();
+		if (!toks[9].isEmpty()) {
+			for (String refStr : toks[9].split(",")) {
+				DataSourceIdentifier<?> id = resolveCrossRefId(refStr);
+				if (id != null) {
+					crossReferences.add(id);
+				}
+			}
+		}
+		boolean hasCpicDosingGuideline = Boolean.parseBoolean(toks[10]);
+
+		String chromosome = (toks[11].equalsIgnoreCase("null")) ? null : toks[11];
+		Integer chromosomeStart = (toks[12].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[12]);
+		Integer chromosomeEnd = (toks[13].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[13]);
+
+		return new PharmGkbGeneFileRecord(pharmGkbAccessionId, entrezGeneId, ensemblGeneId, name, symbol,
+				alternativeNames, alternativeSymbols, isVip, hasVariantAnnotation, crossReferences,
+				hasCpicDosingGuideline, chromosome, chromosomeStart, chromosomeEnd, line.getByteOffset(),
+				line.getLineNumber());
+	}
+
+	/**
+	 * @param refStr
+	 * @return
+	 */
+	private DataSourceIdentifier<?> resolveCrossRefId(String refStr) {
+		try {
+			if (refStr.startsWith(HUMANCYCGENE_PREFIX)) {
+				return new HumanCycGeneId(StringUtil.removePrefix(refStr, HUMANCYCGENE_PREFIX));
+			} else if (refStr.startsWith(ALFRED_PREFIX)) {
+				return new AlfredId(StringUtil.removePrefix(refStr, ALFRED_PREFIX));
+			} else if (refStr.startsWith(CTD_PREFIX)) {
+				return new CtdId(StringUtil.removePrefix(refStr, CTD_PREFIX));
+			} else if (refStr.startsWith(ENSEMBL_PREFIX)) {
+				return new EnsemblGeneID(StringUtil.removePrefix(refStr, ENSEMBL_PREFIX));
+			} else if (refStr.startsWith(ENTREZGENE_PREFIX)) {
+				return new EntrezGeneID(StringUtil.removePrefix(refStr, ENTREZGENE_PREFIX));
+			} else if (refStr.startsWith(GENEATLAS_PREFIX)) {
+				return new GenAtlasId(StringUtil.removePrefix(refStr, GENEATLAS_PREFIX));
+			} else if (refStr.startsWith(GENECARD_PREFIX)) {
+				return new GeneCardId(StringUtil.removePrefix(refStr, GENECARD_PREFIX));
+			} else if (refStr.startsWith(GO_PREFIX)) {
+				return new GeneOntologyID(StringUtil.removePrefix(refStr, GO_PREFIX));
+			} else if (refStr.startsWith(HGNC_PREFIX)) {
+				return new HgncID(StringUtil.removePrefix(refStr, HGNC_PREFIX));
+			} else if (refStr.startsWith(HUGE_PREFIX)) {
+				return new HugeId(StringUtil.removePrefix(refStr, HUGE_PREFIX));
+			} else if (refStr.startsWith(IUPHAR_RECEPTOR_PREFIX)) {
+				return new IupharReceptorId(StringUtil.removePrefix(refStr, IUPHAR_RECEPTOR_PREFIX));
+			} else if (refStr.startsWith(MODBASE_PREFIX)) {
+				return new ModBaseId(StringUtil.removePrefix(refStr, MODBASE_PREFIX));
+			} else if (refStr.startsWith(MUTDB_PREFIX)) {
+				return new MutDbId(StringUtil.removePrefix(refStr, MUTDB_PREFIX));
+			} else if (refStr.startsWith(OMIM_PREFIX)) {
+				return new OmimID(StringUtil.removePrefix(refStr, OMIM_PREFIX));
+			} else if (refStr.startsWith(REFSEQDNA_PREFIX)) {
+				return NucleotideAccessionResolver.resolveNucleotideAccession(StringUtil.removePrefix(refStr,
+						REFSEQDNA_PREFIX));
+			} else if (refStr.startsWith(REFSEQRNA_PREFIX)) {
+				return new RefSeqID(StringUtil.removePrefix(refStr, REFSEQRNA_PREFIX));
+			} else if (refStr.startsWith(REFSEQPROTEIN_PREFIX)) {
+				return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(refStr,
+						REFSEQPROTEIN_PREFIX));
+			} else if (refStr.startsWith(UCSCGENOMEBROWSER_PREFIX)) {
+				return new UcscGenomeBrowserId(StringUtil.removePrefix(refStr, UCSCGENOMEBROWSER_PREFIX));
+			} else if (refStr.startsWith(UNIPROT_PREFIX)) {
+				return new UniProtID(StringUtil.removePrefix(refStr, UNIPROT_PREFIX));
+			} else if (refStr.startsWith(URL_PREFIX)) {
+				return new CrossReferenceUrl(StringUtil.removePrefix(refStr, URL_PREFIX));
+			} else {
+				return new UnknownDataSourceIdentifier(refStr, null);
+			}
+		} catch (IllegalArgumentException e) {
+			return new ProbableErrorDataSourceIdentifier(refStr, null, e.getMessage());
+		}
+	}
+
+}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbRelationFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbRelationFileParser.java
index 146c61c..7159666 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbRelationFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbRelationFileParser.java
@@ -40,9 +40,9 @@
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
-
+
 import org.apache.log4j.Logger;
-
+
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.common.file.reader.Line;
 import edu.ucdenver.ccp.common.string.RegExPatterns;
@@ -51,6 +51,7 @@
 import edu.ucdenver.ccp.common.string.StringUtil.RemoveFieldEnclosures;
 import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecordReader;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.RefSnpID;
 import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbHaplotypeId;
 import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbID;
@@ -127,7 +128,7 @@ private Set<DataSourceIdentifier<?>> resolveEntityId(String idStr, String entity
 			} else if (entityType.equals(ENTITY_TYPE_VARIANT_LOCATION)) {
 				ids.add(new PharmGkbVariantLocationId(id));
 			} else {
-				logger.warn("Unhandled PharmGkb entity type detected: " + idStr + " type = " + entityType);
+				ids.add(new UnknownDataSourceIdentifier(id, null));
 			}
 		}
 		return ids;
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParser.java
index 0bd49a0..94e8bb2 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParser.java
@@ -41,10 +41,16 @@
 import edu.ucdenver.ccp.common.file.reader.Line;
 import edu.ucdenver.ccp.common.file.reader.StreamLineReader;
 import edu.ucdenver.ccp.common.ftp.FTPUtil.FileType;
+import edu.ucdenver.ccp.common.string.StringUtil;
 import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecordReader;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdResolver;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
+import edu.ucdenver.ccp.datasource.identifiers.hgnc.HgncID;
+import edu.ucdenver.ccp.datasource.identifiers.mgi.MgiGeneID;
 import edu.ucdenver.ccp.datasource.identifiers.obo.ProteinOntologyId;
+import edu.ucdenver.ccp.datasource.identifiers.rgd.RgdID;
 
 /**
  * File parser for Protein Ongology promapping.txt file.
@@ -95,8 +101,8 @@ protected StreamLineReader initializeLineReaderFromDownload(CharacterEncoding en
 	 * (non-Javadoc)
 	 * 
 	 * @see
-	 * edu.ucdenver.ccp.fileparsers.SingleLineFileRecordReader#parseRecordFromLine(edu.ucdenver.
-	 * ccp.common.file.reader.LineReader.Line)
+	 * edu.ucdenver.ccp.fileparsers.SingleLineFileRecordReader#parseRecordFromLine
+	 * (edu.ucdenver. ccp.common.file.reader.LineReader.Line)
 	 */
 	@Override
 	protected ProMappingRecord parseRecordFromLine(Line line) {
@@ -106,11 +112,8 @@ protected ProMappingRecord parseRecordFromLine(Line line) {
 		if (text.startsWith("PR:")) {
 
 			String[] tokens = text.split("\t");
-			ProteinOntologyId fromId = (ProteinOntologyId) DataSourceIdResolver.resolveId(tokens[0].trim());
-			if (tokens[1].trim().startsWith("UniProtKB_VAR"))
-				return null;
-
-			DataSourceIdentifier targetId = DataSourceIdResolver.resolveId(tokens[1].trim());
+			ProteinOntologyId fromId = new ProteinOntologyId(tokens[0].trim());
+			DataSourceIdentifier<?> targetId = resolveId(tokens[1].trim());
 			String mappingType = tokens[2].trim();
 
 			r = new ProMappingRecord(fromId, targetId, mappingType, line.getByteOffset(), line.getLineNumber());
@@ -118,4 +121,24 @@ protected ProMappingRecord parseRecordFromLine(Line line) {
 
 		return r;
 	}
+
+	private DataSourceIdentifier<?> resolveId(String idStr) {
+		try {
+			if (idStr.startsWith("MGI:")) {
+				return new MgiGeneID(idStr);
+			}
+			if (idStr.startsWith("RGD:")) {
+				return new RgdID(StringUtil.removePrefix(idStr, "RGD:"));
+			}
+			if (idStr.startsWith("HGNC:")) {
+				return new HgncID(idStr);
+			}
+			if (idStr.startsWith("UniProtKB:")) {
+				return new UniProtID(StringUtil.removePrefix(idStr, "UniProtKB:"));
+			}
+		} catch (IllegalArgumentException e) {
+			return new ProbableErrorDataSourceIdentifier(idStr, null, e.getMessage());
+		}
+		return new UnknownDataSourceIdentifier(idStr, null);
+	}
 }
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
index 69b5c08..2cd990f 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
@@ -37,6 +37,7 @@
 
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.IdResolver;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.omim.OmimID;
@@ -48,7 +49,8 @@
 import edu.ucdenver.ccp.identifier.publication.PubMedID;
 
 /**
- * @author Center for Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
+ * @author Center for Computational Pharmacology, UC Denver;
+ *         ccpsupport@ucdenver.edu
  * 
  */
 public class RgdAnnotationFileIdResolver implements IdResolver {
@@ -62,7 +64,8 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 			return null;
 		}
 		if (idStr.matches("[Rr][Gg][Dd][:;]\\d+")) {
-			// there are instances with mixed case, e.g. RGd: and with semi-colons instead of colons
+			// there are instances with mixed case, e.g. RGd: and with
+			// semi-colons instead of colons
 			return new RgdID(idStr.substring(4));
 		}
 		if (idStr.matches("[Rr][Gg][Dd]\\d+")) {
@@ -82,7 +85,8 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 			return new RgdID(idStr.substring(4));
 		}
 		if (idStr.matches("\\d+")) {
-			// there are a few typos where the "RGD:" prefix is missing, e.g. 1550157
+			// there are a few typos where the "RGD:" prefix is missing, e.g.
+			// 1550157
 			return new RgdID(idStr);
 		}
 		if (idStr.matches("MP:\\d+")) {
@@ -108,27 +112,29 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 		}
 		if (idStr.matches("rno:\\d+")) {
 			logger.warn("Ignoring RNO identifier: " + idStr + ". Not sure what this references...");
-			// not sure what this is.. could be a kegg gene? it's used in the withOrFrom column
+			// not sure what this is.. could be a kegg gene? it's used in the
+			// withOrFrom column
 			return null;
 		}
 		if (idStr.startsWith("UniProtKB:")) {
 			return new UniProtID(idStr.substring(10));
 		}
-		throw new IllegalArgumentException("Unhandled ID type: " + idStr);
+		return new UnknownDataSourceIdentifier(idStr, null);
 	}
 
 	/*
 	 * (non-Javadoc)
 	 * 
-	 * @see edu.ucdenver.ccp.datasource.identifiers.IdResolver#resolveId(java.lang.String,
-	 * java.lang.String)
+	 * @see
+	 * edu.ucdenver.ccp.datasource.identifiers.IdResolver#resolveId(java.lang
+	 * .String, java.lang.String)
 	 */
 	@Override
 	public DataSourceIdentifier<?> resolveId(String db, String id) {
 		if (db.equals("RGD") && id.matches("\\d+")) {
 			return new RgdID(id);
 		}
-		throw new IllegalArgumentException("Unhandled ID type -- db:" + db + "  id: " + id);
+		return new UnknownDataSourceIdentifier(id, db);
 	}
 
 }
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
index 79a4824..2994431 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
@@ -48,6 +48,7 @@
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.datasource.fileparsers.test.RecordReaderTester;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.bind.BindInteractionID;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtEntryName;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
@@ -94,22 +95,28 @@ public void testParser() throws IOException {
 			 * uniprotkb:P38276|refseq:NP_009695|entrezgene/locuslink
 			 * :852434|rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292|irogid:16835195
 			 * uniprotkb:P38276|refseq
-			 * :NP_009695|entrezgene/locuslink:852434|rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
-			 * |irogid:16835195
-			 * uniprotkb:YBY7_YEAST|entrezgene/locuslink:YBR137W|crogid:UsO9ZYVJXLI50JBd
+			 * :NP_009695|entrezgene/locuslink:852434|rogid
+			 * :UsO9ZYVJXLI50JBd/g0C1NtSeXI559292 |irogid:16835195
+			 * uniprotkb:YBY7_YEAST
+			 * |entrezgene/locuslink:YBR137W|crogid:UsO9ZYVJXLI50JBd
 			 * /g0C1NtSeXI559292|icrogid:16835195
 			 * uniprotkb:YBY7_YEAST|entrezgene/locuslink:YBR137W|crogid
-			 * :UsO9ZYVJXLI50JBd/g0C1NtSeXI559292|icrogid:16835195 MI:0018(2 hybrid) -
-			 * pubmed:10655498 taxid:559292(Saccharomyces cerevisiae S288c)
-			 * taxid:559292(Saccharomyces cerevisiae S288c) - MI:0000(BIND_Translation)
-			 * BIND_Translation:1261|rigid:+++94o2VtVJcuk6jD3H2JZXaVYc|irigid:617101|edgetype:X
-			 * lpr:4518|hpr:5191|np:2 none MI:0000(unspecified) MI:0000(unspecified)
-			 * MI:0000(unspecified) MI:0000(unspecified) MI:0326(protein) MI:0326(protein) - - - - -
-			 * - - - 2010/05/18 2010/05/18 rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
-			 * rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292 rigid:+++94o2VtVJcuk6jD3H2JZXaVYc false
-			 * refseq:NP_009695 refseq:NP_009695 refseq:NP_009695 refseq:NP_009695 P P 16835195
-			 * 16835195 617101 UsO9ZYVJXLI50JBd/g0C1NtSeXI559292 UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
-			 * +++94o2VtVJcuk6jD3H2JZXaVYc 16835195 16835195 617101 - X 2
+			 * :UsO9ZYVJXLI50JBd/g0C1NtSeXI559292|icrogid:16835195 MI:0018(2
+			 * hybrid) - pubmed:10655498 taxid:559292(Saccharomyces cerevisiae
+			 * S288c) taxid:559292(Saccharomyces cerevisiae S288c) -
+			 * MI:0000(BIND_Translation)
+			 * BIND_Translation:1261|rigid:+++94o2VtVJcuk6jD3H2JZXaVYc
+			 * |irigid:617101|edgetype:X lpr:4518|hpr:5191|np:2 none
+			 * MI:0000(unspecified) MI:0000(unspecified) MI:0000(unspecified)
+			 * MI:0000(unspecified) MI:0326(protein) MI:0326(protein) - - - - -
+			 * - - - 2010/05/18 2010/05/18
+			 * rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
+			 * rogid:UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
+			 * rigid:+++94o2VtVJcuk6jD3H2JZXaVYc false refseq:NP_009695
+			 * refseq:NP_009695 refseq:NP_009695 refseq:NP_009695 P P 16835195
+			 * 16835195 617101 UsO9ZYVJXLI50JBd/g0C1NtSeXI559292
+			 * UsO9ZYVJXLI50JBd/g0C1NtSeXI559292 +++94o2VtVJcuk6jD3H2JZXaVYc
+			 * 16835195 16835195 617101 - X 2
 			 */
 			parser.next();
 
@@ -122,23 +129,29 @@ public void testParser() throws IOException {
 			/*
 			 * rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796 uniprotkb:P05132
 			 * PDB:1YDT_I|PDB:1YDR_I|PDB:1YDS_I
-			 * |PDB:1FMO_I|PDB:1STC_I|rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796|irogid:9981084
-			 * uniprotkb:
-			 * P05132|refseq:NP_032880|entrezgene/locuslink:18747|rogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
+			 * |PDB:1FMO_I|PDB:1STC_I|rogid:Ivetsb7L
+			 * /rt8ds+TyhtJZKxTtVE9796|irogid:9981084 uniprotkb:
+			 * P05132|refseq:NP_032880
+			 * |entrezgene/locuslink:18747|rogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
 			 * |irogid:2201887
-			 * rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796|crogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796
-			 * |icrogid:9981084|-
-			 * uniprotkb:KAPCA_MOUSE|entrezgene/locuslink:Prkaca|crogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
-			 * |icrogid:2201887 MI:0114(three-dimensional-structure) - pubmed:1862342
-			 * taxid:9796(Equus caballus) taxid:10090(Mus musculus) - MI:0462(bind)
-			 * bind:76262|rigid:++f9f/9TQhDLvdrGu56SalIhHSA|irigid:617146|edgetype:X
-			 * lpr:1|hpr:6|np:6 none MI:0000(unspecified) MI:0000(unspecified) MI:0000(unspecified)
-			 * MI:0000(unspecified) MI:0326(protein) MI:0326(protein) - - - - - - - - 2010/05/18
-			 * 2010/05/18 rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796
-			 * rogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090 rigid:++f9f/9TQhDLvdrGu56SalIhHSA false
-			 * GenBank:"1FMO_I" GenBank:NP_032880 PDB:1FMO_I refseq:NP_032880 PT P 9981084 2201887
-			 * 617146 Ivetsb7L/rt8ds+TyhtJZKxTtVE9796 HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
-			 * ++f9f/9TQhDLvdrGu56SalIhHSA 9981084 2201887 617146 - X 2
+			 * rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796|crogid:Ivetsb7L
+			 * /rt8ds+TyhtJZKxTtVE9796 |icrogid:9981084|-
+			 * uniprotkb:KAPCA_MOUSE|entrezgene
+			 * /locuslink:Prkaca|crogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
+			 * |icrogid:2201887 MI:0114(three-dimensional-structure) -
+			 * pubmed:1862342 taxid:9796(Equus caballus) taxid:10090(Mus
+			 * musculus) - MI:0462(bind)
+			 * bind:76262|rigid:++f9f/9TQhDLvdrGu56SalIhHSA
+			 * |irigid:617146|edgetype:X lpr:1|hpr:6|np:6 none
+			 * MI:0000(unspecified) MI:0000(unspecified) MI:0000(unspecified)
+			 * MI:0000(unspecified) MI:0326(protein) MI:0326(protein) - - - - -
+			 * - - - 2010/05/18 2010/05/18 rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796
+			 * rogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090
+			 * rigid:++f9f/9TQhDLvdrGu56SalIhHSA false GenBank:"1FMO_I"
+			 * GenBank:NP_032880 PDB:1FMO_I refseq:NP_032880 PT P 9981084
+			 * 2201887 617146 Ivetsb7L/rt8ds+TyhtJZKxTtVE9796
+			 * HdW51RuiujpUxo0Fu8TbWz3Yk8c10090 ++f9f/9TQhDLvdrGu56SalIhHSA
+			 * 9981084 2201887 617146 - X 2
 			 */
 
 			IRefWebPsiMitab2_6FileData record = parser.next();
@@ -175,9 +188,12 @@ public void testParser() throws IOException {
 			expectedAliasesB.add(new IcrogId("2201887"));
 			assertEquals(expectedAliasesB, record.getInteractorB().getAliasIds());
 
-			assertEmpty(record.getInteractorA().getAliasSymbols());
+			assertEquals(CollectionsUtil.createSet("rogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796",
+					"crogid:Ivetsb7L/rt8ds+TyhtJZKxTtVE9796", "icrogid:9981084"), record.getInteractorA()
+					.getAliasSymbols());
 
-			Set<String> expectedAliasBSymbols = CollectionsUtil.createSet(new String("Prkaca"));
+			Set<String> expectedAliasBSymbols = CollectionsUtil.createSet("crogid:HdW51RuiujpUxo0Fu8TbWz3Yk8c10090",
+					"entrezgene/locuslink:Prkaca", "icrogid:2201887", "uniprotkb:KAPCA_MOUSE");
 			assertEquals(expectedAliasBSymbols, record.getInteractorB().getAliasSymbols());
 
 			assertEquals(new IRefWebInteractionDetectionMethod(new MolecularInteractionOntologyTermID("MI:0114"),
@@ -189,12 +205,13 @@ public void testParser() throws IOException {
 
 			assertEquals(new IRefWebInteractorOrganism(new NcbiTaxonomyID(9796), "Equus caballus"), record
 					.getInteractorA().getNcbiTaxonomyId());
-			assertEquals(new IRefWebInteractorOrganism(new NcbiTaxonomyID(10090), "Mus musculus"), record.getInteractorB()
-					.getNcbiTaxonomyId());
+			assertEquals(new IRefWebInteractorOrganism(new NcbiTaxonomyID(10090), "Mus musculus"), record
+					.getInteractorB().getNcbiTaxonomyId());
 
 			assertNull(record.getInteraction().getInteractionType());
 
-			assertEquals(new IRefWebInteractionSourceDatabase(new MolecularInteractionOntologyTermID("MI:0462"), "bind"),
+			assertEquals(
+					new IRefWebInteractionSourceDatabase(new MolecularInteractionOntologyTermID("MI:0462"), "bind"),
 					record.getSourceDb());
 
 			Set<DataSourceIdentifier<?>> expectedInteractionDbIds = new HashSet<DataSourceIdentifier<?>>();
@@ -208,55 +225,57 @@ public void testParser() throws IOException {
 
 			assertEquals("none", record.getInteraction().getExpansion());
 
-			assertEquals(new IRefWebInteractorBiologicalRole(new MolecularInteractionOntologyTermID("MI:0000"), "unspecified"),
-					record.getInteractorA().getBiologicalRole());
-			assertEquals(new IRefWebInteractorBiologicalRole(new MolecularInteractionOntologyTermID("MI:0000"), "unspecified"),
-					record.getInteractorB().getBiologicalRole());
-			assertEquals(new IRefWebInteractorExperimentalRole(new MolecularInteractionOntologyTermID("MI:0000"), "unspecified"),
-					record.getInteractorA().getExperimentalRole());
-			assertEquals(new IRefWebInteractorExperimentalRole(new MolecularInteractionOntologyTermID("MI:0000"), "unspecified"),
-					record.getInteractorB().getExperimentalRole());
+			assertEquals(new IRefWebInteractorBiologicalRole(new MolecularInteractionOntologyTermID("MI:0000"),
+					"unspecified"), record.getInteractorA().getBiologicalRole());
+			assertEquals(new IRefWebInteractorBiologicalRole(new MolecularInteractionOntologyTermID("MI:0000"),
+					"unspecified"), record.getInteractorB().getBiologicalRole());
+			assertEquals(new IRefWebInteractorExperimentalRole(new MolecularInteractionOntologyTermID("MI:0000"),
+					"unspecified"), record.getInteractorA().getExperimentalRole());
+			assertEquals(new IRefWebInteractorExperimentalRole(new MolecularInteractionOntologyTermID("MI:0000"),
+					"unspecified"), record.getInteractorB().getExperimentalRole());
 
-			assertEquals(new IRefWebInteractorType(new MolecularInteractionOntologyTermID("MI:0326"), "protein"), record
-					.getInteractorA().getInteractorType());
-			assertEquals(new IRefWebInteractorType(new MolecularInteractionOntologyTermID("MI:0326"), "protein"), record
-					.getInteractorB().getInteractorType());
+			assertEquals(new IRefWebInteractorType(new MolecularInteractionOntologyTermID("MI:0326"), "protein"),
+					record.getInteractorA().getInteractorType());
+			assertEquals(new IRefWebInteractorType(new MolecularInteractionOntologyTermID("MI:0326"), "protein"),
+					record.getInteractorB().getInteractorType());
 
 			assertNull(record.getInteraction().getHostOrgTaxonomyId());
-			
+
 			assertEquals("2010/05/18", record.getCreationDate());
 			assertEquals("2010/05/18", record.getUpdateDate());
-			
+
 			assertEquals(new RogId("Ivetsb7L/rt8ds+TyhtJZKxTtVE9796"), record.getInteractorA().getChecksum());
 			assertEquals(new RogId("HdW51RuiujpUxo0Fu8TbWz3Yk8c10090"), record.getInteractorB().getChecksum());
 			assertEquals(new RigId("++f9f/9TQhDLvdrGu56SalIhHSA"), record.getInteraction().getChecksumInteraction());
-			
+
 			assertFalse(record.getInteraction().isNegative());
-			
-			assertNull(record.getInteractorA().getOriginalReference());
+
+			assertEquals(new ProbableErrorDataSourceIdentifier("\"1FMO_I\"", null,
+					"Input is not a known protein accession pattern: \"1FMO_I\""), record.getInteractorA()
+					.getOriginalReference());
 			assertEquals(new RefSeqID("NP_032880"), record.getInteractorB().getOriginalReference());
 			assertEquals(new PdbID("1FMO_I"), record.getInteractorA().getFinalReference());
 			assertEquals(new RefSeqID("NP_032880"), record.getInteractorB().getFinalReference());
-			
+
 			assertEquals("PT", record.getInteractorA().getMappingScore());
 			assertEquals("P", record.getInteractorB().getMappingScore());
-			
+
 			assertEquals(new IrogId("9981084"), record.getInteractorA().getIrogid());
 			assertEquals(new IrogId("2201887"), record.getInteractorB().getIrogid());
 			assertEquals(new IrigId("617146"), record.getInteraction().getIrigid());
-			
+
 			assertEquals(new CrogId("Ivetsb7L/rt8ds+TyhtJZKxTtVE9796"), record.getInteractorA().getCrogid());
 			assertEquals(new CrogId("HdW51RuiujpUxo0Fu8TbWz3Yk8c10090"), record.getInteractorB().getCrogid());
 			assertEquals(new CrigId("++f9f/9TQhDLvdrGu56SalIhHSA"), record.getInteraction().getCrigid());
-			
+
 			assertEquals(new IcrogId("9981084"), record.getInteractorA().getIcrogid());
 			assertEquals(new IcrogId("2201887"), record.getInteractorB().getIcrogid());
 			assertEquals(new IcrigId("617146"), record.getInteraction().getIcrigid());
-			
+
 			assertNull(record.getInteraction().getImexId());
-			
+
 			assertEquals("X", record.getInteraction().getEdgeType());
-			
+
 			assertEquals(2, record.getInteraction().getNumParticipants());
 
 		} else {
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParserTest.java
index 8083235..8c9f3f9 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParserTest.java
@@ -33,14 +33,22 @@
  * #L%
  */
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.fail;
+
 import java.io.IOException;
+import java.util.NoSuchElementException;
 
-import org.junit.Ignore;
 import org.junit.Test;
 
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
-import edu.ucdenver.ccp.datasource.fileparsers.RecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.test.RecordReaderTester;
+import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
+import edu.ucdenver.ccp.datasource.identifiers.hgnc.HgncID;
+import edu.ucdenver.ccp.datasource.identifiers.obo.ProteinOntologyId;
 
 /**
  * 
@@ -57,16 +65,60 @@ protected String getSampleFileName() {
 	}
 
 	@Override
-	protected RecordReader<?> initSampleRecordReader() throws IOException {
+	protected ProMappingFileParser initSampleRecordReader() throws IOException {
 		return new ProMappingFileParser(sampleInputFile, CharacterEncoding.US_ASCII);
 	}
 
-	@Ignore("Test not yet implemented.. ")
 	@Test
-	public void testParser() {
-		
+	public void testParser() throws IOException {
+		ProMappingFileParser parser = initSampleRecordReader();
+
+		if (parser.hasNext()) {
+			validateRecord1(parser.next());
+		} else {
+			fail("Parser should have returned a record here.");
+		}
+
+		if (parser.hasNext()) {
+			validateRecord2(parser.next());
+		} else {
+			fail("Parser should have returned a record here.");
+		}
+
+		if (parser.hasNext()) {
+			validateRecord3(parser.next());
+		} else {
+			fail("Parser should have returned a record here.");
+		}
+		assertFalse(parser.hasNext());
+
+		try {
+			parser.next();
+			fail("Should have thrown a NoSuchElementException.");
+		} catch (NoSuchElementException nsee) {
+			// do nothing, exception expected
+		}
+
+	}
+
+	private void validateRecord(ProMappingRecord record, ProteinOntologyId expectedPrId, String expectedMappingType,
+			DataSourceIdentifier<?> expectedTargetId) {
+		assertEquals(expectedPrId, record.getProteinOntologyId());
+		assertEquals(expectedMappingType, record.getMappingType());
+		assertEquals(expectedTargetId, record.getTargetRecordId());
+	}
+
+	private void validateRecord1(ProMappingRecord record) {
+		validateRecord(record, new ProteinOntologyId("PR:000000005"), "is_a", new HgncID("HGNC:11773"));
+	}
+
+	private void validateRecord2(ProMappingRecord record) {
+		validateRecord(record, new ProteinOntologyId("PR:000000005"), "is_a", new UnknownDataSourceIdentifier(
+				"UniProtKB_VAR:VAR_022359", null));
+	}
 
+	private void validateRecord3(ProMappingRecord record) {
+		validateRecord(record, new ProteinOntologyId("PR:000000006"), "exact", new UniProtID("P37173"));
 	}
 
-	
 }
diff --git a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/pro/PRO_promapping.txt b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/pro/PRO_promapping.txt
index 8f1dbf7..dab78df 100644
--- a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/pro/PRO_promapping.txt
+++ b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/pro/PRO_promapping.txt
@@ -1,3 +1,3 @@
 PR:000000005	HGNC:11773	is_a
-PR:000000005	MGI:98729	is_a
+PR:000000005	UniProtKB_VAR:VAR_022359	is_a
 PR:000000006	UniProtKB:P37173	exact
\ No newline at end of file
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
index e13efa2..a214cd7 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
@@ -372,7 +372,19 @@ public String getLocalName() {
 	KIAODIP("http://kabob.ucdenver.edu/iao/dip/"),
 	KIAOIREFWEB("http://kabob.ucdenver.edu/iao/irefweb/"),
 	KIAOEMBL("http://kabob.ucdenver.edu/iao/embl/"),
-	KRO("http://kabob.ucdenver.edu/ro/");
+	KRO("http://kabob.ucdenver.edu/ro/"),
+	
+	/**
+	 * to be used for data source identifiers whose source is unknown or not yet modeled.
+	 */
+	UNKNOWN(null),
+	/**
+	 * to be used for data source identifiers that are thought to be incorrect, e.g. 
+	 * a UniProt ID that doesn't match the expected regular expression or an NCBI Gene 
+	 * ID that is not an integer.
+	 */
+	PROBABLE_ERROR(null);
+		
 	public final String longName;
 
 	DataSource(String longName) {
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceElement.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceElement.java
index 00d1229..3c370b6 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceElement.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceElement.java
@@ -45,7 +45,7 @@ public abstract class DataSourceElement<T> {
 	/**
 	 * raw data element
 	 */
-	private T dataElement;
+	protected T dataElement;
 	
 	/**
 	 * Default constructor. 
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
index fdbc8c6..08de587 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
@@ -132,16 +132,18 @@
 import edu.ucdenver.ccp.identifier.publication.PubMedID;
 
 /**
- * provides various methods to map from an ID in database or ontology files to instances of
- * identifier classes under edu.ucdenver.ccp.datasource.identifiers.
+ * provides various methods to map from an ID in database or ontology files to
+ * instances of identifier classes under
+ * edu.ucdenver.ccp.datasource.identifiers.
  * 
- * These are basically factory methods. Given some information about where the ID came from and an
- * ID string, it creates an instance of an identifier class related to the source. This is done for
- * DataSourceIdentifiers, PMID identifiers and others.
+ * These are basically factory methods. Given some information about where the
+ * ID came from and an ID string, it creates an instance of an identifier class
+ * related to the source. This is done for DataSourceIdentifiers, PMID
+ * identifiers and others.
  * 
- * Three functions named resolveId(): - a value of the DataSource enum and an ID string. - a name of
- * a data source and and ID string. - an ID string that is parsed to discover the data source it
- * came from.
+ * Three functions named resolveId(): - a value of the DataSource enum and an ID
+ * string. - a name of a data source and and ID string. - an ID string that is
+ * parsed to discover the data source it came from.
  **/
 public class DataSourceIdResolver {
 
@@ -210,7 +212,8 @@ public static DataSourceIdentifier<?> resolveId(DataSource dataSource, String da
 
 	}
 
-	// TODO: remove this method and replace its use with resolveId(DataSource, String)
+	// TODO: remove this method and replace its use with resolveId(DataSource,
+	// String)
 	public static DataSourceIdentifier<?> resolveId(String databaseName, String databaseObjectID) {
 		if (databaseName.equalsIgnoreCase("MGI"))
 			return new MgiGeneID(databaseObjectID);
@@ -290,8 +293,9 @@ else if (databaseName.equalsIgnoreCase("url")) {
 				|| databaseName.equalsIgnoreCase("GenBank Protein Database"))
 			return new GenBankID(databaseObjectID);
 
-		logger.warn("Unable to resolve data source identifier: datasource=" + databaseName + " id=" + databaseObjectID);
-		return null;
+		logger.warn("Unable to resolve data source identifier: datasource=" + databaseName + " id=" + databaseObjectID
+				+ ". Using UnknownDataSourceIdentifier.");
+		return new UnknownDataSourceIdentifier(databaseObjectID, databaseName);
 	}
 
 	/**
@@ -445,14 +449,13 @@ else if (geneIDStr.startsWith("CL:"))
 			else if (geneIDStr.startsWith("NCBITaxon:"))
 				return new NcbiTaxonomyID(StringUtil.removePrefix(geneIDStr, "NCBITaxon:"));
 
-			logger.error(String
-					.format("Unknown gene ID format: %s. Cannot create DataElementIdentifier<?>.", geneIDStr));
-
+			logger.warn(String.format("Unhandled gene ID format: %s. Creating UnknownDataSourceIdentifier.", geneIDStr));
+			return new UnknownDataSourceIdentifier(geneIDStr, null);
 		} catch (IllegalArgumentException e) {
-			logger.warn("Invalid ID detected... " +  e.getMessage());
+			logger.warn("Invalid ID detected... " + e.getMessage());
+			return new ProbableErrorDataSourceIdentifier(geneIDStr, null, e.getMessage());
 		}
 
-		return null;
 	}
 
 	/**
@@ -460,21 +463,23 @@ else if (geneIDStr.startsWith("NCBITaxon:"))
 	 * 
 	 * @param interactionIDStr
 	 *            id to resolve
-	 * @return identifier if argument is resolvable and supported; otherwise, return null.
+	 * @return identifier if argument is resolvable and supported; otherwise,
+	 *         return null.
 	 */
 	private static DataSourceIdentifier<?> resolveInteractionID(String interactionIDStr) {
-		if (interactionIDStr.startsWith("intact:"))
+		if (interactionIDStr.startsWith("intact:")) {
 			return new IntActID(StringUtil.removePrefix(interactionIDStr, "intact:"));
-		else if (interactionIDStr.startsWith("bind:"))
+		} else if (interactionIDStr.startsWith("bind:")) {
 			return new BindInteractionID(StringUtil.removePrefix(interactionIDStr, "bind:"));
-		else if (interactionIDStr.startsWith("grid:"))
+		} else if (interactionIDStr.startsWith("grid:")) {
 			return new BioGridID(StringUtil.removePrefix(interactionIDStr, "grid:"));
-		else if (interactionIDStr.startsWith("mint:"))
+		} else if (interactionIDStr.startsWith("mint:")) {
 			return new MintID(StringUtil.removePrefix(interactionIDStr, "mint:"));
+		}
 
-		logger.error(String.format("Unknown interaction ID format: %s. Cannot create DataElementIdentifier<?>.",
+		logger.warn(String.format("Unknown interaction ID format: %s. Cannot create DataElementIdentifier<?>.",
 				interactionIDStr));
-		return null;
+		return new UnknownDataSourceIdentifier(interactionIDStr, null);
 	}
 
 	/**
@@ -482,8 +487,8 @@ else if (interactionIDStr.startsWith("mint:"))
 	 * 
 	 * @param interactionIDStrs
 	 *            ids to resolve
-	 * @return identifier if all members of <code>interactionIDStrs</code> are resolvable and
-	 *         supported; otherwise, return null.
+	 * @return identifier if all members of <code>interactionIDStrs</code> are
+	 *         resolvable and supported; otherwise, return null.
 	 */
 	public static Set<DataSourceIdentifier<?>> resolveInteractionIDs(Set<String> interactionIDStrs) {
 		Set<DataSourceIdentifier<?>> interactionIDs = new HashSet<DataSourceIdentifier<?>>();
@@ -500,26 +505,29 @@ public static Set<DataSourceIdentifier<?>> resolveInteractionIDs(Set<String> int
 	 * Resolve Pubmed ID from value that starts with prefix 'pubmed:'.
 	 * 
 	 * @param pmidStr
-	 * @return id if value following prefix is a positive integer; otherwise, null
+	 * @return id if value following prefix is a positive integer; otherwise,
+	 *         null
 	 */
-	public static PubMedID resolvePubMedID(String pmidStr) {
+	public static DataSourceIdentifier<?> resolvePubMedID(String pmidStr) {
 		String prefix = "pubmed:";
 		if (pmidStr.startsWith(prefix)) {
 			String id = StringUtil.removePrefix(pmidStr, prefix);
-			if (StringUtil.isIntegerGreaterThanZero(id))
+			if (StringUtil.isIntegerGreaterThanZero(id)) {
 				return new PubMedID(id);
+			}
 		}
 
-		logger.error(String.format("Unknown PubMed ID format: %s. Cannot create PubMedID.", pmidStr));
-		return null;
+		logger.warn(String.format("Unknown PubMed ID format: %s. Cannot create PubMedID.", pmidStr));
+		return new ProbableErrorDataSourceIdentifier(pmidStr, null, "Invalid PubMedID, must be an integer.");
 	}
 
-	public static Set<PubMedID> resolvePubMedIDs(Set<String> pmidStrs) {
-		Set<PubMedID> pmids = new HashSet<PubMedID>();
+	public static Set<DataSourceIdentifier<?>> resolvePubMedIDs(Set<String> pmidStrs) {
+		Set<DataSourceIdentifier<?>> pmids = new HashSet<DataSourceIdentifier<?>>();
 		for (String pmidStr : pmidStrs) {
-			PubMedID id = resolvePubMedID(pmidStr);
-			if (id == null)
+			DataSourceIdentifier<?> id = resolvePubMedID(pmidStr);
+			if (id == null) {
 				return null;
+			}
 
 			pmids.add(id);
 		}
@@ -530,8 +538,9 @@ public static Set<DataSourceIdentifier<?>> resolveIds(Set<String> databaseObject
 		Set<DataSourceIdentifier<?>> databaseObjectIDs = new HashSet<DataSourceIdentifier<?>>();
 		for (String databaseObjectIDStr : databaseObjectIDStrs) {
 			DataSourceIdentifier<?> id = resolveId(databaseObjectIDStr);
-			if (id != null)
+			if (id != null) {
 				databaseObjectIDs.add(id);
+			}
 		}
 		return databaseObjectIDs;
 	}
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdentifier.java
index b760dcf..a9cecd0 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdentifier.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdentifier.java
@@ -43,7 +43,7 @@
  */
 public abstract class DataSourceIdentifier<T> extends DataSourceElement<T> {
 
-	private final DataSource dataSource;
+	protected final DataSource dataSource;
 	
 	/**
 	 * Default constructor.
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
index 8c7e9dc..f192f76 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
@@ -43,6 +43,8 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.log4j.Logger;
+
 import edu.ucdenver.ccp.common.collections.CollectionsUtil;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.embl.EmblID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.GenBankID;
@@ -54,11 +56,14 @@
  * Resolution of accession identifiers based on prefixes available here:
  * http://www.ncbi.nlm.nih.gov/Sequin/acc.html
  * 
- * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
+ * @author Colorado Computational Pharmacology, UC Denver;
+ *         ccpsupport@ucdenver.edu
  * 
  */
 public class NucleotideAccessionResolver {
 
+	private static final Logger logger = Logger.getLogger(NucleotideAccessionResolver.class);
+
 	private static final Pattern ACC_PATTERN = Pattern.compile("([A-Z]+)\\d+\\.?\\d*");
 
 	private static final Set<String> GENBANK_ID_PREFIXES = CollectionsUtil.createSet("CH", "CM", "DS", "EM", "EN",
@@ -136,7 +141,8 @@ public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc
 				}
 			}
 		}
-		throw new IllegalArgumentException("Input is not a known nucleotide accession: " + acc);
+		logger.warn("Input is not a known nucleotide accession: " + acc);
+		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known nucleotide accession: " + acc);
 	}
 
 }
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
new file mode 100644
index 0000000..f481ad8
--- /dev/null
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
@@ -0,0 +1,65 @@
+package edu.ucdenver.ccp.datasource.identifiers;
+
+public class ProbableErrorDataSourceIdentifier extends DataSourceIdentifier<String> {
+
+	private final String dataSourceStr;
+	private final String errorMessage;
+
+	public ProbableErrorDataSourceIdentifier(String resourceID, String dataSourceStr, String errorMessage) {
+		super(resourceID, DataSource.PROBABLE_ERROR);
+		this.dataSourceStr = dataSourceStr;
+		this.errorMessage = errorMessage;
+	}
+
+	@Override
+	public String validate(String resourceID) throws IllegalArgumentException {
+		return resourceID;
+	}
+
+	public String getDataSourceStr() {
+		return dataSourceStr;
+	}
+
+	public String getErrorMessage() {
+		return errorMessage;
+	}
+
+	@Override
+	public String toString() {
+		return "ProbableErrorDataSourceIdentifier [dataSourceStr=" + dataSourceStr + ", errorMessage=" + errorMessage
+				+ ", getDataElement()=" + getDataElement() + "]";
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = super.hashCode();
+		result = prime * result + ((dataSourceStr == null) ? 0 : dataSourceStr.hashCode());
+		return result;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (!super.equals(obj))
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		ProbableErrorDataSourceIdentifier other = (ProbableErrorDataSourceIdentifier) obj;
+		if (dataSourceStr == null) {
+			if (other.dataSourceStr != null)
+				return false;
+		} else if (!dataSourceStr.equals(other.dataSourceStr))
+			return false;
+		return true;
+	}
+	
+	
+
+	
+	
+	
+	
+
+}
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
index 33862f2..3827dc0 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
@@ -39,6 +39,8 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.log4j.Logger;
+
 import edu.ucdenver.ccp.datasource.identifiers.ebi.embl.EmblID;
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.GenBankID;
@@ -49,10 +51,13 @@
  * Resolution of accession identifiers based on prefixes available here:
  * http://www.ncbi.nlm.nih.gov/Sequin/acc.html
  * 
- * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
+ * @author Colorado Computational Pharmacology, UC Denver;
+ *         ccpsupport@ucdenver.edu
  * 
  */
 public class ProteinAccessionResolver {
+	
+	private static final Logger logger = Logger.getLogger(ProteinAccessionResolver.class);
 
 	private static final Pattern ACC_PATTERN = Pattern.compile("([A-Z]{3})\\d+\\.?\\d*");
 	private static final String VALID_UNIPROT_PATTERN_1 = "[A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9]";
@@ -100,7 +105,9 @@ public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
 				return new GenBankID(acc);
 			}
 		}
-		throw new IllegalArgumentException("Input is not a known protein accession pattern: " + acc);
+		logger.warn("Input is not a known protein accession pattern: " + acc);
+		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known protein accession pattern: "
+				+ acc);
 	}
 
 }
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java
new file mode 100644
index 0000000..65eeb4c
--- /dev/null
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java
@@ -0,0 +1,21 @@
+package edu.ucdenver.ccp.datasource.identifiers;
+
+public class UnknownDataSourceIdentifier extends DataSourceIdentifier<String> {
+
+	private final String dataSourceStr;
+
+	public UnknownDataSourceIdentifier(String resourceID,  String dataSourceStr) {
+		super(resourceID, DataSource.UNKNOWN);
+		this.dataSourceStr = dataSourceStr;
+	}
+
+	@Override
+	public String validate(String resourceID) throws IllegalArgumentException {
+		return resourceID;
+	}
+
+	public String getDataSourceStr() {
+		return dataSourceStr;
+	}
+
+}

From f85466a2b162f7fc55a95f89327186527f8d4a78 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 15:40:05 -0700
Subject: [PATCH 13/36] Added handling for non-normalized (no URI) and
 erroneous identifiers

Instead of being excluded from the output RDF they are now cataloged as
either NonNormalizedIdentifierRecords or ErroneousIdentifierRecords.
---
 .../ProbableErrorDataSourceIdentifier.java    |  34 ++
 .../UnknownDataSourceIdentifier.java          |  34 ++
 .../rdf/ice/ErroneousIdentifierRecord.java    |  72 ++++
 .../ice/NonNormalizedIdentifierRecord.java    |  64 +++
 .../rdfizer/rdf/ice/RdfRecordUriFactory.java  |  29 +-
 .../rdfizer/rdf/ice/RdfRecordUtil.java        | 250 ++++++++----
 .../rdfizer/rdf/ice/RdfRecordWriterImpl.java  | 378 +++++++++---------
 ...ImplErroneousAndUnknownIdentifierTest.java | 180 +++++++++
 8 files changed, 771 insertions(+), 270 deletions(-)
 create mode 100644 datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java
 create mode 100644 datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java
 create mode 100644 datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java

diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
index f481ad8..f0394aa 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java
@@ -1,5 +1,39 @@
 package edu.ucdenver.ccp.datasource.identifiers;
 
+/*
+ * #%L
+ * Colorado Computational Pharmacology's datasource
+ * 							project
+ * %%
+ * Copyright (C) 2012 - 2016 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
 public class ProbableErrorDataSourceIdentifier extends DataSourceIdentifier<String> {
 
 	private final String dataSourceStr;
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java
index 65eeb4c..14a91db 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java
@@ -1,5 +1,39 @@
 package edu.ucdenver.ccp.datasource.identifiers;
 
+/*
+ * #%L
+ * Colorado Computational Pharmacology's datasource
+ * 							project
+ * %%
+ * Copyright (C) 2012 - 2016 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
 public class UnknownDataSourceIdentifier extends DataSourceIdentifier<String> {
 
 	private final String dataSourceStr;
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java
new file mode 100644
index 0000000..e18baea
--- /dev/null
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java
@@ -0,0 +1,72 @@
+package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's datasource
+ * 							project
+ * %%
+ * Copyright (C) 2012 - 2016 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+import edu.ucdenver.ccp.datasource.fileparsers.Record;
+import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
+import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+
+@Record(dataSource = DataSource.KABOB)
+public class ErroneousIdentifierRecord {
+
+	@RecordField
+	private final String identifier;
+
+	@RecordField
+	private final String datasource;
+
+	@RecordField
+	private final String comment;
+
+	public ErroneousIdentifierRecord(String identifier, String datasource, String comment) {
+		super();
+		this.identifier = identifier;
+		this.datasource = datasource;
+		this.comment = comment;
+	}
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public String getDatasource() {
+		return datasource;
+	}
+
+	public String getComment() {
+		return comment;
+	}
+
+}
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java
new file mode 100644
index 0000000..24cb1c5
--- /dev/null
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java
@@ -0,0 +1,64 @@
+package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's datasource
+ * 							project
+ * %%
+ * Copyright (C) 2012 - 2016 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+import edu.ucdenver.ccp.datasource.fileparsers.Record;
+import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
+import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+
+@Record(dataSource = DataSource.KABOB)
+public class NonNormalizedIdentifierRecord {
+
+	@RecordField
+	private final String identifier;
+
+	@RecordField
+	private final String datasource;
+
+	public NonNormalizedIdentifierRecord(String identifier, String datasource) {
+		super();
+		this.identifier = identifier;
+		this.datasource = datasource;
+	}
+
+	public String getIdentifier() {
+		return identifier;
+	}
+
+	public String getDatasource() {
+		return datasource;
+	}
+
+}
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java
index 2a5b87c..ff8d7a4 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java
@@ -44,15 +44,19 @@
 import java.util.Map.Entry;
 import java.util.Set;
 
+import org.openrdf.model.Statement;
 import org.openrdf.model.Value;
 import org.openrdf.model.impl.URIImpl;
 import org.openrdf.rio.ntriples.NTriplesUtil;
 
+import edu.ucdenver.ccp.common.collections.CollectionsUtil;
 import edu.ucdenver.ccp.common.digest.DigestUtil;
 import edu.ucdenver.ccp.common.reflection.PrivateAccessor;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.vocabulary.KIAO;
 
 /**
@@ -178,6 +182,30 @@ private static List<String> getSortedFieldValueUriStrs(Collection<Object> fieldV
 	 *            could be a collection, if so we return one string per value
 	 */
 	private static String getFieldValueUri(Object fieldValue) {
+		/* address unknown and probable error data source identifiers here? */
+		if (fieldValue instanceof UnknownDataSourceIdentifier) {
+			UnknownDataSourceIdentifier id = (UnknownDataSourceIdentifier) fieldValue;
+			NonNormalizedIdentifierRecord record = new NonNormalizedIdentifierRecord(id.getDataElement(), id.getDataSourceStr());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			List<Statement> recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, System.currentTimeMillis(),
+					recordUri, null, null, null);
+			recordInstanceStatements.remove(0);
+			/* this is used to generate sha1 hashes, so it doesn't need to be a true uri */
+			return CollectionsUtil.createDelimitedString(recordInstanceStatements, " ");
+		} else if (fieldValue instanceof ProbableErrorDataSourceIdentifier) {
+			ProbableErrorDataSourceIdentifier id = (ProbableErrorDataSourceIdentifier) fieldValue;
+			ErroneousIdentifierRecord record = new ErroneousIdentifierRecord(id.getDataElement(),
+					id.getDataSourceStr(), id.getErrorMessage());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			List<Statement> recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, System.currentTimeMillis(),
+					recordUri, null, null, null);
+			/*
+			 * the first statement returned is a dataset has_part record triple
+			 * which we do not need
+			 */
+			recordInstanceStatements.remove(0);
+			return CollectionsUtil.createDelimitedString(recordInstanceStatements, " ");
+		}
 		Value value = RdfUtil.getValue(fieldValue);
 		return NTriplesUtil.toNTriplesString(value);
 	}
@@ -224,7 +252,6 @@ private static Collection<Object> getFieldValues(Object record, Field field) {
 			return null;
 		}
 
-		int fieldCount = 0;
 		Collection<Object> fieldValues = new ArrayList<Object>();
 
 		if (!(fieldValue instanceof Collection)) {
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java
index 1e2b49b..d7e0fe6 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java
@@ -53,6 +53,7 @@
 import org.openrdf.model.impl.StatementImpl;
 import org.openrdf.model.impl.URIImpl;
 
+import edu.ucdenver.ccp.common.collections.CollectionsUtil;
 import edu.ucdenver.ccp.common.reflection.PrivateAccessor;
 import edu.ucdenver.ccp.common.string.StringConstants;
 import edu.ucdenver.ccp.datasource.fileparsers.DataRecord;
@@ -60,6 +61,8 @@
 import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.DuplicateStatementFilter;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfRecordUriFactory.IncludeVersion;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.vocabulary.DC;
@@ -72,26 +75,31 @@
 /**
  * Static utility functions for creating RDF
  * 
- * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
+ * @author Colorado Computational Pharmacology, UC Denver;
+ *         ccpsupport@ucdenver.edu
  */
 public class RdfRecordUtil {
 
-	private static final Logger logger = Logger.getLogger(RdfRecordUtil.class);
-	
+	// private static final Logger logger =
+	// Logger.getLogger(RdfRecordUtil.class);
+
 	// /**
 	// *
 	// *
 	// * @param recordClass
 	// * @return Collection of created statements
 	// */
-	// public static Collection<? extends Statement> getRecordSchemaStatements(Class<?> recordClass)
+	// public static Collection<? extends Statement>
+	// getRecordSchemaStatements(Class<?> recordClass)
 	// {
 	// Collection<Statement> stmts = new LinkedHashSet<Statement>();
-	// RdfNamespace ns = RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass));
+	// RdfNamespace ns =
+	// RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass));
 	// String recordComment = RecordUtil.getRecordComment(recordClass);
 	// String recordVersion = RecordUtil.getRecordSchemaVersion(recordClass);
 	//
-	// URIImpl recordClsUri = RdfUtil.createKiaoUri(ns, recordClass.getSimpleName());
+	// URIImpl recordClsUri = RdfUtil.createKiaoUri(ns,
+	// recordClass.getSimpleName());
 	// stmts.add(new StatementImpl(recordClsUri, RDFS.SUBCLASS_OF.uri(),
 	// IAO.INFORMATION_CONTENT_ENITITY.uri()));
 	// if (recordComment != null && !recordComment.isEmpty()) {
@@ -103,7 +111,8 @@ public class RdfRecordUtil {
 	//
 	// Map<Field, RecordField> fieldToRecordFieldAnnotationMap = RecordUtil
 	// .getFieldToRecordFieldAnnotationsMap(recordClass);
-	// for (Entry<Field, RecordField> entry : fieldToRecordFieldAnnotationMap.entrySet()) {
+	// for (Entry<Field, RecordField> entry :
+	// fieldToRecordFieldAnnotationMap.entrySet()) {
 	// if (isFieldSubRecord(entry.getKey())) {
 	// Field f = entry.getKey();
 	// if (Collection.class.isAssignableFrom(f.getType())) {
@@ -114,7 +123,8 @@ public class RdfRecordUtil {
 	// stmts.addAll(getRecordSchemaStatements((Class<?>) genericTypes[0]));
 	// }
 	// } else {
-	// throw new IllegalStateException("Non-parameterized collection detected in record class: "
+	// throw new
+	// IllegalStateException("Non-parameterized collection detected in record class: "
 	// + recordClass.getName() + " Please parameterize.");
 	// }
 	// } else {
@@ -122,14 +132,18 @@ public class RdfRecordUtil {
 	// }
 	// }
 	// String fieldName = entry.getKey().getName();
-	// String fieldComment = RecordUtil.getRecordFieldComment(recordClass, fieldName);
-	// String fieldVersion = RecordUtil.getRecordFieldVersion(recordClass, fieldName);
+	// String fieldComment = RecordUtil.getRecordFieldComment(recordClass,
+	// fieldName);
+	// String fieldVersion = RecordUtil.getRecordFieldVersion(recordClass,
+	// fieldName);
 	// boolean isKeyField = RecordUtil.isKeyRecordField(recordClass, fieldName);
 	//
-	// URIImpl fieldTemplateUri = RdfRecordUriFactory.createDataFieldTemplateUri(recordClass,
+	// URIImpl fieldTemplateUri =
+	// RdfRecordUriFactory.createDataFieldTemplateUri(recordClass,
 	// fieldName,
 	// IncludeVersion.YES);
-	// stmts.add(new StatementImpl(recordClsUri, RO.HAS_PART.uri(), fieldTemplateUri));
+	// stmts.add(new StatementImpl(recordClsUri, RO.HAS_PART.uri(),
+	// fieldTemplateUri));
 	// if (fieldComment != null && !fieldComment.isEmpty()) {
 	// stmts.add(new StatementImpl(fieldTemplateUri, RDFS.COMMENT.uri(),
 	// RdfUtil.createLiteral(fieldComment)));
@@ -140,7 +154,8 @@ public class RdfRecordUtil {
 	// if (isKeyField) {
 	// //stmts.add(new StatementImpl(fieldTemplateUri, DC.IDENTIFIER.uri(),
 	// RdfUtil.createLiteral(isKeyField)));
-	// stmts.add(new StatementImpl(recordClsUri, KIAO.HAS_KEY_FIELD.uri(), fieldTemplateUri));
+	// stmts.add(new StatementImpl(recordClsUri, KIAO.HAS_KEY_FIELD.uri(),
+	// fieldTemplateUri));
 	// }
 	// }
 	//
@@ -148,8 +163,10 @@ public class RdfRecordUtil {
 	// }
 
 	// /**
-	// * Generate statements about datasets, records and their types for specified namespace within
-	// * KABOB namespace. Each class represents a dataset made up of its class of records and their
+	// * Generate statements about datasets, records and their types for
+	// specified namespace within
+	// * KABOB namespace. Each class represents a dataset made up of its class
+	// of records and their
 	// * fields.
 	// *
 	// * @param recordTypes
@@ -157,11 +174,13 @@ public class RdfRecordUtil {
 	// * target namespace
 	// * @return statements
 	// */
-	// public static List<? extends Statement> getRecordSchemaDefinitionStatements(Class<? extends
+	// public static List<? extends Statement>
+	// getRecordSchemaDefinitionStatements(Class<? extends
 	// DataRecord> recordClass) {
 	// List<Statement> statements = new ArrayList<Statement>();
 	//
-	// RdfNamespace ns = RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass));
+	// RdfNamespace ns =
+	// RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass));
 	//
 	// URIImpl dataSourceUri = RdfUtil.createKiaoUri(ns, ns.lowerName() +
 	// KIAO.KABOB_DATASOURCE.termName());
@@ -188,14 +207,16 @@ public class RdfRecordUtil {
 	// statements.add(new StatementImpl(fieldUri, RDFS.SUBCLASS_OF.uri(),
 	// KIAO.KABOB_DATAFIELD.uri()));
 	//
-	// statements.addAll(getRecordFieldDeclarationStatements(recordClass, null));
+	// statements.addAll(getRecordFieldDeclarationStatements(recordClass,
+	// null));
 	//
 	// return statements;
 	// }
 
 	/**
-	 * Generate statements about class' fields specified namespace within KABOB namespace. Each
-	 * field is a subclass of generic field in namespace and part of dataset.
+	 * Generate statements about class' fields specified namespace within KABOB
+	 * namespace. Each field is a subclass of generic field in namespace and
+	 * part of dataset.
 	 * 
 	 * 
 	 * @param recordClass
@@ -205,13 +226,14 @@ public class RdfRecordUtil {
 	 * @param version
 	 *            structural version label
 	 * @param parentSchemaUri
-	 *            if not null, record schema is asserted to be {@link RdfPredicate#RO_PARTOF} parent
-	 *            schema.
+	 *            if not null, record schema is asserted to be
+	 *            {@link RdfPredicate#RO_PARTOF} parent schema.
 	 * @param fieldComment
-	 *            the field comment is used to capture @RecordField comments on fields that are
-	 *            subrecords
+	 *            the field comment is used to capture @RecordField comments on
+	 *            fields that are subrecords
 	 * @param isKeyField
-	 * @return statements about fields; empty result is returned for anonymous classes.
+	 * @return statements about fields; empty result is returned for anonymous
+	 *         classes.
 	 */
 	public static Collection<? extends Statement> getRecordSchemaStatements(Class<?> recordClass,
 			URIImpl parentSchemaUri, String fieldComment, boolean isKeyField) {
@@ -219,16 +241,18 @@ public static Collection<? extends Statement> getRecordSchemaStatements(Class<?>
 		Collection<Statement> statements = new ArrayList<Statement>();
 
 		/*
-		 * The following two statements are meta statements that will be redundant if multiple
-		 * record schemas are combined. Note that the first statement is supposed to be a self-loop.
+		 * The following two statements are meta statements that will be
+		 * redundant if multiple record schemas are combined. Note that the
+		 * first statement is supposed to be a self-loop.
 		 */
 		statements.add(new StatementImpl(KIAO.SCHEMA.uri(), RO.HAS_PART.uri(), KIAO.SCHEMA.uri()));
 		statements.add(new StatementImpl(KIAO.SCHEMA.uri(), RO.HAS_PART.uri(), KIAO.FIELD.uri()));
 
 		/*
-		 * The following adds the kiaosource:Record rdfs:subClassOf iao:IAO_0000030 (information
-		 * content entity) triple. This triple is not really part of the schema, however it only
-		 * needs to be added one time so this seems like a good place to put it.
+		 * The following adds the kiaosource:Record rdfs:subClassOf
+		 * iao:IAO_0000030 (information content entity) triple. This triple is
+		 * not really part of the schema, however it only needs to be added one
+		 * time so this seems like a good place to put it.
 		 */
 		URIImpl recordClsUri = RdfUtil.createKiaoUri(ns, recordClass.getSimpleName());
 		statements.add(new StatementImpl(recordClsUri, RDFS.SUBCLASS_OF.uri(), IAO.INFORMATION_CONTENT_ENITITY.uri()));
@@ -272,8 +296,8 @@ public static Collection<? extends Statement> getRecordSchemaStatements(Class<?>
 
 		for (Field field : sortedFields) {
 			/*
-			 * If the RecordField annotation is not present, then this field does not get serialized
-			 * in the RDF, e.g. the logger field
+			 * If the RecordField annotation is not present, then this field
+			 * does not get serialized in the RDF, e.g. the logger field
 			 */
 			if (field.isAnnotationPresent(RecordField.class)) {
 				String fComment = RecordUtil.getRecordFieldComment(recordClass, field.getName());
@@ -330,9 +354,10 @@ private static String getFieldLabel(Class<?> recordClass, String fieldName) {
 
 	/**
 	 * @param recordClass
-	 * @return a label for the record by first looking for an explicitly defined label in the @Record
-	 *         annotation. If not present, a label is generated by adding spaces to replace
-	 *         camel-case in the Record name
+	 * @return a label for the record by first looking for an explicitly defined
+	 *         label in the @Record annotation. If not present, a label is
+	 *         generated by adding spaces to replace camel-case in the Record
+	 *         name
 	 */
 	private static String getRecordLabel(Class<?> recordClass) {
 		String label = RecordUtil.getRecordLabel(recordClass);
@@ -351,7 +376,8 @@ private static String getRecordLabel(Class<?> recordClass) {
 	 * 
 	 * @param field
 	 *            to check
-	 * @return field type, or generic type if field's type is a {@link Collection}
+	 * @return field type, or generic type if field's type is a
+	 *         {@link Collection}
 	 */
 	private static Class<?> getFieldType(Field field) {
 		Class<?> klass = field.getType();
@@ -370,8 +396,8 @@ private static Class<?> getFieldType(Field field) {
 	}
 
 	/**
-	 * Determine whether class should be treated as sub-record definition. If field type is
-	 * collection, it's generic type is used.
+	 * Determine whether class should be treated as sub-record definition. If
+	 * field type is collection, it's generic type is used.
 	 * 
 	 * @param field
 	 *            to check
@@ -403,12 +429,16 @@ private static boolean isFieldSubRecord(Field field) {
 	private static boolean isFieldSubRecord(Class<?> klass) {
 		return klass.isAnnotationPresent(Record.class);
 		// return DataRecord.class.isAssignableFrom(klass);
-		// if (!(DataSourceElement.class.isAssignableFrom(klass) || klass.isPrimitive() ||
+		// if (!(DataSourceElement.class.isAssignableFrom(klass) ||
+		// klass.isPrimitive() ||
 		// klass.isArray()
 		// || klass.isEnum() || klass.isSynthetic() || klass.isAnnotation()
-		// || Collection.class.isAssignableFrom(klass) || String.class.isAssignableFrom(klass)
-		// || Number.class.isAssignableFrom(klass) || Boolean.class.isAssignableFrom(klass)
-		// || java.util.Date.class.isAssignableFrom(klass) || URI.class.isAssignableFrom(klass) ||
+		// || Collection.class.isAssignableFrom(klass) ||
+		// String.class.isAssignableFrom(klass)
+		// || Number.class.isAssignableFrom(klass) ||
+		// Boolean.class.isAssignableFrom(klass)
+		// || java.util.Date.class.isAssignableFrom(klass) ||
+		// URI.class.isAssignableFrom(klass) ||
 		// URL.class
 		// .isAssignableFrom(klass))) {
 		// return true;
@@ -418,7 +448,8 @@ private static boolean isFieldSubRecord(Class<?> klass) {
 	}
 
 	/**
-	 * Get collection of statements that instance datasource, records and fields for given record.
+	 * Get collection of statements that instance datasource, records and fields
+	 * for given record.
 	 * 
 	 * @param record
 	 * @param src
@@ -459,13 +490,13 @@ public static List<? extends Statement> getDataSourceInstanceStatements(DataReco
 	}
 
 	/**
-	 * Generate instance statements about this particular instance of {@link DataRecord}. Statements
-	 * include assertions about record and it's fields types and values. All record fields are
-	 * included.
+	 * Generate instance statements about this particular instance of
+	 * {@link DataRecord}. Statements include assertions about record and it's
+	 * fields types and values. All record fields are included.
 	 * 
 	 * @param record
 	 *            instance
-	 * @param filter 
+	 * @param filter
 	 * @param src
 	 *            record source
 	 * @param alreadyObservedFieldUris
@@ -473,14 +504,16 @@ public static List<? extends Statement> getDataSourceInstanceStatements(DataReco
 	 *            record instance index
 	 * @return statements
 	 */
-	public static List<Statement> getRecordInstanceStatements(DataRecord record, long createdTime, URIImpl recordUri, DuplicateStatementFilter filter) {
+	public static List<Statement> getRecordInstanceStatements(DataRecord record, long createdTime, URIImpl recordUri,
+			DuplicateStatementFilter filter) {
 		return getRecordInstanceStatements(record, createdTime, recordUri, null, StringConstants.BLANK, filter);
 	}
 
 	/**
-	 * Generate instance statements about this particular instance of {@link DataRecord}. Statements
-	 * include assertions about record and it's fields types and values. {@code rdfFields} will be
-	 * used to determine record exclusion rules and output format.
+	 * Generate instance statements about this particular instance of
+	 * {@link DataRecord}. Statements include assertions about record and it's
+	 * fields types and values. {@code rdfFields} will be used to determine
+	 * record exclusion rules and output format.
 	 * 
 	 * @param record
 	 *            instance
@@ -491,13 +524,14 @@ public static List<Statement> getRecordInstanceStatements(DataRecord record, lon
 	 * @param rdfFields
 	 *            configuration info for field export
 	 * @param parentRecordUri
-	 *            if not null, used to indicate that record is a subrecord within record described
-	 *            by this value
+	 *            if not null, used to indicate that record is a subrecord
+	 *            within record described by this value
 	 * @param readerKey
-	 *            label used in generating dataset instance URI; if null, converted to
-	 *            {@link StringConstants#BLANK}
+	 *            label used in generating dataset instance URI; if null,
+	 *            converted to {@link StringConstants#BLANK}
 	 * @param alreadyObservedFieldUris
-	 * @return statements ; empty result is returned for anonymous {@code record} class.
+	 * @return statements ; empty result is returned for anonymous
+	 *         {@code record} class.
 	 */
 	public static List<Statement> getRecordInstanceStatements(Object record, long createdTime, URIImpl recordUri,
 			URIImpl parentRecordUri, String readerKey, DuplicateStatementFilter filter) {
@@ -538,14 +572,17 @@ public static List<Statement> getRecordInstanceStatements(Object record, long cr
 		// record instance has template record schema
 		// URIImpl recordSchemaUri = RdfUtil.createKiaoUri(
 		// targetNs,
-		// targetNs.lowerName() + record.getClass().getSimpleName() + KIAO.KABOB_SCHEMA.termName()
+		// targetNs.lowerName() + record.getClass().getSimpleName() +
+		// KIAO.KABOB_SCHEMA.termName()
 		// + RecordUtil.getRecordSchemaVersion(record.getClass()));
 		URIImpl recordSchemaUri = RdfRecordUriFactory.createRecordSchemaUri(record.getClass(), IncludeVersion.YES);
 		statements.add(new StatementImpl(recordUri, KIAO.HAS_TEMPLATE.uri(), recordSchemaUri));
 
 		Set<Field> fields = RecordUtil.getFieldToRecordFieldAnnotationsMap(record.getClass()).keySet();
 		List<Field> sortedFields = new ArrayList<Field>(fields);
-		Collections.sort(sortedFields, new FieldNameComparator()); // sorted to ease unit testing
+		Collections.sort(sortedFields, new FieldNameComparator()); // sorted to
+																	// ease unit
+																	// testing
 
 		for (Field field : sortedFields) {
 			if (isFieldSubRecord(field)) {
@@ -568,13 +605,16 @@ public static List<Statement> getRecordInstanceStatements(Object record, long cr
 					}
 				} else {
 					statements.addAll(getSubrecordStatements(createdTime, recordUri, readerKey, filter, subRecord));
-					// URIImpl subRecordUri = RdfRecordUriFactory.createRecordUri(subRecord);
-					// statements.addAll(getRecordInstanceStatements(subRecord, createdTime,
+					// URIImpl subRecordUri =
+					// RdfRecordUriFactory.createRecordUri(subRecord);
+					// statements.addAll(getRecordInstanceStatements(subRecord,
+					// createdTime,
 					// subRecordUri, recordUri,
 					// readerKey));
 				}
 			} else {
-				Collection<Statement> fieldValueStmts = getRdfFieldValueStatements(recordUri, record, field);
+				Collection<Statement> fieldValueStmts = getRdfFieldValueStatements(recordUri, record, field,
+						createdTime, filter);
 				if (fieldValueStmts.isEmpty()) {
 					continue;
 				}
@@ -599,13 +639,13 @@ private static List<Statement> getSubrecordStatements(long createdTime, URIImpl
 			DuplicateStatementFilter filter, Object r) {
 		List<Statement> statements = new ArrayList<Statement>();
 		URIImpl subRecordUri = RdfRecordUriFactory.createRecordUri(r);
-		List<Statement> subRecordStmts = getRecordInstanceStatements(r, createdTime, subRecordUri,
-				recordUri, readerKey, filter);
+		List<Statement> subRecordStmts = getRecordInstanceStatements(r, createdTime, subRecordUri, recordUri,
+				readerKey, filter);
 		if (!filter.alreadyObservedRecordUri(subRecordUri)) {
 			statements.addAll(subRecordStmts);
 			filter.logRecordUri(subRecordUri);
 		} else {
-//			logger.info("already seen subrecord");
+			// logger.info("already seen subrecord");
 			statements.add(subRecordStmts.get(0));
 		}
 		return statements;
@@ -658,41 +698,46 @@ private static Collection<Statement> linkFieldToRecord(URIImpl recordUri, URIImp
 	 * Generate statements about record's field.
 	 * 
 	 * @param fieldInstanceUri
-	 *            initial field instance URI; template re-used if field type is a {@link Collection}
+	 *            initial field instance URI; template re-used if field type is
+	 *            a {@link Collection}
 	 * @param record
 	 *            instance with specified field
+	 * @param filter
 	 * @param fieldName
 	 *            field name
 	 * @param commonFieldStatements
 	 *            shared template statements to be asserted about every field
 	 * @return statements
 	 */
-	private static Collection<Statement> getRdfFieldValueStatements(URIImpl recordUri, Object record, Field field) {
-		Object fieldValue = PrivateAccessor.getFieldValue(record, field.getName()); 
+	private static Collection<Statement> getRdfFieldValueStatements(URIImpl recordUri, Object record, Field field,
+			long createdTime, DuplicateStatementFilter filter) {
+		Object fieldValue = PrivateAccessor.getFieldValue(record, field.getName());
 		if (fieldValue == null) {
 			return new ArrayList<Statement>();
 		}
-		int fieldCount = 0;
 		Collection<Statement> statements = new ArrayList<Statement>();
 
 		if (!(fieldValue instanceof Collection)) {
-			fieldCount = 1;
 			URIImpl fieldUri = RdfRecordUriFactory.createFieldUri(record, field, fieldValue);
 			statements.addAll(linkFieldToRecord(recordUri, fieldUri));
 			statements.addAll(createCommonFieldStatements(record, recordUri, fieldUri, field.getName()));
-			statements.add(getFieldDenotesValueStatement(fieldUri, fieldValue));
+			statements.addAll(getFieldDenotesValueStatement(fieldUri, fieldValue, createdTime, filter));
 		} else {
-			/* for each element in the collection a new fieldInstanceUri is generated */
+			/*
+			 * for each element in the collection a new fieldInstanceUri is
+			 * generated
+			 */
 			Collection<?> coll = (Collection<?>) fieldValue;
 			for (Object object : coll) {
 				URIImpl fieldUri = RdfRecordUriFactory.createFieldUri(record, field, object);
 				if (fieldUri != null) {
 					statements.addAll(linkFieldToRecord(recordUri, fieldUri));
-					statements.add(getFieldDenotesValueStatement(fieldUri, object));
+					statements.addAll(getFieldDenotesValueStatement(fieldUri, object, createdTime, filter));
 					statements.addAll(createCommonFieldStatements(record, recordUri, fieldUri, field.getName()));
 				}
 			}
-			// int startingFieldCount = Integer.valueOf(fieldInstanceUri.substring(fieldInstanceUri
+			// int startingFieldCount =
+			// Integer.valueOf(fieldInstanceUri.substring(fieldInstanceUri
 			// .lastIndexOf(FIELD_VALUE) + 1)) - 1;
 			//
 			// Collection<?> coll = (Collection<?>) fieldValue;
@@ -713,8 +758,9 @@ private static Collection<Statement> getRdfFieldValueStatements(URIImpl recordUr
 	}
 
 	/**
-	 * Generate statements about field (represented by {@code fieldInstanceUri}, and also a Subject
-	 * in RDF statement) and field's value. Statements generated:<br>
+	 * Generate statements about field (represented by {@code fieldInstanceUri},
+	 * and also a Subject in RDF statement) and field's value. Statements
+	 * generated:<br>
 	 * 
 	 * <pre>
 	 * <http://www.ncbi.nlm.nih.gov/gene/F_RdfRecordWriterImplTest%24GeneId2NameDatFileData_geneID_bZWMJYAy_y1wpq1BHpoB2OFoLlc> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/eg/EG_111_ICE> .
@@ -724,16 +770,66 @@ private static Collection<Statement> getRdfFieldValueStatements(URIImpl recordUr
 	 *            rdf field instance URI (subject)
 	 * @param fieldValue
 	 *            value
+	 * @param filter
 	 * @throws IllegalArgumentException
 	 *             if fieldValue's type is {@link Collection}
 	 * @return statements
 	 */
-	public static Statement getFieldDenotesValueStatement(URIImpl fieldInstanceUri, Object fieldValue) {
+	public static List<Statement> getFieldDenotesValueStatement(URIImpl fieldInstanceUri, Object fieldValue,
+			long createdTime, DuplicateStatementFilter filter) {
 		if (fieldValue instanceof Collection) {
 			throw new IllegalArgumentException("Collection fieldValue is not supported");
 		}
 		Value value = RdfUtil.getValue(fieldValue);
-		return new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), value);
+
+		List<Statement> stmts = new ArrayList<Statement>();
+		/*
+		 * if we encounter a data source identifier that is declared either
+		 * unknown or a probable error, we create a record to hold the
+		 * identifier and optional data source string. The field then denotes
+		 * this new record. Unknown data source identifiers occur when the file
+		 * parsing code comes across an identifier for which it does not know
+		 * how to generate an appropriate URI. Perhaps "unknown" is not the
+		 * prefix to use here. Probably erroneous identifiers are identifiers
+		 * that the parsing code has detected to be incorrect, e.g. an UniProt
+		 * identifier that does not follow the regular expression pattern
+		 * stipulated by UniProt.
+		 */
+		if (fieldValue instanceof UnknownDataSourceIdentifier) {
+			UnknownDataSourceIdentifier id = (UnknownDataSourceIdentifier) fieldValue;
+			NonNormalizedIdentifierRecord record = new NonNormalizedIdentifierRecord(id.getDataElement(), id.getDataSourceStr());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			URIImpl parentRecordUri = null;
+			String readerKey = null;
+			List<Statement> recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, createdTime,
+					recordUri, parentRecordUri, readerKey, filter);
+			/*
+			 * the first statement returned is a dataset has_part record triple
+			 * which we do not need
+			 */
+			recordInstanceStatements.remove(0);
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), recordUri));
+			stmts.addAll(recordInstanceStatements);
+		} else if (fieldValue instanceof ProbableErrorDataSourceIdentifier) {
+			ProbableErrorDataSourceIdentifier id = (ProbableErrorDataSourceIdentifier) fieldValue;
+			ErroneousIdentifierRecord record = new ErroneousIdentifierRecord(id.getDataElement(),
+					id.getDataSourceStr(), id.getErrorMessage());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			URIImpl parentRecordUri = null;
+			String readerKey = null;
+			List<Statement> recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, createdTime,
+					recordUri, parentRecordUri, readerKey, filter);
+			/*
+			 * the first statement returned is a dataset has_part record triple
+			 * which we do not need
+			 */
+			recordInstanceStatements.remove(0);
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), recordUri));
+			stmts.addAll(recordInstanceStatements);
+		} else {
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), value));
+		}
+		return stmts;
 
 	}
 
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
index 451db76..2d6c6e5 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
@@ -54,23 +54,17 @@
 import org.apache.log4j.Logger;
 import org.openrdf.model.Resource;
 import org.openrdf.model.Statement;
-import org.openrdf.model.Value;
 import org.openrdf.model.impl.URIImpl;
 import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFWriter;
 
-import edu.ucdenver.ccp.common.collections.CollectionsUtil;
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.common.file.FileUtil;
-import edu.ucdenver.ccp.common.reflection.PrivateAccessor;
 import edu.ucdenver.ccp.common.string.StringConstants;
 import edu.ucdenver.ccp.datasource.fileparsers.DataRecord;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceElement;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
-import edu.ucdenver.ccp.datasource.rdfizer.rdf.RdfId;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.DuplicateStatementFilter;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.InMemoryDuplicateStatementFilter;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfUtil.RdfFormat;
@@ -459,192 +453,192 @@ private void processRecord(DataRecord record, String readerKey, URIImpl recordUr
 		}
 	}
 
-	/**
-	 * Constant is assumed to be a valid URI String
-	 * 
-	 * @param tripleObj
-	 * @param <E>
-	 * @return
-	 */
-	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getConstantValues(String value) {
-		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
-		Value constantValue = new URIImpl(value);
-		CollectionsUtil.addToOne2ManyMap(String.class, constantValue, type2valuesMap);
-		return type2valuesMap;
-	}
-
-	/**
-	 * 
-	 * @param <E>
-	 * @param record
-	 * @param tripleObj
-	 * @return
-	 */
-	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getLiteralValues(E record, String fieldName) {
-		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-		if (fieldValue instanceof DataSourceElement<?>) {
-			DataSourceElement<?> element = (DataSourceElement<?>) fieldValue;
-			Value literalValue = RdfUtil.createLiteral(element.getDataElement());
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), literalValue, type2valuesMap);
-			return type2valuesMap;
-		}
-		if (fieldValue instanceof Collection<?>) {
-			for (Object value : ((Collection<?>) fieldValue))
-				if (value instanceof DataSourceElement<?>) {
-					DataSourceElement<?> element = (DataSourceElement<?>) fieldValue;
-					Value literalValue = RdfUtil.createLiteral(element.getDataElement());
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), literalValue, type2valuesMap);
-				} else
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection<ResourceComponent> but instead observed Collection<%s>.", fieldName,
-							value.getClass().getName()));
-			return type2valuesMap;
-		}
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Get values for triple definition where value is specified to use ICE formatting (ex:
-	 * {@code <object use-ice-id="true">ensemblGeneId</object>})
-	 * 
-	 * @param record
-	 * @param tripleObj
-	 * @return values
-	 */
-	private Map<Class<?>, Collection<Value>> getInformationContentEntityIDValues(DataRecord record, String fieldName) {
-		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-
-		if (fieldValue instanceof DataSourceIdentifier<?>) {
-			DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) fieldValue;
-			RdfId rdfId = new RdfId(id);
-			Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID()).toString());
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), iceIdValue, type2valuesMap);
-			return type2valuesMap;
-		}
-
-		if (fieldValue instanceof Collection<?>) {
-			for (Object value : ((Collection<?>) fieldValue))
-				if (value instanceof DataSourceElement<?>) {
-					DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) value;
-					RdfId rdfId = new RdfId(id);
-					Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID())
-							.toString());
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), iceIdValue, type2valuesMap);
-				} else
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection<DataElementIdentifier<?>> but instead observed Collection<%s>.",
-							fieldName, value.getClass().getName()));
-			return type2valuesMap;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Parser {@link DataRecord} from field of record.
-	 * 
-	 * @param <E>
-	 *            record type
-	 * @param record
-	 *            instance
-	 * @param fieldName
-	 *            field in record
-	 * @return record
-	 */
-	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getValues(E record, String fieldName) {
-		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-
-		if (fieldValue instanceof DataSourceElement<?>) {
-			DataSourceElement<?> id = (DataSourceElement<?>) fieldValue;
-			Value rdfValue = null;
-
-			if (id instanceof DataSourceIdentifier<?>) {
-				RdfId rdfId = new RdfId((DataSourceIdentifier<?>) id);
-				rdfValue = rdfId.getRdfValue();
-			} else
-				rdfValue = RdfUtil.createLiteral(id.getDataElement());
-
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), rdfValue, type2valuesMap);
-			return type2valuesMap;
-		}
-
-		if (fieldValue instanceof Collection<?>) {
-			for (Object value : ((Collection<?>) fieldValue)) {
-				if (value instanceof DataSourceElement<?>) {
-					DataSourceElement<?> id = (DataSourceElement<?>) value;
-					Value rdfValue = null;
-
-					if (id instanceof DataSourceIdentifier<?>) {
-						RdfId rdfId = new RdfId((DataSourceIdentifier<?>) id);
-						rdfValue = rdfId.getRdfValue();
-					} else
-						rdfValue = RdfUtil.createLiteral(id.getDataElement());
-
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), rdfValue, type2valuesMap);
-				} else {
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection<ResourceComponent> but instead observed Collection<%s>.", fieldName,
-							value.getClass().getName()));
-				}
-			}
-
-			return type2valuesMap;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Returns the subject Resource representation of the value of the field with the given name
-	 * contained in the input DataRecord. The field must be of type ResourceIdentifier.
-	 * 
-	 * @param record
-	 * @param fieldName
-	 * @return
-	 * 
-	 */
-	private Collection<Resource> getSubjectResources(DataRecord record, String fieldName) {
-		Collection<Resource> resources = new ArrayList<Resource>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-
-		if (fieldValue instanceof DataSourceIdentifier<?>) {
-			DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) fieldValue;
-			RdfId rdfId = new RdfId(id);
-			resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
-			return resources;
-		}
-
-		if (fieldValue instanceof Collection<?>) {
-			for (Object resource : ((Collection<?>) fieldValue))
-				if (resource instanceof DataSourceIdentifier<?>) {
-					DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) resource;
-					RdfId rdfId = new RdfId(id);
-					resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
-				} else {
-					String message = String.format("Unable to extract RDF subject from field: %s. "
-							+ "Expected Collection<ResourceIdentifier> but instead observed Collection<%s>.",
-							fieldName, resource.getClass().getName());
-					throw new RuntimeException(message);
-				}
-
-			return resources;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF subject from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
+//	/**
+//	 * Constant is assumed to be a valid URI String
+//	 * 
+//	 * @param tripleObj
+//	 * @param <E>
+//	 * @return
+//	 */
+//	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getConstantValues(String value) {
+//		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
+//		Value constantValue = new URIImpl(value);
+//		CollectionsUtil.addToOne2ManyMap(String.class, constantValue, type2valuesMap);
+//		return type2valuesMap;
+//	}
+
+//	/**
+//	 * 
+//	 * @param <E>
+//	 * @param record
+//	 * @param tripleObj
+//	 * @return
+//	 */
+//	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getLiteralValues(E record, String fieldName) {
+//		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//		if (fieldValue instanceof DataSourceElement<?>) {
+//			DataSourceElement<?> element = (DataSourceElement<?>) fieldValue;
+//			Value literalValue = RdfUtil.createLiteral(element.getDataElement());
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), literalValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//		if (fieldValue instanceof Collection<?>) {
+//			for (Object value : ((Collection<?>) fieldValue))
+//				if (value instanceof DataSourceElement<?>) {
+//					DataSourceElement<?> element = (DataSourceElement<?>) fieldValue;
+//					Value literalValue = RdfUtil.createLiteral(element.getDataElement());
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), literalValue, type2valuesMap);
+//				} else
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection<ResourceComponent> but instead observed Collection<%s>.", fieldName,
+//							value.getClass().getName()));
+//			return type2valuesMap;
+//		}
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+
+//	/**
+//	 * Get values for triple definition where value is specified to use ICE formatting (ex:
+//	 * {@code <object use-ice-id="true">ensemblGeneId</object>})
+//	 * 
+//	 * @param record
+//	 * @param tripleObj
+//	 * @return values
+//	 */
+//	private Map<Class<?>, Collection<Value>> getInformationContentEntityIDValues(DataRecord record, String fieldName) {
+//		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//
+//		if (fieldValue instanceof DataSourceIdentifier<?>) {
+//			DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) fieldValue;
+//			RdfId rdfId = new RdfId(id);
+//			Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID()).toString());
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), iceIdValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//
+//		if (fieldValue instanceof Collection<?>) {
+//			for (Object value : ((Collection<?>) fieldValue))
+//				if (value instanceof DataSourceElement<?>) {
+//					DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) value;
+//					RdfId rdfId = new RdfId(id);
+//					Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID())
+//							.toString());
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), iceIdValue, type2valuesMap);
+//				} else
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection<DataElementIdentifier<?>> but instead observed Collection<%s>.",
+//							fieldName, value.getClass().getName()));
+//			return type2valuesMap;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+
+//	/**
+//	 * Parser {@link DataRecord} from field of record.
+//	 * 
+//	 * @param <E>
+//	 *            record type
+//	 * @param record
+//	 *            instance
+//	 * @param fieldName
+//	 *            field in record
+//	 * @return record
+//	 */
+//	private <E extends DataRecord> Map<Class<?>, Collection<Value>> getValues(E record, String fieldName) {
+//		Map<Class<?>, Collection<Value>> type2valuesMap = new HashMap<Class<?>, Collection<Value>>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//
+//		if (fieldValue instanceof DataSourceElement<?>) {
+//			DataSourceElement<?> id = (DataSourceElement<?>) fieldValue;
+//			Value rdfValue = null;
+//
+//			if (id instanceof DataSourceIdentifier<?>) {
+//				RdfId rdfId = new RdfId((DataSourceIdentifier<?>) id);
+//				rdfValue = rdfId.getRdfValue();
+//			} else
+//				rdfValue = RdfUtil.createLiteral(id.getDataElement());
+//
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), rdfValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//
+//		if (fieldValue instanceof Collection<?>) {
+//			for (Object value : ((Collection<?>) fieldValue)) {
+//				if (value instanceof DataSourceElement<?>) {
+//					DataSourceElement<?> id = (DataSourceElement<?>) value;
+//					Value rdfValue = null;
+//
+//					if (id instanceof DataSourceIdentifier<?>) {
+//						RdfId rdfId = new RdfId((DataSourceIdentifier<?>) id);
+//						rdfValue = rdfId.getRdfValue();
+//					} else
+//						rdfValue = RdfUtil.createLiteral(id.getDataElement());
+//
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), rdfValue, type2valuesMap);
+//				} else {
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection<ResourceComponent> but instead observed Collection<%s>.", fieldName,
+//							value.getClass().getName()));
+//				}
+//			}
+//
+//			return type2valuesMap;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+//
+//	/**
+//	 * Returns the subject Resource representation of the value of the field with the given name
+//	 * contained in the input DataRecord. The field must be of type ResourceIdentifier.
+//	 * 
+//	 * @param record
+//	 * @param fieldName
+//	 * @return
+//	 * 
+//	 */
+//	private Collection<Resource> getSubjectResources(DataRecord record, String fieldName) {
+//		Collection<Resource> resources = new ArrayList<Resource>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//
+//		if (fieldValue instanceof DataSourceIdentifier<?>) {
+//			DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) fieldValue;
+//			RdfId rdfId = new RdfId(id);
+//			resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
+//			return resources;
+//		}
+//
+//		if (fieldValue instanceof Collection<?>) {
+//			for (Object resource : ((Collection<?>) fieldValue))
+//				if (resource instanceof DataSourceIdentifier<?>) {
+//					DataSourceIdentifier<?> id = (DataSourceIdentifier<?>) resource;
+//					RdfId rdfId = new RdfId(id);
+//					resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
+//				} else {
+//					String message = String.format("Unable to extract RDF subject from field: %s. "
+//							+ "Expected Collection<ResourceIdentifier> but instead observed Collection<%s>.",
+//							fieldName, resource.getClass().getName());
+//					throw new RuntimeException(message);
+//				}
+//
+//			return resources;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF subject from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
 
 	/**
 	 * Output RDF record to a file based on record's file key.
diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java
new file mode 100644
index 0000000..5edb920
--- /dev/null
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java
@@ -0,0 +1,180 @@
+package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's common module
+ * %%
+ * Copyright (C) 2012 - 2015 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.GregorianCalendar;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.ucdenver.ccp.common.collections.CollectionsUtil;
+import edu.ucdenver.ccp.common.file.CharacterEncoding;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil.ColumnOrder;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil.LineOrder;
+import edu.ucdenver.ccp.common.file.FileReaderUtil;
+import edu.ucdenver.ccp.common.file.FileUtil;
+import edu.ucdenver.ccp.common.file.FileWriterUtil;
+import edu.ucdenver.ccp.common.file.FileWriterUtil.FileSuffixEnforcement;
+import edu.ucdenver.ccp.common.file.FileWriterUtil.WriteMode;
+import edu.ucdenver.ccp.common.test.DefaultTestCase;
+import edu.ucdenver.ccp.datasource.fileparsers.pro.ProMappingFileParser;
+import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfUtil.RdfFormat;
+
+/**
+ * Testing using the protein ontology mapping file b/c it's a simple format and
+ * it has unknown and potentially erroneous data source identifiers.
+ */
+public class RdfRecordWriterImplErroneousAndUnknownIdentifierTest extends DefaultTestCase {
+
+	private File proMappingTxtFile_unknownIdentifier;
+	private File outputDirectory;
+	private final String expectedOutputFileName = "pr-ProMappingFileParser.0-0.nt";
+
+	@Before
+	public void setUp() throws Exception {
+		outputDirectory = folder.newFolder("output");
+		proMappingTxtFile_unknownIdentifier = folder.newFile("promapping.txt");
+		populateProMappingTxtFile_unknownIdentifier();
+	}
+
+	/**
+	 * PR:000000005 HGNC:11773 is_a <br>
+	 * PR:000000005 UniProtKB_VAR:VAR_022359 is_a // unknown identifier type<br>
+	 * PR:000000006 UniProtKB:PABCDE exact // invalid UniProt ID<br>
+	 */
+	private void populateProMappingTxtFile_unknownIdentifier() throws IOException {
+		List<String> lines = CollectionsUtil.createList("PR:000000005\tHGNC:11773\tis_a",
+				"PR:000000005\tUniProtKB_VAR:VAR_022359\tis_a", "PR:000000006\tUniProtKB:PABCDE\texact");
+		FileWriterUtil.printLines(lines, proMappingTxtFile_unknownIdentifier, CharacterEncoding.US_ASCII,
+				WriteMode.OVERWRITE, FileSuffixEnforcement.OFF);
+	}
+
+	@Test
+	public void testWriteRdf_unknown_and_erroneous_identifiers() throws IOException {
+		ProMappingFileParser parser = new ProMappingFileParser(proMappingTxtFile_unknownIdentifier,
+				CharacterEncoding.US_ASCII);
+		RdfRecordWriterImpl<ProMappingFileParser> recordWriter = new RdfRecordWriterImpl<ProMappingFileParser>(
+				outputDirectory, RdfFormat.NTRIPLES);
+		long createdTimeInMillis20101217 = new GregorianCalendar(2010, 11, 17).getTimeInMillis();
+		recordWriter.processRecordReader(parser, createdTimeInMillis20101217);
+
+		File outputFile = FileUtil.appendPathElementsToDirectory(outputDirectory, expectedOutputFileName);
+		System.err.println("dir contents: " + Arrays.toString(outputDirectory.list()));
+		assertTrue("Output file should have been created.", outputFile.exists());
+
+		List<String> linesFromFile = FileReaderUtil.loadLinesFromFile(outputFile, CharacterEncoding.UTF_8);
+		for (String l : linesFromFile) {
+			System.err.println(l);
+		}
+
+		List<String> expectedLines = getExpectedLines();
+		assertTrue("N-Triple Lines should be as expected.", FileComparisonUtil.hasExpectedLines(outputFile,
+				CharacterEncoding.UTF_8, expectedLines, null, LineOrder.ANY_ORDER, ColumnOrder.AS_IN_FILE));
+	}
+
+	private List<String> getExpectedLines() {
+
+		return CollectionsUtil
+				.createList(
+
+						"<http://kabob.ucdenver.edu/iao/pr/prDataSource20101217> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/pr/prDataSource> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/prProMappingRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prDataSource20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/DataSet> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://kabob.ucdenver.edu/iao/hasCreationDate> \"2010-12-17T00:00:00.000-07:00\"^^<http://www.w3.org/2001/XMLSchema#dateTime> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_2897ALlg_3c5NaoTPgKcA2iK3LE> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_2897ALlg_3c5NaoTPgKcA2iK3LE> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_mappingTypeDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_2897ALlg_3c5NaoTPgKcA2iK3LE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_2897ALlg_3c5NaoTPgKcA2iK3LE> <http://purl.obolibrary.org/obo/IAO_0000219> \"is_a\"@en .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_GWKfNoArBFkAsDcNi36qnBAsgzQ> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_GWKfNoArBFkAsDcNi36qnBAsgzQ> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_proteinOntologyIdDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_GWKfNoArBFkAsDcNi36qnBAsgzQ> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_GWKfNoArBFkAsDcNi36qnBAsgzQ> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/pr/PR_000000005_ICE> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_WZhKO4jkiAbN2hT_3flYHMxyvEc> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_kULVK1FZ1tqZ3xW2uncIWRC_QMY> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_kULVK1FZ1tqZ3xW2uncIWRC_QMY> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_targetRecordIdDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_kULVK1FZ1tqZ3xW2uncIWRC_QMY> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_kULVK1FZ1tqZ3xW2uncIWRC_QMY> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/hgnc/HGNC_11773_ICE> .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_2897ALlg_3c5NaoTPgKcA2iK3LE> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_GWKfNoArBFkAsDcNi36qnBAsgzQ> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_X2n4OD0lGbx7Kz9nOlpuf-0b9x8> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_PgXUJOe0DcW7eLkxbVbZS3Jy2OQ> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_PgXUJOe0DcW7eLkxbVbZS3Jy2OQ> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_targetRecordIdDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_PgXUJOe0DcW7eLkxbVbZS3Jy2OQ> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_PgXUJOe0DcW7eLkxbVbZS3Jy2OQ> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/kabob/R_NonNormalizedIdentifierRecord_H0tEwGtJ3UasTlh7kGGk6r42NMo> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_NonNormalizedIdentifierRecord_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/kabob/NonNormalizedIdentifierRecord> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_NonNormalizedIdentifierRecord_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/kabob/NonNormalizedIdentifierRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_NonNormalizedIdentifierRecord_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/kabob/F_NonNormalizedIdentifierRecord_identifier_H0tEwGtJ3UasTlh7kGGk6r42NMo> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_NonNormalizedIdentifierRecord_identifier_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/kabob/NonNormalizedIdentifierRecord_identifierDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_NonNormalizedIdentifierRecord_identifier_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_NonNormalizedIdentifierRecord_identifier_H0tEwGtJ3UasTlh7kGGk6r42NMo> <http://purl.obolibrary.org/obo/IAO_0000219> \"UniProtKB_VAR:VAR_022359\"@en .",
+						"<http://kabob.ucdenver.edu/iao/pr/prProMappingRecordDataSet20101217> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_H1hCp5e8_r2TTrw-uWcH1m5JKXM> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_H1hCp5e8_r2TTrw-uWcH1m5JKXM> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_mappingTypeDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_H1hCp5e8_r2TTrw-uWcH1m5JKXM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_mappingType_H1hCp5e8_r2TTrw-uWcH1m5JKXM> <http://purl.obolibrary.org/obo/IAO_0000219> \"exact\"@en .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_S1fonzU5XtSRL8_MlMV4e_N20QM> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_S1fonzU5XtSRL8_MlMV4e_N20QM> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_proteinOntologyIdDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_S1fonzU5XtSRL8_MlMV4e_N20QM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_proteinOntologyId_S1fonzU5XtSRL8_MlMV4e_N20QM> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/pr/PR_000000006_ICE> .",
+						"<http://kabob.ucdenver.edu/iao/pr/R_ProMappingRecord_7sl9N3o2lQUc26DjJRSaO8R2Y7s> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_NqfHfOrmMhiavPrX11WCiU0Kg5I> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_NqfHfOrmMhiavPrX11WCiU0Kg5I> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/pr/ProMappingRecord_targetRecordIdDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_NqfHfOrmMhiavPrX11WCiU0Kg5I> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/pr/F_ProMappingRecord_targetRecordId_NqfHfOrmMhiavPrX11WCiU0Kg5I> <http://purl.obolibrary.org/obo/IAO_0000219> <http://kabob.ucdenver.edu/iao/kabob/R_ErroneousIdentifierRecord_E34PZ6CBFbVnAiHThhjCwDlF_BM> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_ErroneousIdentifierRecord_E34PZ6CBFbVnAiHThhjCwDlF_BM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/kabob/ErroneousIdentifierRecord> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_ErroneousIdentifierRecord_E34PZ6CBFbVnAiHThhjCwDlF_BM> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/kabob/ErroneousIdentifierRecordSchema1> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_ErroneousIdentifierRecord_E34PZ6CBFbVnAiHThhjCwDlF_BM> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_comment_ofxXBdY4IPpk1fxDieB7Gu34gAY> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_comment_ofxXBdY4IPpk1fxDieB7Gu34gAY> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/kabob/ErroneousIdentifierRecord_commentDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_comment_ofxXBdY4IPpk1fxDieB7Gu34gAY> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_comment_ofxXBdY4IPpk1fxDieB7Gu34gAY> <http://purl.obolibrary.org/obo/IAO_0000219> \"Invalid UniProt ID: PABCDE. This ID does not comply with the specifications for UniProt accession numbers as defined here: http://www.uniprot.org/manual/accession_numbers\"@en .",
+						"<http://kabob.ucdenver.edu/iao/kabob/R_ErroneousIdentifierRecord_E34PZ6CBFbVnAiHThhjCwDlF_BM> <http://purl.obolibrary.org/obo/BFO_0000051> <http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_identifier_eH5KzArIhTUYaJC-91OzTVN2OdU> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_identifier_eH5KzArIhTUYaJC-91OzTVN2OdU> <http://kabob.ucdenver.edu/iao/hasTemplate> <http://kabob.ucdenver.edu/iao/kabob/ErroneousIdentifierRecord_identifierDataField1> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_identifier_eH5KzArIhTUYaJC-91OzTVN2OdU> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://kabob.ucdenver.edu/iao/FieldValue> .",
+						"<http://kabob.ucdenver.edu/iao/kabob/F_ErroneousIdentifierRecord_identifier_eH5KzArIhTUYaJC-91OzTVN2OdU> <http://purl.obolibrary.org/obo/IAO_0000219> \"UniProtKB:PABCDE\"@en .");
+	}
+
+}

From 35d9d009b90cc5357304377c54dab0b765813fd4 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 18:02:04 -0700
Subject: [PATCH 14/36] Modified to accept a list of datasource names as input

---
 .../rdfizer/rdf/ice/IceRdfGenerator.java      | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
index 988de27..40f668c 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
@@ -366,18 +366,23 @@ public enum RunBy {
 	 *            gzipped<br>
 	 *            args[4]: output record limit: can be used to produce a "light"
 	 *            set of RDF. -1 to output all records, i.e. no limit<br>
-	 * <br>
+	 *            args[5]: list of comma-delimited taxonomy identifiers (from
+	 *            NCBI Taxonomy) that will be used to limit RDF generation where
+	 *            applicable, e.g. 9606 to convert only human-related database
+	 *            records to RDF <br>
+	 * 
 	 *            The remaining input arguments depend on args[0]:<br>
 	 *            if NAME:<br>
-	 *            args[5]: name of the FileDataSource to process <br>
-	 *            args[6]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
+	 *            args[6]: comma-delimited list of FileDataSource names to
+	 *            process <br>
+	 *            args[7]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
 	 *            included or if "null" then the current date will be used<br>
 	 * <br>
 	 *            if INDEX: <br>
-	 *            args[5]: start stage args<br>
-	 *            [6]: the number of stages to process<br>
-	 *            args[7]: the Split type: either BY_STAGES or NONE<br>
-	 *            if BY_STAGES, then the index in args[5] corresponds to a
+	 *            args[6]: start stage args<br>
+	 *            [7]: the number of stages to process<br>
+	 *            args[8]: the Split type: either BY_STAGES or NONE<br>
+	 *            if BY_STAGES, then the index in args[6] corresponds to a
 	 *            particular stage of a FileDataSource. Many of the
 	 *            FileDataSources are processed in a single stage, however some
 	 *            of the larger files are split into multiple stages to speed up
@@ -387,7 +392,7 @@ public enum RunBy {
 	 *            stage. This will result in longer execution times for the
 	 *            larger files, however duplicate triple removal can be done
 	 *            concurrently.<br>
-	 *            args[8]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
+	 *            args[9]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
 	 *            included or if "null" then the current date will be used
 	 * 
 	 */
@@ -436,10 +441,13 @@ public static void main(String[] args) {
 				break;
 
 			case NAME:
-				FileDataSource source = FileDataSource.valueOf(args[index++].toUpperCase());
-				time = getTime(args, index);
-				generateIceRdf(source, time, baseSourceFileDirectory, baseRdfOutputDirectory, cleanSourceFiles,
-						compress, outputRecordLimit, taxonIds);
+				String datasourceStr = args[index++].toUpperCase();
+				for (String ds : datasourceStr.split(",")) {
+					FileDataSource source = FileDataSource.valueOf(ds);
+					time = getTime(args, index);
+					generateIceRdf(source, time, baseSourceFileDirectory, baseRdfOutputDirectory, cleanSourceFiles,
+							compress, outputRecordLimit, taxonIds);
+				}
 				break;
 			default:
 				throw new IllegalArgumentException("Unhandled RunBy option: " + runBy.name());

From 5e596946b3219aa38ec146a2495debef3986712f Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 18:03:19 -0700
Subject: [PATCH 15/36] Added requiresManualDownload flag

Also reformulated the output produced by main. It now prints a listing
of available datasources indicating which require manual download.
---
 .../rdfizer/rdf/ice/FileDataSource.java       | 451 ++++++------------
 .../rdfizer/rdf/ice/FileDataSourceParams.java |  49 ++
 2 files changed, 195 insertions(+), 305 deletions(-)
 create mode 100644 datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSourceParams.java

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
index e50597d..b2158bf 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSource.java
@@ -35,7 +35,9 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.Set;
 
 import org.apache.log4j.Logger;
@@ -43,7 +45,6 @@
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.common.file.FileUtil;
 import edu.ucdenver.ccp.datasource.fileparsers.FileRecordReader;
-import edu.ucdenver.ccp.datasource.fileparsers.dip.DipYYYYMMDDFileParser;
 import edu.ucdenver.ccp.datasource.fileparsers.drugbank.DrugbankXmlFileRecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.ebi.goa.GpAssociationGoaUniprotFileParser;
 import edu.ucdenver.ccp.datasource.fileparsers.ebi.interpro.InterPro2GoFileParser;
@@ -84,11 +85,12 @@
 import edu.ucdenver.ccp.datasource.fileparsers.rgd.RgdRatGeneNboAnnotationFileRecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.rgd.RgdRatGenePwAnnotationFileRecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.rgd.RgdRatGeneRdoAnnotationFileRecordReader;
-import edu.ucdenver.ccp.datasource.fileparsers.taxonaware.TaxonAwareSingleLineFileRecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.transfac.TransfacGeneDatFileParser;
 import edu.ucdenver.ccp.datasource.fileparsers.transfac.TransfacMatrixDatFileParser;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.taxonomy.NcbiTaxonomyID;
+import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.FileDataSourceParams.IsTaxonAware;
+import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.FileDataSourceParams.RequiresManualDownload;
 
 /**
  * This enum separates RDF generation by data source file. It is intended to
@@ -100,479 +102,328 @@
  *         ccpsupport@ucdenver.edu
  * 
  */
+
 public enum FileDataSource {
 
-	
 	/*
-	 * DIP is now part of IRefWeb, so it has been commented out since it requires the extra manual step
-	 * of logging in to the DIP website and downloading the file (and IRefWeb does not).
-	 */
-//	/**
-//	 * The DIP data file must be obtained manually. It is assumed to already be
-//	 * in place when RDF generation commences. It must be the only file in the
-//	 * DIP data source directory.
-//	 * 
-//	 */
-//	DIP(DataSource.DIP) {
-//
-//		@Override
-//		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
-//				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
-//			logger.info("sourceFileDirectory (exists): (" + sourceFileDirectory.exists() + ")" + sourceFileDirectory);
-//			logger.info("file listing: " + Arrays.toString(sourceFileDirectory.listFiles()));
-//			File dipDataFile = sourceFileDirectory.listFiles()[0];
-//			logger.info("File exists: " + dipDataFile.exists() + " -- " + dipDataFile.getAbsolutePath());
-//			FileUtil.validateFile(dipDataFile);
-//			return new DipYYYYMMDDFileParser(dipDataFile, CharacterEncoding.US_ASCII, taxonIds);
-//		}
-//
-//		@Override
-//		protected boolean isTaxonAware() {
-//			return true;
-//		}
-//	},
+	 * DIP is now part of IRefWeb, so it has been commented out since it
+	 * requires the extra manual step of logging in to the DIP website and
+	 * downloading the file (and IRefWeb does not).
+	 */
+	// /**
+	// * The DIP data file must be obtained manually. It is assumed to already
+	// be
+	// * in place when RDF generation commences. It must be the only file in the
+	// * DIP data source directory.
+	// *
+	// */
+	// DIP(DataSource.DIP) {
+	//
+	// @Override
+	// protected FileRecordReader<?> initFileRecordReader(File
+	// sourceFileDirectory, boolean cleanSourceFiles,
+	// File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
+	// logger.info("sourceFileDirectory (exists): (" +
+	// sourceFileDirectory.exists() + ")" + sourceFileDirectory);
+	// logger.info("file listing: " +
+	// Arrays.toString(sourceFileDirectory.listFiles()));
+	// File dipDataFile = sourceFileDirectory.listFiles()[0];
+	// logger.info("File exists: " + dipDataFile.exists() + " -- " +
+	// dipDataFile.getAbsolutePath());
+	// FileUtil.validateFile(dipDataFile);
+	// return new DipYYYYMMDDFileParser(dipDataFile, CharacterEncoding.US_ASCII,
+	// taxonIds);
+	// }
+	//
+	// @Override
+	// protected boolean isTaxonAware() {
+	// return true;
+	// }
+	// },
 
 	/**
-	 *
+	 * 
 	 */
-	PHARMGKB_DISEASE(DataSource.PHARMGKB) {
+	PHARMGKB_DISEASE(DataSource.PHARMGKB, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new PharmGkbDiseaseFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
-	PHARMGKB_GENE(DataSource.PHARMGKB) {
+	PHARMGKB_GENE(DataSource.PHARMGKB, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new PharmGkbGeneFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
-
-
-	PHARMGKB_DRUG(DataSource.PHARMGKB) {
+	PHARMGKB_DRUG(DataSource.PHARMGKB, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new PharmGkbDrugFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 * 
 	 *
 	 */
-	DRUGBANK(DataSource.DRUGBANK) {
+	DRUGBANK(DataSource.DRUGBANK, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new DrugbankXmlFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 * 
 	 */
-	HGNC(DataSource.HGNC) {
+	HGNC(DataSource.HGNC, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new HgncDownloadFileParser(sourceFileDirectory, cleanSourceFiles, WithdrawnRecordTreatment.IGNORE);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 * 
 	 */
-	HOMOLOGENE(DataSource.HOMOLOGENE) {
+	HOMOLOGENE(DataSource.HOMOLOGENE, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new HomoloGeneDataFileParser(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 
 	/**
 	 * 
 	 */
-	IREFWEB(DataSource.IREFWEB) {
+	IREFWEB(DataSource.IREFWEB, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new IRefWebPsiMitab2_6FileParser(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	/**
 	 * 
 	 * 
 	 */
-	MGI_ENTREZGENE(DataSource.MGI) {
+	MGI_ENTREZGENE(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MGIEntrezGeneFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MGI_MGIPHENOGENO(DataSource.MGI) {
+	MGI_MGIPHENOGENO(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MGIPhenoGenoMPFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MGI_MRKLIST(DataSource.MGI) {
+	MGI_MRKLIST(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MRKListFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MGI_MRKREFERENCE(DataSource.MGI) {
+	MGI_MRKREFERENCE(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MRKReferenceFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MGI_MRKSEQUENCE(DataSource.MGI) {
+	MGI_MRKSEQUENCE(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MRKSequenceFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MGI_MRKSWISSPROT(DataSource.MGI) {
+	MGI_MRKSWISSPROT(DataSource.MGI, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MRKSwissProtFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	MIRBASE(DataSource.MIRBASE) {
+	MIRBASE(DataSource.MIRBASE, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MirBaseMiRnaDatFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	
+
 	/**
 	 *
 	 */
-	RGD_GENES(DataSource.RGD) {
+	RGD_GENES(DataSource.RGD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RgdRatGeneFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	RGD_GENE_MP(DataSource.RGD) {
+	RGD_GENE_MP(DataSource.RGD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RgdRatGeneMpAnnotationFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	RGD_GENE_RDO(DataSource.RGD) {
+	RGD_GENE_RDO(DataSource.RGD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RgdRatGeneRdoAnnotationFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	RGD_GENE_NBO(DataSource.RGD) {
+	RGD_GENE_NBO(DataSource.RGD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RgdRatGeneNboAnnotationFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	RGD_GENE_PW(DataSource.RGD) {
+	RGD_GENE_PW(DataSource.RGD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RgdRatGenePwAnnotationFileRecordReader(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	PREMOD_HUMAN(DataSource.PREMOD) {
+	PREMOD_HUMAN(DataSource.PREMOD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new HumanPReModModuleTabFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	PREMOD_MOUSE(DataSource.PREMOD) {
+	PREMOD_MOUSE(DataSource.PREMOD, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new MousePReModModuleTabFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
 	/**
 	 *
 	 */
-	PR_MAPPINGFILE(DataSource.PR) {
+	PR_MAPPINGFILE(DataSource.PR, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new ProMappingFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 *
 	 */
-	REACTOME_UNIPROT2PATHWAYSTID(DataSource.REACTOME) {
+	REACTOME_UNIPROT2PATHWAYSTID(DataSource.REACTOME, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new ReactomeUniprot2PathwayStidTxtFileParser(sourceFileDirectory, cleanSourceFiles, idListDir,
 					taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	/**
 	 *
 	 */
-	REFSEQ_RELEASECATALOG(DataSource.REFSEQ, 3) {
+	REFSEQ_RELEASECATALOG(DataSource.REFSEQ, 3, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new RefSeqReleaseCatalogFileParser(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	/**
 	 *
 	 */
-	NCBIGENE_GENE2REFSEQ(DataSource.EG) {
+	NCBIGENE_GENE2REFSEQ(DataSource.EG, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new EntrezGene2RefseqFileParser(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
-	NCBIGENE_GENEINFO(DataSource.EG) {
+	NCBIGENE_GENEINFO(DataSource.EG, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new EntrezGeneInfoFileParser(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
-
 	},
-	NCBIGENE_MIM2GENE(DataSource.EG) {
+	NCBIGENE_MIM2GENE(DataSource.EG, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new EntrezGeneMim2GeneFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	NCBIGENE_REFSEQUNIPROTCOLLAB(DataSource.EG) {
+	NCBIGENE_REFSEQUNIPROTCOLLAB(DataSource.EG, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new EntrezGeneRefSeqUniprotKbCollabFileParser(sourceFileDirectory, cleanSourceFiles, idListDir,
 					taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	/**
 	 */
-	GOA(DataSource.GOA, 13) {
+	GOA(DataSource.GOA, 13, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new GpAssociationGoaUniprotFileParser(sourceFileDirectory, cleanSourceFiles, idListDir, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	/**
 	 */
-	UNIPROT_SWISSPROT(DataSource.UNIPROT) {
+	UNIPROT_SWISSPROT(DataSource.UNIPROT, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new SwissProtXmlFileRecordReader(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
-	UNIPROT_IDMAPPING(DataSource.UNIPROT, 3) {
+	UNIPROT_IDMAPPING(DataSource.UNIPROT, 3, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new UniProtIDMappingFileRecordReader(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
 	// UNIPROT_TREMBL(DataSource.UNIPROT, 33, 1000000) {
 	// @Override
@@ -584,65 +435,44 @@ protected boolean isTaxonAware() {
 	// cleanSourceFiles, taxonIds);
 	// }
 	// },
-	UNIPROT_TREMBL_SPARSE(DataSource.UNIPROT, 33, 1000000) {
+	UNIPROT_TREMBL_SPARSE(DataSource.UNIPROT, 33, 1000000, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new SparseTremblXmlFileRecordReader(sourceFileDirectory, cleanSourceFiles, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
-	
 
 	/**
 	 * 
 	 */
-	INTERPRO_NAMESDAT(DataSource.INTERPRO) {
+	INTERPRO_NAMESDAT(DataSource.INTERPRO, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new InterProNamesDatFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	INTERPRO_INTERPRO2GO(DataSource.INTERPRO) {
+	INTERPRO_INTERPRO2GO(DataSource.INTERPRO, IsTaxonAware.NO, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new InterPro2GoFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	INTERPRO_PROTEIN2IPR(DataSource.INTERPRO, 13) {
+	INTERPRO_PROTEIN2IPR(DataSource.INTERPRO, 13, IsTaxonAware.YES, RequiresManualDownload.NO) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new InterProProtein2IprDatFileParser(sourceFileDirectory, cleanSourceFiles, idListDir, taxonIds);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return true;
-		}
 	},
-	
+
 	/**
 	 * The HPRD HPRD_ID_MAPPINGS.txt file must be obtained manually. It is
 	 * assumed to already be in place when RDF generation commences.
 	 */
-	HPRD_ID_MAPPINGS(DataSource.HPRD) {
+	HPRD_ID_MAPPINGS(DataSource.HPRD, IsTaxonAware.NO, RequiresManualDownload.YES) {
 
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
@@ -652,17 +482,12 @@ protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boo
 			FileUtil.validateFile(hprdIdMappingFile);
 			return new HprdIdMappingsTxtFileParser(hprdIdMappingFile, CharacterEncoding.US_ASCII);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 * The TRANSFAC gene.dat and matrix.dat files must be obtained manually.
 	 * They are assumed to already be in place when RDF generation commences.
 	 */
-	TRANSFAC_GENE(DataSource.TRANSFAC) {
+	TRANSFAC_GENE(DataSource.TRANSFAC, IsTaxonAware.NO, RequiresManualDownload.YES) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
@@ -670,14 +495,9 @@ protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boo
 			FileUtil.validateFile(transfacGeneDatFile);
 			return new TransfacGeneDatFileParser(transfacGeneDatFile, CharacterEncoding.ISO_8859_1);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 
-	TRANSFAC_MATRIX(DataSource.TRANSFAC) {
+	TRANSFAC_MATRIX(DataSource.TRANSFAC, IsTaxonAware.NO, RequiresManualDownload.YES) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
@@ -685,17 +505,12 @@ protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boo
 			FileUtil.validateFile(transfacMatrixDatFile);
 			return new TransfacMatrixDatFileParser(transfacMatrixDatFile, CharacterEncoding.ISO_8859_1);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 * The GAD all.txt data file must be obtained manually. It is assumed to
 	 * already be in place when RDF generation commences.
 	 */
-	GAD(DataSource.GAD) {
+	GAD(DataSource.GAD, IsTaxonAware.NO, RequiresManualDownload.YES) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
@@ -704,41 +519,25 @@ protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boo
 			FileUtil.validateFile(gadAllTxtFile);
 			return new GeneticAssociationDbAllTxtFileParser(gadAllTxtFile, CharacterEncoding.US_ASCII);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
 	/**
 	 *
 	 */
-	OMIM(DataSource.OMIM) {
+	OMIM(DataSource.OMIM, IsTaxonAware.NO, RequiresManualDownload.YES) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			return new OmimTxtFileParser(sourceFileDirectory, cleanSourceFiles);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
 	},
-	PHARMGKB_RELATION(DataSource.PHARMGKB) {
+	PHARMGKB_RELATION(DataSource.PHARMGKB, IsTaxonAware.NO, RequiresManualDownload.YES) {
 		@Override
 		protected FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 				File idListDir, Set<NcbiTaxonomyID> taxonIds) throws IOException {
 			File pharmgkbRelationshipsDataFile = new File(sourceFileDirectory, "relationships.tsv");
 			return new PharmGkbRelationFileParser(pharmgkbRelationshipsDataFile, CharacterEncoding.UTF_8);
 		}
-
-		@Override
-		protected boolean isTaxonAware() {
-			return false;
-		}
-	}
-	;
+	};
 
 	public enum Split {
 		BY_STAGES, NONE;
@@ -774,22 +573,35 @@ public enum Split {
 
 	private final Long blockRecordCount;
 
-	private FileDataSource(DataSource dataSource, int numberOfStages, long blockRecordCount) {
+	private final IsTaxonAware isTaxonAware;
+
+	private final RequiresManualDownload requiresManualDownload;
+
+	private FileDataSource(DataSource dataSource, int numberOfStages, long blockRecordCount, IsTaxonAware isTaxonAware,
+			RequiresManualDownload requiresManualDownload) {
 		this.dataSource = dataSource;
 		this.numberOfStages = numberOfStages;
 		this.blockRecordCount = blockRecordCount;
+		this.isTaxonAware = isTaxonAware;
+		this.requiresManualDownload = requiresManualDownload;
 	}
 
-	private FileDataSource(DataSource dataSource, int numberOfStages) {
+	private FileDataSource(DataSource dataSource, int numberOfStages, IsTaxonAware isTaxonAware,
+			RequiresManualDownload requiresManualDownload) {
 		this.dataSource = dataSource;
 		this.numberOfStages = numberOfStages;
 		this.blockRecordCount = null;
+		this.isTaxonAware = isTaxonAware;
+		this.requiresManualDownload = requiresManualDownload;
 	}
 
-	private FileDataSource(DataSource dataSource) {
+	private FileDataSource(DataSource dataSource, IsTaxonAware isTaxonAware,
+			RequiresManualDownload requiresManualDownload) {
 		this.dataSource = dataSource;
 		this.numberOfStages = 1;
 		this.blockRecordCount = null;
+		this.isTaxonAware = isTaxonAware;
+		this.requiresManualDownload = requiresManualDownload;
 	}
 
 	public DataSource dataSource() {
@@ -800,6 +612,14 @@ public Long blockRecordCount() {
 		return blockRecordCount;
 	}
 
+	public boolean isTaxonAware() {
+		return isTaxonAware == IsTaxonAware.YES;
+	}
+
+	public boolean requiresManualDownload() {
+		return requiresManualDownload == RequiresManualDownload.YES;
+	}
+
 	// /**
 	// * @param stageNum
 	// * @param baseSourceFileDirectory
@@ -862,8 +682,6 @@ public Long blockRecordCount() {
 	protected abstract FileRecordReader<?> initFileRecordReader(File sourceFileDirectory, boolean cleanSourceFiles,
 			File idListFileDirectory, Set<NcbiTaxonomyID> taxonIds) throws IOException;
 
-	protected abstract boolean isTaxonAware();
-
 	// /**
 	// * To be implemented by each DataSourceRdfGenerator instance.
 	// *
@@ -986,23 +804,46 @@ public int getNumberOfStages() {
 	 * @param args
 	 */
 	public static void main(String[] args) {
-		int stageCount = 0;
-		System.out.println("BY STAGES: ");
-		for (FileDataSource source : FileDataSource.values()) {
-			for (int i = 0; i < source.getNumberOfStages(); i++) {
-				System.out.println("Global Stage: " + (i + 1 + stageCount) + " ==> " + source.name() + " Local Stage: "
-						+ (i + 1));
+		// int stageCount = 0;
+		// System.out.println("BY STAGES: ");
+		// for (FileDataSource source : FileDataSource.values()) {
+		// for (int i = 0; i < source.getNumberOfStages(); i++) {
+		// System.out.println("Global Stage: " + (i + 1 + stageCount) + " ==> "
+		// + source.name() + " Local Stage: "
+		// + (i + 1));
+		// }
+		// stageCount += source.getNumberOfStages();
+		// }
+		// System.out.println("Total # of stages: " + stageCount + "\n\n");
+		// stageCount = 0;
+		// System.out.println("SINGLE STAGE PER SOURCE:");
+		// for (FileDataSource source : FileDataSource.values()) {
+		// System.out.println("SGE index: " + (stageCount + 1) + " ==> " +
+		// source.name());
+		// stageCount++;
+		// }
+
+		List<String> autoDownloadSources = new ArrayList<String>();
+		List<String> manualDownloadSources = new ArrayList<String>();
+
+		for (FileDataSource fds : values()) {
+			if (fds.requiresManualDownload()) {
+				manualDownloadSources.add(fds.name());
+			} else {
+				autoDownloadSources.add(fds.name());
 			}
-			stageCount += source.getNumberOfStages();
 		}
-		System.out.println("Total # of stages: " + stageCount + "\n\n");
-		stageCount = 0;
-		System.out.println("SINGLE STAGE PER SOURCE:");
-		for (FileDataSource source : FileDataSource.values()) {
-			System.out.println("SGE index: " + (stageCount + 1) + " ==> " + source.name());
-			stageCount++;
+		
+		Collections.sort(autoDownloadSources);
+		Collections.sort(manualDownloadSources);
+		
+		for (String name : autoDownloadSources) {
+			System.out.println("DS: " + name);
+		}
+		System.out.println("DS: ==== BELOW REQUIRE MANUAL DOWNLOAD OF DATA SOURCE FILE ====");
+		for (String name : manualDownloadSources) {
+			System.out.println("DS: ==== " + name);
 		}
-
 	}
 
 }
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSourceParams.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSourceParams.java
new file mode 100644
index 0000000..283070f
--- /dev/null
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/FileDataSourceParams.java
@@ -0,0 +1,49 @@
+package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's datasource
+ * 							project
+ * %%
+ * Copyright (C) 2012 - 2016 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+public class FileDataSourceParams {
+
+	public static enum RequiresManualDownload {
+		YES,
+		NO
+	}
+	
+	public static enum IsTaxonAware {
+		YES,
+		NO
+	}
+	
+}

From ca9d9ddf2dde74dc6a9de193187cdbaaf73f5b1e Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 18:04:04 -0700
Subject: [PATCH 16/36] scripts reformulated to work with datasource names

dependence on the integer mapping to datasources has been removed
---
 ...wnload-datasources-and-generate-triples.sh | 23 ++++++++++---------
 ...le-indices.sh => list-datasource-names.sh} |  7 +++---
 .../scripts/pom-rdf-gen-9606.xml              |  6 ++---
 .../scripts/pom-rdf-gen-modelorgs.xml         |  6 ++---
 datasource-rdfizer/scripts/pom-rdf-gen.xml    | 10 +++-----
 ....xml => pom-rdf-list-datasource-names.xml} |  0
 6 files changed, 23 insertions(+), 29 deletions(-)
 rename datasource-rdfizer/scripts/{list-download-file-indices.sh => list-datasource-names.sh} (76%)
 rename datasource-rdfizer/scripts/{pom-rdf-gen-ids.xml => pom-rdf-list-datasource-names.xml} (100%)

diff --git a/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh b/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
index 260a5ca..70f5035 100755
--- a/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
+++ b/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
@@ -9,8 +9,8 @@ function print_usage {
     echo "$(basename $0) [OPTIONS]"
     echo "  <-d <download-directory>>: The directory into which to place the downloaded datasource files."
     echo "  <-r <rdf-output-directory>>: The directory into which to place the RDF triples parsed from the datasource files."
-    echo "  [-i <datasource-indices>]: The indices of the datasources to download; if not specified, all available datasources will be downloaded."
-    echo "  [-t <NCBI taxonomy IDS]: A comma-separated list of taxonomy IDs.  Only records for these IDs will be included in the RDF triple output.  If neither -t nor -m is specified, all records will be included."
+    echo "  [-i <datasource-names>]: The names of the datasources to download; if not specified, all available datasources will be downloaded."
+    echo "  [-t <NCBI taxonomy IDS]: A comma-separated list of taxonomy IDs.  Only records for these IDs will be included in the RDF triple output where applicable.  If neither -t nor -m is specified, all records will be included."
     echo "  [-m]: Include only human and the 7 model organisms in the generated RDF. If neither -t nor -m is specified, all records will be included."
 }
 
@@ -34,9 +34,9 @@ while getopts "d:r:i:t:mh" OPTION; do
         # parsing the downloaded datasource files should be placed.
         r) RDF_OUTPUT_DIR=$OPTARG
            ;;
-        # A comma-separated list of the IDs of the files to be downloaded (as
-        # shown by `list-download-file-indices.sh`
-        i) FILE_INDICES=$OPTARG
+        # A comma-separated list of the names of the datasources to be downloaded (as
+        # shown by `list-datasource-names.sh`
+        i) DS_NAMES=$OPTARG
            ;;
         # Include only data for a user-specified taxonomy ID in the RDF output.
         t) set_taxon_ids $OPTARG
@@ -61,17 +61,18 @@ if ! [[ -e README.md ]]; then
     exit 1
 fi
 
-if [[ -z $FILE_INDICES ]]; then
-    FILE_INDICES=$(datasource-rdfizer/scripts/list-download-file-indices.sh \
-        | cut -d " " -f 2 \
+if [[ -z $DS_NAMES ]]; then
+    DS_NAMES=$(datasource-rdfizer/scripts/list-datasource-names.sh \
+        | grep -v "====" \
         | xargs \
         | tr " " ",")
 fi
 
-for INDEX in $(echo $FILE_INDICES | tr -d "[:blank:]" | tr "," " "); do
+echo $DS_NAMES
+
+for INDEX in $(echo $DS_NAMES | tr -d "[:blank:]" | tr "," " "); do
     mvn -f datasource-rdfizer/scripts/pom-rdf-gen.xml exec:exec \
-        -DstartStage=$INDEX \
-        -DnumStages=1 \
+        -DdatasourceNames=$DS_NAMES \
         -DtaxonIDs=$TAXON_IDS \
         -DbaseSourceDir=$DOWNLOAD_DIR \
         -DbaseRdfDir=$RDF_OUTPUT_DIR \
diff --git a/datasource-rdfizer/scripts/list-download-file-indices.sh b/datasource-rdfizer/scripts/list-datasource-names.sh
similarity index 76%
rename from datasource-rdfizer/scripts/list-download-file-indices.sh
rename to datasource-rdfizer/scripts/list-datasource-names.sh
index b208611..e8dde50 100755
--- a/datasource-rdfizer/scripts/list-download-file-indices.sh
+++ b/datasource-rdfizer/scripts/list-datasource-names.sh
@@ -10,10 +10,11 @@ fi
 
 TMPFILE=$(mktemp -t mvn)
 
-mvn -f datasource-rdfizer/scripts/pom-rdf-gen-ids.xml exec:exec \
+mvn -f datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml exec:exec \
     | tee $TMPFILE \
-    | grep "SGE index:" \
-    | cut -b 18-
+    | grep "DS:" \
+    | cut -c 12-
+
 
 # echo ${PIPESTATUS[*]}
 
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
index da7eabd..ded2388 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
@@ -35,15 +35,13 @@
 						<argument>-classpath</argument>
 						<classpath />
 						<argument>edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.IceRdfGenerator</argument>
-						<argument>INDEX</argument>
+						<argument>NAME</argument>
 						<argument>${baseSourceDir}</argument> <!-- baseSourceDir -->
 						<argument>${baseRdfDir}</argument> <!-- baseRdfDir -->
 						<argument>${compressRdf}</argument> <!-- compressRdf -->
 						<argument>${outputRecordLimit}</argument> <!-- outputRecordLimit -->
 						<argument>9606</argument> <!-- taxon identifiers to limit output to, comma-delimited -->
-						<argument>${startStage}</argument> <!-- startStage -->
-						<argument>${numStages}</argument> <!-- numStages -->
-						<argument>NONE</argument> <!-- Split type -->
+						<argument>${datasourceNames}</argument> <!-- datasource names to process, comma-delimited -->
 						<argument>${date}</argument> <!-- date -->
 					</arguments>
 				</configuration>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
index bd4c1bf..13acd49 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
@@ -35,15 +35,13 @@
 						<argument>-classpath</argument>
 						<classpath />
 						<argument>edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.IceRdfGenerator</argument>
-						<argument>INDEX</argument>
+						<argument>NAME</argument>
 						<argument>${baseSourceDir}</argument> <!-- baseSourceDir -->
 						<argument>${baseRdfDir}</argument> <!-- baseRdfDir -->
 						<argument>${compressRdf}</argument> <!-- compressRdf -->
 						<argument>${outputRecordLimit}</argument> <!-- outputRecordLimit -->
 						<argument>9606,741158,63221,10090,947985,80274,57486,477816,477815,46456,35531,179238,1266728,116058,10092,10091,39442,10116,947987,7227,4932,947046,947045,947044,947043,947042,947041,947040,947039,947038,947037,947036,947035,929629,929587,929586,929585,927258,927256,889517,765312,764102,764101,764100,764099,764098,764097,721032,717647,658763,643680,614665,614664,580240,580239,574961,545124,538976,538975,502869,471861,471859,471510,468558,466209,464025,462210,462209,41870,307796,285006,1247190,1227742,1220494,1218710,1216859,1216345,1204498,1201112,1196866,1182968,1182967,1182966,1177187,1162674,1162673,1162672,1162671,1158205,1158204,1149757,1144731,1138861,1097555,1095001,1087981,559292,6239,7955,3702</argument> <!-- taxon identifiers to limit output to, comma-delimited -->
-						<argument>${startStage}</argument> <!-- startStage -->
-						<argument>${numStages}</argument> <!-- numStages -->
-						<argument>NONE</argument> <!-- Split type -->
+						<argument>${datasourceNames}</argument> <!-- datasource names to process, comma-delimited -->
 						<argument>${date}</argument> <!-- date -->
 					</arguments>
 				</configuration>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen.xml b/datasource-rdfizer/scripts/pom-rdf-gen.xml
index 0ab015d..c9f880b 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen.xml
@@ -35,7 +35,7 @@
 						<argument>-classpath</argument>
 						<classpath />
 						<argument>edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.IceRdfGenerator</argument>
-                        <argument>INDEX</argument>
+                        <argument>NAME</argument>
                         <!-- Directory for downloaded datasources -->
                         <argument>${baseSourceDir}</argument>
                         <!-- Directory into which to write RDF triple files
@@ -48,12 +48,8 @@
                         <!-- Taxon identifiers to limit output to,
                              comma-delimited; use "EMPTY" to not limit output -->
                         <argument>${taxonIDs}</argument>
-                        <!-- startStage -->
-                        <argument>${startStage}</argument>
-                        <!-- numStages -->
-                        <argument>${numStages}</argument>
-                        <!-- Split type -->
-                        <argument>NONE</argument>
+                        <!-- comma-delimited list of datasources to process -->
+                        <argument>${datasourceNames}</argument>
                         <!-- date -->
                         <argument>${date}</argument>
 					</arguments>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-ids.xml b/datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml
similarity index 100%
rename from datasource-rdfizer/scripts/pom-rdf-gen-ids.xml
rename to datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml

From 10962afaaf097bcfd5346ae14590314a939cca00 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 21:30:50 -0700
Subject: [PATCH 17/36] updated expected file header

ID --> IDs
---
 .../ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
index 56e2ac3..2ab6dbe 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
@@ -66,8 +66,7 @@
  * 
  */
 public class MRKSequenceFileParser extends SingleLineFileRecordReader<MRKSequenceFileData> {
-
-	private static final String HEADER = "MGI Marker Accession ID\tMarker Symbol\tStatus\tMarker Type\tMarker Name\tcM position\tChromosome\tGenome Coordinate Start\tGenome Coordinate End\tStrand\tGenBank ID\tRefSeq transcript ID\tVEGA transcript ID\tEnsembl transcript ID\tUniProt ID\tTrEMBL ID\tVEGA protein ID\tEnsembl protein ID\tRefSeq protein ID\tUniGene ID";
+	private static final String HEADER = "MGI Marker Accession ID\tMarker Symbol\tStatus\tMarker Type\tMarker Name\tcM position\tChromosome\tGenome Coordinate Start\tGenome Coordinate End\tStrand\tGenBank IDs\tRefSeq transcript IDs\tVEGA transcript IDs\tEnsembl transcript IDs\tUniProt IDs\tTrEMBL IDs\tVEGA protein IDs\tEnsembl protein IDs\tRefSeq protein IDs\tUniGene IDs";
 
 	private static final Logger logger = Logger.getLogger(MRKSequenceFileParser.class);
 

From 8388b8586dfa23e0227c5f044479aa1c57950bce Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Mon, 8 Feb 2016 21:36:41 -0700
Subject: [PATCH 18/36] Added new field: comment

---
 .../ncbi/gene/EntrezGeneMim2GeneFileData.java       | 13 +++++++++----
 .../ncbi/gene/EntrezGeneMim2GeneFileParser.java     |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileData.java
index bec2aef..c531dd8 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileData.java
@@ -79,19 +79,23 @@ public class EntrezGeneMim2GeneFileData extends SingleLineFileRecord {
 	@RecordField(comment = "The accession assigned by MedGen to this phenotype.  If the accession starts with a C followed by integers, the identifier is a concept ID (CUI) from UMLS. http://www.nlm.nih.gov/research/umls/ If it starts with a CN, no CUI in UMLS was identified, and NCBI created a placeholder.")
 	private final MedGenId medGenId;
 
+	@RecordField
+	private final String comment;
+
 	public EntrezGeneMim2GeneFileData(OmimID mimNumber, EntrezGeneID entrezGeneID, String associationType,
-			Set<String> sources, MedGenId medGenId, long byteOffset, long lineNumber) {
+			Set<String> sources, MedGenId medGenId, String comment, long byteOffset, long lineNumber) {
 		super(byteOffset, lineNumber);
 		this.mimNumber = mimNumber;
 		this.entrezGeneID = entrezGeneID;
 		this.associationType = associationType;
 		this.sources = sources;
 		this.medGenId = medGenId;
+		this.comment = comment;
 	}
 
 	public static EntrezGeneMim2GeneFileData parseMim2GeneLine(Line line) {
-		String[] toks = line.getText().split("\\t");
-		if (toks.length == 5) {
+		String[] toks = line.getText().split("\\t", -1);
+		if (toks.length == 6) {
 			OmimID mimNumber = new OmimID(toks[0]);
 			EntrezGeneID entrezGeneID = (toks[1].equals("-")) ? null : new EntrezGeneID(toks[1]);
 			String associationType = toks[2];
@@ -103,7 +107,8 @@ public static EntrezGeneMim2GeneFileData parseMim2GeneLine(Line line) {
 				}
 			}
 			MedGenId medGenId = (toks[4].equals("-")) ? null : new MedGenId(toks[4].trim());
-			return new EntrezGeneMim2GeneFileData(mimNumber, entrezGeneID, associationType, sources, medGenId,
+			String comment = (toks[5].equals("-")) ? null : toks[5].trim();
+			return new EntrezGeneMim2GeneFileData(mimNumber, entrezGeneID, associationType, sources, medGenId, comment,
 					line.getByteOffset(), line.getLineNumber());
 		}
 
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileParser.java
index 64ed4ac..7042ccc 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileParser.java
@@ -52,7 +52,7 @@
  */
 public class EntrezGeneMim2GeneFileParser extends SingleLineFileRecordReader<EntrezGeneMim2GeneFileData> {
 
-	private static final String HEADER = "#MIM number\tGeneID\ttype\tSource\tMedGenCUI";
+	private static final String HEADER = "#MIM number\tGeneID\ttype\tSource\tMedGenCUI\tComment";
 	public static final String FTP_FILE_NAME = "mim2gene_medgen";
 	public static final CharacterEncoding ENCODING = CharacterEncoding.US_ASCII;
 

From c139c7875e398072fdbe9e27adbd76654f270219 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 10:00:51 -0700
Subject: [PATCH 19/36] Updated allowed accession prefixes

---
 .../NucleotideAccessionResolver.java          | 49 +++++++++++--------
 .../identifiers/ProteinAccessionResolver.java | 18 ++++++-
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
index f192f76..52e4ec5 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
@@ -66,23 +66,26 @@ public class NucleotideAccessionResolver {
 
 	private static final Pattern ACC_PATTERN = Pattern.compile("([A-Z]+)\\d+\\.?\\d*");
 
-	private static final Set<String> GENBANK_ID_PREFIXES = CollectionsUtil.createSet("CH", "CM", "DS", "EM", "EN",
-			"EP", "EQ", "FA", "GG", "GL", "JH", "KB", "H", "N", "T", "R", "W", "AA", "AI", "AW", "BE", "BF", "BG",
-			"BI", "BM", "BQ", "BU", "CA", "CB", "CD", "CF", "CK", "CN", "CO", "CV", "CX", "DN", "DR", "DT", "DV", "DW",
-			"DY", "EB", "EC", "EE", "EG", "EH", "EL", "ES", "EV", "EW", "EX", "EY", "FC", "FD", "FE", "FF", "FG", "FK",
-			"FL", "GD", "GE", "GH", "GO", "GR", "GT", "GW", "HO", "HS", "JG", "JK", "JZ", "U", "AF", "AY", "DQ", "EF",
-			"EU", "FJ", "GQ", "GU", "HM", "HQ", "JF", "JN", "JQ", "JX", "KC", "AE", "CP", "CY", "B", "AQ", "AZ", "BH",
-			"BZ", "CC", "CE", "CG", "CL", "CW", "CZ", "DU", "DX", "ED", "EI", "EJ", "EK", "ER", "ET", "FH", "FI", "GS",
-			"HN", "HR", "JJ", "JM", "JS", "JY", "AC", "DP", "I", "AR", "DZ", "EA", "GC", "GP", "GV", "GX", "GY", "GZ",
-			"HJ", "HK", "HL", "G", "BV", "GF", "BK", "BL", "GJ", "GK", "EZ", "HP", "JI", "JL", "JO", "JP", "JR", "JT",
-			"JU", "JV", "JW", "KA", "S", "AD", "AH", "AS", "BC", "BT", "J", "K", "L", "M", "N");
+	private static final Set<String> GENBANK_ID_PREFIXES = CollectionsUtil.createSet("H", "N", "T", "R", "W", "AA",
+			"AI", "AW", "BE", "BF", "BG", "BI", "BM", "BQ", "BU", "CA", "CB", "CD", "CF", "CK", "CN", "CO", "CV", "CX",
+			"DN", "DR", "DT", "DV", "DW", "DY", "EB", "EC", "EE", "EG", "EH", "EL", "ES", "EV", "EW", "EX", "EY", "FC",
+			"FD", "FE", "FF", "FG", "FK", "FL", "GD", "GE", "GH", "GO", "GR", "GT", "GW", "HO", "HS", "JG", "JK", "JZ",
+			"U", "AF", "AY", "DQ", "EF", "EU", "FJ", "GQ", "GU", "HM", "HQ", "JF", "JN", "JQ", "JX", "KC", "KF", "KJ",
+			"KM", "KP", "KR", "KT", "KU", "AE", "CP", "CY", "B", "AQ", "AZ", "BH", "BZ", "CC", "CE", "CG", "CL", "CW",
+			"CZ", "DU", "DX", "ED", "EI", "EJ", "EK", "ER", "ET", "FH", "FI", "GS", "HN", "HR", "JJ", "JM", "JS", "JY",
+			"KG", "KO", "KS", "AC", "DP", "I", "AR", "DZ", "EA", "GC", "GP", "GV", "GX", "GY", "GZ", "HJ", "HK", "HL",
+			"G", "BV", "GF", "BK", "BL", "GJ", "GK", "EZ", "HP", "JI", "JL", "JO", "JP", "JR", "JT", "JU", "JV", "JW",
+			"KA", "S", "AD", "AH", "AS", "BC", "BT", "J", "K", "L", "M", "N", "CH", "CM", "DS", "EM", "EN", "EP", "EQ",
+			"FA", "GG", "GL", "JH", "KB", "KD", "KE", "KI", "KK", "KL", "KN", "KQ", "KV");
 	private static final Set<String> EMBL_ID_PREFIXES = CollectionsUtil.createSet("AN", "F", "V", "X", "Y", "Z", "AJ",
-			"AM", "FM", "FN", "HE", "HF", "HG", "FO", "AL", "BX", "CR", "CT", "CU", "FP", "FQ", "FR", "A", "AX", "CQ",
-			"CS", "FB", "GM", "GN", "HA", "HB", "HC", "HD", "HH", "HI", "JA", "JB", "JC", "JD", "JE", "BN");
-	private static final Set<String> DDBJ_ID_PREFIXES = CollectionsUtil.createSet("BA", "DF", "DG", "C", "AT", "AU",
-			"AV", "BB", "BJ", "BP", "BW", "BY", "CI", "CJ", "DA", "DB", "DC", "DK", "FS", "FY", "HX", "HY", "D", "AB",
-			"AP", "BS", "AG", "DE", "DH", "FT", "GA", "AK", "E", "BD", "DD", "DI", "DJ", "DL", "DM", "FU", "FV", "FW",
-			"FZ", "GB", "HV", "HW", "BR", "HT", "HU", "FX");
+			"AM", "FM", "FN", "HE", "HF", "HG", "FO", "LK", "LL", "LM", "LN", "LO", "LP", "LQ", "LR", "LS", "LT", "AL",
+			"BX", "CR", "CT", "CU", "FP", "FQ", "FR", "A", "AX", "CQ", "CS", "FB", "GM", "GN", "HA", "HB", "HC", "HD",
+			"HH", "HI", "JA", "JB", "JC", "JD", "JE", "BN");
+	private static final Set<String> DDBJ_ID_PREFIXES = CollectionsUtil.createSet("BA", "DF", "DG", "LD", "C", "AT",
+			"AU", "AV", "BB", "BJ", "BP", "BW", "BY", "CI", "CJ", "DA", "DB", "DC", "DK", "FS", "FY", "HX", "HY", "D",
+			"AB", "LC", "AP", "BS", "AG", "DE", "DH", "FT", "GA", "LB", "AK", "E", "BD", "DD", "DI", "DJ", "DL", "DM",
+			"FU", "FV", "FW", "FZ", "GB", "HV", "HW", "HZ", "LF", "LG", "BR", "HT", "HU", "FX", "LA", "LE", "LH", "LI",
+			"LJ");
 
 	private static Map<String, Class<? extends DataSourceIdentifier<String>>> prefixToIdClass;
 
@@ -113,13 +116,19 @@ public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc
 			if (prefix.length() == 5 && prefix.startsWith("A")) {
 				return new DdbjId(acc);
 			}
-			if (prefix.length() == 4 && (prefix.startsWith("A") || prefix.startsWith("D") || prefix.startsWith("G"))) {
+			if (prefix.length() == 4
+					&& (prefix.startsWith("A") || prefix.startsWith("D") || prefix.startsWith("G")
+							|| prefix.startsWith("J") || prefix.startsWith("L") || prefix.startsWith("M")
+							|| prefix.startsWith("N") || prefix.startsWith("K"))) {
 				return new GenBankID(acc);
 			}
-			if (prefix.length() == 4 && (prefix.startsWith("B") || prefix.startsWith("E"))) {
+			if (prefix.length() == 4
+					&& (prefix.startsWith("B") || prefix.startsWith("P") || prefix.startsWith("E") || prefix
+							.startsWith("I"))) {
 				return new DdbjId(acc);
 			}
-			if (prefix.length() == 4 && prefix.startsWith("C")) {
+			if (prefix.length() == 4 && prefix.startsWith("C") || prefix.startsWith("F") || prefix.startsWith("O")
+					|| prefix.startsWith("H")) {
 				return new EmblID(acc);
 			}
 			Class<? extends DataSourceIdentifier<String>> idClass = prefixToIdClass.get(prefix);
@@ -141,7 +150,7 @@ public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc
 				}
 			}
 		}
-		logger.warn("Input is not a known nucleotide accession: " + acc);
+		// logger.warn("Input is not a known nucleotide accession: " + acc);
 		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known nucleotide accession: " + acc);
 	}
 
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
index 3827dc0..0efbf90 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
@@ -56,11 +56,12 @@
  * 
  */
 public class ProteinAccessionResolver {
-	
+
 	private static final Logger logger = Logger.getLogger(ProteinAccessionResolver.class);
 
 	private static final Pattern ACC_PATTERN = Pattern.compile("([A-Z]{3})\\d+\\.?\\d*");
 	private static final String VALID_UNIPROT_PATTERN_1 = "[A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9]";
+	private static final String VALID_UNIPROT_PATTERN_3 = "[A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9][A-Z][A-Z0-9][A-Z0-9][0-9]";
 	private static final String VALID_UNIPROT_PATTERN_2 = "[OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]";
 
 	public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
@@ -68,7 +69,8 @@ public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
 		if (acc.matches("[A-Z][A-Z]_\\d+\\.?\\d*")) {
 			return new RefSeqID(acc);
 		}
-		if (acc.matches(VALID_UNIPROT_PATTERN_1) || acc.matches(VALID_UNIPROT_PATTERN_2)) {
+		if (acc.matches(VALID_UNIPROT_PATTERN_1) || acc.matches(VALID_UNIPROT_PATTERN_2)
+				|| acc.matches(VALID_UNIPROT_PATTERN_3)) {
 			return new UniProtID(acc);
 		}
 		Matcher m = ACC_PATTERN.matcher(acc);
@@ -104,6 +106,18 @@ public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
 			if (prefix.startsWith("J")) {
 				return new GenBankID(acc);
 			}
+			if (prefix.startsWith("K")) {
+				return new GenBankID(acc);
+			}
+			if (prefix.startsWith("L")) {
+				return new DdbjId(acc);
+			}
+			if (prefix.startsWith("M")) {
+				return new GenBankID(acc);
+			}
+			if (prefix.startsWith("N")) {
+				return new GenBankID(acc);
+			}
 		}
 		logger.warn("Input is not a known protein accession pattern: " + acc);
 		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known protein accession pattern: "

From b98deaa6448e01f6172487c7cb7b22edd11e30a8 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 10:01:28 -0700
Subject: [PATCH 20/36] Removed check made invalid by unknown identifier
 handling

---
 .../rdf/filter/DuplicateFieldValueFilter.java     | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/filter/DuplicateFieldValueFilter.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/filter/DuplicateFieldValueFilter.java
index 9100772..314f2b4 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/filter/DuplicateFieldValueFilter.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/filter/DuplicateFieldValueFilter.java
@@ -66,7 +66,7 @@ public abstract class DuplicateFieldValueFilter implements DuplicateStatementFil
 	private final DiskBasedHash hash;
 	private List<File> noDupsFiles;
 	private String previousFieldValueKey = null;
-	private int _3inARowCount = 0;
+//	private int _3inARowCount = 0;
 
 	public DuplicateFieldValueFilter(DiskBasedHash hash) throws IOException {
 		this.hash = hash;
@@ -84,10 +84,11 @@ public boolean alreadyObservedStatement(Statement stmt) {
 		if (isFieldRdfLine(subject)) {
 			String fieldValueKey = getFieldValueKey(subject);
 			if (!fieldValueKey.equals(previousFieldValueKey)) {
-				if (previousFieldValueKey != null && ((_3inARowCount % 3) != 0)) {
-					throw new IllegalStateException("3-in-a-row-count not equal to 3 (" + _3inARowCount + "): "
-							+ previousFieldValueKey);
-				}
+				/* Handling of unknown and probable error identifiers seems to break the 3 in a row count*/
+//				if (previousFieldValueKey != null && ((_3inARowCount % 3) != 0)) {
+//					throw new IllegalStateException("3-in-a-row-count not equal to 3 (" + _3inARowCount + "): "
+//							+ previousFieldValueKey);
+//				}
 				if (previousFieldValueKey != null) {
 					try {
 						hash.add(previousFieldValueKey);
@@ -96,9 +97,9 @@ public boolean alreadyObservedStatement(Statement stmt) {
 					}
 				}
 				previousFieldValueKey = fieldValueKey;
-				_3inARowCount = 1;
+//				_3inARowCount = 1;
 			} else {
-				_3inARowCount++;
+//				_3inARowCount++;
 			}
 			if (!hash.contains(fieldValueKey)) {
 				return false;

From 499f6c5be644890715c2b89f4477fd2bfd983124 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 10:01:57 -0700
Subject: [PATCH 21/36] files are now closed properly when an exception halts
 processing

---
 .../rdfizer/rdf/ice/RdfRecordWriterImpl.java           | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
index 2d6c6e5..9ad8a56 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
@@ -654,12 +654,22 @@ private void write(Statement stmt, DataSource ns) {
 		// them in the filter. This saves some memory and also the time needed to check for
 		// something that is guaranteed to not be already observed
 		boolean checkFilter = needToCheckFilter(stmt.getSubject());
+		try {
 		if (!checkFilter || (checkFilter && !filter.alreadyObservedStatement(stmt))) {
 			if (!rollingCacheContains(stmt)) {
 				write(stmt, rdfWriter);
 				writtenStatementCount++;
 			}
 		}
+		} catch(IllegalStateException e) {
+			logger.error("Halting RDF Generation due to IllegalStateException.", e);
+			try {
+				closeFiles();
+				System.exit(-1);
+			} catch (IOException e1) {
+				e1.printStackTrace();
+			}
+		}
 	}
 
 	/**

From ee2a4abcc742a77ecb5a2555ccfedeca86c17a26 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 10:02:26 -0700
Subject: [PATCH 22/36] fixes to comply with added column (feature type)

---
 .../fileparsers/mgi/MRKSequenceFileData.java     |  6 +++++-
 .../fileparsers/mgi/MRKSequenceFileParser.java   | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileData.java
index 8290529..45a3e60 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileData.java
@@ -98,6 +98,8 @@ public class MRKSequenceFileData extends SingleLineFileRecord {
 	private final Set<RefSeqID> refseqProteinIds;
 	@RecordField
 	private final Set<UniGeneID> unigeneIds;
+	@RecordField
+	private final String featureType;
 	
 
 	/**
@@ -122,6 +124,7 @@ public class MRKSequenceFileData extends SingleLineFileRecord {
 	 * @param vegaProteinIds
 	 * @param ensemblProteinIds
 	 * @param refseqProteinIds
+	 * @param featureType 
 	 */
 	public MRKSequenceFileData(MgiGeneID mgiAccessionID, String markerSymbol, String status,
 			MgiGeneType markerType, String markerName, String cM_Position, String chromosome,
@@ -129,7 +132,7 @@ public MRKSequenceFileData(MgiGeneID mgiAccessionID, String markerSymbol, String
 			Set<DataSourceIdentifier<?>> genBankAccessionIDs, Set<RefSeqID> refseqTranscriptIds,
 			Set<VegaID> vegaTranscriptIds, Set<EnsemblGeneID> ensemblTranscriptId, Set<UniProtID> uniprotIds,
 			Set<UniProtID> tremblIds, Set<VegaID> vegaProteinIds, Set<EnsemblGeneID> ensemblProteinIds,
-			Set<RefSeqID> refseqProteinIds, Set<UniGeneID> unigeneIds, long byteOffset, long lineNumber) {
+			Set<RefSeqID> refseqProteinIds, Set<UniGeneID> unigeneIds, String featureType, long byteOffset, long lineNumber) {
 		super(byteOffset, lineNumber);
 		this.mgiAccessionID = mgiAccessionID;
 		this.markerSymbol = markerSymbol;
@@ -151,6 +154,7 @@ public MRKSequenceFileData(MgiGeneID mgiAccessionID, String markerSymbol, String
 		this.ensemblProteinIds = ensemblProteinIds;
 		this.refseqProteinIds = refseqProteinIds;
 		this.unigeneIds = unigeneIds;
+		this.featureType = featureType;
 	}
 
 	
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
index 2ab6dbe..a095524 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
@@ -66,6 +66,10 @@
  * 
  */
 public class MRKSequenceFileParser extends SingleLineFileRecordReader<MRKSequenceFileData> {
+	/*
+	 * There is a line break in the header. The final column header (Feature
+	 * Type) is on the next line by itself.
+	 */
 	private static final String HEADER = "MGI Marker Accession ID\tMarker Symbol\tStatus\tMarker Type\tMarker Name\tcM position\tChromosome\tGenome Coordinate Start\tGenome Coordinate End\tStrand\tGenBank IDs\tRefSeq transcript IDs\tVEGA transcript IDs\tEnsembl transcript IDs\tUniProt IDs\tTrEMBL IDs\tVEGA protein IDs\tEnsembl protein IDs\tRefSeq protein IDs\tUniGene IDs";
 
 	private static final Logger logger = Logger.getLogger(MRKSequenceFileParser.class);
@@ -92,7 +96,13 @@ protected StreamLineReader initializeLineReaderFromDownload(CharacterEncoding en
 
 	@Override
 	protected String getFileHeader() throws IOException {
-		return readLine().getText();
+		String header = readLine().getText();
+		/*
+		 * There is a line break in the header. The final column header (Feature
+		 * Type) is on the next line by itself so we burn a line here.
+		 */
+		readLine();
+		return header;
 	}
 
 	@Override
@@ -223,11 +233,13 @@ protected MRKSequenceFileData parseRecordFromLine(Line line) {
 				}
 			}
 		}
+		
+		String featureType = toks[20];
 
 		return new MRKSequenceFileData(mgiAccessionID, markerSymbol, status, markerType, markerName, cM_Position,
 				chromosome, genomeCoordinateStart, genomeCoordinateEnd, strand, genBankAccessionIDs,
 				refseqTranscriptIds, vegaTranscriptIds, ensemblTranscriptIds, uniprotIds, tremblIds, vegaProteinIds,
-				ensemblProteinIds, refseqProteinIds, unigeneIds, line.getByteOffset(), line.getLineNumber());
+				ensemblProteinIds, refseqProteinIds, unigeneIds, featureType, line.getByteOffset(), line.getLineNumber());
 
 	}
 

From 4a760add3032a4443045f10c9a7e4184e98d3163 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 10:32:18 -0700
Subject: [PATCH 23/36] Added flag to optionally clean data source files

---
 ...wnload-datasources-and-generate-triples.sh |  9 ++++-
 .../scripts/pom-rdf-gen-9606.xml              |  1 +
 .../scripts/pom-rdf-gen-modelorgs.xml         |  1 +
 datasource-rdfizer/scripts/pom-rdf-gen.xml    | 36 ++++++++++---------
 .../rdfizer/rdf/ice/IceRdfGenerator.java      | 18 +++++-----
 5 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh b/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
index 70f5035..3f15a45 100755
--- a/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
+++ b/datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
@@ -12,6 +12,7 @@ function print_usage {
     echo "  [-i <datasource-names>]: The names of the datasources to download; if not specified, all available datasources will be downloaded."
     echo "  [-t <NCBI taxonomy IDS]: A comma-separated list of taxonomy IDs.  Only records for these IDs will be included in the RDF triple output where applicable.  If neither -t nor -m is specified, all records will be included."
     echo "  [-m]: Include only human and the 7 model organisms in the generated RDF. If neither -t nor -m is specified, all records will be included."
+    echo "  [-c]: Clean the data source files. If set, this flag will cause the data source files to be re-downloaded prior to processing."
 }
 
 TAXON_IDS="EMPTY"
@@ -25,7 +26,9 @@ function set_taxon_ids {
     fi
 }
 
-while getopts "d:r:i:t:mh" OPTION; do
+CLEAN_SOURCES="false"
+
+while getopts "d:r:i:t:mhc" OPTION; do
     case $OPTION in
         # The directory into which we should download the datasource files.
         d) DOWNLOAD_DIR=$OPTARG
@@ -45,6 +48,9 @@ while getopts "d:r:i:t:mh" OPTION; do
         # output.
         m) set_taxon_ids "9606,741158,63221,10090,947985,80274,57486,477816,477815,46456,35531,179238,1266728,116058,10092,10091,39442,10116,947987,7227,4932,947046,947045,947044,947043,947042,947041,947040,947039,947038,947037,947036,947035,929629,929587,929586,929585,927258,927256,889517,765312,764102,764101,764100,764099,764098,764097,721032,717647,658763,643680,614665,614664,580240,580239,574961,545124,538976,538975,502869,471861,471859,471510,468558,466209,464025,462210,462209,41870,307796,285006,1247190,1227742,1220494,1218710,1216859,1216345,1204498,1201112,1196866,1182968,1182967,1182966,1177187,1162674,1162673,1162672,1162671,1158205,1158204,1149757,1144731,1138861,1097555,1095001,1087981,559292,6239,7955,3702"
            ;;
+        # Clean the data sources (causes them to be re-downloaded prior to processing).   
+        c) CLEAN_SOURCES="true"
+           ;;
         # HELP!
         h) print_usage; exit 0
            ;;
@@ -74,6 +80,7 @@ for INDEX in $(echo $DS_NAMES | tr -d "[:blank:]" | tr "," " "); do
     mvn -f datasource-rdfizer/scripts/pom-rdf-gen.xml exec:exec \
         -DdatasourceNames=$DS_NAMES \
         -DtaxonIDs=$TAXON_IDS \
+        -DredownloadDataSourceFiles=$CLEAN_SOURCES \
         -DbaseSourceDir=$DOWNLOAD_DIR \
         -DbaseRdfDir=$RDF_OUTPUT_DIR \
         -DcompressRdf=true \
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
index ded2388..881a136 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
@@ -41,6 +41,7 @@
 						<argument>${compressRdf}</argument> <!-- compressRdf -->
 						<argument>${outputRecordLimit}</argument> <!-- outputRecordLimit -->
 						<argument>9606</argument> <!-- taxon identifiers to limit output to, comma-delimited -->
+						<argument>${redownloadDataSourceFiles}</argument> <!-- Re-download the data source files prior to processing? (true/false) -->
 						<argument>${datasourceNames}</argument> <!-- datasource names to process, comma-delimited -->
 						<argument>${date}</argument> <!-- date -->
 					</arguments>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
index 13acd49..c42f271 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
@@ -41,6 +41,7 @@
 						<argument>${compressRdf}</argument> <!-- compressRdf -->
 						<argument>${outputRecordLimit}</argument> <!-- outputRecordLimit -->
 						<argument>9606,741158,63221,10090,947985,80274,57486,477816,477815,46456,35531,179238,1266728,116058,10092,10091,39442,10116,947987,7227,4932,947046,947045,947044,947043,947042,947041,947040,947039,947038,947037,947036,947035,929629,929587,929586,929585,927258,927256,889517,765312,764102,764101,764100,764099,764098,764097,721032,717647,658763,643680,614665,614664,580240,580239,574961,545124,538976,538975,502869,471861,471859,471510,468558,466209,464025,462210,462209,41870,307796,285006,1247190,1227742,1220494,1218710,1216859,1216345,1204498,1201112,1196866,1182968,1182967,1182966,1177187,1162674,1162673,1162672,1162671,1158205,1158204,1149757,1144731,1138861,1097555,1095001,1087981,559292,6239,7955,3702</argument> <!-- taxon identifiers to limit output to, comma-delimited -->
+						<argument>${redownloadDataSourceFiles}</argument> <!-- Re-download the data source files prior to processing? (true/false) -->
 						<argument>${datasourceNames}</argument> <!-- datasource names to process, comma-delimited -->
 						<argument>${date}</argument> <!-- date -->
 					</arguments>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen.xml b/datasource-rdfizer/scripts/pom-rdf-gen.xml
index c9f880b..5d275be 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen.xml
@@ -35,23 +35,25 @@
 						<argument>-classpath</argument>
 						<classpath />
 						<argument>edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.IceRdfGenerator</argument>
-                        <argument>NAME</argument>
-                        <!-- Directory for downloaded datasources -->
-                        <argument>${baseSourceDir}</argument>
-                        <!-- Directory into which to write RDF triple files
-                             parsed from downloaded datasources. -->
-                        <argument>${baseRdfDir}</argument>
-                        <!-- Compress the generated triple files? -->
-                        <argument>${compressRdf}</argument>
-                        <!-- outputRecordLimit -->
-                        <argument>${outputRecordLimit}</argument>
-                        <!-- Taxon identifiers to limit output to,
-                             comma-delimited; use "EMPTY" to not limit output -->
-                        <argument>${taxonIDs}</argument>
-                        <!-- comma-delimited list of datasources to process -->
-                        <argument>${datasourceNames}</argument>
-                        <!-- date -->
-                        <argument>${date}</argument>
+						<argument>NAME</argument>
+						<!-- Directory for downloaded datasources -->
+						<argument>${baseSourceDir}</argument>
+						<!-- Directory into which to write RDF triple files parsed from downloaded 
+							datasources. -->
+						<argument>${baseRdfDir}</argument>
+						<!-- Compress the generated triple files? -->
+						<argument>${compressRdf}</argument>
+						<!-- outputRecordLimit -->
+						<argument>${outputRecordLimit}</argument>
+						<!-- Taxon identifiers to limit output to, comma-delimited; use "EMPTY" 
+							to not limit output -->
+						<argument>${taxonIDs}</argument>
+						<!-- Re-download the data source files prior to processing? (true/false) -->
+						<argument>${redownloadDataSourceFiles}</argument>
+						<!-- comma-delimited list of datasources to process -->
+						<argument>${datasourceNames}</argument>
+						<!-- date -->
+						<argument>${date}</argument>
 					</arguments>
 				</configuration>
 			</plugin>
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
index 40f668c..50bdfa4 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/IceRdfGenerator.java
@@ -370,19 +370,21 @@ public enum RunBy {
 	 *            NCBI Taxonomy) that will be used to limit RDF generation where
 	 *            applicable, e.g. 9606 to convert only human-related database
 	 *            records to RDF <br>
+	 *            args[6]: Clean data source files (if true, then the data
+	 *            source files will be deleted and re-downloaded)
 	 * 
 	 *            The remaining input arguments depend on args[0]:<br>
 	 *            if NAME:<br>
-	 *            args[6]: comma-delimited list of FileDataSource names to
+	 *            args[7]: comma-delimited list of FileDataSource names to
 	 *            process <br>
-	 *            args[7]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
+	 *            args[8]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
 	 *            included or if "null" then the current date will be used<br>
 	 * <br>
 	 *            if INDEX: <br>
-	 *            args[6]: start stage args<br>
-	 *            [7]: the number of stages to process<br>
-	 *            args[8]: the Split type: either BY_STAGES or NONE<br>
-	 *            if BY_STAGES, then the index in args[6] corresponds to a
+	 *            args[7]: start stage args<br>
+	 *            args[8]: the number of stages to process<br>
+	 *            args[9]: the Split type: either BY_STAGES or NONE<br>
+	 *            if BY_STAGES, then the index in args[7] corresponds to a
 	 *            particular stage of a FileDataSource. Many of the
 	 *            FileDataSources are processed in a single stage, however some
 	 *            of the larger files are split into multiple stages to speed up
@@ -392,7 +394,7 @@ public enum RunBy {
 	 *            stage. This will result in longer execution times for the
 	 *            larger files, however duplicate triple removal can be done
 	 *            concurrently.<br>
-	 *            args[9]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
+	 *            args[10]: [OPTIONAL] date to use in the form yyyy-mm-dd. If not
 	 *            included or if "null" then the current date will be used
 	 * 
 	 */
@@ -410,7 +412,6 @@ public static void main(String[] args) {
 
 		File baseSourceFileDirectory = new File(args[index++]);
 		File baseRdfOutputDirectory = new File(args[index++]);
-		boolean cleanSourceFiles = false;// Boolean.valueOf(args[index++]);
 		boolean compress = Boolean.valueOf(args[index++]);
 		int outputRecordLimit = Integer.valueOf(args[index++]);
 		String taxonIdsStr = args[index++];
@@ -423,6 +424,7 @@ public static void main(String[] args) {
 				taxonIds.add(new NcbiTaxonomyID(id));
 			}
 		}
+		boolean cleanSourceFiles = Boolean.valueOf(args[index++]);
 
 		try {
 

From cfe69cc22091c49e14350a0a0d13a2633fba9dd3 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 11:04:31 -0700
Subject: [PATCH 24/36] Added handling for multiple entries in Entrez Id field

---
 .../pharmgkb/PharmGkbGeneFileParser.java      | 21 ++++++++++-
 .../pharmgkb/PharmGkbGeneFileRecord.java      | 37 ++++++++++---------
 .../pharmgkb/PharmGkbGeneFileParserTest.java  |  4 +-
 3 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
index 98d0afb..fd351f1 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
@@ -37,7 +37,9 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.log4j.Logger;
@@ -165,7 +167,7 @@ protected String getExpectedFileHeader() throws IOException {
 	protected PharmGkbGeneFileRecord parseRecordFromLine(Line line) {
 		String[] toks = line.getText().split(RegExPatterns.TAB, -1);
 		PharmGkbID pharmGkbAccessionId = new PharmGkbID(toks[0]);
-		EntrezGeneID entrezGeneId = StringUtils.isNotBlank(toks[1]) ? new EntrezGeneID(toks[1]) : null;
+		Set<EntrezGeneID> entrezGeneIds = getEntrezGeneIDs(toks[1]);
 		EnsemblGeneID ensemblGeneId = StringUtils.isNotBlank(toks[2]) ? new EnsemblGeneID(toks[2]) : null;
 		String name = StringUtils.isNotBlank(toks[3]) ? new String(toks[3]) : null;
 		String symbol = StringUtils.isNotBlank(toks[4]) ? new String(toks[4]) : null;
@@ -202,12 +204,27 @@ protected PharmGkbGeneFileRecord parseRecordFromLine(Line line) {
 		Integer chromosomeStart = (toks[12].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[12]);
 		Integer chromosomeEnd = (toks[13].equalsIgnoreCase("null")) ? null : Integer.parseInt(toks[13]);
 
-		return new PharmGkbGeneFileRecord(pharmGkbAccessionId, entrezGeneId, ensemblGeneId, name, symbol,
+		return new PharmGkbGeneFileRecord(pharmGkbAccessionId, entrezGeneIds, ensemblGeneId, name, symbol,
 				alternativeNames, alternativeSymbols, isVip, hasVariantAnnotation, crossReferences,
 				hasCpicDosingGuideline, chromosome, chromosomeStart, chromosomeEnd, line.getByteOffset(),
 				line.getLineNumber());
 	}
 
+	private Set<EntrezGeneID> getEntrezGeneIDs(String idStr) {
+		Set<EntrezGeneID> ids = new HashSet<EntrezGeneID>();
+		if (StringUtils.isNotBlank(idStr)) {
+			if (idStr.contains(",")) {
+				idStr = idStr.replaceAll("\"", "");
+				for (String tok : idStr.split(",")) {
+					ids.add(new EntrezGeneID(tok));
+				}
+			} else {
+				ids.add(new EntrezGeneID(idStr));
+			}
+		}
+		return ids;
+	}
+
 	/**
 	 * @param refStr
 	 * @return
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileRecord.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileRecord.java
index a647ea9..42ffb26 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileRecord.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileRecord.java
@@ -34,21 +34,22 @@
  */
 
 
-import java.util.Collection;
-
-import lombok.Data;
-
-import org.apache.log4j.Logger;
-
-import edu.ucdenver.ccp.datasource.fileparsers.License;
-import edu.ucdenver.ccp.datasource.fileparsers.Record;
-import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
-import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecord;
-import edu.ucdenver.ccp.datasource.identifiers.DataSource;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
-import edu.ucdenver.ccp.datasource.identifiers.ensembl.EnsemblGeneID;
-import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
-import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbID;
+import java.util.Collection;
+import java.util.Set;
+
+import lombok.Data;
+
+import org.apache.log4j.Logger;
+
+import edu.ucdenver.ccp.datasource.fileparsers.License;
+import edu.ucdenver.ccp.datasource.fileparsers.Record;
+import edu.ucdenver.ccp.datasource.fileparsers.RecordField;
+import edu.ucdenver.ccp.datasource.fileparsers.SingleLineFileRecord;
+import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ensembl.EnsemblGeneID;
+import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
+import edu.ucdenver.ccp.datasource.identifiers.pharmgkb.PharmGkbID;
 
 /**
  * File record capturing single line record from PharmGKB's genes.tsv file.
@@ -65,7 +66,7 @@ public class PharmGkbGeneFileRecord extends SingleLineFileRecord {
 	@RecordField
 	private final PharmGkbID accessionId;
 	@RecordField
-	private final EntrezGeneID entrezGeneId;
+	private final Set<EntrezGeneID> entrezGeneIds;
 	@RecordField
 	private final EnsemblGeneID ensemblGeneId;
 	@RecordField
@@ -105,14 +106,14 @@ public class PharmGkbGeneFileRecord extends SingleLineFileRecord {
 	 * @param hasVariantAnnotation
 	 * @param crossReferences
 	 */
-	public PharmGkbGeneFileRecord(PharmGkbID accessionId, EntrezGeneID entrezGeneId, EnsemblGeneID ensemblGeneId,
+	public PharmGkbGeneFileRecord(PharmGkbID accessionId, Set<EntrezGeneID> entrezGeneIds, EnsemblGeneID ensemblGeneId,
 			String name, String symbol, Collection<String> alternativeNames,
 			Collection<String> alternativeSymbols, boolean isVip, boolean hasVariantAnnotation,
 			Collection<DataSourceIdentifier<?>> crossReferences, boolean hasCpicDosingGuideline, String chromosome,
 			Integer chromosomalStart, Integer chromosomalEnd, long byteOffset, long lineNumber) {
 		super(byteOffset, lineNumber);
 		this.accessionId = accessionId;
-		this.entrezGeneId = entrezGeneId;
+		this.entrezGeneIds = entrezGeneIds;
 		this.ensemblGeneId = ensemblGeneId;
 		this.name = name;
 		this.symbol = symbol;
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParserTest.java
index f7ed3a8..f8ff912 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParserTest.java
@@ -88,7 +88,7 @@ public void testParser() throws IOException {
 		RecordReader<PharmGkbGeneFileRecord> reader = initSampleRecordReader();
 		PharmGkbGeneFileRecord r = reader.next();
 		assertEquals("PA100", r.getAccessionId().getDataElement());
-		assertEquals(995, r.getEntrezGeneId().getDataElement().intValue());
+		assertEquals(995, r.getEntrezGeneIds().iterator().next().getDataElement().intValue());
 		assertEquals("ENSG00000158402", r.getEnsemblGeneId().getDataElement());
 		assertEquals("cell division cycle 25 homolog C (S. pombe)", r.getName());
 		assertEquals("CDC25C", r.getSymbol());
@@ -159,7 +159,7 @@ public void testParser() throws IOException {
 
 		r = reader.next();
 		assertEquals("PA101", r.getAccessionId().getDataElement());
-		assertEquals(1017, r.getEntrezGeneId().getDataElement().intValue());
+		assertEquals(1017, r.getEntrezGeneIds().iterator().next().getDataElement().intValue());
 
 		assertFalse(reader.hasNext());
 	}

From 27e666023301dcedb6692352f43bb0650dd6ce8a Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 11:59:35 -0700
Subject: [PATCH 25/36] Removed the IRefWeb host

This class should probably be deprecated
---
 .../ucdenver/ccp/datasource/fileparsers/download/FtpHost.java   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/download/FtpHost.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/download/FtpHost.java
index 12fa1ee..ee096e0 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/download/FtpHost.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/download/FtpHost.java
@@ -80,7 +80,5 @@ private FtpHost() {
 	public static final String KEGG_GENEMAPTAB_PATH = "pub/kegg/pathway/organisms";
 	
 	public static final String MGI_REPORTS_PATH = "pub/reports";
-	
-	public static final String IREFWEB_HOST = "ftp.no.embnet.org";
 
 }

From 7bf4214993823c287c400ba13a27bb552f583d05 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 12:00:56 -0700
Subject: [PATCH 26/36] Added argument for identifier resolution to improve
 error messages

---
 .../dip/DipYYYYMMDDFileParser.java            | 41 +++++-----
 .../drugbank/DrugBankDrugRecord.java          | 21 ++---
 .../EmblSequenceDatabaseFileParserBase.java   |  2 +-
 .../GeneticAssociationDbAllTxtFileData.java   |  2 +-
 .../hgnc/HgncDownloadFileParser.java          |  4 +-
 .../hprd/HprdIdMappingsTxtFileParser.java     |  4 +-
 .../irefweb/IRefWebInteraction.java           |  3 +-
 .../irefweb/IRefWebPsiMitab2_6FileParser.java | 77 +++++++++----------
 .../mgi/MRKSequenceFileParser.java            |  7 +-
 .../gene/EntrezGene2AccessionFileData.java    | 35 +++++----
 .../pharmgkb/PharmGkbGeneFileParser.java      |  8 +-
 .../rgd/RgdAnnotationFileIdResolver.java      |  4 +
 .../hgnc/HgncDownloadFileParserTest.java      |  4 +-
 .../IRefWebPsiMitab2_6FileParserTest.java     |  4 +-
 .../mgi/MRKSequenceFileParserTest.java        | 16 ++--
 .../NucleotideAccessionResolver.java          | 17 +++-
 .../identifiers/ProteinAccessionResolver.java | 20 ++++-
 .../NucleotideAccessionResolverTest.java      |  6 +-
 .../ProteinAccessionResolverTest.java         | 12 +--
 19 files changed, 153 insertions(+), 134 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/dip/DipYYYYMMDDFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/dip/DipYYYYMMDDFileParser.java
index 96e15e4..16c5a43 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/dip/DipYYYYMMDDFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/dip/DipYYYYMMDDFileParser.java
@@ -68,6 +68,7 @@
 import edu.ucdenver.ccp.datasource.fileparsers.obo.MiOntologyIdTermPair;
 import edu.ucdenver.ccp.datasource.fileparsers.taxonaware.TaxonAwareSingleLineFileRecordReader;
 import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
+import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.ProteinAccessionResolver;
 import edu.ucdenver.ccp.datasource.identifiers.dip.DipInteractionID;
 import edu.ucdenver.ccp.datasource.identifiers.dip.DipInteractorID;
@@ -77,8 +78,8 @@
 import edu.ucdenver.ccp.identifier.publication.PubMedID;
 
 /**
- * This class is used to parse DIPYYYMMDD files which can be downloaded from the DIP website:
- * http://dip.doe-mbi.ucla.edu/dip/Main.cgi
+ * This class is used to parse DIPYYYMMDD files which can be downloaded from the
+ * DIP website: http://dip.doe-mbi.ucla.edu/dip/Main.cgi
  * 
  * @author Bill Baumgartner
  * 
@@ -197,7 +198,8 @@ private Set<DipInteractionExperiment> getInteractionExperiments(String detection
 			DipInteractionType interactionType = MiOntologyIdTermPair.parseString(DipInteractionType.class,
 					interactionTypes[i]);
 			DipProcessingStatus processingStatus = getDipProcessingStatus(processingStatuses[i], line);
-			String firstAuthorName = null; // change if the first author column ever contains names
+			String firstAuthorName = null; // change if the first author column
+											// ever contains names
 			DipPublication publication = getDipPublication(firstAuthorName, pmids[i * 2], pmids[i * 2 + 1]);
 
 			experiments.add(new DipInteractionExperiment(publication, processingStatus, detectionMethod,
@@ -212,9 +214,9 @@ private Set<DipInteractionExperiment> getInteractionExperiments(String detection
 	 * @param string
 	 * @param string2
 	 * @param pmids
-	 * @return {@link DipPublication} from first author name and conversions of strings like
-	 *         "pubmed:9194558" and "pubmed:DIP-209S" into a {@link PubMedID} and a
-	 *         {@link DipPublicationId}
+	 * @return {@link DipPublication} from first author name and conversions of
+	 *         strings like "pubmed:9194558" and "pubmed:DIP-209S" into a
+	 *         {@link PubMedID} and a {@link DipPublicationId}
 	 */
 	private DipPublication getDipPublication(String firstAuthorName, String pmidStr, String dipPubIdStr) {
 		PubMedID pmid;
@@ -230,7 +232,8 @@ private DipPublication getDipPublication(String firstAuthorName, String pmidStr,
 
 	/**
 	 * @param string
-	 * @return {@link DipProcessingStatus} parsed from a string such as: "dip:0002(small scale)"
+	 * @return {@link DipProcessingStatus} parsed from a string such as:
+	 *         "dip:0002(small scale)"
 	 */
 	private DipProcessingStatus getDipProcessingStatus(String statusStr, String line) {
 		Pattern p = Pattern.compile("(dip:\\d+)\\((.*?)\\)");
@@ -258,8 +261,9 @@ private DipInteractor getInteractor(String interactorStr, String alternateIdsStr
 				}
 
 				/*
-				 * The columns for alternate IDs and aliases are always set to "-". If this is no
-				 * longer the case then an exception will be thrown and code changes required.
+				 * The columns for alternate IDs and aliases are always set to
+				 * "-". If this is no longer the case then an exception will be
+				 * thrown and code changes required.
 				 */
 				Set<DipInteractorID> alternateIds = null;
 				if (!alternateIdsStr.trim().equals("-")) {
@@ -318,13 +322,7 @@ private DataSourceIdentifier<?> resolveId(String idStr) {
 			return new DipInteractorID(idStr);
 		}
 		if (idStr.startsWith("refseq:")) {
-			try {
-				return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(idStr, "refseq:"));
-				// return new RefSeqID(StringUtil.removePrefix(idStr, "refseq:"));
-			} catch (IllegalArgumentException e) {
-				logger.warn("Invalid RefSeq identifier detected: " + idStr);
-				return null;
-			}
+			return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(idStr, "refseq:"), idStr);
 		}
 		if (idStr.startsWith("uniprotkb:")) {
 			if (idStr.contains(StringConstants.HYPHEN_MINUS)) {
@@ -333,8 +331,7 @@ private DataSourceIdentifier<?> resolveId(String idStr) {
 			try {
 				return new UniProtID(StringUtil.removePrefix(idStr, "uniprotkb:"));
 			} catch (IllegalArgumentException e) {
-				logger.warn("Invalid UniProt identifier detected: " + idStr);
-				return null;
+				return new ProbableErrorDataSourceIdentifier(idStr, null, e.getMessage());
 			}
 		}
 		throw new IllegalArgumentException("Unhandled identifier type: " + idStr);
@@ -347,16 +344,16 @@ private DataSourceIdentifier<?> resolveId(String idStr) {
 	// * MI id
 	// * @return id if recognized; otherwise, null
 	// */
-	// private static MolecularInteractionOntologyTermID extractMiId(String inputStr) {
+	// private static MolecularInteractionOntologyTermID extractMiId(String
+	// inputStr) {
 	// Pattern methodIDPattern = Pattern.compile("(MI:\\d+),?\\(");
 	// Matcher m = methodIDPattern.matcher(inputStr);
 	// if (m.find()) {
 	// return new MolecularInteractionOntologyTermID(m.group(1));
 	// }
-	// logger.error("Unable to locate ExperimentalMethod MI ID in String: " + inputStr);
+	// logger.error("Unable to locate ExperimentalMethod MI ID in String: " +
+	// inputStr);
 	// return null;
 	// }
 
-	
-
 }
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
index e98a736..9c0498b 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
@@ -160,7 +160,6 @@
 import edu.ucdenver.ccp.datasource.identifiers.kegg.KeggCompoundID;
 import edu.ucdenver.ccp.datasource.identifiers.kegg.KeggDrugID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.MeshID;
-import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.GiNumberID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.snp.SnpRsId;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.taxonomy.NcbiTaxonomyID;
 import edu.ucdenver.ccp.datasource.identifiers.obo.ChebiOntologyID;
@@ -1299,22 +1298,16 @@ private static DataSourceIdentifier<?> resolveIdentifier(String resource, String
 		} else if (resource.equals("GeneCards")) {
 			return new GeneCardId(identifier);
 		} else if (resource.equals("GenBank Gene Database")) {
-			return NucleotideAccessionResolver.resolveNucleotideAccession(identifier);
+			return NucleotideAccessionResolver.resolveNucleotideAccession(identifier, "GenBank Gene Database:"
+					+ identifier);
 		} else if (resource.equals("GenBank Protein Database")) {
-			try {
-				return ProteinAccessionResolver.resolveProteinAccession(identifier);
-			} catch (IllegalArgumentException e) {
-				if (identifier.matches("\\d+")) {
-					return new GiNumberID(identifier);
-				} else {
-					return new ProbableErrorDataSourceIdentifier("identifier", "GenBank",
-							"Observed invalid GenBank protein identifier: " + identifier);
-				}
-			}
+			return ProteinAccessionResolver
+					.resolveProteinAccession(identifier, "GenBank Protein Database" + identifier);
 		} else if (resource.equals("GenBank")) {
-			DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(identifier);
+			DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(identifier,
+					"GenBank:" + identifier);
 			if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId.getClass())) {
-				return ProteinAccessionResolver.resolveProteinAccession(identifier);
+				return ProteinAccessionResolver.resolveProteinAccession(identifier, "GenBank:" + identifier);
 			}
 		} else if (resource.equals("UniProtKB")) {
 			return new UniProtID(identifier);
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/embl/EmblSequenceDatabaseFileParserBase.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/embl/EmblSequenceDatabaseFileParserBase.java
index 439c02e..e8d3c21 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/embl/EmblSequenceDatabaseFileParserBase.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/embl/EmblSequenceDatabaseFileParserBase.java
@@ -500,7 +500,7 @@ protected abstract T invokeConstructor(E idLineContents, List<D> accessionNumber
 	private EmblAssemblyInformation parseASLine(String line) {
 		String[] toks = line.split("\\s+");
 		String localSpan = toks[1];
-		DataSourceIdentifier<?> primaryIdentifier = NucleotideAccessionResolver.resolveNucleotideAccession(toks[2]);
+		DataSourceIdentifier<?> primaryIdentifier = NucleotideAccessionResolver.resolveNucleotideAccession(toks[2], toks[2]);
 		String primarySpan = toks[3];
 		boolean originatesFromComplementary = (toks.length == 5 && toks[4].trim().equalsIgnoreCase("c")) ? true : false;
 		return new EmblAssemblyInformation(localSpan, primaryIdentifier, primarySpan, originatesFromComplementary);
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
index 33ff25a..3b8e4bf 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/gad/GeneticAssociationDbAllTxtFileData.java
@@ -473,7 +473,7 @@ public static GeneticAssociationDbAllTxtFileData parseGeneticAssociationDbAllTxt
 			if (acc.matches("\\d+")) {
 				nucleotideId = new GiNumberID(acc);
 			} else {
-				nucleotideId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+				nucleotideId = NucleotideAccessionResolver.resolveNucleotideAccession(acc, refseqURL);
 			}
 		}
 
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
index 7497744..753a6ae 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
@@ -564,9 +564,9 @@ private Set<DataSourceIdentifier<?>> resolveAccessionNumbers(String accListStr)
 		Set<DataSourceIdentifier<?>> accNumbers = new HashSet<DataSourceIdentifier<?>>();
 		if (!accListStr.isEmpty()) {
 			for (String acc : accListStr.split(",")) {
-				DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+				DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc, acc);
 				if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
-					DataSourceIdentifier<String> proAccId = ProteinAccessionResolver.resolveProteinAccession(acc);
+					DataSourceIdentifier<String> proAccId = ProteinAccessionResolver.resolveProteinAccession(acc, acc);
 					accNumbers.add(proAccId);
 				} else {
 					accNumbers.add(nucAccId);
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
index a5417ea..278b79c 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hprd/HprdIdMappingsTxtFileParser.java
@@ -115,9 +115,9 @@ protected HprdIdMappingsTxtFileData parseRecordFromLine(Line line) {
 	}
 
 	private DataSourceIdentifier<?> resolveAccession(String acc) {
-		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc, acc);
 		if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
-			return ProteinAccessionResolver.resolveProteinAccession(acc);
+			return ProteinAccessionResolver.resolveProteinAccession(acc, acc);
 		}
 		return nucAccId;
 	}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebInteraction.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebInteraction.java
index c0be435..a7d31ed 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebInteraction.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebInteraction.java
@@ -46,7 +46,6 @@
 import edu.ucdenver.ccp.datasource.identifiers.irefweb.IrigId;
 import edu.ucdenver.ccp.datasource.identifiers.irefweb.RigId;
 import edu.ucdenver.ccp.datasource.identifiers.other.ImexId;
-import edu.ucdenver.ccp.identifier.publication.PubMedID;
 
 @Data
 @Record(dataSource = DataSource.IREFWEB, label="interaction")
@@ -57,7 +56,7 @@ public class IRefWebInteraction implements DataRecord {
 	@RecordField(comment = "Notes: According to MITAB2.6 format this column should contain a pipe-delimited list of author surnames in which the interaction has been shown.\nThis column will usually include only one author name reference. However, some experimental evidences have secondary references which could be included here. This filed also includes references which are not author names as in the following examples:\nOPHID Predicted Protein Interaction\nHPRD Text Mining Confirmation\nMINT Text Mining Confirmation")
 	private final String author;
 	@RecordField(comment = "Notes: This is a non-redundant list of PubMed identifiers pointing to literature that supports the interaction. According to MITAB2.6 format, this column should contain a pipe-delimited set of databaseName:identifier pairs such as pubmed:12345. The source database name is always pubmed.")
-	private final Set<PubMedID> pmids;
+	private final Set<DataSourceIdentifier<?>> pmids;
 	@RecordField
 	private final IRefWebInteractionType interactionType;
 	@RecordField(comment = "source interaction-database and accessions.\nExample: intact:EBI-761694|rigid:3ERiFkUFsm7ZUHIRJTx8ZlHILRA|irigid:1234|edgetype:X\nNotes: Each reference is presented as a database name:identifier pair.\nChange: The source database is listed first. Additional information is pipe-delimited and presented here for the convenience of PSICQUIC web-service users (these services presently truncate this file at column 15 as they only support MITAB2.5). See columns 35,45,53.\nThe source database names that appear in this column are taken from the PSI-MI controlled vocabulary at the following location (where possible): http://www.ebi.ac.uk/ontology-lookup/browse.do?ontName=MI\nIf an interaction record identifier is not provided by the source database, this entry will appear as database-name:- with the identifier region replaced with a dash (-).")
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
index 2c3a615..4643a13 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java
@@ -56,18 +56,15 @@
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.log4j.BasicConfigurator;
 import org.apache.log4j.Logger;
 
-import edu.ucdenver.ccp.common.download.FtpDownload;
+import edu.ucdenver.ccp.common.download.HttpDownload;
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.common.file.reader.Line;
 import edu.ucdenver.ccp.common.file.reader.StreamLineReader;
-import edu.ucdenver.ccp.common.ftp.FTPUtil.FileType;
 import edu.ucdenver.ccp.common.string.RegExPatterns;
 import edu.ucdenver.ccp.common.string.StringConstants;
 import edu.ucdenver.ccp.common.string.StringUtil;
-import edu.ucdenver.ccp.datasource.fileparsers.download.FtpHost;
 import edu.ucdenver.ccp.datasource.fileparsers.obo.MiOntologyIdTermPair;
 import edu.ucdenver.ccp.datasource.fileparsers.obo.NcbiTaxonomyIdTermPair;
 import edu.ucdenver.ccp.datasource.fileparsers.taxonaware.TaxonAwareSingleLineFileRecordReader;
@@ -137,12 +134,9 @@ public class IRefWebPsiMitab2_6FileParser extends TaxonAwareSingleLineFileRecord
 
 	private static final String HEADER = "#uidA\tuidB\taltA\taltB\taliasA\taliasB\tmethod\tauthor\tpmids\ttaxa\ttaxb\tinteractionType\tsourcedb\tinteractionIdentifier\tconfidence\texpansion\tbiological_role_A\tbiological_role_B\texperimental_role_A\texperimental_role_B\tinteractor_type_A\tinteractor_type_B\txrefs_A\txrefs_B\txrefs_Interaction\tAnnotations_A\tAnnotations_B\tAnnotations_Interaction\tHost_organism_taxid\tparameters_Interaction\tCreation_date\tUpdate_date\tChecksum_A\tChecksum_B\tChecksum_Interaction\tNegative\tOriginalReferenceA\tOriginalReferenceB\tFinalReferenceA\tFinalReferenceB\tMappingScoreA\tMappingScoreB\tirogida\tirogidb\tirigid\tcrogida\tcrogidb\tcrigid\ticrogida\ticrogidb\ticrigid\timex_id\tedgetype\tnumParticipants";
 
-	// public static final String FTP_FILE_NAME = "All.mitab.03022013.txt.zip";
-	public static final String FTP_FILE_NAME = "All.mitab.07042015.txt.zip";
 	public static final CharacterEncoding ENCODING = CharacterEncoding.US_ASCII;
-	public static final String FTP_USER_NAME = "ftp";
 
-	@FtpDownload(server = FtpHost.IREFWEB_HOST, path = "irefindex/data/archive/release_10.0/psi_mitab/MITAB2.6/", filename = FTP_FILE_NAME, filetype = FileType.BINARY, username = FTP_USER_NAME, decompress = true, targetFileName = "All.mitab.04072015.txt")
+	@HttpDownload(url = "http://irefindex.org/download/irefindex/data/archive/release_14.0/psi_mitab/MITAB2.6/All.mitab.07042015.txt.zip", decompress = true, targetFileName = "All.mitab.04072015.txt")
 	private File allMitabTxtFile;
 
 	public IRefWebPsiMitab2_6FileParser(File file, CharacterEncoding encoding) throws IOException,
@@ -254,7 +248,7 @@ private IRefWebInteraction getInteraction(String detectionMethodStr, String auth
 					detectionMethodStr);
 		}
 		String author = (authorStr.trim().equals(StringConstants.HYPHEN_MINUS)) ? null : authorStr;
-		Set<PubMedID> pmids = parsePmidsStr(pmidsStr);
+		Set<DataSourceIdentifier<?>> pmids = parsePmidsStr(pmidsStr);
 		IRefWebInteractionType interactionType = null;
 		if (!interactionTypeStr.trim().equals(StringConstants.HYPHEN_MINUS)) {
 			interactionType = MiOntologyIdTermPair.parseString(IRefWebInteractionType.class, interactionTypeStr);
@@ -404,9 +398,9 @@ private DataSourceIdentifier<?> resolveInteractorId(String idStr) {
 			} else if (idStr.startsWith("icrogid:")) {
 				return new IcrogId(StringUtil.removePrefix(idStr, "icrogid:"));
 			} else if (idStr.startsWith("refseq:")) {
-				return getRefseqAccession(StringUtil.removePrefix(idStr, "refseq:").toUpperCase());
+				return getRefseqAccession(StringUtil.removePrefix(idStr, "refseq:").toUpperCase(), idStr);
 			} else if (idStr.startsWith("RefSeq:")) {
-				return getRefseqAccession(StringUtil.removePrefix(idStr, "RefSeq:").toUpperCase());
+				return getRefseqAccession(StringUtil.removePrefix(idStr, "RefSeq:").toUpperCase(), idStr);
 			} else if (idStr.startsWith("rogid:")) {
 				return new RogId(StringUtil.removePrefix(idStr, "rogid:"));
 			} else if (idStr.startsWith("irogid:")) {
@@ -464,21 +458,21 @@ private DataSourceIdentifier<?> resolveInteractorId(String idStr) {
 			} else if (idStr.startsWith("InnateDB:")) {
 				return new InnateDbId(StringUtil.removePrefix(idStr, "InnateDB:"));
 			} else if (idStr.startsWith("emb:")) {
-				return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(idStr, "emb:"));
+				return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(idStr, "emb:"), idStr);
 			} else if (idStr.startsWith("dbj:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "dbj:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "dbj:"), idStr);
 			} else if (idStr.startsWith("ddbj/embl/genbank:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "ddbj/embl/genbank:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "ddbj/embl/genbank:"), idStr);
 			} else if (idStr.startsWith("GenBank:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "GenBank:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "GenBank:"), idStr);
 			} else if (idStr.startsWith("genbank indentifier:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "genbank indentifier:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "genbank indentifier:"), idStr);
 			} else if (idStr.startsWith("GB:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "GB:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "GB:"), idStr);
 			} else if (idStr.startsWith("gb:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "gb:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "gb:"), idStr);
 			} else if (idStr.startsWith("tpg:")) {
-				return getGenbankAccession(StringUtil.removePrefix(idStr, "tpg:"));
+				return getGenbankAccession(StringUtil.removePrefix(idStr, "tpg:"), idStr);
 			} else if (idStr.startsWith("pdb:")) {
 				return new PdbID(StringUtil.removePrefix(idStr, "pdb:"));
 			} else if (idStr.startsWith("flybase:")) {
@@ -508,16 +502,15 @@ private DataSourceIdentifier<?> getUniprotId(String idStr) {
 			}
 			return new UniProtID(idStr);
 		} catch (IllegalArgumentException e) {
-			logger.warn("Detected invalid UniProt accession: " + idStr);
-			return null;
+			return new ProbableErrorDataSourceIdentifier(idStr, null, e.getMessage());
 		}
 	}
 
-	private DataSourceIdentifier<?> getRefseqAccession(String acc) {
+	private DataSourceIdentifier<?> getRefseqAccession(String acc, String accWithPrefix) {
 		try {
 			return new RefSeqID(acc);
 		} catch (IllegalArgumentException e) {
-			return getGenbankAccession(acc);
+			return getGenbankAccession(acc, accWithPrefix);
 		}
 	}
 
@@ -525,10 +518,11 @@ private DataSourceIdentifier<?> getRefseqAccession(String acc) {
 	 * @param removePrefix
 	 * @return
 	 */
-	private DataSourceIdentifier<?> getGenbankAccession(String acc) {
-		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc);
+	private DataSourceIdentifier<?> getGenbankAccession(String acc, String accWithPrefix) {
+		DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc,
+				accWithPrefix);
 		if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
-			return ProteinAccessionResolver.resolveProteinAccession(acc);
+			return ProteinAccessionResolver.resolveProteinAccession(acc, accWithPrefix);
 		} else {
 			return nucAccId;
 		}
@@ -538,17 +532,17 @@ private DataSourceIdentifier<?> getGenbankAccession(String acc) {
 	 * @param pmidsStr
 	 * @return
 	 */
-	private Set<PubMedID> parsePmidsStr(String pmidsStr) {
+	private Set<DataSourceIdentifier<?>> parsePmidsStr(String pmidsStr) {
 		if (pmidsStr.trim().equals(StringConstants.HYPHEN_MINUS) || pmidsStr.trim().equals("pubmed:0")) {
 			return null;
 		}
 		String[] toks = pmidsStr.split(RegExPatterns.PIPE);
-		Set<PubMedID> pmids = new HashSet<PubMedID>();
+		Set<DataSourceIdentifier<?>> pmids = new HashSet<DataSourceIdentifier<?>>();
 		for (String tok : toks) {
 			try {
 				pmids.add(new PubMedID(StringUtil.removePrefix(tok, "pubmed:")));
 			} catch (IllegalArgumentException e) {
-				logger.warn("Detected invalid pubmed id: " + e.getMessage());
+				pmids.add(new ProbableErrorDataSourceIdentifier(tok, null, e.getMessage()));
 			}
 		}
 		return pmids;
@@ -630,7 +624,7 @@ private IRefWebInteractor getInteractor(String uniqueIdStr, String altIdStr, Str
 	private Set<String> resolveAliasSymbols(String aliasStr) {
 		Set<String> aliases = new HashSet<String>();
 		for (String alias : aliasStr.split(RegExPatterns.PIPE)) {
-			String aliasSymbol = alias;//resolveAliasSymbol(alias);
+			String aliasSymbol = alias;// resolveAliasSymbol(alias);
 			if (aliasSymbol != null && !aliasSymbol.equals("-")) {
 				aliases.add(aliasSymbol);
 			}
@@ -638,16 +632,17 @@ private Set<String> resolveAliasSymbols(String aliasStr) {
 		return aliases;
 	}
 
-//	/**
-//	 * @param alias
-//	 * @return
-//	 */
-//	private String resolveAliasSymbol(String aliasStr) {
-//		if (aliasStr.startsWith("entrezgene/locuslink:")) {
-//			return new String(StringUtil.removePrefix(aliasStr, "entrezgene/locuslink:"));
-//		}
-//		return aliasStr;
-//	}
+	// /**
+	// * @param alias
+	// * @return
+	// */
+	// private String resolveAliasSymbol(String aliasStr) {
+	// if (aliasStr.startsWith("entrezgene/locuslink:")) {
+	// return new String(StringUtil.removePrefix(aliasStr,
+	// "entrezgene/locuslink:"));
+	// }
+	// return aliasStr;
+	// }
 
 	/**
 	 * @param aliasStr
@@ -684,7 +679,7 @@ private DataSourceIdentifier<?> resolveAliasId(String aliasStr) {
 		} else if (aliasStr.startsWith("rogid:")) {
 			return new RogId(StringUtil.removePrefix(aliasStr, "rogid:"));
 		} else if (aliasStr.startsWith("refseq:")) {
-			return getRefseqAccession(StringUtil.removePrefix(aliasStr, "refseq:"));
+			return getRefseqAccession(StringUtil.removePrefix(aliasStr, "refseq:"), aliasStr);
 		} else if (aliasStr.startsWith("hgnc:")) {
 			return new HgncGeneSymbolID(StringUtil.removePrefix(aliasStr, "hgnc:"));
 		}
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
index a095524..41d49c8 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java
@@ -142,7 +142,7 @@ protected MRKSequenceFileData parseRecordFromLine(Line line) {
 			for (String genBankID : genBankIDs) {
 				if (genBankID.trim().length() > 0) {
 					DataSourceIdentifier<String> resolveNucleotideAccession = NucleotideAccessionResolver
-							.resolveNucleotideAccession(genBankID);
+							.resolveNucleotideAccession(genBankID, genBankID);
 					genBankAccessionIDs.add(resolveNucleotideAccession);
 				}
 			}
@@ -233,13 +233,14 @@ protected MRKSequenceFileData parseRecordFromLine(Line line) {
 				}
 			}
 		}
-		
+
 		String featureType = toks[20];
 
 		return new MRKSequenceFileData(mgiAccessionID, markerSymbol, status, markerType, markerName, cM_Position,
 				chromosome, genomeCoordinateStart, genomeCoordinateEnd, strand, genBankAccessionIDs,
 				refseqTranscriptIds, vegaTranscriptIds, ensemblTranscriptIds, uniprotIds, tremblIds, vegaProteinIds,
-				ensemblProteinIds, refseqProteinIds, unigeneIds, featureType, line.getByteOffset(), line.getLineNumber());
+				ensemblProteinIds, refseqProteinIds, unigeneIds, featureType, line.getByteOffset(),
+				line.getLineNumber());
 
 	}
 
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2AccessionFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2AccessionFileData.java
index 65d918b..8736a73 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2AccessionFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2AccessionFileData.java
@@ -60,19 +60,22 @@
 @Record(dataSource = DataSource.EG, comment = "", license = License.NCBI, citation = "The NCBI handbook [Internet]. Bethesda (MD): National Library of Medicine (US), National Center for Biotechnology Information; 2002 Oct. Chapter 19 Gene: A Directory of Genes. Available from http://www.ncbi.nlm.nih.gov/books/NBK21091", label = "gene2accession record")
 public class EntrezGene2AccessionFileData extends SingleLineFileRecord {
 	/*
-	 * #Format: tax_id GeneID status RNA_nucleotide_accession.version RNA_nucleotide_gi
-	 * protein_accession.version protein_gi genomic_nucleotide_accession.version
-	 * genomic_nucleotide_gi start_position_on_the_genomic_accession
-	 * end_position_on_the_genomic_accession orientation assembly (tab is used as a separator, pound
-	 * sign - start of a comment)
+	 * #Format: tax_id GeneID status RNA_nucleotide_accession.version
+	 * RNA_nucleotide_gi protein_accession.version protein_gi
+	 * genomic_nucleotide_accession.version genomic_nucleotide_gi
+	 * start_position_on_the_genomic_accession
+	 * end_position_on_the_genomic_accession orientation assembly (tab is used
+	 * as a separator, pound sign - start of a comment)
 	 */
 
 	/*
-	 * #Format: tax_id GeneID status RNA_nucleotide_accession.version RNA_nucleotide_gi
-	 * protein_accession.version protein_gi genomic_nucleotide_accession.version
-	 * genomic_nucleotide_gi start_position_on_the_genomic_accession
-	 * end_position_on_the_genomic_accession orientation assembly mature_peptide_accession.version
-	 * mature_peptide_gi Symbol (tab is used as a separator, pound sign - start of a comment)
+	 * #Format: tax_id GeneID status RNA_nucleotide_accession.version
+	 * RNA_nucleotide_gi protein_accession.version protein_gi
+	 * genomic_nucleotide_accession.version genomic_nucleotide_gi
+	 * start_position_on_the_genomic_accession
+	 * end_position_on_the_genomic_accession orientation assembly
+	 * mature_peptide_accession.version mature_peptide_gi Symbol (tab is used as
+	 * a separator, pound sign - start of a comment)
 	 */
 
 	@RecordField(comment = "the unique identifier provided by NCBI Taxonomy for the species or strain/isolate")
@@ -196,7 +199,8 @@ public static EntrezGene2AccessionFileData parseGene2AccessionLine(Line line) {
 
 			DataSourceIdentifier<?> RNA_nucleotide_accession_dot_version = null;
 			if (!toks[3].equals("-") && status != null) {
-				RNA_nucleotide_accession_dot_version = NucleotideAccessionResolver.resolveNucleotideAccession(toks[3]);
+				RNA_nucleotide_accession_dot_version = NucleotideAccessionResolver.resolveNucleotideAccession(toks[3],
+						toks[3]);
 			}
 
 			String intStr = toks[4];
@@ -207,7 +211,7 @@ public static EntrezGene2AccessionFileData parseGene2AccessionLine(Line line) {
 
 			DataSourceIdentifier<?> protein_accession_dot_version = null;
 			if (!toks[5].equals("-") && status != null) {
-				protein_accession_dot_version = ProteinAccessionResolver.resolveProteinAccession(toks[5]);
+				protein_accession_dot_version = ProteinAccessionResolver.resolveProteinAccession(toks[5], toks[5]);
 			}
 
 			intStr = toks[6];
@@ -218,8 +222,8 @@ public static EntrezGene2AccessionFileData parseGene2AccessionLine(Line line) {
 
 			DataSourceIdentifier<?> genomic_nucleotide_accession_dot_version = null;
 			if (!toks[7].equals("-") && status != null) {
-				genomic_nucleotide_accession_dot_version = NucleotideAccessionResolver
-						.resolveNucleotideAccession(toks[7]);
+				genomic_nucleotide_accession_dot_version = NucleotideAccessionResolver.resolveNucleotideAccession(
+						toks[7], toks[7]);
 			}
 
 			intStr = toks[8];
@@ -257,7 +261,8 @@ public static EntrezGene2AccessionFileData parseGene2AccessionLine(Line line) {
 
 			DataSourceIdentifier<?> mature_peptide_accession_dot_version = null;
 			if (!toks[13].equals("-")) {
-				mature_peptide_accession_dot_version = ProteinAccessionResolver.resolveProteinAccession(toks[13]);
+				mature_peptide_accession_dot_version = ProteinAccessionResolver.resolveProteinAccession(toks[13],
+						toks[13]);
 			}
 
 			intStr = toks[14];
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
index fd351f1..23327d1 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java
@@ -260,13 +260,13 @@ private DataSourceIdentifier<?> resolveCrossRefId(String refStr) {
 			} else if (refStr.startsWith(OMIM_PREFIX)) {
 				return new OmimID(StringUtil.removePrefix(refStr, OMIM_PREFIX));
 			} else if (refStr.startsWith(REFSEQDNA_PREFIX)) {
-				return NucleotideAccessionResolver.resolveNucleotideAccession(StringUtil.removePrefix(refStr,
-						REFSEQDNA_PREFIX));
+				return NucleotideAccessionResolver.resolveNucleotideAccession(
+						StringUtil.removePrefix(refStr, REFSEQDNA_PREFIX), refStr);
 			} else if (refStr.startsWith(REFSEQRNA_PREFIX)) {
 				return new RefSeqID(StringUtil.removePrefix(refStr, REFSEQRNA_PREFIX));
 			} else if (refStr.startsWith(REFSEQPROTEIN_PREFIX)) {
-				return ProteinAccessionResolver.resolveProteinAccession(StringUtil.removePrefix(refStr,
-						REFSEQPROTEIN_PREFIX));
+				return ProteinAccessionResolver.resolveProteinAccession(
+						StringUtil.removePrefix(refStr, REFSEQPROTEIN_PREFIX), refStr);
 			} else if (refStr.startsWith(UCSCGENOMEBROWSER_PREFIX)) {
 				return new UcscGenomeBrowserId(StringUtil.removePrefix(refStr, UCSCGENOMEBROWSER_PREFIX));
 			} else if (refStr.startsWith(UNIPROT_PREFIX)) {
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
index 2cd990f..8ce4c2f 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
@@ -80,6 +80,10 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 			// there is one instance of RGD:[space]737465
 			return new RgdID(idStr.substring(idStr.lastIndexOf(" ")));
 		}
+		if (idStr.matches("RGDG:\\d+")) {
+			// there is one instance of RGDG:
+			return new RgdID(idStr.substring(4));
+		}
 		if (idStr.matches("RDG:\\d+")) {
 			// there are a few typos where RDG appears instead of RGD
 			return new RgdID(idStr.substring(4));
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
index f574e71..e96d636 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
@@ -108,7 +108,7 @@ public void testParser() throws Exception {
 			assertEquals("2012-10-12", dataRecord.getDateModified());
 			assertEquals("2010-11-25", dataRecord.getDateSymbolChanged());
 			assertEquals("2012-08-15", dataRecord.getDateNameChanged());
-			assertEquals(CollectionsUtil.createSet(NucleotideAccessionResolver.resolveNucleotideAccession("BC040926")),
+			assertEquals(CollectionsUtil.createSet(NucleotideAccessionResolver.resolveNucleotideAccession("BC040926", null)),
 					dataRecord.getAccessionNumbers());
 			assertEmpty(dataRecord.getEcNumbers());
 			assertEquals(new EntrezGeneID(503538), dataRecord.getEntrezGeneID());
@@ -158,7 +158,7 @@ public void testParser() throws Exception {
 			assertEquals("2011-07-21", dataRecord.getDateModified());
 			assertNull(dataRecord.getDateSymbolChanged());
 			assertNull(dataRecord.getDateNameChanged());
-			assertEquals(CollectionsUtil.createSet(NucleotideAccessionResolver.resolveNucleotideAccession("AF271790")),
+			assertEquals(CollectionsUtil.createSet(NucleotideAccessionResolver.resolveNucleotideAccession("AF271790", null)),
 					dataRecord.getAccessionNumbers());
 			assertEmpty(dataRecord.getEcNumbers());
 			assertEquals(new EntrezGeneID(29974), dataRecord.getEntrezGeneID());
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
index 2994431..f856a86 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParserTest.java
@@ -250,8 +250,8 @@ public void testParser() throws IOException {
 
 			assertFalse(record.getInteraction().isNegative());
 
-			assertEquals(new ProbableErrorDataSourceIdentifier("\"1FMO_I\"", null,
-					"Input is not a known protein accession pattern: \"1FMO_I\""), record.getInteractorA()
+			assertEquals(new ProbableErrorDataSourceIdentifier("GenBank:\"1FMO_I\"", null,
+					"Input is not a known accession pattern: GenBank:\"1FMO_I\""), record.getInteractorA()
 					.getOriginalReference());
 			assertEquals(new RefSeqID("NP_032880"), record.getInteractorB().getOriginalReference());
 			assertEquals(new PdbID("1FMO_I"), record.getInteractorA().getFinalReference());
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParserTest.java
index c2783d9..35b4818 100755
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParserTest.java
@@ -96,14 +96,14 @@ public void testParser() {
 				assertEquals(MgiGeneType.GENE, record1.getMarkerType());
 				assertEquals(new String("RIKEN cDNA 0610007P14 gene"), record1.getMarkerName());
 				Set<DataSourceIdentifier<?>> expectedGenBankIds = new HashSet<DataSourceIdentifier<?>>();
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AF270646"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK002308"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK004480"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK152230"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AU019315"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("BC004591"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("BG066052"));
-				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("C77855"));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AF270646", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK002308", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK004480", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AK152230", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("AU019315", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("BC004591", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("BG066052", null));
+				expectedGenBankIds.add(NucleotideAccessionResolver.resolveNucleotideAccession("C77855", null));
 				assertEquals(expectedGenBankIds, record1.getGenBankAccessionIDs());
 				
 				Set<RefSeqID> expectedRefseqTranscriptIds = new HashSet<RefSeqID>();
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
index 52e4ec5..917ba05 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolver.java
@@ -102,7 +102,16 @@ public class NucleotideAccessionResolver {
 		}
 	}
 
-	public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc) {
+	/**
+	 * @param acc
+	 * @param idWithPrefix
+	 *            - optional, is only used as part of the error message if the
+	 *            acc cannot be resolved. Often the prefix is stripped prior to
+	 *            id resolution, this parameter allows the prefix to be included
+	 *            in the error message.
+	 * @return
+	 */
+	public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc, String idWithPrefix) {
 		acc = acc.toUpperCase().trim();
 		if (acc.matches("[A-Z][A-Z]_\\d+\\.?\\d*")) {
 			return new RefSeqID(acc);
@@ -151,7 +160,11 @@ public static DataSourceIdentifier<String> resolveNucleotideAccession(String acc
 			}
 		}
 		// logger.warn("Input is not a known nucleotide accession: " + acc);
-		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known nucleotide accession: " + acc);
+		if (idWithPrefix == null) {
+			idWithPrefix = acc;
+		}
+		return new ProbableErrorDataSourceIdentifier(idWithPrefix, null, "Input is not a known accession: "
+				+ idWithPrefix);
 	}
 
 }
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
index 0efbf90..98e3cdd 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolver.java
@@ -64,7 +64,16 @@ public class ProteinAccessionResolver {
 	private static final String VALID_UNIPROT_PATTERN_3 = "[A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9][A-Z][A-Z0-9][A-Z0-9][0-9]";
 	private static final String VALID_UNIPROT_PATTERN_2 = "[OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]";
 
-	public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
+	/**
+	 * @param acc
+	 * @param idWithPrefix
+	 *            - optional, is only used as part of the error message if the
+	 *            acc cannot be resolved. Often the prefix is stripped prior to
+	 *            id resolution, this parameter allows the prefix to be included
+	 *            in the error message.
+	 * @return
+	 */
+	public static DataSourceIdentifier<String> resolveProteinAccession(String acc, String idWithPrefix) {
 		acc = acc.toUpperCase();
 		if (acc.matches("[A-Z][A-Z]_\\d+\\.?\\d*")) {
 			return new RefSeqID(acc);
@@ -119,9 +128,12 @@ public static DataSourceIdentifier<String> resolveProteinAccession(String acc) {
 				return new GenBankID(acc);
 			}
 		}
-		logger.warn("Input is not a known protein accession pattern: " + acc);
-		return new ProbableErrorDataSourceIdentifier(acc, null, "Input is not a known protein accession pattern: "
-				+ acc);
+//		logger.warn("Input is not a known protein accession pattern: " + acc);
+		if (idWithPrefix == null) {
+			idWithPrefix = acc;
+		}
+		return new ProbableErrorDataSourceIdentifier(idWithPrefix, null, "Input is not a known accession pattern: "
+				+ idWithPrefix);
 	}
 
 }
diff --git a/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolverTest.java b/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolverTest.java
index d2d9517..b8021d8 100644
--- a/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolverTest.java
+++ b/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/NucleotideAccessionResolverTest.java
@@ -51,13 +51,13 @@ public class NucleotideAccessionResolverTest {
 
 	@Test
 	public void testRefseqResolution() {
-		assertEquals(new RefSeqID("NM_000518"), NucleotideAccessionResolver.resolveNucleotideAccession("NM_000518"));
-		assertEquals(new RefSeqID("NM_000518"), NucleotideAccessionResolver.resolveNucleotideAccession("NM_000518.2"));
+		assertEquals(new RefSeqID("NM_000518"), NucleotideAccessionResolver.resolveNucleotideAccession("NM_000518", null));
+		assertEquals(new RefSeqID("NM_000518"), NucleotideAccessionResolver.resolveNucleotideAccession("NM_000518.2", null));
 	}
 	
 	@Test
 	public void testGenbankResolution() {
-		assertEquals(new GenBankID("AC004528.1"), NucleotideAccessionResolver.resolveNucleotideAccession("AC004528.1"));
+		assertEquals(new GenBankID("AC004528.1"), NucleotideAccessionResolver.resolveNucleotideAccession("AC004528.1", null));
 	}
 
 }
diff --git a/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolverTest.java b/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolverTest.java
index edc578f..713f9d1 100644
--- a/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolverTest.java
+++ b/datasource-identifiers/src/test/java/edu/ucdenver/ccp/datasource/identifiers/ProteinAccessionResolverTest.java
@@ -54,12 +54,12 @@ public class ProteinAccessionResolverTest {
 
 	@Test
 	public void testProteinAccessionResolution() {
-		assertEquals(new GenBankID("AAI00916"), ProteinAccessionResolver.resolveProteinAccession("AAI00916"));
-		assertEquals(new GenBankID("AAI00916.2"), ProteinAccessionResolver.resolveProteinAccession("AAI00916.2"));
-		assertEquals(new EmblID("CAI00916"), ProteinAccessionResolver.resolveProteinAccession("CAI00916"));
-		assertEquals(new DdbjId("GAI00916"), ProteinAccessionResolver.resolveProteinAccession("GAI00916"));
-		assertEquals(new RefSeqID("NP_795370"), ProteinAccessionResolver.resolveProteinAccession("NP_795370"));
-		assertEquals(new UniProtID("P59543"), ProteinAccessionResolver.resolveProteinAccession("P59543"));
+		assertEquals(new GenBankID("AAI00916"), ProteinAccessionResolver.resolveProteinAccession("AAI00916", null));
+		assertEquals(new GenBankID("AAI00916.2"), ProteinAccessionResolver.resolveProteinAccession("AAI00916.2", null));
+		assertEquals(new EmblID("CAI00916"), ProteinAccessionResolver.resolveProteinAccession("CAI00916", null));
+		assertEquals(new DdbjId("GAI00916"), ProteinAccessionResolver.resolveProteinAccession("GAI00916", null));
+		assertEquals(new RefSeqID("NP_795370"), ProteinAccessionResolver.resolveProteinAccession("NP_795370", null));
+		assertEquals(new UniProtID("P59543"), ProteinAccessionResolver.resolveProteinAccession("P59543", null));
 	}
 
 }

From 53c3ac8efb178ce477c33577cacb8dd8a69e7e0e Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 12:10:25 -0700
Subject: [PATCH 27/36] added handling for an RGD id typo

---
 .../fileparsers/rgd/RgdAnnotationFileIdResolver.java   | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
index 8ce4c2f..f477e67 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdAnnotationFileIdResolver.java
@@ -81,8 +81,8 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 			return new RgdID(idStr.substring(idStr.lastIndexOf(" ")));
 		}
 		if (idStr.matches("RGDG:\\d+")) {
-			// there is one instance of RGDG:
-			return new RgdID(idStr.substring(4));
+			// there is one instance of RGDG:733289
+			return new RgdID(idStr.substring(5));
 		}
 		if (idStr.matches("RDG:\\d+")) {
 			// there are a few typos where RDG appears instead of RGD
@@ -114,12 +114,6 @@ public DataSourceIdentifier<?> resolveId(String idStr) {
 		if (idStr.matches("PW:\\d+")) {
 			return new PwId(idStr);
 		}
-		if (idStr.matches("rno:\\d+")) {
-			logger.warn("Ignoring RNO identifier: " + idStr + ". Not sure what this references...");
-			// not sure what this is.. could be a kegg gene? it's used in the
-			// withOrFrom column
-			return null;
-		}
 		if (idStr.startsWith("UniProtKB:")) {
 			return new UniProtID(idStr.substring(10));
 		}

From 9c7b81f28833498c62935c2946f64cfffc7637bd Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 12:48:08 -0700
Subject: [PATCH 28/36] Added handling for new column (supplied vega id)

---
 .../hgnc/HgncDownloadFileData.java            | 494 +-----------------
 .../hgnc/HgncDownloadFileParser.java          |  28 +-
 .../hgnc/HgncDownloadFileParserTest.java      |  10 +-
 .../fileparsers/hgnc/hgnc_download.txt        |   6 +-
 4 files changed, 47 insertions(+), 491 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileData.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileData.java
index 8473722..49a216d 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileData.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileData.java
@@ -35,6 +35,9 @@
 
 import java.util.Set;
 
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
 import org.apache.log4j.Logger;
 
 import edu.ucdenver.ccp.datasource.fileparsers.Record;
@@ -153,6 +156,8 @@
  * @author Center for Computational Pharmacology; ccpsupport@ucdenver.edu
  * 
  */
+@Data
+@EqualsAndHashCode(callSuper = false)
 @Record(dataSource = DataSource.HGNC, schemaVersion = "2", comment = "Previous version of this record represented only a subset of the data in the HGNC download file. This version represents all data and includes the new \"gene family description\" column.", label = "HGNC record")
 public class HgncDownloadFileData extends SingleLineFileRecord {
 
@@ -262,7 +267,7 @@ public class HgncDownloadFileData extends SingleLineFileRecord {
 	@RecordField
 	private final EntrezGeneID suppliedEntrezGeneId;
 	@RecordField
-	private final OmimID suppliedOmimId;
+	private final Set<OmimID> suppliedOmimIds;
 	@RecordField
 	private final RefSeqID suppliedRefseqId;
 	@RecordField
@@ -270,6 +275,8 @@ public class HgncDownloadFileData extends SingleLineFileRecord {
 	@RecordField
 	private final EnsemblGeneID suppliedEnsemblId;
 	@RecordField
+	private final VegaID suppliedVegaId;
+	@RecordField
 	private final UcscGenomeBrowserId suppliedUcscId;
 	@RecordField
 	private final Set<MgiGeneID> suppliedMgiIds;
@@ -319,10 +326,10 @@ public class HgncDownloadFileData extends SingleLineFileRecord {
 	 * @param suppliedMgiId
 	 * @param suppliedRgdId
 	 */
-	public HgncDownloadFileData(HgncID hgncID, HgncGeneSymbolID hgncGeneSymbol, String hgncGeneName,
-			String status, String locusType, String locusGroup, Set<String> previousSymbols,
-			Set<String> previousNames, Set<String> synonyms, Set<String> nameSynonyms, String chromosome,
-			String dateApproved, String dateModified, String dateSymbolChanged, String dateNameChanged,
+	public HgncDownloadFileData(HgncID hgncID, HgncGeneSymbolID hgncGeneSymbol, String hgncGeneName, String status,
+			String locusType, String locusGroup, Set<String> previousSymbols, Set<String> previousNames,
+			Set<String> synonyms, Set<String> nameSynonyms, String chromosome, String dateApproved,
+			String dateModified, String dateSymbolChanged, String dateNameChanged,
 			Set<DataSourceIdentifier<?>> accessionNumbers, Set<EnzymeCommissionID> ecNumbers,
 			EntrezGeneID entrezGeneID, EnsemblGeneID ensemblGeneID, Set<MgiGeneID> mgiIDs,
 			Set<SpecialistDbIdLinkPair> specialistDatabaseIdLinkPairings, Set<PubMedID> pubmedIDs,
@@ -330,9 +337,10 @@ public HgncDownloadFileData(HgncID hgncID, HgncGeneSymbolID hgncGeneSymbol, Stri
 			String recordType, Set<DataSourceIdentifier<?>> primaryIds, Set<DataSourceIdentifier<?>> secondaryIds,
 			Set<CcdsId> ccdsIDs, Set<VegaID> vegaIDs,
 			Set<LocusSpecificDatabaseNameLinkPair> locusSpecificDatabaseNameLinkPairings,
-			EntrezGeneID suppliedEntrezGeneId, OmimID suppliedOmimId, RefSeqID suppliedRefseqId,
-			UniProtID suppliedUniprotId, EnsemblGeneID suppliedEnsemblId, UcscGenomeBrowserId suppliedUcscId,
-			Set<MgiGeneID> suppliedMgiIds, Set<RgdID> suppliedRgdIds, long byteOffset, long lineNumber) {
+			EntrezGeneID suppliedEntrezGeneId, Set<OmimID> suppliedOmimIds, RefSeqID suppliedRefseqId,
+			UniProtID suppliedUniprotId, EnsemblGeneID suppliedEnsemblId, VegaID suppliedVegaId,
+			UcscGenomeBrowserId suppliedUcscId, Set<MgiGeneID> suppliedMgiIds, Set<RgdID> suppliedRgdIds,
+			long byteOffset, long lineNumber) {
 		super(byteOffset, lineNumber);
 		this.hgncID = hgncID;
 		this.hgncGeneSymbol = hgncGeneSymbol;
@@ -365,281 +373,17 @@ public HgncDownloadFileData(HgncID hgncID, HgncGeneSymbolID hgncGeneSymbol, Stri
 		this.vegaIDs = vegaIDs;
 		this.locusSpecificDatabaseNameLinkPairings = locusSpecificDatabaseNameLinkPairings;
 		this.suppliedEntrezGeneId = suppliedEntrezGeneId;
-		this.suppliedOmimId = suppliedOmimId;
+		this.suppliedOmimIds = suppliedOmimIds;
 		this.suppliedRefseqId = suppliedRefseqId;
 		this.suppliedUniprotId = suppliedUniprotId;
 		this.suppliedEnsemblId = suppliedEnsemblId;
+		this.suppliedVegaId = suppliedVegaId;
 		this.suppliedUcscId = suppliedUcscId;
 		this.suppliedMgiIds = suppliedMgiIds;
 		this.suppliedRgdIds = suppliedRgdIds;
 	}
 
-	/**
-	 * @return the hgncID
-	 */
-	public HgncID getHgncID() {
-		return hgncID;
-	}
-
-	/**
-	 * @return the hgncGeneSymbol
-	 */
-	public HgncGeneSymbolID getHgncGeneSymbol() {
-		return hgncGeneSymbol;
-	}
-
-	/**
-	 * @return the hgncGeneName
-	 */
-	public String getHgncGeneName() {
-		return hgncGeneName;
-	}
-
-	/**
-	 * @return the status
-	 */
-	public String getStatus() {
-		return status;
-	}
-
-	/**
-	 * @return the locusType
-	 */
-	public String getLocusType() {
-		return locusType;
-	}
-
-	/**
-	 * @return the locusGroup
-	 */
-	public String getLocusGroup() {
-		return locusGroup;
-	}
-
-	/**
-	 * @return the previousSymbols
-	 */
-	public Set<String> getPreviousSymbols() {
-		return previousSymbols;
-	}
-
-	/**
-	 * @return the previousNames
-	 */
-	public Set<String> getPreviousNames() {
-		return previousNames;
-	}
-
-	/**
-	 * @return the synonyms
-	 */
-	public Set<String> getSynonyms() {
-		return synonyms;
-	}
-
-	/**
-	 * @return the nameSynonyms
-	 */
-	public Set<String> getNameSynonyms() {
-		return nameSynonyms;
-	}
-
-	/**
-	 * @return the chromosome
-	 */
-	public String getChromosome() {
-		return chromosome;
-	}
-
-	/**
-	 * @return the dateApproved
-	 */
-	public String getDateApproved() {
-		return dateApproved;
-	}
-
-	/**
-	 * @return the dateModified
-	 */
-	public String getDateModified() {
-		return dateModified;
-	}
-
-	/**
-	 * @return the dateSymbolChanged
-	 */
-	public String getDateSymbolChanged() {
-		return dateSymbolChanged;
-	}
-
-	/**
-	 * @return the dateNameChanged
-	 */
-	public String getDateNameChanged() {
-		return dateNameChanged;
-	}
-
-	/**
-	 * @return the accessionNumbers
-	 */
-	public Set<DataSourceIdentifier<?>> getAccessionNumbers() {
-		return accessionNumbers;
-	}
-
-	/**
-	 * @return the ecNumbers
-	 */
-	public Set<EnzymeCommissionID> getEcNumbers() {
-		return ecNumbers;
-	}
-
-	/**
-	 * @return the entrezGeneID
-	 */
-	public EntrezGeneID getEntrezGeneID() {
-		return entrezGeneID;
-	}
-
-	/**
-	 * @return the ensemblGeneID
-	 */
-	public EnsemblGeneID getEnsemblGeneID() {
-		return ensemblGeneID;
-	}
-
-	/**
-	 * @return the mgiID
-	 */
-	public Set<MgiGeneID> getMgiIDs() {
-		return mgiIDs;
-	}
-
-	/**
-	 * @return the specialistDatabaseIdLinkPairings
-	 */
-	public Set<SpecialistDbIdLinkPair> getSpecialistDatabaseIdLinkPairings() {
-		return specialistDatabaseIdLinkPairings;
-	}
-
-	/**
-	 * @return the pubmedIDs
-	 */
-	public Set<PubMedID> getPubmedIDs() {
-		return pubmedIDs;
-	}
-
-	/**
-	 * @return the refseqIDs
-	 */
-	public Set<RefSeqID> getRefseqIDs() {
-		return refseqIDs;
-	}
-
-	/**
-	 * @return the geneFamilyTagDescriptionPairings
-	 */
-	public Set<GeneFamilyTagDescriptionPair> getGeneFamilyTagDescriptionPairings() {
-		return geneFamilyTagDescriptionPairings;
-	}
-
-	/**
-	 * @return the recordType
-	 */
-	public String getRecordType() {
-		return recordType;
-	}
-
-	/**
-	 * @return the primaryIds
-	 */
-	public Set<DataSourceIdentifier<?>> getPrimaryIds() {
-		return primaryIds;
-	}
-
-	/**
-	 * @return the secondaryIds
-	 */
-	public Set<DataSourceIdentifier<?>> getSecondaryIds() {
-		return secondaryIds;
-	}
-
-	/**
-	 * @return the ccdsIDs
-	 */
-	public Set<CcdsId> getCcdsIDs() {
-		return ccdsIDs;
-	}
-
-	/**
-	 * @return the vegaIDs
-	 */
-	public Set<VegaID> getVegaIDs() {
-		return vegaIDs;
-	}
-
-	/**
-	 * @return the locusSpecificDatabaseNameLinkPairings
-	 */
-	public Set<LocusSpecificDatabaseNameLinkPair> getLocusSpecificDatabaseNameLinkPairings() {
-		return locusSpecificDatabaseNameLinkPairings;
-	}
-
-	/**
-	 * @return the suppliedEntrezGeneId
-	 */
-	public EntrezGeneID getSuppliedEntrezGeneId() {
-		return suppliedEntrezGeneId;
-	}
-
-	/**
-	 * @return the suppliedOmimId
-	 */
-	public OmimID getSuppliedOmimId() {
-		return suppliedOmimId;
-	}
-
-	/**
-	 * @return the suppliedRefseqId
-	 */
-	public RefSeqID getSuppliedRefseqId() {
-		return suppliedRefseqId;
-	}
-
-	/**
-	 * @return the suppliedUniprotId
-	 */
-	public UniProtID getSuppliedUniprotId() {
-		return suppliedUniprotId;
-	}
-
-	/**
-	 * @return the suppliedEnsemblId
-	 */
-	public EnsemblGeneID getSuppliedEnsemblId() {
-		return suppliedEnsemblId;
-	}
-
-	/**
-	 * @return the suppliedUcscId
-	 */
-	public UcscGenomeBrowserId getSuppliedUcscId() {
-		return suppliedUcscId;
-	}
-
-	/**
-	 * @return the suppliedMgiId
-	 */
-	public Set<MgiGeneID> getSuppliedMgiIds() {
-		return suppliedMgiIds;
-	}
-
-	/**
-	 * @return the suppliedRgdId
-	 */
-	public Set<RgdID> getSuppliedRgdId() {
-		return suppliedRgdIds;
-	}
-
+	@Data
 	@Record(dataSource = DataSource.HGNC)
 	public static class SpecialistDbIdLinkPair {
 		@RecordField
@@ -656,75 +400,9 @@ public SpecialistDbIdLinkPair(DataSourceIdentifier<?> specialistDbId, String spe
 			this.specialistDbId = specialistDbId;
 			this.specialistDbUrl = specialistDbUrl;
 		}
-
-		/**
-		 * @return the specialistDbId
-		 */
-		public DataSourceIdentifier<?> getSpecialistDbId() {
-			return specialistDbId;
-		}
-
-		/**
-		 * @return the specialistDbUrl
-		 */
-		public String getSpecialistDbUrl() {
-			return specialistDbUrl;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#hashCode()
-		 */
-		@Override
-		public int hashCode() {
-			final int prime = 31;
-			int result = 1;
-			result = prime * result + ((specialistDbId == null) ? 0 : specialistDbId.hashCode());
-			result = prime * result + ((specialistDbUrl == null) ? 0 : specialistDbUrl.hashCode());
-			return result;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#equals(java.lang.Object)
-		 */
-		@Override
-		public boolean equals(Object obj) {
-			if (this == obj)
-				return true;
-			if (obj == null)
-				return false;
-			if (getClass() != obj.getClass())
-				return false;
-			SpecialistDbIdLinkPair other = (SpecialistDbIdLinkPair) obj;
-			if (specialistDbId == null) {
-				if (other.specialistDbId != null)
-					return false;
-			} else if (!specialistDbId.equals(other.specialistDbId))
-				return false;
-			if (specialistDbUrl == null) {
-				if (other.specialistDbUrl != null)
-					return false;
-			} else if (!specialistDbUrl.equals(other.specialistDbUrl))
-				return false;
-			return true;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#toString()
-		 */
-		@Override
-		public String toString() {
-			return "SpecialistDbIdLinkPair [specialistDbId=" + specialistDbId + ", specialistDbUrl=" + specialistDbUrl
-					+ "]";
-		}
-
 	}
 
+	@Data
 	@Record(dataSource = DataSource.HGNC)
 	public static class GeneFamilyTagDescriptionPair {
 		@RecordField
@@ -742,74 +420,9 @@ public GeneFamilyTagDescriptionPair(String geneFamilyTag, String geneFamilyDescr
 			this.geneFamilyDescription = geneFamilyDescription;
 		}
 
-		/**
-		 * @return the geneFamilyTag
-		 */
-		public String getGeneFamilyTag() {
-			return geneFamilyTag;
-		}
-
-		/**
-		 * @return the geneFamilyDescription
-		 */
-		public String getGeneFamilyDescription() {
-			return geneFamilyDescription;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#hashCode()
-		 */
-		@Override
-		public int hashCode() {
-			final int prime = 31;
-			int result = 1;
-			result = prime * result + ((geneFamilyDescription == null) ? 0 : geneFamilyDescription.hashCode());
-			result = prime * result + ((geneFamilyTag == null) ? 0 : geneFamilyTag.hashCode());
-			return result;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#equals(java.lang.Object)
-		 */
-		@Override
-		public boolean equals(Object obj) {
-			if (this == obj)
-				return true;
-			if (obj == null)
-				return false;
-			if (getClass() != obj.getClass())
-				return false;
-			GeneFamilyTagDescriptionPair other = (GeneFamilyTagDescriptionPair) obj;
-			if (geneFamilyDescription == null) {
-				if (other.geneFamilyDescription != null)
-					return false;
-			} else if (!geneFamilyDescription.equals(other.geneFamilyDescription))
-				return false;
-			if (geneFamilyTag == null) {
-				if (other.geneFamilyTag != null)
-					return false;
-			} else if (!geneFamilyTag.equals(other.geneFamilyTag))
-				return false;
-			return true;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#toString()
-		 */
-		@Override
-		public String toString() {
-			return "GeneFamilyTagDescriptionPair [geneFamilyTag=" + geneFamilyTag + ", geneFamilyDescription="
-					+ geneFamilyDescription + "]";
-		}
-
 	}
 
+	@Data
 	@Record(dataSource = DataSource.HGNC)
 	public static class LocusSpecificDatabaseNameLinkPair {
 		@RecordField
@@ -827,71 +440,6 @@ public LocusSpecificDatabaseNameLinkPair(String databaseName, String link) {
 			this.link = link;
 		}
 
-		/**
-		 * @return the databaseName
-		 */
-		public String getDatabaseName() {
-			return databaseName;
-		}
-
-		/**
-		 * @return the link
-		 */
-		public String getLink() {
-			return link;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#hashCode()
-		 */
-		@Override
-		public int hashCode() {
-			final int prime = 31;
-			int result = 1;
-			result = prime * result + ((databaseName == null) ? 0 : databaseName.hashCode());
-			result = prime * result + ((link == null) ? 0 : link.hashCode());
-			return result;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#equals(java.lang.Object)
-		 */
-		@Override
-		public boolean equals(Object obj) {
-			if (this == obj)
-				return true;
-			if (obj == null)
-				return false;
-			if (getClass() != obj.getClass())
-				return false;
-			LocusSpecificDatabaseNameLinkPair other = (LocusSpecificDatabaseNameLinkPair) obj;
-			if (databaseName == null) {
-				if (other.databaseName != null)
-					return false;
-			} else if (!databaseName.equals(other.databaseName))
-				return false;
-			if (link == null) {
-				if (other.link != null)
-					return false;
-			} else if (!link.equals(other.link))
-				return false;
-			return true;
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see java.lang.Object#toString()
-		 */
-		@Override
-		public String toString() {
-			return "LocusSpecificDatabaseNameLinkPair [databaseName=" + databaseName + ", link=" + link + "]";
-		}
-
 	}
 
 }
diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
index 753a6ae..690b3af 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java
@@ -108,7 +108,7 @@
 public class HgncDownloadFileParser extends SingleLineFileRecordReader<HgncDownloadFileData> {
 
 	private static final Logger logger = Logger.getLogger(HgncDownloadFileParser.class);
-	private static final String HEADER = "HGNC ID\tApproved Symbol\tApproved Name\tStatus\tLocus Type\tLocus Group\tPrevious Symbols\tPrevious Names\tSynonyms\tName Synonyms\tChromosome\tDate Approved\tDate Modified\tDate Symbol Changed\tDate Name Changed\tAccession Numbers\tEnzyme IDs\tEntrez Gene ID\tEnsembl Gene ID\tMouse Genome Database ID\tSpecialist Database Links\tSpecialist Database IDs\tPubmed IDs\tRefSeq IDs\tGene Family Tag\tGene family description\tRecord Type\tPrimary IDs\tSecondary IDs\tCCDS IDs\tVEGA IDs\tLocus Specific Databases\tEntrez Gene ID (supplied by NCBI)\tOMIM ID (supplied by NCBI)\tRefSeq (supplied by NCBI)\tUniProt ID (supplied by UniProt)\tEnsembl ID (supplied by Ensembl)\tUCSC ID (supplied by UCSC)\tMouse Genome Database ID (supplied by MGI)\tRat Genome Database ID (supplied by RGD)";
+	private static final String HEADER = "HGNC ID\tApproved Symbol\tApproved Name\tStatus\tLocus Type\tLocus Group\tPrevious Symbols\tPrevious Names\tSynonyms\tName Synonyms\tChromosome\tDate Approved\tDate Modified\tDate Symbol Changed\tDate Name Changed\tAccession Numbers\tEnzyme IDs\tEntrez Gene ID\tEnsembl Gene ID\tMouse Genome Database ID\tSpecialist Database Links\tSpecialist Database IDs\tPubmed IDs\tRefSeq IDs\tGene Family Tag\tGene family description\tRecord Type\tPrimary IDs\tSecondary IDs\tCCDS IDs\tVEGA IDs\tLocus Specific Databases\tEntrez Gene ID (supplied by NCBI)\tOMIM ID (supplied by NCBI)\tRefSeq (supplied by NCBI)\tUniProt ID (supplied by UniProt)\tEnsembl ID (supplied by Ensembl)\tVega ID (supplied by Vega)\tUCSC ID (supplied by UCSC)\tMouse Genome Database ID (supplied by MGI)\tRat Genome Database ID (supplied by RGD)";
 
 	public enum WithdrawnRecordTreatment {
 		IGNORE, INCLUDE
@@ -153,7 +153,7 @@ protected String getExpectedFileHeader() throws IOException {
 	@Override
 	protected HgncDownloadFileData parseRecordFromLine(Line line) {
 		String[] toks = line.getText().split("\\t", -1);
-		if (toks.length == 40) {
+		if (toks.length == 41) {
 			int column = 0;
 			HgncID hgncID = new HgncID(toks[column++]);
 			HgncGeneSymbolID hgncGeneSymbol = new HgncGeneSymbolID(toks[column++]);
@@ -326,13 +326,11 @@ protected HgncDownloadFileData parseRecordFromLine(Line line) {
 				suppliedEntrezGeneId = new EntrezGeneID(columnValue);
 			}
 
-			OmimID suppliedOmimId = null;
+			Set<OmimID> suppliedOmimIds = new HashSet<OmimID>();
 			columnValue = toks[column++];
 			if (!columnValue.isEmpty()) {
-				try {
-					suppliedOmimId = new OmimID(columnValue);
-				} catch (IllegalArgumentException iae) {
-					logger.warn(iae);
+				for (String tok : columnValue.split(",")) {
+					suppliedOmimIds.add(new OmimID(tok.trim()));
 				}
 			}
 
@@ -354,6 +352,12 @@ protected HgncDownloadFileData parseRecordFromLine(Line line) {
 				suppliedEnsemblId = new EnsemblGeneID(columnValue);
 			}
 
+			VegaID suppliedVegaId = null;
+			columnValue = toks[column++];
+			if (!columnValue.isEmpty()) {
+				suppliedVegaId = new VegaID(columnValue);
+			}
+
 			UcscGenomeBrowserId suppliedUcscId = null;
 			columnValue = toks[column++];
 			if (!columnValue.isEmpty()) {
@@ -381,11 +385,12 @@ protected HgncDownloadFileData parseRecordFromLine(Line line) {
 					dateSymbolChanged, dateNameChanged, accessionNumbers, ecNumbers, entrezGeneId, ensemblGeneID,
 					mgiIDs, specialistDatabaseLinks, pubmedIDs, refseqIDs, geneFamilyTagDescriptionPairs, recordType,
 					primaryIds, secondaryIds, ccdsIds, vegaIds, locusSpecificDatabaseNameLinkPairs,
-					suppliedEntrezGeneId, suppliedOmimId, suppliedRefseqId, suppliedUniProtId, suppliedEnsemblId,
-					suppliedUcscId, suppliedMgiIds, suppliedRgdIds, line.getByteOffset(), line.getLineNumber());
+					suppliedEntrezGeneId, suppliedOmimIds, suppliedRefseqId, suppliedUniProtId, suppliedEnsemblId,
+					suppliedVegaId, suppliedUcscId, suppliedMgiIds, suppliedRgdIds, line.getByteOffset(),
+					line.getLineNumber());
 		}
 
-		logger.error("Unexpected number of tokens (" + toks.length + "; expected 40) on line: "
+		logger.error("Unexpected number of tokens (" + toks.length + "; expected 41) on line: "
 				+ line.getText().replaceAll("\\t", " [TAB] "));
 		return null;
 
@@ -564,7 +569,8 @@ private Set<DataSourceIdentifier<?>> resolveAccessionNumbers(String accListStr)
 		Set<DataSourceIdentifier<?>> accNumbers = new HashSet<DataSourceIdentifier<?>>();
 		if (!accListStr.isEmpty()) {
 			for (String acc : accListStr.split(",")) {
-				DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver.resolveNucleotideAccession(acc, acc);
+				DataSourceIdentifier<String> nucAccId = NucleotideAccessionResolver
+						.resolveNucleotideAccession(acc, acc);
 				if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId)) {
 					DataSourceIdentifier<String> proAccId = ProteinAccessionResolver.resolveProteinAccession(acc, acc);
 					accNumbers.add(proAccId);
diff --git a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
index e96d636..ff28862 100644
--- a/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
+++ b/datasource-fileparsers/src/test/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParserTest.java
@@ -41,6 +41,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -62,6 +63,7 @@
 import edu.ucdenver.ccp.datasource.identifiers.mgi.MgiGeneID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.CcdsId;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.gene.EntrezGeneID;
+import edu.ucdenver.ccp.datasource.identifiers.ncbi.omim.OmimID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.refseq.RefSeqID;
 import edu.ucdenver.ccp.datasource.identifiers.other.CosmicId;
 import edu.ucdenver.ccp.datasource.identifiers.other.UcscGenomeBrowserId;
@@ -127,13 +129,13 @@ public void testParser() throws Exception {
 			assertEmpty(dataRecord.getVegaIDs());
 			assertEmpty(dataRecord.getLocusSpecificDatabaseNameLinkPairings());
 			assertEquals(new EntrezGeneID(503538), dataRecord.getSuppliedEntrezGeneId());
-			assertNull(dataRecord.getSuppliedOmimId());
+			assertEmpty(dataRecord.getSuppliedOmimIds());
 			assertEquals(new RefSeqID("NR_015380"), dataRecord.getSuppliedRefseqId());
 			assertNull(dataRecord.getSuppliedUniprotId());
 			assertNull(dataRecord.getSuppliedEnsemblId());
 			assertEquals(new UcscGenomeBrowserId("uc002qsg.3"), dataRecord.getSuppliedUcscId());
 			assertEmpty(dataRecord.getSuppliedMgiIds());
-			assertEmpty(dataRecord.getSuppliedRgdId());
+			assertEmpty(dataRecord.getSuppliedRgdIds());
 		} else {
 			fail("Parser should have returned the first record.");
 		}
@@ -184,13 +186,13 @@ public void testParser() throws Exception {
 							"ALSOD, the Amyotrophic Lateral Sclerosis Online Genetic Database",
 							"http://alsod.iop.kcl.ac.uk/")), dataRecord.getLocusSpecificDatabaseNameLinkPairings());
 			assertEquals(new EntrezGeneID(29974), dataRecord.getSuppliedEntrezGeneId());
-			assertNull(dataRecord.getSuppliedOmimId());
+			assertEmpty(dataRecord.getSuppliedOmimIds());
 			assertEquals(new RefSeqID("NM_001198818"), dataRecord.getSuppliedRefseqId());
 			assertEquals(new UniProtID("Q9NQ94"), dataRecord.getSuppliedUniprotId());
 			assertEquals(new EnsemblGeneID("ENSG00000148584"), dataRecord.getSuppliedEnsemblId());
 			assertEquals(new UcscGenomeBrowserId("uc001jjj.3"), dataRecord.getSuppliedUcscId());
 			assertEquals(CollectionsUtil.createSet(new MgiGeneID("MGI:1917115")), dataRecord.getSuppliedMgiIds());
-			assertEquals(CollectionsUtil.createSet(new RgdID("619834")), dataRecord.getSuppliedRgdId());
+			assertEquals(CollectionsUtil.createSet(new RgdID("619834")), dataRecord.getSuppliedRgdIds());
 		} else {
 			fail("Parser should have returned the first record.");
 		}
diff --git a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/hgnc/hgnc_download.txt b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/hgnc/hgnc_download.txt
index 4217a78..94b5606 100644
--- a/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/hgnc/hgnc_download.txt
+++ b/datasource-fileparsers/src/test/resources/edu/ucdenver/ccp/datasource/fileparsers/hgnc/hgnc_download.txt
@@ -1,3 +1,3 @@
-HGNC ID	Approved Symbol	Approved Name	Status	Locus Type	Locus Group	Previous Symbols	Previous Names	Synonyms	Name Synonyms	Chromosome	Date Approved	Date Modified	Date Symbol Changed	Date Name Changed	Accession Numbers	Enzyme IDs	Entrez Gene ID	Ensembl Gene ID	Mouse Genome Database ID	Specialist Database Links	Specialist Database IDs	Pubmed IDs	RefSeq IDs	Gene Family Tag	Gene family description	Record Type	Primary IDs	Secondary IDs	CCDS IDs	VEGA IDs	Locus Specific Databases	Entrez Gene ID (supplied by NCBI)	OMIM ID (supplied by NCBI)	RefSeq (supplied by NCBI)	UniProt ID (supplied by UniProt)	Ensembl ID (supplied by Ensembl)	UCSC ID (supplied by UCSC)	Mouse Genome Database ID (supplied by MGI)	Rat Genome Database ID (supplied by RGD)
-HGNC:37133	A1BG-AS1	A1BG antisense RNA 1	Approved	RNA, long non-coding	non-coding RNA	NCRNA00181, A1BGAS, A1BG-AS	"non-protein coding RNA 181", "A1BG antisense RNA (non-protein coding)", "A1BG antisense RNA 1 (non-protein coding)"	FLJ23569		19q13.4	2009-07-20	2012-10-12	2010-11-25	2012-08-15	BC040926		503538			<!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> 	, , , , , , , , , , , , , , , , 		NR_015380	LNCRNA, ANTISENSE	"-", "ncRNAs / Long non-coding RNAs, antisense"	Standard						503538		NR_015380			uc002qsg.3		
-HGNC:24086	A1CF	APOBEC1 complementation factor	Approved	gene with protein product	protein-coding gene			ACF, ASP, ACF64, ACF65, APOBEC1CF		10q21.1	2007-11-23	2011-07-21			AF271790		29974	ENSG00000148584	MGI:1917115	<!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <a href="http://www.sanger.ac.uk/perl/genetics/CGP/cosmic?action=gene&amp;ln=A1CF">COSMIC</a><!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> 	, , , , , , , , , , A1CF, , , , , , 	11815617, 11072063	NM_014576			Standard			CCDS7241.1, CCDS7242.1, CCDS7243.1	OTTHUMG00000018240	Androgen Receptor|http://androgendb.mcgill.ca/,Mental Retardation database|http://grenada.lumc.nl/LOVD2/MR/home.php?select_db=AR,ALSOD, the Amyotrophic Lateral Sclerosis Online Genetic Database|http://alsod.iop.kcl.ac.uk/	29974		NM_001198818	Q9NQ94	ENSG00000148584	uc001jjj.3	MGI:1917115	RGD:619834
+HGNC ID	Approved Symbol	Approved Name	Status	Locus Type	Locus Group	Previous Symbols	Previous Names	Synonyms	Name Synonyms	Chromosome	Date Approved	Date Modified	Date Symbol Changed	Date Name Changed	Accession Numbers	Enzyme IDs	Entrez Gene ID	Ensembl Gene ID	Mouse Genome Database ID	Specialist Database Links	Specialist Database IDs	Pubmed IDs	RefSeq IDs	Gene Family Tag	Gene family description	Record Type	Primary IDs	Secondary IDs	CCDS IDs	VEGA IDs	Locus Specific Databases	Entrez Gene ID (supplied by NCBI)	OMIM ID (supplied by NCBI)	RefSeq (supplied by NCBI)	UniProt ID (supplied by UniProt)	Ensembl ID (supplied by Ensembl)	Vega ID (supplied by Vega)	UCSC ID (supplied by UCSC)	Mouse Genome Database ID (supplied by MGI)	Rat Genome Database ID (supplied by RGD)
+HGNC:37133	A1BG-AS1	A1BG antisense RNA 1	Approved	RNA, long non-coding	non-coding RNA	NCRNA00181, A1BGAS, A1BG-AS	"non-protein coding RNA 181", "A1BG antisense RNA (non-protein coding)", "A1BG antisense RNA 1 (non-protein coding)"	FLJ23569		19q13.4	2009-07-20	2012-10-12	2010-11-25	2012-08-15	BC040926		503538			<!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> 	, , , , , , , , , , , , , , , , 		NR_015380	LNCRNA, ANTISENSE	"-", "ncRNAs / Long non-coding RNAs, antisense"	Standard						503538		NR_015380				uc002qsg.3		
+HGNC:24086	A1CF	APOBEC1 complementation factor	Approved	gene with protein product	protein-coding gene			ACF, ASP, ACF64, ACF65, APOBEC1CF		10q21.1	2007-11-23	2011-07-21			AF271790		29974	ENSG00000148584	MGI:1917115	<!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <a href="http://www.sanger.ac.uk/perl/genetics/CGP/cosmic?action=gene&amp;ln=A1CF">COSMIC</a><!--,--> <!--,--> <!--,--> <!--,--> <!--,--> <!--,--> 	, , , , , , , , , , A1CF, , , , , , 	11815617, 11072063	NM_014576			Standard			CCDS7241.1, CCDS7242.1, CCDS7243.1	OTTHUMG00000018240	Androgen Receptor|http://androgendb.mcgill.ca/,Mental Retardation database|http://grenada.lumc.nl/LOVD2/MR/home.php?select_db=AR,ALSOD, the Amyotrophic Lateral Sclerosis Online Genetic Database|http://alsod.iop.kcl.ac.uk/	29974		NM_001198818	Q9NQ94	ENSG00000148584		uc001jjj.3	MGI:1917115	RGD:619834

From 1df77efcfdb493c0153e2a11e6f9c48a67043764 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 12:48:20 -0700
Subject: [PATCH 29/36] Added handling for Vega ID resolution

---
 .../ccp/datasource/identifiers/DataSourceIdResolver.java        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
index 08de587..6b0fddd 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSourceIdResolver.java
@@ -446,6 +446,8 @@ else if (geneIDStr.matches("rs\\d+"))
 				return new SnpRsId(geneIDStr);
 			else if (geneIDStr.startsWith("CL:"))
 				return new CellTypeOntologyID(geneIDStr);
+			else if (geneIDStr.startsWith("Vega:"))
+				return new VegaID(StringUtil.removePrefix(geneIDStr, "Vega:"));
 			else if (geneIDStr.startsWith("NCBITaxon:"))
 				return new NcbiTaxonomyID(StringUtil.removePrefix(geneIDStr, "NCBITaxon:"));
 

From d34c2eebfb4a3c4cd5b23deb4d3b03fd30b6cdd1 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 13:19:27 -0700
Subject: [PATCH 30/36] Added handling for multiple mesh IDs for a given
 category

---
 .../fileparsers/drugbank/DrugBankDrugRecord.java | 16 ++++++++++++----
 .../ccp/datasource/identifiers/ncbi/MeshID.java  |  6 ++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
index 9c0498b..eb3c385 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugBankDrugRecord.java
@@ -932,11 +932,17 @@ private Set<Category> initCategories(CategoryListType list) {
 		}
 		Set<Category> toReturn = new HashSet<Category>();
 		for (CategoryType p : list.getCategory()) {
-			MeshID meshId = null;
+			Set<MeshID> meshIds = new HashSet<MeshID>();
 			if (!p.getMeshId().trim().isEmpty()) {
-				meshId = new MeshID(p.getMeshId().trim());
+				String meshStr = p.getMeshId().trim();
+				meshStr = meshStr.replaceAll("\"", "");
+				meshStr = meshStr.replace("[", "");
+				meshStr = meshStr.replace("]", "");
+				for (String tok : meshStr.split(",")) {
+					meshIds.add(new MeshID(tok));
+				}
 			}
-			Category c = new Category(meshId, p.getCategory());
+			Category c = new Category(meshIds, p.getCategory());
 			toReturn.add(c);
 		}
 		return toReturn;
@@ -946,7 +952,7 @@ private Set<Category> initCategories(CategoryListType list) {
 	@Record(dataSource = DataSource.DRUGBANK)
 	private static class Category {
 		@RecordField
-		private final MeshID meshId;
+		private final Set<MeshID> meshIds;
 		@RecordField
 		private final String category;
 	}
@@ -1308,6 +1314,8 @@ private static DataSourceIdentifier<?> resolveIdentifier(String resource, String
 					"GenBank:" + identifier);
 			if (ProbableErrorDataSourceIdentifier.class.isInstance(nucAccId.getClass())) {
 				return ProteinAccessionResolver.resolveProteinAccession(identifier, "GenBank:" + identifier);
+			} else {
+				return nucAccId;
 			}
 		} else if (resource.equals("UniProtKB")) {
 			return new UniProtID(identifier);
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ncbi/MeshID.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ncbi/MeshID.java
index b6dbb98..9c735b5 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ncbi/MeshID.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ncbi/MeshID.java
@@ -33,10 +33,8 @@
  * #L%
  */
 
-import org.apache.commons.lang.math.NumberUtils;
-
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 
 /**
  * ID for Medical Subject Heading definition as described by www.nlm.nih.gov/mesh
@@ -57,7 +55,7 @@ public MeshID(String resourceID) {
 
 	@Override
 	public String validate(String resourceID) throws IllegalArgumentException {
-		if (resourceID != null && resourceID.matches("[A-Z]\\d+"))
+		if (resourceID != null)
 			return resourceID;
 
 		throw new IllegalArgumentException(String.format("Invalid Mesh ID : %s", resourceID));

From 2a68c66186bd9ae8285bb4bdeb81fcb265aae5f7 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 14:40:14 -0700
Subject: [PATCH 31/36] Added handling for RNACentral identifiers

---
 .../GpAssociationGoaUniprotFileParser.java    |  4 ++
 .../datasource/identifiers/DataSource.java    |  1 +
 .../identifiers/other/RnaCentralId.java       | 51 +++++++++++++++++++
 3 files changed, 56 insertions(+)
 create mode 100644 datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/other/RnaCentralId.java

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/goa/GpAssociationGoaUniprotFileParser.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/goa/GpAssociationGoaUniprotFileParser.java
index a38fb15..a8687c6 100755
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/goa/GpAssociationGoaUniprotFileParser.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/goa/GpAssociationGoaUniprotFileParser.java
@@ -63,6 +63,7 @@
 import edu.ucdenver.ccp.datasource.identifiers.ebi.uniprot.UniProtIsoformID;
 import edu.ucdenver.ccp.datasource.identifiers.ncbi.taxonomy.NcbiTaxonomyID;
 import edu.ucdenver.ccp.datasource.identifiers.obo.GeneOntologyID;
+import edu.ucdenver.ccp.datasource.identifiers.other.RnaCentralId;
 import edu.ucdenver.ccp.datasource.identifiers.reactome.ReactomeReactionID;
 import edu.ucdenver.ccp.identifier.publication.DOI;
 import edu.ucdenver.ccp.identifier.publication.PubMedID;
@@ -317,6 +318,9 @@ private static DataSourceIdentifier<?> createDatabaseObjectID(String database, S
 			if (database.equals("IntAct")) {
 				return new IntActID(databaseObjectIDStr);
 			}
+			if (database.equals("RNAcentral")) {
+				return new RnaCentralId(databaseObjectIDStr);
+			}
 		} catch (IllegalArgumentException e) {
 			logger.warn(e.getMessage());
 		}
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
index a214cd7..c9398ce 100644
--- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/DataSource.java
@@ -341,6 +341,7 @@ public String getLocalName() {
 	PROTONET("http://www.protonet.cs.huji.ac.il/"),
 	REBASE("http://rebase.neb.com/"),
 	REPRODUCTION_2DPAGE("http://reprod.njmu.edu.cn/"),
+	RNACENTRAL("http://rnacentral.org/rna/"),
 	ROUGE("http://www.kazusa.or.jp/rouge/"),
 	SABIO_RK("http://sabiork.h-its.org/"),
 	SBKB("http://sbkb.org/"),
diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/other/RnaCentralId.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/other/RnaCentralId.java
new file mode 100644
index 0000000..8d17c58
--- /dev/null
+++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/other/RnaCentralId.java
@@ -0,0 +1,51 @@
+package edu.ucdenver.ccp.datasource.identifiers.other;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's common module
+ * %%
+ * Copyright (C) 2012 - 2014 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+import edu.ucdenver.ccp.datasource.identifiers.DataSource;
+import edu.ucdenver.ccp.datasource.identifiers.StringDataSourceIdentifier;
+
+/**
+ * http://www.nal.usda.gov/
+ * 
+ * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
+ * 
+ */
+public class RnaCentralId extends StringDataSourceIdentifier {
+
+	public RnaCentralId(String resourceID) {
+		super(resourceID, DataSource.RNACENTRAL);
+}
+
+}

From cb1faa9483e6eb0c4895b07982f8366f40706401 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Tue, 9 Feb 2016 14:59:46 -0700
Subject: [PATCH 32/36] removed a print statement

---
 .../ebi/uniprot/SparseUniProtXmlFileRecordReader.java            | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/SparseUniProtXmlFileRecordReader.java b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/SparseUniProtXmlFileRecordReader.java
index ff3fc07..05b99ab 100644
--- a/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/SparseUniProtXmlFileRecordReader.java
+++ b/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/SparseUniProtXmlFileRecordReader.java
@@ -97,7 +97,6 @@ protected boolean hasTaxonOfInterest(SparseUniProtFileRecord record) {
 		}
 		for (DbReference dbRef : record.getOrganism().getDbReference()) {
 			if (getTaxonsOfInterest().contains(dbRef.getId())) {
-				System.out.println("has taxon of interest: " + dbRef.getId());
 				return true;
 			}
 		}

From b4404d9b982f7bca74f18df1e767c74167b97e34 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Thu, 11 Feb 2016 08:37:43 -0700
Subject: [PATCH 33/36] updating poms for 0.6.1 branch with snapshot versions

---
 datasource-fileparsers/pom.xml | 2 +-
 datasource-identifiers/pom.xml | 2 +-
 datasource-rdfizer/pom.xml     | 2 +-
 pom.xml                        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/datasource-fileparsers/pom.xml b/datasource-fileparsers/pom.xml
index 55a633d..8cffda9 100644
--- a/datasource-fileparsers/pom.xml
+++ b/datasource-fileparsers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6.1-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-fileparsers</artifactId>
 
diff --git a/datasource-identifiers/pom.xml b/datasource-identifiers/pom.xml
index ef3917f..c12811c 100644
--- a/datasource-identifiers/pom.xml
+++ b/datasource-identifiers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6.1-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-identifiers</artifactId>
 
diff --git a/datasource-rdfizer/pom.xml b/datasource-rdfizer/pom.xml
index 1e4b68f..728b501 100644
--- a/datasource-rdfizer/pom.xml
+++ b/datasource-rdfizer/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<groupId>edu.ucdenver.ccp</groupId>
 		<artifactId>datasource</artifactId>
-		<version>0.7-SNAPSHOT</version>
+		<version>0.6.1-SNAPSHOT</version>
 	</parent>
 	<artifactId>datasource-rdfizer</artifactId>
 
diff --git a/pom.xml b/pom.xml
index dd3e590..76a1578 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2,7 +2,7 @@
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource</artifactId>
-	<version>0.7-SNAPSHOT</version>
+	<version>0.6.1-SNAPSHOT</version>
 	<packaging>pom</packaging>
 
 	<properties>

From 1d0533c9a9975224b738d3ffdaf5ba45b1858cbf Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Thu, 11 Feb 2016 14:28:55 -0700
Subject: [PATCH 34/36] Overhaul of README

now lists most of the available parsers and has improved documentation
for RDF generation
---
 README.md | 226 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 136 insertions(+), 90 deletions(-)

diff --git a/README.md b/README.md
index 46c6654..4f4607f 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,28 @@
-# datasource
-A library of code for parsing (mostly biomedical) data source files and converting their contents to RDF
+A library of code for parsing (mostly biomedical) data source files
 
-This library contains file parsers for files from many different biomedical databases. It also contains
-code that uses a file parser as input and outputs RDF. The structure of the RDF is described in:
-```
-KaBOB: Ontology-Based Semantic Integration of Biomedical Databases
-Kevin M Livingston, Michael Bada, William A Baumgartner, Lawrence E Hunter
-BMC Bioinformatics (accepted)
-``` 
-
-## Development
-This project follows the Git-Flow approach to branching as originally described [here](http://nvie.com/posts/a-successful-git-branching-model/). 
-To facilitate the Git-Flow branching approach, this project makes use of the [jgitflow-maven-plugin](https://bitbucket.org/atlassian/jgit-flow) as described [here](http://george-stathis.com/2013/11/09/painless-maven-project-releases-with-maven-gitflow-plugin/).
-
-Code in the [master branch](https://github.com/UCDenver-ccp/datasource/tree/master) reflects the latest release of this library. Code in the [development](https://github.com/UCDenver-ccp/datasource/tree/development) branch contains the most up-to-date version of this project.
+# Prerequisites
+  * [Java](https://www.oracle.com/java/index.html), at least version 8, is required. 
+  * [Apache Maven](https://maven.apache.org/) is required to build the project. 
+  * If you 
+intend to build this project inside of an IDE, such as Eclipse, please see the instructions 
+for using the [Lombok](https://projectlombok.org/) library with your IDE [here](https://projectlombok.org/features/index.html). 
 
+# Installation
+To use the scripts included in this project, e.g. to generate an RDF representation for a given datasource from the command line, you must download and install the project:
+```
+$ git clone https://github.com/UCDenver-ccp/datasource datasource.git
+$ cd datasource.git
+$ mvn clean install
+```
+Scripts must be run from the project's base directory. 
 
-## Maven signature if only using the file parser API
+If you are interested in programmatic access to the file parsers and related code, the libraries are available as Maven artifacts:
+#### Maven signature if only using the file parser API
 ```xml
 <dependency>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource-fileparsers</artifactId>
-	<version>0.6</version>
+	<version>0.6.1</version>
 </dependency>
 
 <repository>
@@ -30,12 +31,12 @@ Code in the [master branch](https://github.com/UCDenver-ccp/datasource/tree/mast
 </repository>
 ```
 
-## Maven signature if interested in generating RDF of parsed file content
+#### Maven signature if interested in generating RDF of parsed file content
 ```xml
 <dependency>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource-rdfizer</artifactId>
-	<version>0.6</version>
+	<version>0.6.1</version>
 </dependency>
 
 <repository>
@@ -44,75 +45,116 @@ Code in the [master branch](https://github.com/UCDenver-ccp/datasource/tree/mast
 </repository>
 ```
 
-## Bulk RDF Generation
+# Development
+This project follows the Git-Flow approach to branching as originally described [here](http://nvie.com/posts/a-successful-git-branching-model/). 
+To facilitate the Git-Flow branching approach, this project makes use of the [jgitflow-maven-plugin](https://bitbucket.org/atlassian/jgit-flow) as described [here](http://george-stathis.com/2013/11/09/painless-maven-project-releases-with-maven-gitflow-plugin/).
 
-This library has been built to work easily with distributed resource management
-systems such as Oracle Grid Engine or Torque. This simply means that there is a
-script to download and process (generate RDF triples) the data for a source:
+Code in the [master branch](https://github.com/UCDenver-ccp/datasource/tree/master) reflects the latest release (v0.6.1) of this library. Code in the [development](https://github.com/UCDenver-ccp/datasource/tree/development) branch contains the most up-to-date version of this project.
+
+# Available file parsers
+This library contains file parsers for files from many different biomedical databases. The table below lists the datasources, files, and relevant file parser class.
+Many of the file parsers are capable of automatically downloading the file that they parse. Those files that cannot be downloaded automatically typically require registration, login, or a user-specific license. 
+The "Download" column is used to indicate which files cannot be downloaded automatically. This list is not guaranteed to be exhaustive.
+
+| <sub<>Data source</sub> | <sub>File</sub> | <sub>Parser class</sub> | <sub>RDF Generation Key</sub> | <sub>Download</sub> |
+|---|---|---|---|---|
+| <sub>[DIP](http://dip.doe-mbi.ucla.edu/dip/Main.cgi)</sub> | <sub>dip{DATE}.txt.gz</sub> | <sub>[DipYYYYMMDDFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/dip/DipYYYYMMDDFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[DrugBank](http://www.drugbank.ca/)</sub> | <sub>[drugbank.xml](http://www.drugbank.ca/downloads)</sub> | <sub>[DrugbankXmlFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/drugbank/DrugbankXmlFileRecordReader.java)</sub> | <sub>DRUGBANK</sub> | <sub>AUTO</sub> |
+| <sub>[Gene Ontology](http://geneontology.org/)</sub> | <sub>[annotation files](http://geneontology.org/page/download-annotations)</sub> | <sub>[GeneAssociationFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/geneontology/GeneAssociationFileParser.java)</sub> | |  <sub>AUTO</sub> |
+| <sub>[GOA](http://www.ebi.ac.uk/GOA)</sub> | <sub>[gp_association.goa_uniprot.gz](http://www.ebi.ac.uk/GOA/downloads)</sub> | <sub>[GpAssociationGoaUniprotFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/goa/GpAssociationGoaUniprotFileParser.java)</sub> | <sub>GOA</sub> | <sub>AUTO</sub> |
+| <sub>[HGNC](http://www.genenames.org/)</sub> | <sub>[hgnc_complete_set.txt.gz](http://www.genenames.org/cgi-bin/statistics)</sub> | <sub>[HgncDownloadFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/hgnc/HgncDownloadFileParser.java)</sub> | <sub>HGNC</sub> | <sub>AUTO</sub> |
+| <sub>[InterPro](http://www.ebi.ac.uk/interpro/)</sub> | <sub>[interpro2go](ftp://ftp.ebi.ac.uk/pub/databases/interpro/)</sub> | <sub>[InterPro2GoFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/interpro/InterPro2GoFileParser.java)</sub> | <sub>INTERPRO_INTERPRO2GO</sub> | <sub>AUTO</sub> |
+| <sub>[InterPro](http://www.ebi.ac.uk/interpro/)</sub> | <sub>[names.dat](ftp://ftp.ebi.ac.uk/pub/databases/interpro/)</sub> | <sub>[InterProNamesDatFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/interpro/InterProNamesDatFileParser.java)</sub> | <sub>INTERPRO_NAMESDAT</sub> | <sub>AUTO</sub> |
+| <sub>[InterPro](http://www.ebi.ac.uk/interpro/)</sub> | <sub>[protein2ipr.dat.gz](ftp://ftp.ebi.ac.uk/pub/databases/interpro/)</sub> | <sub>[InterProProtein2IprDatFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/interpro/InterProProtein2IprDatFileParser.java)</sub> | <sub>INTERPRO_PROTEIN2IPR</sub> | <sub>AUTO</sub> |
+| <sub>[IRefWeb](http://wodaklab.org/iRefWeb/)</sub> | <sub>[All.mitab.{DATE}.txt.zip](http://irefindex.org/download/irefindex/data/archive/release_14.0/psi_mitab/MITAB2.6/)</sub> | <sub>[IRefWebPsiMitab2_6FileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/irefweb/IRefWebPsiMitab2_6FileParser.java)</sub> | <sub>IREFWEB</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MGI_EntrezGene.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MGIEntrezGeneFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MGIEntrezGeneFileParser.java)</sub> | <sub>MGI_ENTREZGENE</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MGI_Geno_Disease.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MGIGenoDiseaseFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MGIGenoDiseaseFileRecordReader.java)</sub> | |<sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MGI_PhenoGenoMP.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MGIPhenoGenoMPFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MGIPhenoGenoMPFileParser.java)</sub> | <sub>MGI_MGIPHENOGENO</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MRK_List2.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MRKListFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKListFileParser.java)</sub> | <sub>MGI_MRKLIST</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MRK_Reference.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MRKReferenceFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKReferenceFileParser.java)</sub> | <sub>MGI_MRKREFERENCE</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MRK_Sequence.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MRKSequenceFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSequenceFileParser.java)</sub> | <sub>MGI_MRKSEQUENCE</sub> | <sub>AUTO</sub> |
+| <sub>[MGI](http://www.informatics.jax.org/)</sub> | <sub>[MRK_SwissProt.rpt](ftp://ftp.informatics.jax.org/pub/reports/index.html)</sub> | <sub>[MRKSwissProtFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mgi/MRKSwissProtFileParser.java)</sub> | <sub>MGI_MRKSWISSPROT</sub> | <sub>AUTO</sub> |
+| <sub>[miRBase](http://www.mirbase.org/)</sub> | <sub>[miRNA.dat.gz](http://www.mirbase.org/ftp.shtml)</sub> | <sub>[MirBaseMiRnaDatFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/mirbase/MirBaseMiRnaDatFileParser.java)</sub> | <sub>MIRBASE</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[gene2accession.gz](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGene2AccessionFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2AccessionFileParser.java)</sub> | | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[gene2pubmed.gz](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGene2PubmedFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2PubmedFileParser.java)</sub> | | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[gene2refseq.gz](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGene2RefseqFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGene2RefseqFileParser.java)</sub> | <sub>NCBIGENE_GENE2REFSEQ</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[gene_info.gz](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGeneInfoFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneInfoFileParser.java)</sub> | <sub>NCBIGENE_GENEINFO</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[mim2gene_medgen](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGeneMim2GeneFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneMim2GeneFileParser.java)</sub> | <sub>NCBIGENE_MIM2GENE</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI Gene](http://www.ncbi.nlm.nih.gov/gene)</sub> | <sub>[gene_refseq_uniprotkb_collab.gz](ftp://ftp.ncbi.nih.gov/gene/DATA/)</sub> | <sub>[EntrezGeneRefSeqUniprotKbCollabFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/gene/EntrezGeneRefSeqUniprotKbCollabFileParser.java)</sub> | <sub>NCBIGENE_REFSEQUNIPROTCOLLAB</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI Homologene](http://www.ncbi.nlm.nih.gov/homologene)</sub> | <sub>[homologene.data](ftp://ftp.ncbi.nih.gov/pub/HomoloGene/current)</sub> | <sub>[HomoloGeneDataFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/homologene/HomoloGeneDataFileParser.java)</sub> | <sub>HOMOLOGENE</sub> | <sub>AUTO</sub> |
+| <sub>[NCBI RefSeq](http://www.ncbi.nlm.nih.gov/refseq/)</sub> | <sub>[RefSeq-release{##}.catalog.gz](ftp://ftp.ncbi.nlm.nih.gov/refseq/release/release-catalog/)</sub> | <sub>[RefSeqReleaseCatalogFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ncbi/refseq/RefSeqReleaseCatalogFileParser.java)</sub> | <sub>REFSEQ_RELEASECATALOG</sub> | <sub>AUTO</sub> |
+| <sub>[PharmGKB](https://www.pharmgkb.org/)</sub> | <sub>[diseases.tsv](https://www.pharmgkb.org/downloads/)</sub> | <sub>[PharmGkbDiseaseFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbDiseaseFileParser.java)</sub> | <sub>PHARMGKB_DISEASE</sub> | <sub>AUTO</sub> |
+| <sub>[PharmGKB](https://www.pharmgkb.org/)</sub> | <sub>[drugs.tsv](https://www.pharmgkb.org/downloads/)</sub> | <sub>[PharmGkbDrugFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbDrugFileParser.java)</sub> | <sub>PHARMGKB_DRUG</sub> | <sub>AUTO</sub> |
+| <sub>[PharmGKB](https://www.pharmgkb.org/)</sub> | <sub>[genes.tsv](https://www.pharmgkb.org/downloads/)</sub> | <sub>[PharmGkbGeneFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbGeneFileParser.java)</sub> | <sub>PHARMGKB_GENE</sub> | <sub>AUTO</sub> |
+| <sub>[PharmGKB](https://www.pharmgkb.org/)</sub> | <sub>relations.tsv</sub> | <sub>[PharmGkbRelationFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pharmgkb/PharmGkbRelationFileParser.java)</sub> | <sub>PHARMGKB_RELATION</sub> | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Acetylation_site_dataset.gz</sub> | <sub>[AcetylationPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/AcetylationPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Disease-associated_sites.gz</sub> | <sub>[DiseasePhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/DiseasePhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Kinase_Substrate_Dataset.gz</sub> | <sub>[KinasePhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/KinasePhosphositeFileParser.java)</sub> | |  <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Methylation_site_dataset.gz</sub> | <sub>[MethylationPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/MethylationPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>O-GalNAc_site_dataset.gz</sub> | <sub>[OGalNAcPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/OGalNAcPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>O-GlcNAc_site_dataset.gz</sub> | <sub>[OGlcNAcPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/OGlcNAcPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Phosphorylation_site_dataset.gz</sub> | <sub>[PhosphorylationPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/PhosphorylationPhosphositeFileParser.java)</sub> | |  <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Regulatory_sites.gz</sub> | <sub>[RegulatoryPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/RegulatoryPhosphositeFileParser.java)</sub> | |  <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Sumoylation_site_dataset.gz</sub> | <sub>[SumoylationPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/SumoylationPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PhosphoSite](http://www.phosphosite.org/homeAction.action)</sub> | <sub>Ubiquitination_site_dataset.gz</sub> | <sub>[UbiquitinationPhosphositeFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/phosphosite/UbiquitinationPhosphositeFileParser.java)</sub> | | <sub>MANUAL</sub> |
+| <sub>[PreMod](http://genomequebec.mcgill.ca/PReMod/)</sub> | <sub>[human_module_tab.txt.gz](http://genomequebec.mcgill.ca/PReMod/download)</sub> | <sub>[HumanPReModModuleTabFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/premod/HumanPReModModuleTabFileParser.java)</sub> | <sub>PREMOD_HUMAN</sub> | <sub>AUTO</sub> |
+| <sub>[PreMod](http://genomequebec.mcgill.ca/PReMod/)</sub> | <sub>[mouse_module_tab.txt.gz](http://genomequebec.mcgill.ca/PReMod/download)</sub> | <sub>[MousePReModModuleTabFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/premod/MousePReModModuleTabFileParser.java)</sub> | <sub>PREMOD_MOUSE</sub> | <sub>AUTO</sub> |
+| <sub>[Protein Ontology](http://pir.georgetown.edu/pro/)</sub> | <sub>[promapping.txt](ftp://ftp.pir.georgetown.edu/databases/ontology/pro_obo/PRO_mappings/)</sub> | <sub>[ProMappingFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/pro/ProMappingFileParser.java)</sub> | <sub>PR_MAPPINGFILE</sub> | <sub>AUTO</sub> |
+| <sub>[Reactome](http://www.reactome.org/)</sub> | <sub>[UniProt2Reactome.txt](http://www.reactome.org/pages/download-data/)</sub> | <sub>[ReactomeUniprot2PathwayStidTxtFileParser](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/reactome/ReactomeUniprot2PathwayStidTxtFileParser.java)</sub> | <sub>REACTOME_UNIPROT2PATHWAYSTID</sub> | <sub>AUTO</sub> |
+| <sub>[RGD](http://rgd.mcw.edu/)</sub> | <sub>[GENES_RAT.txt](ftp://ftp.rgd.mcw.edu/pub/data_release/)</sub> | <sub>[RgdRatGeneFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/rgd/RgdRatGeneFileRecordReader.java)</sub> | <sub>RGD_GENES</sub> | <sub>AUTO</sub> |
+| <sub>[UniProt](http://www.uniprot.org/)</sub> | <sub>[uniprot_sprot.xml.gz](http://www.uniprot.org/downloads)</sub> | <sub>[SwissProtXmlFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/SwissProtXmlFileRecordReader.java)</sub> | <sub>UNIPROT_SWISSPROT</sub> | <sub>AUTO</sub> |
+| <sub>[UniProt](http://www.uniprot.org/)</sub> | <sub>[uniprot_trembl.xml.gz](http://www.uniprot.org/downloads)</sub> | <sub>[TremblXmlFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/TremblXmlFileRecordReader.java)</sub> | <sub>UNIPROT_TREMBL_SPARSE</sub> | <sub>AUTO</sub> |
+| <sub>[UniProt](http://www.uniprot.org/)</sub> | <sub>[idmapping_selected.tab.gz](http://www.uniprot.org/downloads)</sub> | <sub>[UniProtIDMappingFileRecordReader](https://github.com/UCDenver-ccp/datasource/blob/master/datasource-fileparsers/src/main/java/edu/ucdenver/ccp/datasource/fileparsers/ebi/uniprot/UniProtIDMappingFileRecordReader.java)</sub> | <sub>UNIPROT_IDMAPPING</sub> | <sub>AUTO</sub> |
+
+
+
+# Generating RDF representations of parsed database files
+This library also contains code that can convert file parser output into a structured database record/field representation using RDF.  
+
+The structure of the RDF is described in:
 ```
-datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
-```
-
-#### Integer-to-File mappings
+KaBOB: Ontology-Based Semantic Integration of Biomedical Databases
+Kevin M Livingston, Michael Bada, William A Baumgartner, Lawrence E Hunter
+BMC Bioinformatics (accepted)
+``` 
+And the generated RDF serves as a foundation for the [KaBOB Knowledge Base of Biology](https://github.com/UCDenver-ccp/kabob).
+Detailed instructions on how to generate RDF to feed into KaBOB can be found below and [here](https://github.com/UCDenver-ccp/kabob/wiki/Building-a-Knowledgebase-instance).
 
-To see the integer-to-file mappings, run:
-```
-datasource-rdfizer/scripts/list-download-file-indices.sh
+The following script can be used to generate RDF representation for a given data source file:
 ```
+datasource-rdfizer/scripts/download-datasources-and-generate-triples.sh
 
-Note that due to licensing issues, some files are not available for download
-directly.  The resources denoted in italics below must be manually obtained in
-order to be used.  Those resources not listed in italics are capable of being
-automatically downloaded at RDF generation time.
-
+Parameters:
+  [-d]: The directory into which to place the downloaded datasource files.
+  [-r]: The directory into which to place the RDF triples parsed from the 
+        datasource files.
+  [-i]: The names of the datasources (comma-delimited) to download and process; 
+        if not specified, all available datasources will be downloaded and 
+        processed. These names are listed in the "RDF Generation Key" column in 
+        the table above.
+  [-t]: A comma-separated list of NCBI taxonomy IDs. Only records for these IDs 
+        will be included in the RDF triple output where applicable. If neither 
+        -t nor -m is specified, all records will be included.
+  [-m]: Include only human and the 7 model organisms (fly, rat, mouse, yeast, 
+        worm, arabidopsis, and zebrafish) in the generated RDF. If neither -t 
+        nor -m is specified, all records will be included.
+  [-c]: Clean the data source files. If set, this flag will cause the data 
+        source files to be re-downloaded prior to processing.
 ```
-*1 ==> DIP*
-*2 ==> HPRD_ID_MAPPINGS*
-*3 ==> TRANSFAC_GENE*
-*4 ==> TRANSFAC_MATRIX*
-*5 ==> GAD*
-6 ==> PHARMGKB_DISEASE
-7 ==> PHARMGKB_GENE
-*8 ==> PHARMGKB_RELATION*
-9 ==> PHARMGKB_DRUG
-10 ==> DRUGBANK
-11 ==> HGNC
-12 ==> HOMOLOGENE
-13 ==> IREFWEB
-14 ==> MGI_ENTREZGENE
-15 ==> MGI_MGIPHENOGENO
-16 ==> MGI_MRKLIST
-17 ==> MGI_MRKREFERENCE
-18 ==> MGI_MRKSEQUENCE
-19 ==> MGI_MRKSWISSPROT
-20 ==> MIRBASE
-*21 ==> OMIM*
-22 ==> RGD_GENES
-23 ==> RGD_GENE_MP
-24 ==> RGD_GENE_RDO
-25 ==> RGD_GENE_NBO
-26 ==> RGD_GENE_PW
-27 ==> PREMOD_HUMAN
-28 ==> PREMOD_MOUSE
-29 ==> PR_MAPPINGFILE
-30 ==> REACTOME_UNIPROT2PATHWAYSTID
-31 ==> REFSEQ_RELEASECATALOG
-32 ==> NCBIGENE_GENE2REFSEQ
-33 ==> NCBIGENE_GENEINFO
-34 ==> NCBIGENE_MIM2GENE
-35 ==> NCBIGENE_REFSEQUNIPROTCOLLAB
-36 ==> GOA
-37 ==> UNIPROT_SWISSPROT
-38 ==> UNIPROT_IDMAPPING
-39 ==> UNIPROT_TREMBL_SPARSE
-40 ==> INTERPRO_NAMESDAT
-41 ==> INTERPRO_INTERPRO2GO
-42 ==> INTERPRO_PROTEIN2IPR
+Data source files that are publicly available will be automatically downloaded and saved under 
+the directory specified by the `-d` parameter. Data source files that require manual download 
+must be manually placed under the directory specified by the `-d` parameter prior to RDF generation. 
+Data source names that can be used as input to the `-i` parameter in the `download-datasources-and-generate-triples.sh` 
+script are listed in the above 
+table in the "RDF Generation Key" column. They can also be seen by running the following script:
 ```
+datasource-rdfizer/scripts/list-datasource-names.sh
+```
+
+## Example RDF Generation
 
-While this is very convenient when dealing with some job schedulers, it also
-allows for easy execution of single RDF generation jobs. For example, to
-generate RDF for the MirBase database file (index = 20):
+#### miRBase RDF Generation
+For example, to generate RDF for the MirBase database file:
 
 ```
 $ export DATA_DIR=[BASE_DIRECTORY_WHERE_DATA_FILES_TO_PARSE_LIVE]
@@ -121,10 +163,10 @@ $ mkdir -p $DATA_DIR
 $ mkdir -p $RDF_DIR
 $ export DATE=[TODAYS_DATE_TO_TIMESTAMP_THE_DATA e.g. 2015-04-16]
 $ mvn clean install
-$ ./datasource-rdfizer/scripts/download-ddatasources-and-generate-triples \
+$ ./datasource-rdfizer/scripts/download-datasources-and-generate-triples \
     -d $DATA_DIR \
     -r $RDF_DIR \
-    -i 20
+    -i MIRBASE
 ```
 
 Note: you may need to adjust the Java Heap size in pom-rdf-gen.xml depending on
@@ -137,28 +179,32 @@ group of species.  Doing so can improve RDF generation time as well as limit
 the number of triples produced when parsing a file. Some of the file parsers
 are *species-aware* and the script allows one to specify the NCBI taxonomy ID
 of the species to which triple generation should be constrained.  For example,
-to limit RDF triples only to humans (NCBI taxonomy ID: 9606):
+to constrain output to UniProt ID mapping records that pertain only to human 
+(NCBI taxonomy ID: 9606), run:
 
 ```
-./datasource-rdfizer/scripts/download-ddatasources-and-generate-triples \
+./datasource-rdfizer/scripts/download-datasources-and-generate-triples \
     -d $DATA_DIR \
     -r $RDF_DIR \
-    -i 20
+    -i UNIPROT_IDMAPPING
     -t 9606
 ```
 
 For human plus seven model organisms (fly, rat, mouse, yeast, worm,
-arabidopsis, and zebrafish), use:
+arabidopsis, and zebrafish), use the `-m` parameter:
 
 ```
-./datasource-rdfizer/scripts/download-ddatasources-and-generate-triples \
+./datasource-rdfizer/scripts/download-datasources-and-generate-triples \
     -d $DATA_DIR \
     -r $RDF_DIR \
-    -i 20
+    -i UNIPROT_IDMAPPING
     -m
 ```
 
-When a taxon-aware file parser is used, some extra data is downloaded to ensure
+_Note: when a taxon-aware file parser is used, some extra data is downloaded to ensure
 that the mappings from biological concepts to taxon identifiers are
 present. This download can be time consuming due to one of the files being very
-large, but it is a one-time cost.
\ No newline at end of file
+large, but it is a one-time cost._
+
+
+

From eb7074cfee24772272432d9449fe3f1a3b33cd30 Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Thu, 11 Feb 2016 14:31:13 -0700
Subject: [PATCH 35/36] updated compiler version to 1.8

---
 datasource-rdfizer/scripts/pom-rdf-gen-9606.xml              | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml         | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-gen.xml                   | 4 ++--
 datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
index 915dc97..4be5962 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-9606.xml
@@ -54,8 +54,8 @@
 				<artifactId>maven-compiler-plugin</artifactId>
 				<version>3.3</version>
 				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
+					<source>1.8</source>
+					<target>1.8</target>
 				</configuration>
 			</plugin>
 		</plugins>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
index a805709..72b046a 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen-modelorgs.xml
@@ -54,8 +54,8 @@
 				<artifactId>maven-compiler-plugin</artifactId>
 				<version>3.3</version>
 				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
+					<source>1.8</source>
+					<target>1.8</target>
 				</configuration>
 			</plugin>
 		</plugins>
diff --git a/datasource-rdfizer/scripts/pom-rdf-gen.xml b/datasource-rdfizer/scripts/pom-rdf-gen.xml
index e0985d6..1712ba4 100644
--- a/datasource-rdfizer/scripts/pom-rdf-gen.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-gen.xml
@@ -64,8 +64,8 @@
 				<artifactId>maven-compiler-plugin</artifactId>
 				<version>3.3</version>
 				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
+					<source>1.8</source>
+					<target>1.8</target>
 				</configuration>
 			</plugin>
 		</plugins>
diff --git a/datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml b/datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml
index d4bc319..b485fe3 100644
--- a/datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml
+++ b/datasource-rdfizer/scripts/pom-rdf-list-datasource-names.xml
@@ -45,8 +45,8 @@
         <artifactId>maven-compiler-plugin</artifactId>
         <version>3.3</version>
         <configuration>
-          <source>1.7</source>
-          <target>1.7</target>
+          <source>1.8</source>
+          <target>1.8</target>
         </configuration>
       </plugin>
     </plugins>

From bd9af86289ebce04dbb4b2ad7b942d82ab15c5df Mon Sep 17 00:00:00 2001
From: bill-baumgartner <bill.baumgartner@gmail.com>
Date: Thu, 11 Feb 2016 14:33:47 -0700
Subject: [PATCH 36/36] updating poms for branch'release/0.6.1' with
 non-snapshot versions

---
 datasource-fileparsers/pom.xml | 2 +-
 datasource-identifiers/pom.xml | 2 +-
 datasource-rdfizer/pom.xml     | 2 +-
 pom.xml                        | 7 ++-----
 4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/datasource-fileparsers/pom.xml b/datasource-fileparsers/pom.xml
index 8cffda9..90aceac 100644
--- a/datasource-fileparsers/pom.xml
+++ b/datasource-fileparsers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6.1-SNAPSHOT</version>
+		<version>0.6.1</version>
 	</parent>
 	<artifactId>datasource-fileparsers</artifactId>
 
diff --git a/datasource-identifiers/pom.xml b/datasource-identifiers/pom.xml
index c12811c..52498a6 100644
--- a/datasource-identifiers/pom.xml
+++ b/datasource-identifiers/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<artifactId>datasource</artifactId>
 		<groupId>edu.ucdenver.ccp</groupId>
-		<version>0.6.1-SNAPSHOT</version>
+		<version>0.6.1</version>
 	</parent>
 	<artifactId>datasource-identifiers</artifactId>
 
diff --git a/datasource-rdfizer/pom.xml b/datasource-rdfizer/pom.xml
index 728b501..5430d2b 100644
--- a/datasource-rdfizer/pom.xml
+++ b/datasource-rdfizer/pom.xml
@@ -3,7 +3,7 @@
 	<parent>
 		<groupId>edu.ucdenver.ccp</groupId>
 		<artifactId>datasource</artifactId>
-		<version>0.6.1-SNAPSHOT</version>
+		<version>0.6.1</version>
 	</parent>
 	<artifactId>datasource-rdfizer</artifactId>
 
diff --git a/pom.xml b/pom.xml
index 028577b..3f6f3ec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,12 +1,9 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
-                             http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0                              http://maven.apache.org/xsd/maven-4.0.0.xsd">
 
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>edu.ucdenver.ccp</groupId>
 	<artifactId>datasource</artifactId>
-	<version>0.6.1-SNAPSHOT</version>
+	<version>0.6.1</version>
 	<packaging>pom</packaging>
 
 	<properties>