diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java index f481ad8..f0394aa 100644 --- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java +++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/ProbableErrorDataSourceIdentifier.java @@ -1,5 +1,39 @@ package edu.ucdenver.ccp.datasource.identifiers; +/* + * #%L + * Colorado Computational Pharmacology's datasource + * project + * %% + * Copyright (C) 2012 - 2016 Regents of the University of Colorado + * %% + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + public class ProbableErrorDataSourceIdentifier extends DataSourceIdentifier { private final String dataSourceStr; diff --git a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java index 65eeb4c..14a91db 100644 --- a/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java +++ b/datasource-identifiers/src/main/java/edu/ucdenver/ccp/datasource/identifiers/UnknownDataSourceIdentifier.java @@ -1,5 +1,39 @@ package edu.ucdenver.ccp.datasource.identifiers; +/* + * #%L + * Colorado Computational Pharmacology's datasource + * project + * %% + * Copyright (C) 2012 - 2016 Regents of the University of Colorado + * %% + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + public class UnknownDataSourceIdentifier extends DataSourceIdentifier { private final String dataSourceStr; diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java new file mode 100644 index 0000000..e18baea --- /dev/null +++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/ErroneousIdentifierRecord.java @@ -0,0 +1,72 @@ +package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice; + +/* + * #%L + * Colorado Computational Pharmacology's datasource + * project + * %% + * Copyright (C) 2012 - 2016 Regents of the University of Colorado + * %% + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + +import edu.ucdenver.ccp.datasource.fileparsers.Record; +import edu.ucdenver.ccp.datasource.fileparsers.RecordField; +import edu.ucdenver.ccp.datasource.identifiers.DataSource; + +@Record(dataSource = DataSource.KABOB) +public class ErroneousIdentifierRecord { + + @RecordField + private final String identifier; + + @RecordField + private final String datasource; + + @RecordField + private final String comment; + + public ErroneousIdentifierRecord(String identifier, String datasource, String comment) { + super(); + this.identifier = identifier; + this.datasource = datasource; + this.comment = comment; + } + + public String getIdentifier() { + return identifier; + } + + public String getDatasource() { + return datasource; + } + + public String getComment() { + return comment; + } + +} diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java new file mode 100644 index 0000000..24cb1c5 --- /dev/null +++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/NonNormalizedIdentifierRecord.java @@ -0,0 +1,64 @@ +package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice; + +/* + * #%L + * Colorado Computational Pharmacology's datasource + * project + * %% + * Copyright (C) 2012 - 2016 Regents of the University of Colorado + * %% + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + +import edu.ucdenver.ccp.datasource.fileparsers.Record; +import edu.ucdenver.ccp.datasource.fileparsers.RecordField; +import edu.ucdenver.ccp.datasource.identifiers.DataSource; + +@Record(dataSource = DataSource.KABOB) +public class NonNormalizedIdentifierRecord { + + @RecordField + private final String identifier; + + @RecordField + private final String datasource; + + public NonNormalizedIdentifierRecord(String identifier, String datasource) { + super(); + this.identifier = identifier; + this.datasource = datasource; + } + + public String getIdentifier() { + return identifier; + } + + public String getDatasource() { + return datasource; + } + +} diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java index 2a5b87c..ff8d7a4 100644 --- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java +++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUriFactory.java @@ -44,15 +44,19 @@ import java.util.Map.Entry; import java.util.Set; +import org.openrdf.model.Statement; import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; import org.openrdf.rio.ntriples.NTriplesUtil; +import edu.ucdenver.ccp.common.collections.CollectionsUtil; import edu.ucdenver.ccp.common.digest.DigestUtil; import edu.ucdenver.ccp.common.reflection.PrivateAccessor; import edu.ucdenver.ccp.datasource.fileparsers.RecordField; import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil; import edu.ucdenver.ccp.datasource.identifiers.DataSource; +import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier; +import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier; import edu.ucdenver.ccp.datasource.rdfizer.rdf.vocabulary.KIAO; /** @@ -178,6 +182,30 @@ private static List getSortedFieldValueUriStrs(Collection fieldV * could be a collection, if so we return one string per value */ private static String getFieldValueUri(Object fieldValue) { + /* address unknown and probable error data source identifiers here? */ + if (fieldValue instanceof UnknownDataSourceIdentifier) { + UnknownDataSourceIdentifier id = (UnknownDataSourceIdentifier) fieldValue; + NonNormalizedIdentifierRecord record = new NonNormalizedIdentifierRecord(id.getDataElement(), id.getDataSourceStr()); + URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record); + List recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, System.currentTimeMillis(), + recordUri, null, null, null); + recordInstanceStatements.remove(0); + /* this is used to generate sha1 hashes, so it doesn't need to be a true uri */ + return CollectionsUtil.createDelimitedString(recordInstanceStatements, " "); + } else if (fieldValue instanceof ProbableErrorDataSourceIdentifier) { + ProbableErrorDataSourceIdentifier id = (ProbableErrorDataSourceIdentifier) fieldValue; + ErroneousIdentifierRecord record = new ErroneousIdentifierRecord(id.getDataElement(), + id.getDataSourceStr(), id.getErrorMessage()); + URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record); + List recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, System.currentTimeMillis(), + recordUri, null, null, null); + /* + * the first statement returned is a dataset has_part record triple + * which we do not need + */ + recordInstanceStatements.remove(0); + return CollectionsUtil.createDelimitedString(recordInstanceStatements, " "); + } Value value = RdfUtil.getValue(fieldValue); return NTriplesUtil.toNTriplesString(value); } @@ -224,7 +252,6 @@ private static Collection getFieldValues(Object record, Field field) { return null; } - int fieldCount = 0; Collection fieldValues = new ArrayList(); if (!(fieldValue instanceof Collection)) { diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java index 1e2b49b..d7e0fe6 100644 --- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java +++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordUtil.java @@ -53,6 +53,7 @@ import org.openrdf.model.impl.StatementImpl; import org.openrdf.model.impl.URIImpl; +import edu.ucdenver.ccp.common.collections.CollectionsUtil; import edu.ucdenver.ccp.common.reflection.PrivateAccessor; import edu.ucdenver.ccp.common.string.StringConstants; import edu.ucdenver.ccp.datasource.fileparsers.DataRecord; @@ -60,6 +61,8 @@ import edu.ucdenver.ccp.datasource.fileparsers.RecordField; import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil; import edu.ucdenver.ccp.datasource.identifiers.DataSource; +import edu.ucdenver.ccp.datasource.identifiers.ProbableErrorDataSourceIdentifier; +import edu.ucdenver.ccp.datasource.identifiers.UnknownDataSourceIdentifier; import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.DuplicateStatementFilter; import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfRecordUriFactory.IncludeVersion; import edu.ucdenver.ccp.datasource.rdfizer.rdf.vocabulary.DC; @@ -72,26 +75,31 @@ /** * Static utility functions for creating RDF * - * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu + * @author Colorado Computational Pharmacology, UC Denver; + * ccpsupport@ucdenver.edu */ public class RdfRecordUtil { - private static final Logger logger = Logger.getLogger(RdfRecordUtil.class); - + // private static final Logger logger = + // Logger.getLogger(RdfRecordUtil.class); + // /** // * // * // * @param recordClass // * @return Collection of created statements // */ - // public static Collection getRecordSchemaStatements(Class recordClass) + // public static Collection + // getRecordSchemaStatements(Class recordClass) // { // Collection stmts = new LinkedHashSet(); - // RdfNamespace ns = RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass)); + // RdfNamespace ns = + // RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass)); // String recordComment = RecordUtil.getRecordComment(recordClass); // String recordVersion = RecordUtil.getRecordSchemaVersion(recordClass); // - // URIImpl recordClsUri = RdfUtil.createKiaoUri(ns, recordClass.getSimpleName()); + // URIImpl recordClsUri = RdfUtil.createKiaoUri(ns, + // recordClass.getSimpleName()); // stmts.add(new StatementImpl(recordClsUri, RDFS.SUBCLASS_OF.uri(), // IAO.INFORMATION_CONTENT_ENITITY.uri())); // if (recordComment != null && !recordComment.isEmpty()) { @@ -103,7 +111,8 @@ public class RdfRecordUtil { // // Map fieldToRecordFieldAnnotationMap = RecordUtil // .getFieldToRecordFieldAnnotationsMap(recordClass); - // for (Entry entry : fieldToRecordFieldAnnotationMap.entrySet()) { + // for (Entry entry : + // fieldToRecordFieldAnnotationMap.entrySet()) { // if (isFieldSubRecord(entry.getKey())) { // Field f = entry.getKey(); // if (Collection.class.isAssignableFrom(f.getType())) { @@ -114,7 +123,8 @@ public class RdfRecordUtil { // stmts.addAll(getRecordSchemaStatements((Class) genericTypes[0])); // } // } else { - // throw new IllegalStateException("Non-parameterized collection detected in record class: " + // throw new + // IllegalStateException("Non-parameterized collection detected in record class: " // + recordClass.getName() + " Please parameterize."); // } // } else { @@ -122,14 +132,18 @@ public class RdfRecordUtil { // } // } // String fieldName = entry.getKey().getName(); - // String fieldComment = RecordUtil.getRecordFieldComment(recordClass, fieldName); - // String fieldVersion = RecordUtil.getRecordFieldVersion(recordClass, fieldName); + // String fieldComment = RecordUtil.getRecordFieldComment(recordClass, + // fieldName); + // String fieldVersion = RecordUtil.getRecordFieldVersion(recordClass, + // fieldName); // boolean isKeyField = RecordUtil.isKeyRecordField(recordClass, fieldName); // - // URIImpl fieldTemplateUri = RdfRecordUriFactory.createDataFieldTemplateUri(recordClass, + // URIImpl fieldTemplateUri = + // RdfRecordUriFactory.createDataFieldTemplateUri(recordClass, // fieldName, // IncludeVersion.YES); - // stmts.add(new StatementImpl(recordClsUri, RO.HAS_PART.uri(), fieldTemplateUri)); + // stmts.add(new StatementImpl(recordClsUri, RO.HAS_PART.uri(), + // fieldTemplateUri)); // if (fieldComment != null && !fieldComment.isEmpty()) { // stmts.add(new StatementImpl(fieldTemplateUri, RDFS.COMMENT.uri(), // RdfUtil.createLiteral(fieldComment))); @@ -140,7 +154,8 @@ public class RdfRecordUtil { // if (isKeyField) { // //stmts.add(new StatementImpl(fieldTemplateUri, DC.IDENTIFIER.uri(), // RdfUtil.createLiteral(isKeyField))); - // stmts.add(new StatementImpl(recordClsUri, KIAO.HAS_KEY_FIELD.uri(), fieldTemplateUri)); + // stmts.add(new StatementImpl(recordClsUri, KIAO.HAS_KEY_FIELD.uri(), + // fieldTemplateUri)); // } // } // @@ -148,8 +163,10 @@ public class RdfRecordUtil { // } // /** - // * Generate statements about datasets, records and their types for specified namespace within - // * KABOB namespace. Each class represents a dataset made up of its class of records and their + // * Generate statements about datasets, records and their types for + // specified namespace within + // * KABOB namespace. Each class represents a dataset made up of its class + // of records and their // * fields. // * // * @param recordTypes @@ -157,11 +174,13 @@ public class RdfRecordUtil { // * target namespace // * @return statements // */ - // public static List getRecordSchemaDefinitionStatements(Class + // getRecordSchemaDefinitionStatements(Class recordClass) { // List statements = new ArrayList(); // - // RdfNamespace ns = RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass)); + // RdfNamespace ns = + // RdfNamespace.getNamespace(RecordUtil.getRecordDataSource(recordClass)); // // URIImpl dataSourceUri = RdfUtil.createKiaoUri(ns, ns.lowerName() + // KIAO.KABOB_DATASOURCE.termName()); @@ -188,14 +207,16 @@ public class RdfRecordUtil { // statements.add(new StatementImpl(fieldUri, RDFS.SUBCLASS_OF.uri(), // KIAO.KABOB_DATAFIELD.uri())); // - // statements.addAll(getRecordFieldDeclarationStatements(recordClass, null)); + // statements.addAll(getRecordFieldDeclarationStatements(recordClass, + // null)); // // return statements; // } /** - * Generate statements about class' fields specified namespace within KABOB namespace. Each - * field is a subclass of generic field in namespace and part of dataset. + * Generate statements about class' fields specified namespace within KABOB + * namespace. Each field is a subclass of generic field in namespace and + * part of dataset. * * * @param recordClass @@ -205,13 +226,14 @@ public class RdfRecordUtil { * @param version * structural version label * @param parentSchemaUri - * if not null, record schema is asserted to be {@link RdfPredicate#RO_PARTOF} parent - * schema. + * if not null, record schema is asserted to be + * {@link RdfPredicate#RO_PARTOF} parent schema. * @param fieldComment - * the field comment is used to capture @RecordField comments on fields that are - * subrecords + * the field comment is used to capture @RecordField comments on + * fields that are subrecords * @param isKeyField - * @return statements about fields; empty result is returned for anonymous classes. + * @return statements about fields; empty result is returned for anonymous + * classes. */ public static Collection getRecordSchemaStatements(Class recordClass, URIImpl parentSchemaUri, String fieldComment, boolean isKeyField) { @@ -219,16 +241,18 @@ public static Collection getRecordSchemaStatements(Class Collection statements = new ArrayList(); /* - * The following two statements are meta statements that will be redundant if multiple - * record schemas are combined. Note that the first statement is supposed to be a self-loop. + * The following two statements are meta statements that will be + * redundant if multiple record schemas are combined. Note that the + * first statement is supposed to be a self-loop. */ statements.add(new StatementImpl(KIAO.SCHEMA.uri(), RO.HAS_PART.uri(), KIAO.SCHEMA.uri())); statements.add(new StatementImpl(KIAO.SCHEMA.uri(), RO.HAS_PART.uri(), KIAO.FIELD.uri())); /* - * The following adds the kiaosource:Record rdfs:subClassOf iao:IAO_0000030 (information - * content entity) triple. This triple is not really part of the schema, however it only - * needs to be added one time so this seems like a good place to put it. + * The following adds the kiaosource:Record rdfs:subClassOf + * iao:IAO_0000030 (information content entity) triple. This triple is + * not really part of the schema, however it only needs to be added one + * time so this seems like a good place to put it. */ URIImpl recordClsUri = RdfUtil.createKiaoUri(ns, recordClass.getSimpleName()); statements.add(new StatementImpl(recordClsUri, RDFS.SUBCLASS_OF.uri(), IAO.INFORMATION_CONTENT_ENITITY.uri())); @@ -272,8 +296,8 @@ public static Collection getRecordSchemaStatements(Class for (Field field : sortedFields) { /* - * If the RecordField annotation is not present, then this field does not get serialized - * in the RDF, e.g. the logger field + * If the RecordField annotation is not present, then this field + * does not get serialized in the RDF, e.g. the logger field */ if (field.isAnnotationPresent(RecordField.class)) { String fComment = RecordUtil.getRecordFieldComment(recordClass, field.getName()); @@ -330,9 +354,10 @@ private static String getFieldLabel(Class recordClass, String fieldName) { /** * @param recordClass - * @return a label for the record by first looking for an explicitly defined label in the @Record - * annotation. If not present, a label is generated by adding spaces to replace - * camel-case in the Record name + * @return a label for the record by first looking for an explicitly defined + * label in the @Record annotation. If not present, a label is + * generated by adding spaces to replace camel-case in the Record + * name */ private static String getRecordLabel(Class recordClass) { String label = RecordUtil.getRecordLabel(recordClass); @@ -351,7 +376,8 @@ private static String getRecordLabel(Class recordClass) { * * @param field * to check - * @return field type, or generic type if field's type is a {@link Collection} + * @return field type, or generic type if field's type is a + * {@link Collection} */ private static Class getFieldType(Field field) { Class klass = field.getType(); @@ -370,8 +396,8 @@ private static Class getFieldType(Field field) { } /** - * Determine whether class should be treated as sub-record definition. If field type is - * collection, it's generic type is used. + * Determine whether class should be treated as sub-record definition. If + * field type is collection, it's generic type is used. * * @param field * to check @@ -403,12 +429,16 @@ private static boolean isFieldSubRecord(Field field) { private static boolean isFieldSubRecord(Class klass) { return klass.isAnnotationPresent(Record.class); // return DataRecord.class.isAssignableFrom(klass); - // if (!(DataSourceElement.class.isAssignableFrom(klass) || klass.isPrimitive() || + // if (!(DataSourceElement.class.isAssignableFrom(klass) || + // klass.isPrimitive() || // klass.isArray() // || klass.isEnum() || klass.isSynthetic() || klass.isAnnotation() - // || Collection.class.isAssignableFrom(klass) || String.class.isAssignableFrom(klass) - // || Number.class.isAssignableFrom(klass) || Boolean.class.isAssignableFrom(klass) - // || java.util.Date.class.isAssignableFrom(klass) || URI.class.isAssignableFrom(klass) || + // || Collection.class.isAssignableFrom(klass) || + // String.class.isAssignableFrom(klass) + // || Number.class.isAssignableFrom(klass) || + // Boolean.class.isAssignableFrom(klass) + // || java.util.Date.class.isAssignableFrom(klass) || + // URI.class.isAssignableFrom(klass) || // URL.class // .isAssignableFrom(klass))) { // return true; @@ -418,7 +448,8 @@ private static boolean isFieldSubRecord(Class klass) { } /** - * Get collection of statements that instance datasource, records and fields for given record. + * Get collection of statements that instance datasource, records and fields + * for given record. * * @param record * @param src @@ -459,13 +490,13 @@ public static List getDataSourceInstanceStatements(DataReco } /** - * Generate instance statements about this particular instance of {@link DataRecord}. Statements - * include assertions about record and it's fields types and values. All record fields are - * included. + * Generate instance statements about this particular instance of + * {@link DataRecord}. Statements include assertions about record and it's + * fields types and values. All record fields are included. * * @param record * instance - * @param filter + * @param filter * @param src * record source * @param alreadyObservedFieldUris @@ -473,14 +504,16 @@ public static List getDataSourceInstanceStatements(DataReco * record instance index * @return statements */ - public static List getRecordInstanceStatements(DataRecord record, long createdTime, URIImpl recordUri, DuplicateStatementFilter filter) { + public static List getRecordInstanceStatements(DataRecord record, long createdTime, URIImpl recordUri, + DuplicateStatementFilter filter) { return getRecordInstanceStatements(record, createdTime, recordUri, null, StringConstants.BLANK, filter); } /** - * Generate instance statements about this particular instance of {@link DataRecord}. Statements - * include assertions about record and it's fields types and values. {@code rdfFields} will be - * used to determine record exclusion rules and output format. + * Generate instance statements about this particular instance of + * {@link DataRecord}. Statements include assertions about record and it's + * fields types and values. {@code rdfFields} will be used to determine + * record exclusion rules and output format. * * @param record * instance @@ -491,13 +524,14 @@ public static List getRecordInstanceStatements(DataRecord record, lon * @param rdfFields * configuration info for field export * @param parentRecordUri - * if not null, used to indicate that record is a subrecord within record described - * by this value + * if not null, used to indicate that record is a subrecord + * within record described by this value * @param readerKey - * label used in generating dataset instance URI; if null, converted to - * {@link StringConstants#BLANK} + * label used in generating dataset instance URI; if null, + * converted to {@link StringConstants#BLANK} * @param alreadyObservedFieldUris - * @return statements ; empty result is returned for anonymous {@code record} class. + * @return statements ; empty result is returned for anonymous + * {@code record} class. */ public static List getRecordInstanceStatements(Object record, long createdTime, URIImpl recordUri, URIImpl parentRecordUri, String readerKey, DuplicateStatementFilter filter) { @@ -538,14 +572,17 @@ public static List getRecordInstanceStatements(Object record, long cr // record instance has template record schema // URIImpl recordSchemaUri = RdfUtil.createKiaoUri( // targetNs, - // targetNs.lowerName() + record.getClass().getSimpleName() + KIAO.KABOB_SCHEMA.termName() + // targetNs.lowerName() + record.getClass().getSimpleName() + + // KIAO.KABOB_SCHEMA.termName() // + RecordUtil.getRecordSchemaVersion(record.getClass())); URIImpl recordSchemaUri = RdfRecordUriFactory.createRecordSchemaUri(record.getClass(), IncludeVersion.YES); statements.add(new StatementImpl(recordUri, KIAO.HAS_TEMPLATE.uri(), recordSchemaUri)); Set fields = RecordUtil.getFieldToRecordFieldAnnotationsMap(record.getClass()).keySet(); List sortedFields = new ArrayList(fields); - Collections.sort(sortedFields, new FieldNameComparator()); // sorted to ease unit testing + Collections.sort(sortedFields, new FieldNameComparator()); // sorted to + // ease unit + // testing for (Field field : sortedFields) { if (isFieldSubRecord(field)) { @@ -568,13 +605,16 @@ public static List getRecordInstanceStatements(Object record, long cr } } else { statements.addAll(getSubrecordStatements(createdTime, recordUri, readerKey, filter, subRecord)); - // URIImpl subRecordUri = RdfRecordUriFactory.createRecordUri(subRecord); - // statements.addAll(getRecordInstanceStatements(subRecord, createdTime, + // URIImpl subRecordUri = + // RdfRecordUriFactory.createRecordUri(subRecord); + // statements.addAll(getRecordInstanceStatements(subRecord, + // createdTime, // subRecordUri, recordUri, // readerKey)); } } else { - Collection fieldValueStmts = getRdfFieldValueStatements(recordUri, record, field); + Collection fieldValueStmts = getRdfFieldValueStatements(recordUri, record, field, + createdTime, filter); if (fieldValueStmts.isEmpty()) { continue; } @@ -599,13 +639,13 @@ private static List getSubrecordStatements(long createdTime, URIImpl DuplicateStatementFilter filter, Object r) { List statements = new ArrayList(); URIImpl subRecordUri = RdfRecordUriFactory.createRecordUri(r); - List subRecordStmts = getRecordInstanceStatements(r, createdTime, subRecordUri, - recordUri, readerKey, filter); + List subRecordStmts = getRecordInstanceStatements(r, createdTime, subRecordUri, recordUri, + readerKey, filter); if (!filter.alreadyObservedRecordUri(subRecordUri)) { statements.addAll(subRecordStmts); filter.logRecordUri(subRecordUri); } else { -// logger.info("already seen subrecord"); + // logger.info("already seen subrecord"); statements.add(subRecordStmts.get(0)); } return statements; @@ -658,41 +698,46 @@ private static Collection linkFieldToRecord(URIImpl recordUri, URIImp * Generate statements about record's field. * * @param fieldInstanceUri - * initial field instance URI; template re-used if field type is a {@link Collection} + * initial field instance URI; template re-used if field type is + * a {@link Collection} * @param record * instance with specified field + * @param filter * @param fieldName * field name * @param commonFieldStatements * shared template statements to be asserted about every field * @return statements */ - private static Collection getRdfFieldValueStatements(URIImpl recordUri, Object record, Field field) { - Object fieldValue = PrivateAccessor.getFieldValue(record, field.getName()); + private static Collection getRdfFieldValueStatements(URIImpl recordUri, Object record, Field field, + long createdTime, DuplicateStatementFilter filter) { + Object fieldValue = PrivateAccessor.getFieldValue(record, field.getName()); if (fieldValue == null) { return new ArrayList(); } - int fieldCount = 0; Collection statements = new ArrayList(); if (!(fieldValue instanceof Collection)) { - fieldCount = 1; URIImpl fieldUri = RdfRecordUriFactory.createFieldUri(record, field, fieldValue); statements.addAll(linkFieldToRecord(recordUri, fieldUri)); statements.addAll(createCommonFieldStatements(record, recordUri, fieldUri, field.getName())); - statements.add(getFieldDenotesValueStatement(fieldUri, fieldValue)); + statements.addAll(getFieldDenotesValueStatement(fieldUri, fieldValue, createdTime, filter)); } else { - /* for each element in the collection a new fieldInstanceUri is generated */ + /* + * for each element in the collection a new fieldInstanceUri is + * generated + */ Collection coll = (Collection) fieldValue; for (Object object : coll) { URIImpl fieldUri = RdfRecordUriFactory.createFieldUri(record, field, object); if (fieldUri != null) { statements.addAll(linkFieldToRecord(recordUri, fieldUri)); - statements.add(getFieldDenotesValueStatement(fieldUri, object)); + statements.addAll(getFieldDenotesValueStatement(fieldUri, object, createdTime, filter)); statements.addAll(createCommonFieldStatements(record, recordUri, fieldUri, field.getName())); } } - // int startingFieldCount = Integer.valueOf(fieldInstanceUri.substring(fieldInstanceUri + // int startingFieldCount = + // Integer.valueOf(fieldInstanceUri.substring(fieldInstanceUri // .lastIndexOf(FIELD_VALUE) + 1)) - 1; // // Collection coll = (Collection) fieldValue; @@ -713,8 +758,9 @@ private static Collection getRdfFieldValueStatements(URIImpl recordUr } /** - * Generate statements about field (represented by {@code fieldInstanceUri}, and also a Subject - * in RDF statement) and field's value. Statements generated:
+ * Generate statements about field (represented by {@code fieldInstanceUri}, + * and also a Subject in RDF statement) and field's value. Statements + * generated:
* *
 	 *    .
@@ -724,16 +770,66 @@ private static Collection getRdfFieldValueStatements(URIImpl recordUr
 	 *            rdf field instance URI (subject)
 	 * @param fieldValue
 	 *            value
+	 * @param filter
 	 * @throws IllegalArgumentException
 	 *             if fieldValue's type is {@link Collection}
 	 * @return statements
 	 */
-	public static Statement getFieldDenotesValueStatement(URIImpl fieldInstanceUri, Object fieldValue) {
+	public static List getFieldDenotesValueStatement(URIImpl fieldInstanceUri, Object fieldValue,
+			long createdTime, DuplicateStatementFilter filter) {
 		if (fieldValue instanceof Collection) {
 			throw new IllegalArgumentException("Collection fieldValue is not supported");
 		}
 		Value value = RdfUtil.getValue(fieldValue);
-		return new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), value);
+
+		List stmts = new ArrayList();
+		/*
+		 * if we encounter a data source identifier that is declared either
+		 * unknown or a probable error, we create a record to hold the
+		 * identifier and optional data source string. The field then denotes
+		 * this new record. Unknown data source identifiers occur when the file
+		 * parsing code comes across an identifier for which it does not know
+		 * how to generate an appropriate URI. Perhaps "unknown" is not the
+		 * prefix to use here. Probably erroneous identifiers are identifiers
+		 * that the parsing code has detected to be incorrect, e.g. an UniProt
+		 * identifier that does not follow the regular expression pattern
+		 * stipulated by UniProt.
+		 */
+		if (fieldValue instanceof UnknownDataSourceIdentifier) {
+			UnknownDataSourceIdentifier id = (UnknownDataSourceIdentifier) fieldValue;
+			NonNormalizedIdentifierRecord record = new NonNormalizedIdentifierRecord(id.getDataElement(), id.getDataSourceStr());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			URIImpl parentRecordUri = null;
+			String readerKey = null;
+			List recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, createdTime,
+					recordUri, parentRecordUri, readerKey, filter);
+			/*
+			 * the first statement returned is a dataset has_part record triple
+			 * which we do not need
+			 */
+			recordInstanceStatements.remove(0);
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), recordUri));
+			stmts.addAll(recordInstanceStatements);
+		} else if (fieldValue instanceof ProbableErrorDataSourceIdentifier) {
+			ProbableErrorDataSourceIdentifier id = (ProbableErrorDataSourceIdentifier) fieldValue;
+			ErroneousIdentifierRecord record = new ErroneousIdentifierRecord(id.getDataElement(),
+					id.getDataSourceStr(), id.getErrorMessage());
+			URIImpl recordUri = RdfRecordUriFactory.createRecordUri(record);
+			URIImpl parentRecordUri = null;
+			String readerKey = null;
+			List recordInstanceStatements = RdfRecordUtil.getRecordInstanceStatements(record, createdTime,
+					recordUri, parentRecordUri, readerKey, filter);
+			/*
+			 * the first statement returned is a dataset has_part record triple
+			 * which we do not need
+			 */
+			recordInstanceStatements.remove(0);
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), recordUri));
+			stmts.addAll(recordInstanceStatements);
+		} else {
+			stmts.add(new StatementImpl(fieldInstanceUri, IAO.DENOTES.uri(), value));
+		}
+		return stmts;
 
 	}
 
diff --git a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
index 451db76..2d6c6e5 100644
--- a/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
+++ b/datasource-rdfizer/src/main/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImpl.java
@@ -54,23 +54,17 @@
 import org.apache.log4j.Logger;
 import org.openrdf.model.Resource;
 import org.openrdf.model.Statement;
-import org.openrdf.model.Value;
 import org.openrdf.model.impl.URIImpl;
 import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFWriter;
 
-import edu.ucdenver.ccp.common.collections.CollectionsUtil;
 import edu.ucdenver.ccp.common.file.CharacterEncoding;
 import edu.ucdenver.ccp.common.file.FileUtil;
-import edu.ucdenver.ccp.common.reflection.PrivateAccessor;
 import edu.ucdenver.ccp.common.string.StringConstants;
 import edu.ucdenver.ccp.datasource.fileparsers.DataRecord;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordReader;
 import edu.ucdenver.ccp.datasource.fileparsers.RecordUtil;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceElement;
-import edu.ucdenver.ccp.datasource.identifiers.DataSourceIdentifier;
 import edu.ucdenver.ccp.datasource.identifiers.DataSource;
-import edu.ucdenver.ccp.datasource.rdfizer.rdf.RdfId;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.DuplicateStatementFilter;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.filter.InMemoryDuplicateStatementFilter;
 import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfUtil.RdfFormat;
@@ -459,192 +453,192 @@ private void processRecord(DataRecord record, String readerKey, URIImpl recordUr
 		}
 	}
 
-	/**
-	 * Constant is assumed to be a valid URI String
-	 * 
-	 * @param tripleObj
-	 * @param 
-	 * @return
-	 */
-	private  Map, Collection> getConstantValues(String value) {
-		Map, Collection> type2valuesMap = new HashMap, Collection>();
-		Value constantValue = new URIImpl(value);
-		CollectionsUtil.addToOne2ManyMap(String.class, constantValue, type2valuesMap);
-		return type2valuesMap;
-	}
-
-	/**
-	 * 
-	 * @param 
-	 * @param record
-	 * @param tripleObj
-	 * @return
-	 */
-	private  Map, Collection> getLiteralValues(E record, String fieldName) {
-		Map, Collection> type2valuesMap = new HashMap, Collection>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-		if (fieldValue instanceof DataSourceElement) {
-			DataSourceElement element = (DataSourceElement) fieldValue;
-			Value literalValue = RdfUtil.createLiteral(element.getDataElement());
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), literalValue, type2valuesMap);
-			return type2valuesMap;
-		}
-		if (fieldValue instanceof Collection) {
-			for (Object value : ((Collection) fieldValue))
-				if (value instanceof DataSourceElement) {
-					DataSourceElement element = (DataSourceElement) fieldValue;
-					Value literalValue = RdfUtil.createLiteral(element.getDataElement());
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), literalValue, type2valuesMap);
-				} else
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection but instead observed Collection<%s>.", fieldName,
-							value.getClass().getName()));
-			return type2valuesMap;
-		}
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Get values for triple definition where value is specified to use ICE formatting (ex:
-	 * {@code ensemblGeneId})
-	 * 
-	 * @param record
-	 * @param tripleObj
-	 * @return values
-	 */
-	private Map, Collection> getInformationContentEntityIDValues(DataRecord record, String fieldName) {
-		Map, Collection> type2valuesMap = new HashMap, Collection>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-
-		if (fieldValue instanceof DataSourceIdentifier) {
-			DataSourceIdentifier id = (DataSourceIdentifier) fieldValue;
-			RdfId rdfId = new RdfId(id);
-			Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID()).toString());
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), iceIdValue, type2valuesMap);
-			return type2valuesMap;
-		}
-
-		if (fieldValue instanceof Collection) {
-			for (Object value : ((Collection) fieldValue))
-				if (value instanceof DataSourceElement) {
-					DataSourceIdentifier id = (DataSourceIdentifier) value;
-					RdfId rdfId = new RdfId(id);
-					Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID())
-							.toString());
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), iceIdValue, type2valuesMap);
-				} else
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection> but instead observed Collection<%s>.",
-							fieldName, value.getClass().getName()));
-			return type2valuesMap;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Parser {@link DataRecord} from field of record.
-	 * 
-	 * @param 
-	 *            record type
-	 * @param record
-	 *            instance
-	 * @param fieldName
-	 *            field in record
-	 * @return record
-	 */
-	private  Map, Collection> getValues(E record, String fieldName) {
-		Map, Collection> type2valuesMap = new HashMap, Collection>();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-		if (fieldValue == null)
-			return type2valuesMap;
-
-		if (fieldValue instanceof DataSourceElement) {
-			DataSourceElement id = (DataSourceElement) fieldValue;
-			Value rdfValue = null;
-
-			if (id instanceof DataSourceIdentifier) {
-				RdfId rdfId = new RdfId((DataSourceIdentifier) id);
-				rdfValue = rdfId.getRdfValue();
-			} else
-				rdfValue = RdfUtil.createLiteral(id.getDataElement());
-
-			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), rdfValue, type2valuesMap);
-			return type2valuesMap;
-		}
-
-		if (fieldValue instanceof Collection) {
-			for (Object value : ((Collection) fieldValue)) {
-				if (value instanceof DataSourceElement) {
-					DataSourceElement id = (DataSourceElement) value;
-					Value rdfValue = null;
-
-					if (id instanceof DataSourceIdentifier) {
-						RdfId rdfId = new RdfId((DataSourceIdentifier) id);
-						rdfValue = rdfId.getRdfValue();
-					} else
-						rdfValue = RdfUtil.createLiteral(id.getDataElement());
-
-					CollectionsUtil.addToOne2ManyMap(value.getClass(), rdfValue, type2valuesMap);
-				} else {
-					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
-							+ "Expected Collection but instead observed Collection<%s>.", fieldName,
-							value.getClass().getName()));
-				}
-			}
-
-			return type2valuesMap;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
-
-	/**
-	 * Returns the subject Resource representation of the value of the field with the given name
-	 * contained in the input DataRecord. The field must be of type ResourceIdentifier.
-	 * 
-	 * @param record
-	 * @param fieldName
-	 * @return
-	 * 
-	 */
-	private Collection getSubjectResources(DataRecord record, String fieldName) {
-		Collection resources = new ArrayList();
-		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
-
-		if (fieldValue instanceof DataSourceIdentifier) {
-			DataSourceIdentifier id = (DataSourceIdentifier) fieldValue;
-			RdfId rdfId = new RdfId(id);
-			resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
-			return resources;
-		}
-
-		if (fieldValue instanceof Collection) {
-			for (Object resource : ((Collection) fieldValue))
-				if (resource instanceof DataSourceIdentifier) {
-					DataSourceIdentifier id = (DataSourceIdentifier) resource;
-					RdfId rdfId = new RdfId(id);
-					resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
-				} else {
-					String message = String.format("Unable to extract RDF subject from field: %s. "
-							+ "Expected Collection but instead observed Collection<%s>.",
-							fieldName, resource.getClass().getName());
-					throw new RuntimeException(message);
-				}
-
-			return resources;
-		}
-
-		throw new RuntimeException(String.format("Unable to extract RDF subject from field: %s (observedValue=%s)",
-				fieldName, fieldValue.toString()));
-	}
+//	/**
+//	 * Constant is assumed to be a valid URI String
+//	 * 
+//	 * @param tripleObj
+//	 * @param 
+//	 * @return
+//	 */
+//	private  Map, Collection> getConstantValues(String value) {
+//		Map, Collection> type2valuesMap = new HashMap, Collection>();
+//		Value constantValue = new URIImpl(value);
+//		CollectionsUtil.addToOne2ManyMap(String.class, constantValue, type2valuesMap);
+//		return type2valuesMap;
+//	}
+
+//	/**
+//	 * 
+//	 * @param 
+//	 * @param record
+//	 * @param tripleObj
+//	 * @return
+//	 */
+//	private  Map, Collection> getLiteralValues(E record, String fieldName) {
+//		Map, Collection> type2valuesMap = new HashMap, Collection>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//		if (fieldValue instanceof DataSourceElement) {
+//			DataSourceElement element = (DataSourceElement) fieldValue;
+//			Value literalValue = RdfUtil.createLiteral(element.getDataElement());
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), literalValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//		if (fieldValue instanceof Collection) {
+//			for (Object value : ((Collection) fieldValue))
+//				if (value instanceof DataSourceElement) {
+//					DataSourceElement element = (DataSourceElement) fieldValue;
+//					Value literalValue = RdfUtil.createLiteral(element.getDataElement());
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), literalValue, type2valuesMap);
+//				} else
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection but instead observed Collection<%s>.", fieldName,
+//							value.getClass().getName()));
+//			return type2valuesMap;
+//		}
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+
+//	/**
+//	 * Get values for triple definition where value is specified to use ICE formatting (ex:
+//	 * {@code ensemblGeneId})
+//	 * 
+//	 * @param record
+//	 * @param tripleObj
+//	 * @return values
+//	 */
+//	private Map, Collection> getInformationContentEntityIDValues(DataRecord record, String fieldName) {
+//		Map, Collection> type2valuesMap = new HashMap, Collection>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//
+//		if (fieldValue instanceof DataSourceIdentifier) {
+//			DataSourceIdentifier id = (DataSourceIdentifier) fieldValue;
+//			RdfId rdfId = new RdfId(id);
+//			Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID()).toString());
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), iceIdValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//
+//		if (fieldValue instanceof Collection) {
+//			for (Object value : ((Collection) fieldValue))
+//				if (value instanceof DataSourceElement) {
+//					DataSourceIdentifier id = (DataSourceIdentifier) value;
+//					RdfId rdfId = new RdfId(id);
+//					Value iceIdValue = new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), rdfId.getICE_ID())
+//							.toString());
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), iceIdValue, type2valuesMap);
+//				} else
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection> but instead observed Collection<%s>.",
+//							fieldName, value.getClass().getName()));
+//			return type2valuesMap;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+
+//	/**
+//	 * Parser {@link DataRecord} from field of record.
+//	 * 
+//	 * @param 
+//	 *            record type
+//	 * @param record
+//	 *            instance
+//	 * @param fieldName
+//	 *            field in record
+//	 * @return record
+//	 */
+//	private  Map, Collection> getValues(E record, String fieldName) {
+//		Map, Collection> type2valuesMap = new HashMap, Collection>();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//		if (fieldValue == null)
+//			return type2valuesMap;
+//
+//		if (fieldValue instanceof DataSourceElement) {
+//			DataSourceElement id = (DataSourceElement) fieldValue;
+//			Value rdfValue = null;
+//
+//			if (id instanceof DataSourceIdentifier) {
+//				RdfId rdfId = new RdfId((DataSourceIdentifier) id);
+//				rdfValue = rdfId.getRdfValue();
+//			} else
+//				rdfValue = RdfUtil.createLiteral(id.getDataElement());
+//
+//			CollectionsUtil.addToOne2ManyMap(fieldValue.getClass(), rdfValue, type2valuesMap);
+//			return type2valuesMap;
+//		}
+//
+//		if (fieldValue instanceof Collection) {
+//			for (Object value : ((Collection) fieldValue)) {
+//				if (value instanceof DataSourceElement) {
+//					DataSourceElement id = (DataSourceElement) value;
+//					Value rdfValue = null;
+//
+//					if (id instanceof DataSourceIdentifier) {
+//						RdfId rdfId = new RdfId((DataSourceIdentifier) id);
+//						rdfValue = rdfId.getRdfValue();
+//					} else
+//						rdfValue = RdfUtil.createLiteral(id.getDataElement());
+//
+//					CollectionsUtil.addToOne2ManyMap(value.getClass(), rdfValue, type2valuesMap);
+//				} else {
+//					throw new RuntimeException(String.format("Unable to extract RDF object from field: %s. "
+//							+ "Expected Collection but instead observed Collection<%s>.", fieldName,
+//							value.getClass().getName()));
+//				}
+//			}
+//
+//			return type2valuesMap;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF object from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
+//
+//	/**
+//	 * Returns the subject Resource representation of the value of the field with the given name
+//	 * contained in the input DataRecord. The field must be of type ResourceIdentifier.
+//	 * 
+//	 * @param record
+//	 * @param fieldName
+//	 * @return
+//	 * 
+//	 */
+//	private Collection getSubjectResources(DataRecord record, String fieldName) {
+//		Collection resources = new ArrayList();
+//		Object fieldValue = PrivateAccessor.getFieldValue(record, fieldName);
+//
+//		if (fieldValue instanceof DataSourceIdentifier) {
+//			DataSourceIdentifier id = (DataSourceIdentifier) fieldValue;
+//			RdfId rdfId = new RdfId(id);
+//			resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
+//			return resources;
+//		}
+//
+//		if (fieldValue instanceof Collection) {
+//			for (Object resource : ((Collection) fieldValue))
+//				if (resource instanceof DataSourceIdentifier) {
+//					DataSourceIdentifier id = (DataSourceIdentifier) resource;
+//					RdfId rdfId = new RdfId(id);
+//					resources.add(new URIImpl(RdfUtil.createKiaoUri(rdfId.getNamespace(), id.toString()).toString()));
+//				} else {
+//					String message = String.format("Unable to extract RDF subject from field: %s. "
+//							+ "Expected Collection but instead observed Collection<%s>.",
+//							fieldName, resource.getClass().getName());
+//					throw new RuntimeException(message);
+//				}
+//
+//			return resources;
+//		}
+//
+//		throw new RuntimeException(String.format("Unable to extract RDF subject from field: %s (observedValue=%s)",
+//				fieldName, fieldValue.toString()));
+//	}
 
 	/**
 	 * Output RDF record to a file based on record's file key.
diff --git a/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java
new file mode 100644
index 0000000..5edb920
--- /dev/null
+++ b/datasource-rdfizer/src/test/java/edu/ucdenver/ccp/datasource/rdfizer/rdf/ice/RdfRecordWriterImplErroneousAndUnknownIdentifierTest.java
@@ -0,0 +1,180 @@
+package edu.ucdenver.ccp.datasource.rdfizer.rdf.ice;
+
+/*
+ * #%L
+ * Colorado Computational Pharmacology's common module
+ * %%
+ * Copyright (C) 2012 - 2015 Regents of the University of Colorado
+ * %%
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the Regents of the University of Colorado nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * #L%
+ */
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.GregorianCalendar;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.ucdenver.ccp.common.collections.CollectionsUtil;
+import edu.ucdenver.ccp.common.file.CharacterEncoding;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil.ColumnOrder;
+import edu.ucdenver.ccp.common.file.FileComparisonUtil.LineOrder;
+import edu.ucdenver.ccp.common.file.FileReaderUtil;
+import edu.ucdenver.ccp.common.file.FileUtil;
+import edu.ucdenver.ccp.common.file.FileWriterUtil;
+import edu.ucdenver.ccp.common.file.FileWriterUtil.FileSuffixEnforcement;
+import edu.ucdenver.ccp.common.file.FileWriterUtil.WriteMode;
+import edu.ucdenver.ccp.common.test.DefaultTestCase;
+import edu.ucdenver.ccp.datasource.fileparsers.pro.ProMappingFileParser;
+import edu.ucdenver.ccp.datasource.rdfizer.rdf.ice.RdfUtil.RdfFormat;
+
+/**
+ * Testing using the protein ontology mapping file b/c it's a simple format and
+ * it has unknown and potentially erroneous data source identifiers.
+ */
+public class RdfRecordWriterImplErroneousAndUnknownIdentifierTest extends DefaultTestCase {
+
+	private File proMappingTxtFile_unknownIdentifier;
+	private File outputDirectory;
+	private final String expectedOutputFileName = "pr-ProMappingFileParser.0-0.nt";
+
+	@Before
+	public void setUp() throws Exception {
+		outputDirectory = folder.newFolder("output");
+		proMappingTxtFile_unknownIdentifier = folder.newFile("promapping.txt");
+		populateProMappingTxtFile_unknownIdentifier();
+	}
+
+	/**
+	 * PR:000000005 HGNC:11773 is_a 
+ * PR:000000005 UniProtKB_VAR:VAR_022359 is_a // unknown identifier type
+ * PR:000000006 UniProtKB:PABCDE exact // invalid UniProt ID
+ */ + private void populateProMappingTxtFile_unknownIdentifier() throws IOException { + List lines = CollectionsUtil.createList("PR:000000005\tHGNC:11773\tis_a", + "PR:000000005\tUniProtKB_VAR:VAR_022359\tis_a", "PR:000000006\tUniProtKB:PABCDE\texact"); + FileWriterUtil.printLines(lines, proMappingTxtFile_unknownIdentifier, CharacterEncoding.US_ASCII, + WriteMode.OVERWRITE, FileSuffixEnforcement.OFF); + } + + @Test + public void testWriteRdf_unknown_and_erroneous_identifiers() throws IOException { + ProMappingFileParser parser = new ProMappingFileParser(proMappingTxtFile_unknownIdentifier, + CharacterEncoding.US_ASCII); + RdfRecordWriterImpl recordWriter = new RdfRecordWriterImpl( + outputDirectory, RdfFormat.NTRIPLES); + long createdTimeInMillis20101217 = new GregorianCalendar(2010, 11, 17).getTimeInMillis(); + recordWriter.processRecordReader(parser, createdTimeInMillis20101217); + + File outputFile = FileUtil.appendPathElementsToDirectory(outputDirectory, expectedOutputFileName); + System.err.println("dir contents: " + Arrays.toString(outputDirectory.list())); + assertTrue("Output file should have been created.", outputFile.exists()); + + List linesFromFile = FileReaderUtil.loadLinesFromFile(outputFile, CharacterEncoding.UTF_8); + for (String l : linesFromFile) { + System.err.println(l); + } + + List expectedLines = getExpectedLines(); + assertTrue("N-Triple Lines should be as expected.", FileComparisonUtil.hasExpectedLines(outputFile, + CharacterEncoding.UTF_8, expectedLines, null, LineOrder.ANY_ORDER, ColumnOrder.AS_IN_FILE)); + } + + private List getExpectedLines() { + + return CollectionsUtil + .createList( + + " .", + " .", + " .", + " .", + " \"2010-12-17T00:00:00.000-07:00\"^^ .", + " .", + " .", + " .", + " .", + " .", + " .", + " \"is_a\"@en .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " \"UniProtKB_VAR:VAR_022359\"@en .", + " .", + " .", + " .", + " .", + " .", + " .", + " \"exact\"@en .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " .", + " \"Invalid UniProt ID: PABCDE. This ID does not comply with the specifications for UniProt accession numbers as defined here: http://www.uniprot.org/manual/accession_numbers\"@en .", + " .", + " .", + " .", + " \"UniProtKB:PABCDE\"@en ."); + } + +}