Skip to content

Commit

Permalink
added SPLIT method for antibody mapping in ENCODE
Browse files Browse the repository at this point in the history
  • Loading branch information
sunbrn committed Sep 23, 2019
1 parent d5dc7dd commit 3d9a775
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
<flattener_rule_base>/Users/abernasconi/Documents/gitProjects/GMQL-Importer/Example/cleaner_rule_base.txt</flattener_rule_base>
<download_enabled>false</download_enabled>
<transform_enabled>false</transform_enabled>
<cleaner_enabled>false</cleaner_enabled>
<mapper_enabled>true</mapper_enabled>
<cleaner_enabled>true</cleaner_enabled>
<mapper_enabled>false</mapper_enabled>
<enricher_enabled>false</enricher_enabled>
<flattener_enabled>false</flattener_enabled>
<load_enabled>false</load_enabled>
<parallel_execution>false</parallel_execution>
</settings>
<source_list>
<source name="PROVA_ENCODE">
<source name="PROVA_ENCODE1">
<url>https://www.encodeproject.org/</url>
<source_working_directory>encode</source_working_directory>
<downloader>it.polimi.genomics.metadata.downloader_transformer.encode.EncodeDownloader</downloader>
Expand Down Expand Up @@ -178,7 +178,7 @@
<parameter>
<description>The dataset name on the server</description>
<key>loading_name</key>
<value>PROVA_ENCODE</value>
<value>PROVA_ENCODE1_narrow</value>
</parameter>
<parameter>
<description>The dataset description on the server, when clicking Show Info</description>
Expand Down Expand Up @@ -234,7 +234,7 @@
<parameter>
<description>The dataset name on the server</description>
<key>loading_name</key>
<value>PROVA_ENCODE</value>
<value>PROVA_ENCODE1_broad</value>
</parameter>
<parameter>
<description>The dataset description on the server, when clicking Show Info</description>
Expand Down Expand Up @@ -271,7 +271,7 @@
<description>this ones are to reduce the downloaded files</description>
<type>url_generation</type>
<key>accession</key>
<value>ENCSR635OSG</value>
<value>ENCSR741GJT</value>
<!--<value>ENCSR726ZZX</value>-->
</parameter>
</parameter_list>
Expand Down Expand Up @@ -436,7 +436,7 @@
<parameter>
<description>The dataset name on the server</description>
<key>loading_name</key>
<value>PROVA_ENCODE</value>
<value>PROVA_ENCODE2_narrow</value>
</parameter>
<parameter>
<description>The dataset description on the server, when clicking Show Info</description>
Expand Down Expand Up @@ -491,7 +491,7 @@
<parameter>
<description>The dataset name on the server</description>
<key>loading_name</key>
<value>PROVA_ENCODE</value>
<value>PROVA_ENCODE2_broad</value>
</parameter>
<parameter>
<description>The dataset description on the server, when clicking Show Info</description>
Expand Down Expand Up @@ -528,7 +528,7 @@
<description>this ones are to reduce the downloaded files</description>
<type>url_generation</type>
<key>accession</key>
<value>ENCSR726ZZX</value>
<value>ENCSR311DQO</value>
</parameter>
</parameter_list>
</dataset>
Expand Down
41 changes: 23 additions & 18 deletions Example/xml/ExampleConfiguration.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,27 @@
xsi:schemaLocation="http://polimi.it/GDMImporter /Example/xml/configurationSchema.xsd">
<settings>
<!--BASE WORKING FOLDER FOR THE IMPORTER-->
<base_working_directory>/Users/canakoglu/GMQL-sources/GMQL-Importer/Example</base_working_directory>
<gcm_config_file>/Users/canakoglu/GMQL-sources/GMQL-Importer/src/main/resources/application.conf</gcm_config_file>
<flattener_rule_base>/Users/canakoglu/GMQL-sources/GMQL-Importer/Example/cleaner_rule_base.txt</flattener_rule_base>
<download_enabled>true</download_enabled>
<transform_enabled>true</transform_enabled>
<cleaner_enabled>true</cleaner_enabled>
<base_working_directory>/Users/abernasconi/Documents/gitProjects//GMQL-Importer/Example</base_working_directory>
<gcm_config_file>/Users/abernasconi/Documents/gitProjects/GMQL-Importer/src/main/resources/application.conf</gcm_config_file>
<mapper_source>ENCODE</mapper_source>
<database_connection_url>jdbc:postgresql://localhost/gmql_metadata_anna</database_connection_url>
<database_connection_user>geco</database_connection_user>
<database_connection_pw>geco78</database_connection_pw>
<database_connection_driver>org.postgresql.Driver</database_connection_driver>
<flattener_rule_base>/Users/abernasconi/Documents/gitProjects/GMQL-Importer/Example/cleaner_rule_base.txt</flattener_rule_base>
<download_enabled>false</download_enabled>
<transform_enabled>false</transform_enabled>
<cleaner_enabled>false</cleaner_enabled>
<mapper_enabled>true</mapper_enabled>
<enricher_enabled>true</enricher_enabled>
<flattener_enabled>true</flattener_enabled>
<load_enabled>true</load_enabled>
<enricher_enabled>false</enricher_enabled>
<flattener_enabled>false</flattener_enabled>
<load_enabled>false</load_enabled>
<parallel_execution>false</parallel_execution>
</settings>
<source_list>
<source name="ENCODE">
<source name="ENCODEPROVAANNA">
<url>https://www.encodeproject.org/</url>
<source_working_directory>ENCODE</source_working_directory>
<source_working_directory>ENCODEPROVAANNA</source_working_directory>
<downloader>it.polimi.genomics.metadata.downloader_transformer.encode.EncodeDownloader</downloader>
<transformer>it.polimi.genomics.metadata.downloader_transformer.encode.EncodeTransformer</transformer>
<loader>it.polimi.genomics.metadata.step.GMQLLoader</loader>
Expand Down Expand Up @@ -134,14 +139,13 @@
<parameter>
<description>Cleaner definition rule base file</description>
<key>rule_base</key>
<value>/Users/canakoglu/GMQL-sources/GMQL-Importer/Example/examples_meta/ENCODE_rules.txt</value>
<value>/Users/abernasconi/Documents/gitProjects/GMQL-Importer/Example/examples_meta/ENCODE_rules.txt</value>
</parameter>
<parameter>
<description>Mappings </description>
<description>Mappings</description>
<key>mappings</key>
<value>/Users/canakoglu/GMQL-sources/GMQL-Importer/Example/hello.xml</value>
<value>/Users/abernasconi/Documents/gitProjects/GMQL-Importer/Example/xml/settingsEncode.xml</value>
</parameter>

</parameter_list>
<dataset_list>
<dataset name="exampleNarrowPeak">
Expand All @@ -159,7 +163,7 @@
<parameter>
<description>The dataset name on the server</description>
<key>loading_name</key>
<value>HG19_ENCODE_BROAD_NOV_2027</value>
<value>ANNAHG19_ENCODE_BROAD_NOV_2027</value>
</parameter>
<parameter>
<description>The dataset description on the server, when clicking Show Info</description>
Expand Down Expand Up @@ -192,9 +196,10 @@
<parameter>
<description>this ones are to reduce the downloaded files</description>
<type>url_generation</type>
<key>files.accession</key>
<key>accession</key>
<!--<value>ENCFF001SSQ</value>-->
<value>ENCFF429VMY</value>
<value>ENCSR000EOT</value>
<!--<value>ENCSR635OSG</value>-->
</parameter>

</parameter_list>
Expand Down
6 changes: 5 additions & 1 deletion Example/xml/settingsEncode.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,14 @@
<source_key>target__label</source_key>
<global_key>target</global_key>
</mapping>
<mapping>
<mapping method="SPLIT">
<source_key>antibody__dbxrefs</source_key>
<global_key>antibody</global_key>
</mapping>
<mapping method="CHECKPREC">
<source_key>antibody__accession</source_key>
<global_key>antibody</global_key>
</mapping>
<mapping method="ONTOLOGY">
<source_key>experiment__assay_term_id</source_key>
<global_key>ontologicalcode</global_key>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ object InsertMethod {
case "SOURCEPAGEGDC" => acc.split(",").map(x => conf.getString("import.gdc_source_page") + x).mkString(",")
case "REMOVE" => this.remove(remCharacter, newParam) //this is only used in TCGA2BED mapping
case "SUB" => this.replace(subCharacter, newCharacter, acc)
case "SPLIT" => acc.split(",").head
case "UPPERCASE" => acc.toUpperCase()
case "LOWERCASE" => acc.toLowerCase()
case "DATETODAYS" => this.selectDayByParam(newParam)
Expand Down

0 comments on commit 3d9a775

Please sign in to comment.