Skip to content

Commit ebaac3c

Browse files
updated genage parser
1 parent eb73cb8 commit ebaac3c

File tree

1 file changed

+56
-71
lines changed

1 file changed

+56
-71
lines changed

Diff for: genage/genage.php

+56-71
Original file line numberDiff line numberDiff line change
@@ -215,30 +215,48 @@ function human(){
215215
return false;
216216
}
217217

218+
/*
219+
[0] GenAge ID
220+
[1] symbol
221+
[2] aliases
222+
[3] name
223+
[4] why
224+
[5] entrez gene id
225+
[6] swissprot/uniprot
226+
[7] band
227+
[8] location start
228+
[9] location end
229+
[10] orientation
230+
[11] acc promoter
231+
[12] acc orf
232+
[13] acc cds
233+
[14] references
234+
[15] orthologs
235+
*/
236+
218237
while($l = parent::getReadFile()->read(200000)) {
219238
$data = str_getcsv($l);
239+
220240
$hagr = str_pad($data[0], 4, "0", STR_PAD_LEFT);
221-
$aliases = $data[1];
222-
$hgnc_symbol = $data[2];
223-
$common_name = $data[3];
224-
$ncbi_gene_id = $data[4];
225-
$reasons = $data[5];
226-
$band = $data[6];
227-
$location_start = $data[7];
228-
$location_end = $data[8];
229-
$orientation = $data[9];
230-
$unigene_id = $data[10];
231-
$swissprot = $data[11];
232-
$acc_promoter = $data[12];
233-
$acc_orf = $data[13];
234-
$acc_cds = $data[14];
235-
$references = $data[15];
236-
// $ppis = $data[16];
237-
// $notes = $data[17];
241+
$hgnc_symbol = $data[1];
242+
$aliases = $data[2];
243+
$label = $data[3];
244+
$reasons = $data[4];
245+
$ncbigeneid = $data[5];
246+
$swissprot = $data[6];
247+
$band = $data[7];
248+
$location_start = $data[8];
249+
$location_end = $data[9];
250+
$orientation = $data[10];
251+
$acc_promoter = $data[11];
252+
$acc_orf = $data[12];
253+
$acc_cds = $data[13];
254+
$references = $data[14];
255+
$orthologs = $data[15];
238256

239257
$hagr_id = "hagr:".$hagr;
240258
parent::addRDF(
241-
parent::describeIndividual($hagr_id, $data[3], parent::getVoc()."Human-Aging-Related-Gene").
259+
parent::describeIndividual($hagr_id, $label, parent::getVoc()."Human-Aging-Related-Gene").
242260
parent::describeClass(parent::getVoc()."Human-Aging-Related-Gene","Human Aging Related Gene")
243261
);
244262

@@ -252,17 +270,16 @@ function human(){
252270
}
253271

254272
parent::addRDF(
255-
parent::triplifyString($hagr_id, parent::getVoc()."hgnc-symbol", parent::safeLiteral($hgnc_symbol))
273+
parent::triplify($hagr_id, parent::getVoc()."x-hgnc.symbol", "hgnc.symbol:".parent::safeLiteral($hgnc_symbol))
256274
);
257275

258276
parent::addRDF(
259-
parent::triplify($hagr_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbi_gene_id)
277+
parent::triplify($hagr_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbigeneid)
260278
);
261279

262280
if($reasons !== ""){
263281
$reasons_split = explode(",", $reasons);
264282
foreach($reasons_split as $reason){
265-
266283
parent::addRDF(
267284
parent::triplify($hagr_id, parent::getVoc()."inclusion-criteria", parent::getVoc().$inclusion_criteria[$reason][0])
268285
);
@@ -292,12 +309,6 @@ function human(){
292309
);
293310
}
294311

295-
if($unigene_id !== ""){
296-
parent::addRDF(
297-
parent::triplify($hagr_id, parent::getVoc()."x-unigene", "unigene:".$unigene_id)
298-
);
299-
}
300-
301312
if($swissprot !== ""){
302313
if(strstr($swissprot, "_")){
303314
parent::addRDF(
@@ -368,32 +379,36 @@ function models(){
368379
);
369380

370381
$h = explode(",", parent::getReadFile()->read());
371-
$expected_columns = 10;
382+
$expected_columns = 8;
372383
if(($n = count($h)) != $expected_columns) {
373384
trigger_error("Found $n columns in gene file - expecting $expected_columns!", E_USER_WARNING);
374385
return false;
375386
}
376387

388+
/*
389+
[0] GenAge ID
390+
[1] symbol
391+
[2] name
392+
[3] organism
393+
[4] entrez gene id
394+
[5] avg lifespan change (max obsv)
395+
[6] lifespan effect
396+
[7] longevity influence
397+
*/
377398
while($l = parent::getReadFile()->read(200000)) {
378399
$data = str_getcsv($l);
379-
400+
380401
$genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
381-
$name = $data[1];
382-
$gene_symbol = $data[2];
402+
$gene_symbol = $data[1];
403+
$name = $data[2];
383404
$organism = $data[3];
384-
$function = $data[4];
385-
$ncbi_gene_id = $data[5];
386-
// $ensembl_id = $data[6];
387-
// $uniprot_id = $data[7];
388-
// $unigene_id = $data[8];
389-
$max_percent_obsv_avg_lifespan_change = $data[6];
390-
$lifespan_effect = $data[7];
391-
$longevity_influence = $data[8];
392-
$observations = $data[9];
405+
$ncbi_gene_id = $data[4];
406+
$max_percent_obsv_avg_lifespan_change = $data[5];
407+
$lifespan_effect = $data[6];
408+
$longevity_influence = $data[7];
393409

394410
$genage_id = parent::getNamespace().$genage;
395411

396-
397412
parent::addRDF(
398413
parent::describeIndividual($genage_id, $name, parent::getVoc()."Aging-Related-Gene").
399414
parent::describeClass(parent::getVoc()."Aging-Related-Gene","Aging Related Gene")
@@ -407,42 +422,12 @@ function models(){
407422
parent::triplify($genage_id, parent::getVoc()."taxon", "ncbitaxon:".$tax_ids[$organism])
408423
);
409424

410-
if($function !== ""){
411-
parent::addRDF(
412-
parent::triplifyString($genage_id, parent::getVoc()."function", parent::safeLiteral($function))
413-
);
414-
}
415-
416425
if($ncbi_gene_id !== ""){
417426
parent::addRDF(
418427
parent::triplify($genage_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbi_gene_id)
419428
);
420429
}
421-
/*
422-
423-
if($ensembl_id !== ""){
424-
parent::addRDF(
425-
parent::triplify($genage_id, parent::getVoc()."x-ensembl", "ensembl:".$ensembl_id)
426-
);
427-
}
428-
if($uniprot_id !== ""){
429-
if(strstr($uniprot_id, "_")){
430-
parent::addRDF(
431-
parent::triplifyString($genage_id, parent::getVoc()."uniprot-entry", parent::safeLiteral($uniprot_id))
432-
);
433-
} else {
434-
parent::addRDF(
435-
parent::triplify($genage_id, parent::getVoc()."x-uniprot", "uniprot:".$uniprot_id)
436-
);
437-
}
438-
}
439430

440-
if($unigene_id !== ""){
441-
parent::addRDF(
442-
parent::triplify($genage_id, parent::getVoc()."x-unigene", "unigene:".$unigene_id)
443-
);
444-
}
445-
*/
446431
if($max_percent_obsv_avg_lifespan_change !== ""){
447432
parent::addRDF(
448433
parent::triplifyString($genage_id, parent::getVoc()."maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))

0 commit comments

Comments
 (0)