BioPHP is an easy to use open source project. BioPHP implements a selection of simple tools for manipulating genomic data. PHP 7 has benchmarked much faster than Python, this class aims to build tools for basic RNA, DNA, and Protein manipulation. Future plans include creating an experimental de novo assembly tool, and online project portal.
$BioPHP = new BioPHP();
$result = $BioPHP->reverseSequence('ATGAAAGCATC');
$result = $BioPHP->complementDnaSequence($result);
//prints TTTCAT
$BioPHP = new BioPHP();
echo $BioPHP->gcContent('ATGAAAGCATC', 4)."\n";
//prints 36.3636
$BioPHP = new BioPHP();
echo $BioPHP->countPointMutations('CTGATGATGGGAGGAAATTTCA','CTGATGATGCGAGGGAATATCG')."\n";
//prints 4
$BioPHP = new BioPHP();
echo $BioPHP->translateDna('CTGATGATGGGAGGAAATTTCAGA')."\n";
//prints LMMGGNFR
$BioPHP = new BioPHP();
$proteinSequence = $BioPHP->translateDna('CTGATGATGGGAGGAAATTTCAGA')."\n";
echo $BioPHP->calcMonoIsotopicMass($proteinSequence)."\n\n";
//prints 906.42041
$BioPHP = new BioPHP();
echo $BioPHP->findMotifDNA('ATAT', 'GTATATCTATATGGCCATAT')."\n";
//prints 3 9 17
$BioPHP = new BioPHP();
print_r( $BioPHP->getReadingFrames('GTATATCTATATGGCCATAT') );
/*
* returns array containing...
Array
(
[0] => GTATATCTATATGGCCATAT
[1] => TATATCTATATGGCCATAT
[2] => ATATCTATATGGCCATAT
)
*/
//Protip: To get all 6 reading frames. Use the reverse and complement methods, then pass the result to getReadingFrames()
$fastaSequence = "
>Sequence 1
ATCCAGCT
>Sequence 2
GGGCAACT
>Sequence 3
ATGGATCT
";
$BioPHP = new BioPHP();
$fastaArray = $BioPHP->readFasta($fastaSequence); //read and parse the sequences
echo $BioPHP->mostLikelyCommonAncestor($fastaArray)."\n";
//prints ATGCAACT
$BioPHP = new BioPHP();
$uniprotFasta = $BioPHP->getUniprotFastaByID("B5ZC00"); //returns the result from Uniprot as a string
$fastaArray = $BioPHP->readFasta($uniprotFasta); //parses the response
echo $BioPHP->calcMonoIsotopicMass($fastaArray[0]['sequence'])."\n";
//prints 55319.0636
$BioPHP = new BioPHP();
$results = $BioPHP->findMotifProtein("N{P}[ST]{P}","B5ZC00");
print_r($results);
/*
* returns array containing...
Array
(
[0] => 85
[1] => 118
[2] => 142
[3] => 306
[4] => 395
)
*/
//Notes: The second parameter expects a protein access ID string used to lookup the full sequence via UniProt.
This task can be very CPU intensive. Using PHP 7, this method benchmarked faster than Python! Runtime results were about 1 second with a collection of 100 DNA strings of length 1 kbp each.
$fasta="
>Sequence 1
GATTACA
>Sequence 2
TAGACCA
>Sequence 3
ATACA";
$BioPHP = new BioPHP();
$fastaArray = $BioPHP->readFasta($fasta);
$result = $BioPHP->findLongestSharedMotif($fastaArray);
echo $result."\n";
//prints TA
$sequence = ">Test DNA Sequence
TCCCCGGACTCCAAACGCTCGGTAGCCGCCCCTGCTCGACATATTTAGCTCCCTGCATTG
ACGCCCTGGCAGCCCCGATCAATTTTCGTGGTTAAACGCGCGCTCGCAAGGGACATCGAC
CGGACCACAGAGCATAGCATGCCTTAGGATCGCCTGTCACTGTTCGTCTCCCTATTTGAG
CACTGTAGCCCCTGGTACCCCCGTCCTGAAGCGTGTGTGATACACGGTCTGCCCAAGATG
";
$BioPHP = new BioPHP();
$results = $BioPHP->printORFProteins($sequence);
print_r($results);
/*
* Returns the following array
Array
(
[0] => MP
[1] => MLCGPVDVPCERAFNHEN
[2] => MLCSVVRSMSLASARLTTKIDRGCQGVNAGS
[3] => MSLASARLTTKIDRGCQGVNAGS
)
*/
$BioPHP = new BioPHP();
$results = $BioPHP->findRestrictionSites("TCAATGCATGCGGGTCTATATGCAT", 4, 12);
//returns an array containing postion and length of restrictions
Inferring mRNA from Protein - calculates total different RNA strings from which the protein can be translated
Note: This method requires the use of the PHP Math Big Integer package which is a composer dependency of this project.
$BioPHP = new BioPHP();
$result = $BioPHP->inferringMRnaFromProteinCount("MTIFMFHNKNICTEYMGYYDQQIMQTEHKWYWDFHTFMIPNVFYEDVIKFKMRMLMIPNCFFGPWLFCKLEKCQYYEKATEPAPIVKDYTLFATGGAGREATFWPWFWTDENRPKDYYFQRDGLHHRNEPRLPHATCRRAYYQCEMIQYAIVTSCVLLAWKMFTDYGHTGVASEPKEPQEDIKCMKFPHMSWQKTLTEAFYELFPCYPEEFPNDRPWLLGHGFGPIVCTITAIDTTDVAKNIWKAVFRPHAGNWDIGFHSPCASEGCPDIMFPYFTCHDYKGMMCCFNLTMEVCCKQPRPTGIYMMVERMRIMNNREFAGFKHYREEHIKHYWRFGIFASPFVICWSPKTKGPPTSDWYMRDSEVVTQESELKESWQDMMEQHSMFGIPHCEKERWMNDNWKCKLFYYEVILWISNCECDQHVNCCVAHDPGTQVDWAWTLDMWWDQKYFGFFVRKKGQKYNMHWGAPYWLTNPTEKKDFIQHEQLGPLQTFRHCSSPAPT");
echo $result."\n";
//prints 884608