From 34e15fdacedea3d4486c6a5e0a1b01321bbfbdc0 Mon Sep 17 00:00:00 2001 From: Joseph Min Date: Thu, 31 Oct 2019 20:05:41 -0700 Subject: [PATCH] Lamanno intron (#19) * update directory structure * update lamanno intron generation * ignore 5' and 3' utrs --- kb_python/fasta.py | 85 ++++++++++++++++++++------- kb_python/gtf.py | 4 +- kb_python/ref.py | 31 +++++++++- tests/fixtures/fasta/cdna_split.fa | 2 +- tests/fixtures/fasta/intron_split.fa | 10 ++-- tests/fixtures/gtf/not_sorted.gtf | 2 +- tests/fixtures/gtf/sorted.gtf | 2 +- tests/fixtures/gtf/t2g_intron.txt | 9 +-- tests/fixtures/gtf/t2g_intron_gtf.txt | 6 ++ tests/mixins.py | 5 +- tests/test_fasta.py | 13 ++-- tests/test_ref.py | 33 +++++++---- 12 files changed, 146 insertions(+), 56 deletions(-) create mode 100644 tests/fixtures/gtf/t2g_intron_gtf.txt diff --git a/kb_python/fasta.py b/kb_python/fasta.py index 6d13099..33c51ff 100644 --- a/kb_python/fasta.py +++ b/kb_python/fasta.py @@ -1,4 +1,5 @@ import logging +import re from .gtf import GTF from .utils import open_as_text @@ -12,6 +13,8 @@ class FASTA: :param fasta_path: path to FASTA file :type fasta_path: str """ + PARSER = re.compile(r'^>(?P\S+)(?P.*)') + GROUP_PARSER = re.compile(r'(?P\S+?):(?P\S+)') BASEPAIRS = { 'a': 'T', 'A': 'T', @@ -48,15 +51,22 @@ def make_header(seq_id, attributes): @staticmethod def parse_header(line): - """Get the sequence ID from a FASTA header. + """Parse information from a FASTA header. :param line: FASTA header line :type line: str - :return: sequence ID - :rtype: str + :return: parsed information + :rtype: dict """ - return line.strip().split(' ', 1)[0][1:] + match = FASTA.PARSER.match(line) + if match: + groupdict = match.groupdict() + groupdict['group'] = dict( + FASTA.GROUP_PARSER.findall(groupdict.get('group', '')) + ) + return groupdict + return None @staticmethod def reverse_complement(sequence): @@ -77,20 +87,20 @@ def entries(self): :rtype: generator """ with open_as_text(self.fasta_path, 'r') as f: - sequence_id = None + info = None sequence = '' for line in f: if line.startswith('>'): - if sequence_id: - yield sequence_id, sequence + if info: + yield info, sequence sequence = '' - sequence_id = FASTA.parse_header(line) + info = FASTA.parse_header(line) else: sequence += line.strip() - if sequence_id: - yield sequence_id, sequence + if info: + yield info, sequence def sort(self, out_path): """Sort the FASTA file by sequence ID. @@ -105,7 +115,9 @@ def sort(self, out_path): while line: if line.startswith('>'): - to_sort.append([FASTA.parse_header(line), position, None]) + to_sort.append([ + FASTA.parse_header(line)['sequence_id'], position, None + ]) position = f.tell() line = f.readline() @@ -153,7 +165,8 @@ def generate_cdna_fasta(fasta_path, gtf_path, out_path): with open_as_text(out_path, 'w') as f: previous_gtf_entry = None - for sequence_id, sequence in fasta.entries(): + for info, sequence in fasta.entries(): + sequence_id = info['sequence_id'] logger.debug( 'Generating cDNA from chromosome {}'.format(sequence_id) ) @@ -235,7 +248,7 @@ def generate_cdna_fasta(fasta_path, gtf_path, out_path): return out_path -def generate_intron_fasta(fasta_path, gtf_path, out_path): +def generate_intron_fasta(fasta_path, gtf_path, out_path, flank=30): """Generate an intron FASTA using the genome and GTF. This function assumes the order in which the chromosomes appear in the @@ -245,6 +258,8 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): 1. transcript - exons 2. 5' UTR 3. 3' UTR + Additionally, append 30-bp (k - 1 where k = 31) flanks to each intron, + combining sections that overlap into a single FASTA entry. :param fasta_path: path to genomic FASTA file :type fasta_path: str @@ -252,6 +267,8 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): :type gtf_path: str :param out_path: path to intron FASTA to generate :type out_path: str + :param flank: the size of intron flanks, in bases, defaults to `30` + :type flank: int, optional :return: path to generated intron FASTA :rtype: str @@ -262,7 +279,8 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): with open_as_text(out_path, 'w') as f: previous_gtf_entry = None - for sequence_id, sequence in fasta.entries(): + for info, sequence in fasta.entries(): + sequence_id = info['sequence_id'] logger.debug( 'Generating introns from chromosome {}'.format(sequence_id) ) @@ -294,7 +312,6 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): transcript = '{}.{}'.format( transcript_id, transcript_version ) if transcript_version else transcript_id - transcript += '-I' transcript_exons.setdefault(transcript, []).append((start, end)) @@ -306,7 +323,6 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): transcript = '{}.{}'.format( transcript_id, transcript_version ) if transcript_version else transcript_id - transcript += '-I' gene_id = gtf_entry['group']['gene_id'] gene_version = gtf_entry['group'].get('gene_version', None) @@ -354,14 +370,40 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): else: introns.append(transcript_interval) - intron = '' + index = 1 + flank_start = None + flank_end = None for start, end in introns: - intron += sequence[start - 1:end] - - if intron: + if flank_start is None: + flank_start = max(start - flank, transcript_interval[0]) + if flank_end is None or start - flank <= flank_end: + flank_end = min(end + flank, transcript_interval[1]) + else: + intron = sequence[flank_start - 1:flank_end] + f.write( + '{}\n'.format( + FASTA.make_header( + '{}-I.{}'.format(transcript, index), + attributes + ) + ) + ) + f.write( + '{}\n'.format( + intron if dict(attributes)['strand'] == + '+' else FASTA.reverse_complement(intron) + ) + ) + index += 1 + flank_start = max(start - flank, transcript_interval[0]) + flank_end = min(end + flank, transcript_interval[1]) + if flank_start is not None and flank_end is not None: + intron = sequence[flank_start - 1:flank_end] f.write( '{}\n'.format( - FASTA.make_header(transcript, attributes) + FASTA.make_header( + '{}-I.{}'.format(transcript, index), attributes + ) ) ) f.write( @@ -370,6 +412,7 @@ def generate_intron_fasta(fasta_path, gtf_path, out_path): '+' else FASTA.reverse_complement(intron) ) ) + index += 1 return out_path diff --git a/kb_python/gtf.py b/kb_python/gtf.py index f6a6ffa..7994421 100644 --- a/kb_python/gtf.py +++ b/kb_python/gtf.py @@ -79,9 +79,7 @@ def sort(self, out_path): while line: if not line.startswith('#') and not line.isspace(): entry = GTF.parse_entry(line) - if entry['feature'] in ('transcript', 'exon', - 'five_prime_utr', - 'three_prime_utr'): + if entry['feature'] in ('transcript', 'exon'): to_sort.append( (entry['seqname'], entry['start'], position) ) diff --git a/kb_python/ref.py b/kb_python/ref.py index 5487b90..1b6bde4 100644 --- a/kb_python/ref.py +++ b/kb_python/ref.py @@ -54,6 +54,30 @@ def sort_fasta(fasta_path, out_path): return out_path +def create_t2g_from_fasta(fasta_path, t2g_path): + """Parse FASTA headers to get transcripts-to-gene mapping. + + :param fasta_path: path to FASTA file + :type fasta_path: str + :param t2g_path: path to output transcript-to-gene mapping + :type t2g_path: str + + :return: dictionary containing path to generated t2g mapping + :rtype: dict + """ + logger.info('Creating transcript-to-gene mapping at {}'.format(t2g_path)) + with open_as_text(t2g_path, 'w') as f: + fasta = FASTA(fasta_path) + for info, _ in fasta.entries(): + f.write( + '{}\t{}\t{}\n'.format( + info['sequence_id'], info['group']['gene_id'], + info['group'].get('gene_name', '') + ) + ) + return {'t2g': t2g_path} + + def create_t2g_from_gtf(gtf_path, t2g_path, intron=False): """Creates a transcript-to-gene mapping from a GTF file. @@ -114,7 +138,8 @@ def create_t2c(fasta_path, t2c_path): """ fasta = FASTA(fasta_path) with open_as_text(t2c_path, 'w') as f: - for sequence_id, _ in fasta.entries(): + for info, _ in fasta.entries(): + sequence_id = info['sequence_id'] f.write('{}\n'.format(sequence_id)) return {'t2c': t2c_path} @@ -284,8 +309,6 @@ def ref_lamanno( :rtype: dict """ results = {} - t2g_result = create_t2g_from_gtf(gtf_path, t2g_path, intron=True) - results.update(t2g_result) if not os.path.exists(index_path) or overwrite: sorted_fasta_path = sort_fasta( fasta_path, os.path.join(temp_dir, SORTED_FASTA_FILENAME) @@ -321,6 +344,8 @@ def ref_lamanno( out_path=os.path.join(temp_dir, COMBINED_FILENAME), temp_dir=temp_dir ) + t2g_result = create_t2g_from_fasta(combined_path, t2g_path) + results.update(t2g_result) index_result = kallisto_index(combined_path, index_path) results.update(index_result) else: diff --git a/tests/fixtures/fasta/cdna_split.fa b/tests/fixtures/fasta/cdna_split.fa index 4bfd9b1..6d18966 100644 --- a/tests/fixtures/fasta/cdna_split.fa +++ b/tests/fixtures/fasta/cdna_split.fa @@ -1,6 +1,6 @@ >TRANSCRIPT_A.1 gene_id:GENE_A.1 gene_name:GENE_A_NAME chr:1 start:1 end:2 strand:+ CA >TRANSCRIPT_B gene_id:GENE_B.1 gene_name:GENE_B_NAME chr:2 start:1 end:10 strand:+ -TCGATC +TGATC >TRANSCRIPT_C gene_id:GENE_C gene_name:GENE_C_NAME chr:2 start:2 end:14 strand:- TAATCGA diff --git a/tests/fixtures/fasta/intron_split.fa b/tests/fixtures/fasta/intron_split.fa index a62b3ce..88a1954 100644 --- a/tests/fixtures/fasta/intron_split.fa +++ b/tests/fixtures/fasta/intron_split.fa @@ -1,4 +1,6 @@ ->TRANSCRIPT_B-I gene_id:GENE_B.1 gene_name:GENE_B_NAME chr:2 start:1 end:10 strand:+ -AGTG ->TRANSCRIPT_C-I gene_id:GENE_C gene_name:GENE_C_NAME chr:2 start:2 end:14 strand:- -AGATCC +>TRANSCRIPT_B-I.1 gene_id:GENE_B.1 gene_name:GENE_B_NAME chr:2 start:1 end:10 strand:+ +ATCGG +>TRANSCRIPT_B-I.2 gene_id:GENE_B.1 gene_name:GENE_B_NAME chr:2 start:1 end:10 strand:+ +CTG +>TRANSCRIPT_C-I.1 gene_id:GENE_C gene_name:GENE_C_NAME chr:2 start:2 end:14 strand:- +CAGATCCG diff --git a/tests/fixtures/gtf/not_sorted.gtf b/tests/fixtures/gtf/not_sorted.gtf index fd8cbc7..74fafef 100644 --- a/tests/fixtures/gtf/not_sorted.gtf +++ b/tests/fixtures/gtf/not_sorted.gtf @@ -2,7 +2,7 @@ 2 havana exon 2 3 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C", gene_source "havana"; gene_biotype "TEC"; 2 havana exon 10 14 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C", gene_source "havana"; gene_biotype "TEC"; 2 havana transcript 2 14 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C"; ; gene_source "havana"; gene_biotype "TEC"; transcript_name "4933401J01Rik-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA"; -2 havana exon 2 3 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; +2 havana exon 2 2 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; 2 havana exon 5 8 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; 2 havana transcript 1 10 . + . gene_id "GENE_B"; gene_version "1"; transcript_id "TRANSCRIPT_B"; gene_name "GENE_B_NAME"; gene_source "havana"; gene_biotype "TEC"; transcript_name "4933401J01Rik-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA"; 1 havana exon 1 2 . + . gene_id "GENE_A"; gene_version "1"; transcript_id "TRANSCRIPT_A"; transcript_version "1"; exon_number "1"; gene_name "GENE_A_NAME"; gene_source "havana"; gene_biotype "TEC"; transcript_name "4933401J01Rik-201"; transcript_source "havana"; transcript_biotype "TEC"; exon_id "ENSMUSE00001343744"; exon_version "1"; tag "basic"; transcript_support_level "NA"; diff --git a/tests/fixtures/gtf/sorted.gtf b/tests/fixtures/gtf/sorted.gtf index 245393b..b7ffa53 100644 --- a/tests/fixtures/gtf/sorted.gtf +++ b/tests/fixtures/gtf/sorted.gtf @@ -3,6 +3,6 @@ 2 havana transcript 1 10 . + . gene_id "GENE_B"; gene_version "1"; transcript_id "TRANSCRIPT_B"; gene_name "GENE_B_NAME"; gene_source "havana"; gene_biotype "TEC"; transcript_name "4933401J01Rik-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA"; 2 havana exon 2 3 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C", gene_source "havana"; gene_biotype "TEC"; 2 havana transcript 2 14 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C"; ; gene_source "havana"; gene_biotype "TEC"; transcript_name "4933401J01Rik-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA"; -2 havana exon 2 3 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; +2 havana exon 2 2 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; 2 havana exon 5 8 . + . gene_id "GENE_B"; gene_version "1"; gene_name "GENE_B_NAME"; transcript_id "TRANSCRIPT_B", gene_source "havana"; gene_biotype "TEC"; 2 havana exon 10 14 . - . gene_id "GENE_C"; gene_name "GENE_C_NAME"; transcript_id "TRANSCRIPT_C", gene_source "havana"; gene_biotype "TEC"; diff --git a/tests/fixtures/gtf/t2g_intron.txt b/tests/fixtures/gtf/t2g_intron.txt index 0e8fd14..564f681 100644 --- a/tests/fixtures/gtf/t2g_intron.txt +++ b/tests/fixtures/gtf/t2g_intron.txt @@ -1,6 +1,3 @@ -TRANSCRIPT_C GENE_C GENE_C_NAME -TRANSCRIPT_C-I GENE_C GENE_C_NAME -TRANSCRIPT_B GENE_B.1 GENE_B_NAME -TRANSCRIPT_B-I GENE_B.1 GENE_B_NAME -TRANSCRIPT_A.1 GENE_A.1 GENE_A_NAME -TRANSCRIPT_A.1-I GENE_A.1 GENE_A_NAME +TRANSCRIPT_B-I.1 GENE_B.1 GENE_B_NAME +TRANSCRIPT_B-I.2 GENE_B.1 GENE_B_NAME +TRANSCRIPT_C-I.1 GENE_C GENE_C_NAME diff --git a/tests/fixtures/gtf/t2g_intron_gtf.txt b/tests/fixtures/gtf/t2g_intron_gtf.txt new file mode 100644 index 0000000..0e8fd14 --- /dev/null +++ b/tests/fixtures/gtf/t2g_intron_gtf.txt @@ -0,0 +1,6 @@ +TRANSCRIPT_C GENE_C GENE_C_NAME +TRANSCRIPT_C-I GENE_C GENE_C_NAME +TRANSCRIPT_B GENE_B.1 GENE_B_NAME +TRANSCRIPT_B-I GENE_B.1 GENE_B_NAME +TRANSCRIPT_A.1 GENE_A.1 GENE_A_NAME +TRANSCRIPT_A.1-I GENE_A.1 GENE_A_NAME diff --git a/tests/mixins.py b/tests/mixins.py index ed3f092..3604dbe 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -82,7 +82,10 @@ def setUpClass(cls): cls.unsorted_gtf_path = os.path.join(cls.gtf_dir, 'not_sorted.gtf') cls.sorted_gtf_path = os.path.join(cls.gtf_dir, 'sorted.gtf') cls.gtf_t2g_path = os.path.join(cls.gtf_dir, 't2g.txt') - cls.gtf_t2g_intron_path = os.path.join(cls.gtf_dir, 't2g_intron.txt') + cls.fasta_t2g_intron_path = os.path.join(cls.gtf_dir, 't2g_intron.txt') + cls.gtf_t2g_intron_path = os.path.join( + cls.gtf_dir, 't2g_intron_gtf.txt' + ) cls.fasta_dir = os.path.join(cls.fixtures_dir, 'fasta') cls.unsorted_fasta_path = os.path.join(cls.fasta_dir, 'not_sorted.fa') diff --git a/tests/test_fasta.py b/tests/test_fasta.py index d6684ad..1ff30be 100644 --- a/tests/test_fasta.py +++ b/tests/test_fasta.py @@ -21,8 +21,13 @@ def test_make_header(self): ) def test_parse_header(self): - header = '>transcript_id TEST' - self.assertEqual('transcript_id', fasta.FASTA.parse_header(header)) + header = '>transcript_id TEST:testing' + self.assertEqual({ + 'sequence_id': 'transcript_id', + 'group': { + 'TEST': 'testing' + } + }, fasta.FASTA.parse_header(header)) def test_reverse_complement(self): sequence = 'ATCG' @@ -34,7 +39,7 @@ def test_entries(self): def test_sort(self): out_path = os.path.join( - tempfile.gettempdir(), '{}.gtf'.format(uuid.uuid4()) + tempfile.gettempdir(), '{}.fa'.format(uuid.uuid4()) ) fa = fasta.FASTA(self.unsorted_fasta_path) fa.sort(out_path) @@ -59,7 +64,7 @@ def test_generate_intron_fasta(self): tempfile.gettempdir(), '{}.fa'.format(uuid.uuid4()) ) fasta.generate_intron_fasta( - self.sorted_fasta_path, self.sorted_gtf_path, out_path + self.sorted_fasta_path, self.sorted_gtf_path, out_path, flank=1 ) with open(out_path, 'r') as f, open(self.split_intron_fasta_path, 'r') as split: diff --git a/tests/test_ref.py b/tests/test_ref.py index 69251e1..7ac6a4f 100644 --- a/tests/test_ref.py +++ b/tests/test_ref.py @@ -37,6 +37,17 @@ def test_kallisto_index(self): for key, path in result.items(): self.assertTrue(os.path.exists(path)) + def test_create_t2g_from_fasta(self): + t2g_path = os.path.join( + tempfile.gettempdir(), '{}.txt'.format(uuid.uuid4()) + ) + result = ref.create_t2g_from_fasta( + self.split_intron_fasta_path, t2g_path + ) + with open(result['t2g'], 'r') as f, open(self.fasta_t2g_intron_path, + 'r') as t2g: + self.assertEqual(f.read(), t2g.read()) + def test_create_t2g_from_gtf(self): t2g_path = os.path.join( tempfile.gettempdir(), '{}.txt'.format(uuid.uuid4()) @@ -282,7 +293,7 @@ def test_ref_overwrite(self): kallisto_index.assert_called_once_with(cdna_fasta_path, index_path) def test_ref_lamanno(self): - with mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\ + with mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\ mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\ mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\ mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\ @@ -307,7 +318,7 @@ def test_ref_lamanno(self): generate_cdna_fasta.return_value = cdna_fasta_path generate_intron_fasta.return_value = intron_fasta_path kallisto_index.return_value = {'index': index_path} - create_t2g_from_gtf.return_value = {'t2g': t2g_path} + create_t2g_from_fasta.return_value = {'t2g': t2g_path} create_t2c.side_effect = [{ 't2c': cdna_t2c_path }, { @@ -333,8 +344,8 @@ def test_ref_lamanno(self): intron_t2c_path, temp_dir=temp_dir )) - create_t2g_from_gtf.assert_called_once_with( - self.gtf_path, t2g_path, intron=True + create_t2g_from_fasta.assert_called_once_with( + combined_path, t2g_path ) sort_fasta.assert_called_once_with( self.fasta_path, os.path.join(temp_dir, SORTED_FASTA_FILENAME) @@ -356,7 +367,7 @@ def test_ref_lamanno(self): kallisto_index.assert_called_once_with(combined_path, index_path) def test_ref_lamanno_exists(self): - with mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\ + with mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\ mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\ mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\ mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\ @@ -370,13 +381,13 @@ def test_ref_lamanno_exists(self): index_path = mock.MagicMock() t2g_path = mock.MagicMock() kallisto_index.return_value = {'index': index_path} - create_t2g_from_gtf.return_value = {'t2g': t2g_path} + create_t2g_from_fasta.return_value = {'t2g': t2g_path} self.assertEqual({'t2g': t2g_path}, ref.ref( self.fasta_path, self.gtf_path, cdna_fasta_path, index_path, t2g_path )) - create_t2g_from_gtf.assert_called_once_with(self.gtf_path, t2g_path) + create_t2g_from_fasta.assert_not_called() sort_fasta.assert_not_called() sort_gtf.assert_not_called() generate_cdna_fasta.assert_not_called() @@ -386,7 +397,7 @@ def test_ref_lamanno_exists(self): kallisto_index.assert_not_called() def test_ref_lamanno_overwrite(self): - with mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\ + with mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\ mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\ mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\ mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\ @@ -411,7 +422,7 @@ def test_ref_lamanno_overwrite(self): generate_cdna_fasta.return_value = cdna_fasta_path generate_intron_fasta.return_value = intron_fasta_path kallisto_index.return_value = {'index': index_path} - create_t2g_from_gtf.return_value = {'t2g': t2g_path} + create_t2g_from_fasta.return_value = {'t2g': t2g_path} create_t2c.side_effect = [{ 't2c': cdna_t2c_path }, { @@ -438,8 +449,8 @@ def test_ref_lamanno_overwrite(self): temp_dir=temp_dir, overwrite=True )) - create_t2g_from_gtf.assert_called_once_with( - self.gtf_path, t2g_path, intron=True + create_t2g_from_fasta.assert_called_once_with( + combined_path, t2g_path ) sort_fasta.assert_called_once_with( self.fasta_path, os.path.join(temp_dir, SORTED_FASTA_FILENAME)