Skip to content

Commit

Permalink
fix(regenerate-fastq): polars categorical bug
Browse files Browse the repository at this point in the history
  • Loading branch information
alsmith151 committed Nov 12, 2023
1 parent ca31254 commit 7c28fdf
Showing 1 changed file with 31 additions and 26 deletions.
57 changes: 31 additions & 26 deletions capcruncher/cli/cli_utilities.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import os
import subprocess
from tempfile import NamedTemporaryFile
from typing import Iterable, List, Literal

import click
import pandas as pd
import os
from loguru import logger
from capcruncher.utils import get_file_type
import ibis
import pandas as pd
from ibis import _
from loguru import logger

from capcruncher.api.statistics import CisOrTransStats
from capcruncher.utils import get_file_type


@click.group()
Expand All @@ -32,6 +34,7 @@ def gtf_to_bed12(gtf: str, output: str):
"""

from pybedtools import BedTool

from capcruncher.utils import gtf_line_to_bed12_line

bt_gtf = BedTool(gtf)
Expand Down Expand Up @@ -194,9 +197,10 @@ def viewpoint_coordinates(
ValueError: If no bowtie2 indices are supplied
"""

from capcruncher.cli import genome_digest
from pybedtools import BedTool

from capcruncher.cli import genome_digest

digested_genome = NamedTemporaryFile("r+")
viewpoints_fasta = NamedTemporaryFile("r+")
viewpoints_aligned_bam = NamedTemporaryFile("r+")
Expand Down Expand Up @@ -350,11 +354,11 @@ def regenerate_fastq(
Returns:
None
"""
import pandas as pd
import pysam
import pathlib

Check warning on line 357 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L357

Added line #L357 was not covered by tests

import polars as pl
import pysam
from xopen import xopen

Check warning on line 361 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L359-L361

Added lines #L359 - L361 were not covered by tests
import pathlib

assert os.path.exists(parquet_file), f"File {parquet_file} does not exist"

Check warning on line 363 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L363

Added line #L363 was not covered by tests

Expand All @@ -366,24 +370,25 @@ def regenerate_fastq(


logger.info(f"Extracting reads info from {parquet_file}")
read_names = set(
pl.scan_parquet(parquet_file)
.select("parent_read")
.unique()
.collect()["parent_read"]
.to_list()
)

logger.info(f"Writing reads to {outpath}")
with pysam.FastxFile(fastq1) as r1:
with pysam.FastxFile(fastq2) as r2:
with xopen(f"{outpath}_1.fastq.gz", "w") as w1:
with xopen(f"{outpath}_2.fastq.gz", "w") as w2:
for read_1, read_2 in zip(r1, r2):
if read_1.name in read_names:
w1.write(str(read_1) + "\n")
w2.write(str(read_2) + "\n")

with pl.StringCache() as cache:
read_names = set(

Check warning on line 374 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L372-L374

Added lines #L372 - L374 were not covered by tests
pl.scan_parquet(parquet_file)
.select("parent_read")
.unique()
.collect()["parent_read"]
.to_list()
)

logger.info(f"Writing reads to {outpath}")
with pysam.FastxFile(fastq1) as r1:
with pysam.FastxFile(fastq2) as r2:
with xopen(f"{outpath}_1.fastq.gz", "w") as w1:
with xopen(f"{outpath}_2.fastq.gz", "w") as w2:
for read_1, read_2 in zip(r1, r2):
if read_1.name in read_names:
w1.write(str(read_1) + "\n")
w2.write(str(read_2) + "\n")

Check warning on line 390 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L382-L390

Added lines #L382 - L390 were not covered by tests

logger.info("Done")

Check warning on line 392 in capcruncher/cli/cli_utilities.py

View check run for this annotation

Codecov / codecov/patch

capcruncher/cli/cli_utilities.py#L392

Added line #L392 was not covered by tests


0 comments on commit 7c28fdf

Please sign in to comment.