Skip to content

Commit

Permalink
Add BUILD rules for GBZ reader. Add to make_examples.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 679000601
  • Loading branch information
pichuan authored and copybara-github committed Sep 26, 2024
1 parent c5c3ad7 commit be3d2e5
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 12 deletions.
5 changes: 5 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ filegroup(
"//third_party:boost.LICENSE",
"@com_google_protobuf//:LICENSE",
"@com_googlesource_code_re2//:LICENSE",
"@gbwt//:LICENSE",
"@gbwtgraph//:LICENSE",
"@htslib//:LICENSE",
"@libdivsufsort//:LICENSE",
"@libssw//:README.md", # SSW license embedded in the README.
"@org_tensorflow//:LICENSE",
"@sdsl_lite//:COPYING",
],
)

Expand Down Expand Up @@ -73,6 +77,7 @@ cc_library(
"//third_party/nucleus/io/python:bedgraph_writer_cclib",
"//third_party/nucleus/io/python:fastq_reader_cclib",
"//third_party/nucleus/io/python:fastq_writer_cclib",
"//third_party/nucleus/io/python:gbz_reader_cclib",
"//third_party/nucleus/io/python:gff_reader_cclib",
"//third_party/nucleus/io/python:gff_writer_cclib",
"//third_party/nucleus/io/python:gfile_cclib",
Expand Down
24 changes: 24 additions & 0 deletions third_party/nucleus/io/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ py_library(
":genomics_reader",
":genomics_writer",
"//third_party/nucleus/io:clif_postproc",
"//third_party/nucleus/io/python:gbz_reader",
"//third_party/nucleus/io/python:sam_reader",
"//third_party/nucleus/io/python:sam_writer",
"//third_party/nucleus/protos:reads_py_pb2",
Expand Down Expand Up @@ -656,6 +657,29 @@ cc_library(
],
)

cc_library(
name = "gbz_reader",
srcs = ["gbz_reader.cc"],
hdrs = ["gbz_reader.h"],
deps = [
":reader_base",
"//third_party/nucleus/core:status",
"//third_party/nucleus/core:statusor",
"//third_party/nucleus/platform:types",
"//third_party/nucleus/protos:cigar_cc_pb2",
"//third_party/nucleus/protos:position_cc_pb2",
"//third_party/nucleus/protos:range_cc_pb2",
"//third_party/nucleus/protos:reads_cc_pb2",
"//third_party/nucleus/util:cpp_utils",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@gbwt",
"@gbwtgraph",
"@libhandlegraph",
],
)

cc_test(
name = "sam_reader_test",
size = "small",
Expand Down
19 changes: 9 additions & 10 deletions third_party/nucleus/io/gbz_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,21 @@

#include <algorithm>
#include <cstddef>
#include <fstream> // IWYU pragma: keep
#include <iostream>
#include <memory>
#include <regex>
#include <string>
#include <vector>

#include "file/base/file.h"
#include "file/base/options.h"
#include "file/iostream/file_iostream.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "third_party/gbwt/include/gbwt/metadata.h"
#include "third_party/gbwt/include/gbwt/support.h"
#include "third_party/gbwt/include/gbwt/utils.h"
#include "third_party/gbwtgraph/include/gbwtgraph/subgraph.h"
#include "third_party/libhandlegraph/src/include/handlegraph/types.hpp"
#include "include/gbwt/metadata.h"
#include "include/gbwt/support.h"
#include "include/gbwt/utils.h"
#include "include/gbwtgraph/subgraph.h"
#include "src/include/handlegraph/types.hpp"
#include "third_party/nucleus/core/status.h"
#include "third_party/nucleus/core/statusor.h"
#include "third_party/nucleus/platform/types.h"
Expand All @@ -57,7 +55,8 @@ GbzReader::GbzReader(const std::string& gbz_path,
double start = gbwt::readTimer();

// Open GBZ file in read mode.
file::FileInStream in(file::OpenOrDie(gbz_path, "r", file::Defaults()));
std::ifstream in(gbz_path);

// Create an empty GBZ object.
this->gbz_ = gbwtgraph::GBZ();
// Load the GBZ file into the GBZ object.
Expand Down Expand Up @@ -114,7 +113,7 @@ nucleus::StatusOr<std::vector<nucleus::genomics::v1::Read>> GbzReader::Query(

updateCache(reads);

return StatusOr<std::vector<nucleus::genomics::v1::Read>>(reads);
return nucleus::StatusOr<std::vector<nucleus::genomics::v1::Read>>(reads);
}

nucleus::StatusOr<std::shared_ptr<SamIterable>> GbzReader::Iterate() const {
Expand Down
4 changes: 2 additions & 2 deletions third_party/nucleus/io/gbz_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
#include <vector>
#include <string>

#include "third_party/gbwtgraph/include/gbwtgraph/gbz.h"
#include "third_party/gbwtgraph/include/gbwtgraph/subgraph.h"
#include "include/gbwtgraph/gbz.h"
#include "include/gbwtgraph/subgraph.h"
#include "third_party/nucleus/core/statusor.h"
#include "third_party/nucleus/io/reader_base.h"
#include "third_party/nucleus/protos/cigar.pb.h"
Expand Down
24 changes: 24 additions & 0 deletions third_party/nucleus/io/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,30 @@ py_test(
],
)

pybind_extension(
name = "gbz_reader",
srcs = ["gbz_reader_pybind.cc"],
deps = [
"//third_party/nucleus/core/python:type_caster_nucleus_status",
"//third_party/nucleus/core/python:type_caster_nucleus_statusor",
"//third_party/nucleus/io:gbz_reader",
"//third_party/nucleus/util/python:type_caster_nucleus_proto_ptr",
"@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
],
)

pybind_library(
name = "gbz_reader_cclib",
srcs = ["gbz_reader_pybind.cc"],
deps = [
"//third_party/nucleus/core/python:type_caster_nucleus_status",
"//third_party/nucleus/core/python:type_caster_nucleus_statusor",
"//third_party/nucleus/io:gbz_reader",
"//third_party/nucleus/util/python:type_caster_nucleus_proto_ptr",
"@pybind11_protobuf//pybind11_protobuf:native_proto_caster",
],
)

pybind_extension(
name = "sam_reader",
srcs = ["sam_reader_pybind.cc"],
Expand Down
3 changes: 3 additions & 0 deletions third_party/nucleus/io/sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@

from third_party.nucleus.io import genomics_reader
from third_party.nucleus.io import genomics_writer
from third_party.nucleus.io.python import gbz_reader
from third_party.nucleus.io.python import sam_reader
from third_party.nucleus.io.python import sam_writer
from third_party.nucleus.protos import reads_pb2
Expand Down Expand Up @@ -257,6 +258,8 @@ class SamReader(genomics_reader.DispatchingGenomicsReader):
"""Class for reading Read protos from SAM/BAM/CRAM or TFRecord files."""

def _native_reader(self, input_path, ref_name='', **kwargs):
if input_path.endswith('.gbz'):
return gbz_reader.GbzReader(input_path, ref_name)
return NativeSamReader(input_path, **kwargs)

def _record_proto(self):
Expand Down

0 comments on commit be3d2e5

Please sign in to comment.