Skip to content

Commit

Permalink
Option to write .gg in simple-sds format
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Oct 23, 2021
1 parent 6faf66f commit 93b2df0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 7 deletions.
8 changes: 5 additions & 3 deletions include/gbwtgraph/gbz.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,12 @@ class GBZ
// Returns the size of the serialized structure in elements.
size_t simple_sds_size() const;

// Serialize the GBWT (simple-sds format) and the GBWTGraph (SDSL format) to separate files.
void serialize_to_files(const std::string& gbwt_name, const std::string& graph_name) const;
// Serialize the GBWT (simple-sds format) and the GBWTGraph to separate files.
// Default graph format is libhandlegraph / SDSL.
void serialize_to_files(const std::string& gbwt_name, const std::string& graph_name, bool simple_sds_graph = false) const;

// Loads the GBWT (simple-sds format) and the GBWTGraph (SDSL format) from separate.
// Loads the GBWT (simple-sds format) and the GBWTGraph from separate files.
// Graph format is libhandlegraph / SDSL; the simple-sds format cannot be read.
void load_from_files(const std::string& gbwt_name, const std::string& graph_name);

private:
Expand Down
5 changes: 3 additions & 2 deletions src/gbz.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,11 @@ GBZ::simple_sds_size() const
}

void
GBZ::serialize_to_files(const std::string& gbwt_name, const std::string& graph_name) const
GBZ::serialize_to_files(const std::string& gbwt_name, const std::string& graph_name, bool simple_sds_graph) const
{
sdsl::simple_sds::serialize_to(this->index, gbwt_name);
this->graph.serialize(graph_name);
if(simple_sds_graph) { sdsl::simple_sds::serialize_to(this->graph, graph_name); }
else { this->graph.serialize(graph_name); }
}

void
Expand Down
15 changes: 13 additions & 2 deletions src/gfa2gbwt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct Config

bool translation = false;
bool show_progress = false;
bool simple_sds_graph = false;
};

const std::string tool_name = "GFA to GBWTGraph";
Expand Down Expand Up @@ -129,11 +130,16 @@ printUsage(int exit_code)
std::cerr << " -p, --progress show progress information" << std::endl;
std::cerr << " -t, --translation write translation table into a " << SequenceSource::TRANSLATION_EXTENSION << " file" << std::endl;
std::cerr << std::endl;
std::cerr << "Other options:" << std::endl;
std::cerr << " -s, --simple-sds-graph serialize " << GBWTGraph::EXTENSION << " in simple-sds format instead of libhandlegraph format" << std::endl;
std::cerr << " (this tool cannot read simple-sds graphs)" << std::endl;
std::cerr << std::endl;
std::cerr << "GFA parsing parameters:" << std::endl;
std::cerr << " -m, --max-node N break > N bp segments into multiple nodes (default " << MAX_NODE_LENGTH << ")" << std::endl;
std::cerr << " (minimizer index requires nodes of length <= 1024 bp)" << std::endl;
std::cerr << " -r, --path-regex STR parse path names using regex STR (default " << GFAParsingParameters::DEFAULT_REGEX << ")" << std::endl;
std::cerr << " -f, --path-fields STR map the submatches to fields STR (default " << GFAParsingParameters::DEFAULT_FIELDS << ")" << std::endl;
std::cerr << " (the first submatch is the entire path name)" << std::endl;
std::cerr << std::endl;
std::cerr << "Fields (case insensitive):" << std::endl;
std::cerr << " S sample name" << std::endl;
Expand Down Expand Up @@ -164,13 +170,14 @@ Config::Config(int argc, char** argv)
{ "decompress-graph", no_argument, 0, 'D' },
{ "progress", no_argument, 0, 'p' },
{ "translation", no_argument, 0, 't' },
{ "simple-sds-graph", no_argument, 0, 's' },
{ "max-node", required_argument, 0, 'm' },
{ "path-regex", required_argument, 0, 'r' },
{ "path-fields", required_argument, 0, 'f' },
};

// Process options.
while((c = getopt_long(argc, argv, "becdCDptm:r:f:", long_options, &option_index)) != -1)
while((c = getopt_long(argc, argv, "becdCDptsm:r:f:", long_options, &option_index)) != -1)
{
switch(c)
{
Expand Down Expand Up @@ -207,6 +214,10 @@ Config::Config(int argc, char** argv)
this->translation = true;
break;

case 's':
this->simple_sds_graph = true;
break;

case 'm':
try { this->parameters.max_node_length = std::stoul(optarg); }
catch(const std::invalid_argument&)
Expand Down Expand Up @@ -322,7 +333,7 @@ write_graph(const GBZ& gbz, const Config& config)
{
std::cerr << "Writing GBWT and GBWTGraph to " << gbwt_name << " and " << graph_name << std::endl;
}
gbz.serialize_to_files(gbwt_name, graph_name);
gbz.serialize_to_files(gbwt_name, graph_name, config.simple_sds_graph);
}

//------------------------------------------------------------------------------
Expand Down

0 comments on commit 93b2df0

Please sign in to comment.