Skip to content

Commit

Permalink
suppprt gzipped BED and PA5 input
Browse files Browse the repository at this point in the history
  • Loading branch information
c-zhou committed Sep 27, 2024
1 parent 977ff05 commit 510d788
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 175 deletions.
97 changes: 96 additions & 1 deletion asset.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,19 @@
#include <assert.h>
#include <ctype.h>
#include <unistd.h>
#include <zlib.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "kseq.h"

#include "asset.h"

KSTREAM_INIT(gzFile, gzread, gzseek, BUFF_SIZE)

void *kopen(const char *fn, int *_fd);
int kclose(void *a);

#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))

double cputime(void)
Expand Down Expand Up @@ -77,7 +85,7 @@ double realtime(void)
return tp.tv_sec + tp.tv_usec * 1e-6;
}

#if defined CTL_HW && defined HW_USERMEM
#if (defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__
#include <sys/sysctl.h>
#endif

Expand Down Expand Up @@ -235,3 +243,90 @@ void positive_or_die(int num)
exit(EXIT_FAILURE);
}
}

int is_empty_line(char *line)
{
while (isspace(*line))
line++;
if(*line == '\0')
return 1;
return 0;
}

static kstring_t *kstring_init(int size)
{
kstring_t *str;
str = (kstring_t *) calloc(1, sizeof(kstring_t));
str->m = size;
str->s = (char *) malloc(sizeof(char) * size);
return str;
}

static void kstring_destroy(kstring_t *str)
{
if (!str) return;
free(str->s);
free(str);
}

iostream_t *iostream_open(const char *spath)
{
iostream_t *iostream;
iostream = (iostream_t *) calloc(1, sizeof(iostream_t));
if (iostream == NULL)
return NULL;
iostream->koaux = kopen(spath, &iostream->fd);
if (iostream->koaux == 0) {
free(iostream);
return NULL;
}
iostream->fp = gzdopen(iostream->fd, "r");
if (iostream->fp == Z_NULL) {
kclose(iostream->koaux);
free(iostream);
return NULL;
}
iostream->stream = ks_init(iostream->fp);
iostream->buffer = kstring_init(BUFF_SIZE);

return iostream;
}

void iostream_close(iostream_t *iostream)
{
if (!iostream) return;
ks_destroy(iostream->stream);
kstring_destroy(iostream->buffer);
gzclose(iostream->fp);
kclose(iostream->koaux);
free(iostream);
}

char *iostream_getline(iostream_t *iostream)
{
int len;
kstring_t *buffer;
buffer = (kstring_t *) (iostream->buffer);
len = ks_getuntil(iostream->stream, KS_SEP_LINE, buffer, 0);
if (len >= 0) {
iostream->nline += 1;
return buffer->s;
}
return NULL;
}

#ifdef IOSTREAM_MAIN
int main(int argc, char *argv[])
{
iostream_t *iostream = iostream_open(argv[1]);
if (iostream == NULL) {
fprintf(stdout, "[E::%s] cannot open file %s to read\n", __func__, argv[1]);
exit (1);
}
char *line;
while ((line = iostream_getline(iostream)) != NULL)
fprintf(stdout, "[I::%s] Line %9ld: %s\n", __func__, iostream->nline, line);
iostream_close(iostream);
}

#endif
14 changes: 14 additions & 0 deletions asset.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <stdint.h>
#include <stdio.h>
#include <zlib.h>

#define SWAP(T, x, y) {T tmp = x; x = y; y = tmp;}
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
Expand All @@ -42,6 +43,15 @@
#define BIN_V 0x2
#define strcasecmp(s1, s2) strcmp_case_insensitive(s1, s2)

typedef struct {
int fd;
gzFile fp;
void *stream;
void *buffer;
void *koaux;
int64_t nline;
} iostream_t;

#ifdef __cplusplus
extern "C" {
#endif
Expand All @@ -59,6 +69,10 @@ void write_bin_header(FILE *fo);
int is_valid_bin_header(int64_t magic_number);
int strcmp_case_insensitive(const char *s1, const char *s2);
void positive_or_die(int num);
int is_empty_line(char *line);
iostream_t *iostream_open(const char *spath);
void iostream_close(iostream_t *iostream);
char *iostream_getline(iostream_t *iostream);
#ifdef __cplusplus
}
#endif
Expand Down
21 changes: 6 additions & 15 deletions cov.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@
#include "cov.h"
#include "asset.h"

void *kopen(const char *fn, int *_fd);
int kclose(void *a);

KHASH_SET_INIT_STR(str)

// read unmapped (0x4)
Expand Down Expand Up @@ -238,20 +235,15 @@ cov_t *bed_cstats(const char *bed, sdict_t *sdict)
uint32_t i, s, e;
uint64_t n, m, max_m, n_recs;
char *line, cname[4096];
size_t ln = 0;
ssize_t read;
FILE *fp;
int fd;
void *fh;
iostream_t *fp;
cov_t *covs;

khash_t(str) *hmseq; // for absent sequences
khint_t k;
int absent;
hmseq = kh_init(str);

fh = kopen(bed, &fd);
fp = fdopen(fd, "r");
fp = iostream_open(bed);
if (fp == NULL) {
fprintf(stderr, "[E::%s] cannot open file %s for reading\n", __func__, bed);
exit(EXIT_FAILURE);
Expand All @@ -262,7 +254,9 @@ cov_t *bed_cstats(const char *bed, sdict_t *sdict)
n_recs = 0;
n = 0;
line = 0;
while ((read = getline(&line, &ln, fp)) != -1) {
while ((line = iostream_getline(fp)) != NULL) {
if (is_empty_line(line))
continue;
++n_recs;
if (n_recs % 1000000 == 0)
fprintf(stderr, "[I::%s] %lu million records processed\n", __func__, n_recs / 1000000);
Expand Down Expand Up @@ -300,10 +294,7 @@ cov_t *bed_cstats(const char *bed, sdict_t *sdict)
free((char *) kh_key(hmseq, k));
kh_destroy(str, hmseq);

if (line)
free(line);
fclose(fp);
kclose(fh);
iostream_close(fp);

return covs;
}
Expand Down
14 changes: 5 additions & 9 deletions graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,33 +153,29 @@ void graph_arc_index(graph_t *g)

graph_t *read_graph_from_gfa(char *gfa)
{
FILE *fp;
char *line = NULL;
size_t ln = 0;
ssize_t read;
iostream_t *fp;
char *line;
char c0[1024], c1[1024], s0[4], s1[4], wts[1024];
double wt;

graph_t *g;
g = graph_init();
g->sdict = make_asm_dict_from_sdict(make_sdict_from_gfa(gfa, 0));

fp = fopen(gfa, "r");
fp = iostream_open(gfa);
if (fp == NULL) {
fprintf(stderr, "[E::%s] cannot open file %s for reading\n", __func__, gfa);
exit(EXIT_FAILURE);
}

while ((read = getline(&line, &ln, fp)) != -1) {
while ((line = iostream_getline(fp)) != NULL) {
if (line[0] == 'L') {
sscanf(line, "%*s %s %s %s %s %*s %s", c0, s0, c1, s1, wts);
wt = strtof(wts + 5, NULL);
graph_add_arc(g, (asm_sd_get(g->sdict, c0)<<1) | (s0[0]=='-'), (asm_sd_get(g->sdict, c1)<<1) | (s1[0]=='-'), -1, 0, wt);
}
}
if (line)
free(line);
fclose(fp);
iostream_close(fp);

graph_arc_sort(g);
graph_arc_index(g);
Expand Down
Loading

0 comments on commit 510d788

Please sign in to comment.