From 84f383508d4506632dd456893421405ca81a079f Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 13:03:00 +0100 Subject: [PATCH 1/8] Wrong exit status for -G. Spotted by @pierreganty (#478), thank you --- src/libfsm/gen.c | 6 +++--- src/re/main.c | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libfsm/gen.c b/src/libfsm/gen.c index 730b40724..790579dc8 100644 --- a/src/libfsm/gen.c +++ b/src/libfsm/gen.c @@ -193,7 +193,7 @@ gen_init_outer(struct fsm *fsm, size_t max_length, fsm_generate_matches_cb *cb, void *opaque, bool randomized, unsigned seed) { - int res = 0; + int res = false; if (fsm == NULL || cb == NULL || max_length == 0) { return false; } @@ -222,11 +222,11 @@ gen_init_outer(struct fsm *fsm, size_t max_length, goto cleanup; } - res = 1; + res = true; while (!ctx.done) { if (!gen_iter(&ctx)) { - res = 0; + res = false; break; } } diff --git a/src/re/main.c b/src/re/main.c index 78bc77a2f..1a0d5cbe1 100644 --- a/src/re/main.c +++ b/src/re/main.c @@ -1047,7 +1047,11 @@ main(int argc, char *argv[]) } if (generate_bounds > 0) { - return fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt); + if (!fsm_generate_matches(fsm, generate_bounds, fsm_generate_cb_printf_escaped, &opt)) { + exit(EXIT_FAILURE); + } + + return 0; } if (fsm_lang != FSM_PRINT_NONE) { From 60baaab8addbd06c65d0a2f8fb53eafd9b32c4bc Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 13:03:56 +0100 Subject: [PATCH 2/8] Wrong return type for AMBIG_ERROR/AMBIG_EARLIEST. Spotted by @sw17ch, thank you. --- src/libfsm/print/rust.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libfsm/print/rust.c b/src/libfsm/print/rust.c index b11fb2c5a..cf855d56d 100644 --- a/src/libfsm/print/rust.c +++ b/src/libfsm/print/rust.c @@ -520,14 +520,18 @@ fsm_print_rust(FILE *f, switch (opt->ambig) { case AMBIG_NONE: + fprintf(f, "Option<()>"); + break; + case AMBIG_ERROR: case AMBIG_EARLIEST: - fprintf(f, "Option<()>"); + fprintf(f, "Option"); break; case AMBIG_MULTIPLE: fprintf(f, "Option<&'static [u32]>"); break; + default: fprintf(stderr, "unsupported ambig mode\n"); exit(EXIT_FAILURE); From e7a21b47f40028d698a21733fccf9ecd61beb3cc Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 13:04:40 +0100 Subject: [PATCH 3/8] -C opt-level=3 makes a huge difference here. Spotted by @sw17ch, thank you. --- src/retest/runner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/retest/runner.c b/src/retest/runner.c index 0dac9ab38..49cd611af 100644 --- a/src/retest/runner.c +++ b/src/retest/runner.c @@ -158,7 +158,7 @@ compile(enum implementation impl, break; case IMPL_RUST: - if (0 != systemf("%s %s --crate-type dylib %s -o %s", + if (0 != systemf("%s %s -C opt-level=3 --crate-type dylib %s -o %s", "rustc", "--edition 2021", tmp_src, tmp_so)) { From edeb29131aca82e830feb55564537d62a30aaf96 Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 13:30:47 +0100 Subject: [PATCH 4/8] Missing language. --- src/retest/main.c | 16 ++++++++-------- src/retest/reperf.c | 14 ++++++++++---- src/retest/runner.h | 8 ++++---- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/retest/main.c b/src/retest/main.c index 163e93bb9..53eba216c 100644 --- a/src/retest/main.c +++ b/src/retest/main.c @@ -1270,16 +1270,10 @@ main(int argc, char *argv[]) break; case 'l': - if (strcmp(optarg, "vm") == 0) { - impl = IMPL_INTERPRET; + if (strcmp(optarg, "asm") == 0) { + impl = IMPL_VMASM; } else if (strcmp(optarg, "c") == 0) { impl = IMPL_C; - } else if (strcmp(optarg, "asm") == 0) { - impl = IMPL_VMASM; - } else if (strcmp(optarg, "vmc") == 0) { - impl = IMPL_VMC; - } else if (strcmp(optarg, "vmops") == 0) { - impl = IMPL_VMOPS; } else if (strcmp(optarg, "go") == 0) { impl = IMPL_GO; } else if (strcmp(optarg, "goasm") == 0) { @@ -1288,6 +1282,12 @@ main(int argc, char *argv[]) impl = IMPL_LLVM; } else if (strcmp(optarg, "rust") == 0) { impl = IMPL_RUST; + } else if (strcmp(optarg, "vm") == 0) { + impl = IMPL_INTERPRET; + } else if (strcmp(optarg, "vmc") == 0) { + impl = IMPL_VMC; + } else if (strcmp(optarg, "vmops") == 0) { + impl = IMPL_VMOPS; } else { fprintf(stderr, "unknown argument to -l: %s\n", optarg); usage(); diff --git a/src/retest/reperf.c b/src/retest/reperf.c index 43c294821..4536bcf5f 100644 --- a/src/retest/reperf.c +++ b/src/retest/reperf.c @@ -1154,18 +1154,24 @@ main(int argc, char *argv[]) break; case 'l': - if (strcmp(optarg, "vm") == 0) { - impl = IMPL_INTERPRET; + if (strcmp(optarg, "asm") == 0) { + impl = IMPL_VMASM; } else if (strcmp(optarg, "c") == 0) { impl = IMPL_C; + } else if (strcmp(optarg, "go") == 0) { + impl = IMPL_GO; + } else if (strcmp(optarg, "goasm") == 0) { + impl = IMPL_GOASM; } else if (strcmp(optarg, "llvm") == 0) { impl = IMPL_LLVM; } else if (strcmp(optarg, "rust") == 0) { impl = IMPL_RUST; - } else if (strcmp(optarg, "asm") == 0) { - impl = IMPL_VMASM; + } else if (strcmp(optarg, "vm") == 0) { + impl = IMPL_INTERPRET; } else if (strcmp(optarg, "vmc") == 0) { impl = IMPL_VMC; + } else if (strcmp(optarg, "vmops") == 0) { + impl = IMPL_VMOPS; } else { fprintf(stderr, "unknown argument to -l: %s\n", optarg); usage(); diff --git a/src/retest/runner.h b/src/retest/runner.h index 3d53bc511..40b269eb0 100644 --- a/src/retest/runner.h +++ b/src/retest/runner.h @@ -27,13 +27,13 @@ enum error_type { enum implementation { IMPL_C, - IMPL_RUST, - IMPL_LLVM, IMPL_GO, IMPL_GOASM, - IMPL_VMC, - IMPL_VMASM, IMPL_INTERPRET, + IMPL_LLVM, + IMPL_RUST, + IMPL_VMASM, + IMPL_VMC, IMPL_VMOPS, }; From 3769f7743c673e623fd6f5b3519d3d250cfce471 Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 15:05:32 +0100 Subject: [PATCH 5/8] Off by one. Previously this gave: ``` ; ./build/bin/re -r literal -G 3 abc ; ./build/bin/re -r literal -G 4 abc abc ; ``` and now -G 3 does construct "abc" (not including the newline, which is a property of the printing, not of the constructed string): ``` ; ./build/bin/re -r literal -G 3 abc abc ; ./build/bin/re -r literal -G 4 abc abc ; ``` and: ``` ; ./build/bin/re -r native -G 3 '^x+$' x xx xxx ``` Spotted by @pierreganty (#478), thank you --- src/libfsm/gen.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/libfsm/gen.c b/src/libfsm/gen.c index 790579dc8..aaf095674 100644 --- a/src/libfsm/gen.c +++ b/src/libfsm/gen.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,11 @@ int fsm_generate_matches(struct fsm *fsm, size_t max_length, fsm_generate_matches_cb *cb, void *opaque) { + if (max_length == 0) { + errno = EINVAL; + return 0; + } + INIT_TIMERS(); TIME(&pre); int res = gen_init_outer(fsm, max_length, cb, opaque, false, 0); @@ -562,8 +568,8 @@ sfs_step_edges(struct gen_ctx *ctx, struct gen_stack_frame *sf) sf->u.step_edges.initialized = true; } - if (ctx->buf_used + ctx->sed[sf->s_id] >= ctx->max_length) { - LOG(2, "PRUNING due to max length: used:%zu + sed[%d]:%u >= max_length:%zu\n", + if (ctx->buf_used + ctx->sed[sf->s_id] > ctx->max_length) { + LOG(2, "PRUNING due to max length: used:%zu + sed[%d]:%u > max_length:%zu\n", ctx->buf_used, sf->s_id, ctx->sed[sf->s_id], ctx->max_length); sf->t = GEN_SFS_LEAVING_STATE; return true; From 00ad53b60731c7464650af80cfd0fffa38dfa0ee Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 17:53:40 +0100 Subject: [PATCH 6/8] Bump actions for Node 20. --- .github/workflows/ci.yml | 66 ++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce24dbcf3..dfb6182f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: steps: - name: Cache checkout - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -27,7 +27,7 @@ jobs: - name: Checkout if: steps.cache-checkout.outputs.cache-hit != 'true' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive path: ${{ env.wc }} @@ -49,7 +49,7 @@ jobs: steps: - name: Cache PCRE suite - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-pcre with: path: pcre-suite/${{ env.pcre2 }} @@ -67,7 +67,7 @@ jobs: chmod -R ug-w pcre-suite - name: Cache converted PCRE tests - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-cvtpcre with: path: ${{ env.cvtpcre }} @@ -75,7 +75,7 @@ jobs: - name: Fetch build if: steps.cache-cvtpcre.outputs.cache-hit != 'true' - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} @@ -155,14 +155,14 @@ jobs: steps: - name: Fetch checkout - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} key: checkout-${{ github.sha }} - name: Cache build - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} @@ -185,7 +185,7 @@ jobs: - name: Get number of CPU cores if: steps.cache-build.outputs.cache-hit != 'true' - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Make @@ -233,7 +233,7 @@ jobs: steps: - name: Fetch checkout - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -241,7 +241,7 @@ jobs: # An arbitary build. - name: Fetch build - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} @@ -273,7 +273,7 @@ jobs: ${{ matrix.cc }} --version - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Make @@ -292,7 +292,7 @@ jobs: # kmkf duplicate install targets, it's not interesting for libfsm's CI, # so I'm retrying on error here. # github.com/katef/kmkf/issues/14 - name: Install - uses: nick-fields/retry@v2.8.3 + uses: nick-fields/retry@v3 with: timeout_seconds: 10 # required, but not a problem for the kmkf bug max_attempts: 3 @@ -322,7 +322,7 @@ jobs: steps: - name: Fetch checkout - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -344,14 +344,14 @@ jobs: ${{ matrix.cc }} --version - name: Fetch build - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }} - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Test @@ -381,7 +381,7 @@ jobs: steps: - name: Fetch checkout - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -403,7 +403,7 @@ jobs: ${{ matrix.cc }} --version - name: Fetch build - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} @@ -418,7 +418,7 @@ jobs: # still run fuzzing, just from empty, and do not save their seeds. - name: Restore seeds (mode ${{ matrix.mode }}) if: github.repository == 'katef/libfsm' - uses: actions/cache/restore@v3 + uses: actions/cache/restore@v4 id: cache-seeds with: path: ${{ env.seeds }}-${{ matrix.mode }} @@ -429,7 +429,7 @@ jobs: run: mkdir -p ${{ env.seeds }}-${{ matrix.mode }} - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Fuzz @@ -455,7 +455,7 @@ jobs: # the same seeds for a given bug. # The explicit cache/restore and cache/save actions are just for that. - name: Save seeds (mode ${{ matrix.mode }}-${{ matrix.debug }}) - uses: actions/cache/save@v3 + uses: actions/cache/save@v4 if: always() with: path: ${{ env.seeds }}-${{ matrix.mode }} @@ -463,7 +463,7 @@ jobs: # nothing to do with the caching, I'm uploading the seeds so a developer can grab them to fuzz locally - name: Upload seeds (mode ${{ matrix.mode }}-${{ matrix.debug }}) - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: seeds-${{ matrix.mode }}-${{ matrix.debug }} path: ${{ env.seeds }}-${{ matrix.mode }} @@ -513,14 +513,14 @@ jobs: go version - name: Fetch build - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }} - name: Fetch converted PCRE tests - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-cvtpcre with: path: ${{ env.cvtpcre }} @@ -539,7 +539,7 @@ jobs: steps: - name: Cache docs - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-docs with: path: ${{ env.build }} @@ -554,7 +554,7 @@ jobs: - name: Fetch checkout if: steps.cache-docs.outputs.cache-hit != 'true' - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -562,7 +562,7 @@ jobs: - name: Get number of CPU cores if: steps.cache-docs.outputs.cache-hit != 'true' - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Test docs @@ -594,7 +594,7 @@ jobs: steps: - name: Cache prefix - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-prefix with: path: ${{ env.prefix }} @@ -608,7 +608,7 @@ jobs: - name: Fetch checkout if: steps.cache-prefix.outputs.cache-hit != 'true' - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-checkout with: path: ${{ env.wc }} @@ -616,7 +616,7 @@ jobs: - name: Fetch build if: steps.cache-prefix.outputs.cache-hit != 'true' - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-build with: path: ${{ env.build }} @@ -624,7 +624,7 @@ jobs: - name: Fetch docs if: steps.cache-prefix.outputs.cache-hit != 'true' - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-docs with: path: ${{ env.build }} @@ -632,7 +632,7 @@ jobs: - name: Get number of CPU cores if: steps.cache-prefix.outputs.cache-hit != 'true' - uses: SimenB/github-actions-cpu-cores@v1 + uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores - name: Install @@ -668,7 +668,7 @@ jobs: fpm -v - name: Fetch prefix - uses: actions/cache@v3 + uses: actions/cache@v4 id: cache-prefix with: path: ${{ env.prefix }} @@ -691,7 +691,7 @@ jobs: printf "package_file=%s\n" $(basename pkg/*) >> $GITHUB_ENV - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ${{ env.package_file }} path: pkg/${{ env.package_file }} From 5010a4043e199459860cbc6940f54cfc8aaadf9b Mon Sep 17 00:00:00 2001 From: Kate F Date: Sat, 24 Aug 2024 15:23:48 +0100 Subject: [PATCH 7/8] Stray assertion. This doesn't help for #317, but whatever the solution is there, asserting about it is the wrong thing to do. Spotted by @classabbyamp, thank you --- src/libre/print/abnf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libre/print/abnf.c b/src/libre/print/abnf.c index e8439aa28..a50771ca2 100644 --- a/src/libre/print/abnf.c +++ b/src/libre/print/abnf.c @@ -212,7 +212,6 @@ pp_iter(FILE *f, const struct fsm_options *opt, enum re_flags *re_flags, struct break; case AST_EXPR_SUBTRACT: - assert(!"unimplemented"); pp_atomic(f, opt, re_flags, n->u.subtract.a, n); fprintf(f, " - "); pp_atomic(f, opt, re_flags, n->u.subtract.b, n); From ebbcb356bf7aec23348417e6cbc76a8ad88c814e Mon Sep 17 00:00:00 2001 From: Kate F Date: Mon, 26 Aug 2024 02:50:00 +0100 Subject: [PATCH 8/8] Update for hooks & options API changes. I have extremely broken the iprange example. I am very unsure what's going on with this program. --- examples/bm/libfsm.c | 4 +- examples/glob/main.c | 2 +- examples/iprange/main.c | 149 +++++++++++++++++++++++++++++++--------- examples/utf8dfa/main.c | 9 ++- examples/words/main.c | 8 +-- 5 files changed, 130 insertions(+), 42 deletions(-) diff --git a/examples/bm/libfsm.c b/examples/bm/libfsm.c index 80b013d63..a7581bd59 100644 --- a/examples/bm/libfsm.c +++ b/examples/bm/libfsm.c @@ -61,7 +61,7 @@ main(int argc, char *argv[]) opt.io = FSM_IO_STR; p = argv[0]; - fsm = re_comp(RE_PCRE, fsm_sgetc, &p, &opt, flags, &e); + fsm = re_comp(RE_PCRE, fsm_sgetc, &p, NULL, flags, &e); if (fsm == NULL) { re_perror(RE_LITERAL, &e, NULL, s); return 1; @@ -80,7 +80,7 @@ main(int argc, char *argv[]) printf("#include \n"); printf("\n"); - fsm_print(stdout, fsm, FSM_PRINT_C); + fsm_print(stdout, fsm, &opt, NULL, FSM_PRINT_C); printf("int\n"); printf("main(void)\n"); diff --git a/examples/glob/main.c b/examples/glob/main.c index a5af0f317..65703352c 100644 --- a/examples/glob/main.c +++ b/examples/glob/main.c @@ -196,7 +196,7 @@ main(int argc, char *argv[]) } if (!quiet) { - fsm_print(stdout, fsm, FSM_PRINT_FSM); + fsm_print(stdout, fsm, NULL, NULL, FSM_PRINT_FSM); } matched = match(fsm, argv[1]); diff --git a/examples/iprange/main.c b/examples/iprange/main.c index 6ce49993b..ba028351b 100644 --- a/examples/iprange/main.c +++ b/examples/iprange/main.c @@ -88,6 +88,24 @@ RB_GENERATE_STATIC(recmap, record, entry, recmap_cmp) static unsigned nrecords; static struct fsm_options opt; +static struct fsm_hooks hooks; + +static struct record * +find_id(unsigned id) +{ + struct record *r; + + /* XXX: this is a crime, we have a tree. + * we should be able to RB_FIND() */ + RB_FOREACH(r, recmap, &recmap) { + if (r->id == id) { + return r; + } + } + + assert(!"unreached"); + abort(); +} static struct record * get_id(char *rec, size_t reclen) @@ -115,7 +133,7 @@ get_id(char *rec, size_t reclen) r->len = reclen; r->id = nrecords++; - r->fsm = fsm_new(&opt); + r->fsm = fsm_new(NULL); if (r->fsm == NULL) { perror("fsm_new"); exit(-1); @@ -133,7 +151,7 @@ get_id(char *rec, size_t reclen) } fsm_setend(r->fsm, r->end, 1); - for (size_t i = 0; i < sizeof r->regs; i++) { + for (size_t i = 0; i < sizeof r->regs / sizeof *r->regs; i++) { r->regs[i].c = '\0'; r->regs[i].s = fsm_none; } @@ -164,7 +182,7 @@ get_id(char *rec, size_t reclen) static void usage(void) { - fprintf(stderr, "ip2fsm -[46] [-f ] -l fmt\n" + fprintf(stderr, "iprange -[46] [-f ] -l fmt\n" "\t-4\t\tIPv4\n" "\t-6\t\tIPv6\n" "\t-f \tuse as input\n" @@ -378,6 +396,9 @@ handle_line(unsigned char *socts, unsigned char *eocts, unsigned noct, } socts[spos] = eocts[spos]; + + // XXX: not sure about this + break; } else { gen_range(r, noct, spos - 1, socts[spos], 255, socts); @@ -422,45 +443,69 @@ important(unsigned n) } static int -leaf(FILE *f, const fsm_end_id_t *ids, size_t count, const void *leaf_opaque) +conflict(FILE *f, const struct fsm_options *opt, + const fsm_end_id_t *ids, size_t count, + const char *example, void *hook_opaque) { - const struct record *r; + size_t i; - (void) leaf_opaque; + (void) f; + (void) hook_opaque; + (void) opt; - if (count != 1) { - fprintf(f, "endid conflict\n"); - exit(EXIT_FAILURE); - } + fprintf(stderr, "ambiguous matches for "); - r = (const void *) (intptr_t) ids[0]; /* XXX */ + for (i = 0; i < count; i++) { + const struct record *r; - if (r == NULL) { - fprintf(f, "return -1;"); - return 0; + r = (const void *) (intptr_t) ids[i]; /* XXX */ + + fprintf(stderr, "%s", r->rec); + + if (i + 1 < count) { + fprintf(stderr, ", "); + } + } + + if (example != NULL) { + fprintf(stderr, "; for example on input '%s'", example); } - fprintf(f, "return 0x%u; /* %s */", r->id, r->rec); + fprintf(stderr, "\n"); return 0; } static int -endleaf_dot(FILE *f, const fsm_end_id_t *ids, size_t count, const void *endleaf_opaque) +accept_dot(FILE *f, const struct fsm_options *opt, + const fsm_end_id_t *ids, size_t count, + void *lang_opaque, void *hook_opaque) { - const struct record *r; + fsm_state_t s; assert(f != NULL); - assert(endleaf_opaque == NULL); - (void) endleaf_opaque; + (void) hook_opaque; + + s = * (fsm_state_t *) lang_opaque; fprintf(f, "label = <"); + if (!opt->anonymous_states) { + fprintf(f, "%u", s); + + if (count > 0) { + fprintf(f, "
"); + } + } + for (size_t i = 0; i < count; i++) { - r = (const void *) (intptr_t) ids[i]; /* XXX */ + const struct record *r; + + r = find_id(ids[i]); fprintf(f, "%s", r->rec); /* XXX: escape */ + if (i + 1 < count) { fprintf(f, ", "); } @@ -471,6 +516,35 @@ endleaf_dot(FILE *f, const fsm_end_id_t *ids, size_t count, const void *endleaf_ return 0; } +static int +comment_c(FILE *f, const struct fsm_options *opt, + const fsm_end_id_t *ids, size_t count, + void *hook_opaque) +{ + assert(f != NULL); + + (void) opt; + (void) hook_opaque; + + fprintf(f, "/* "); + + for (size_t i = 0; i < count; i++) { + const struct record *r; + + r = find_id(ids[i]); + + fprintf(f, "%s", r->rec); /* XXX: escape */ + + if (i + 1 < count) { + fprintf(f, ", "); + } + } + + fprintf(f, " */\n"); + + return 0; +} + int main(int argc, char **argv) { @@ -481,12 +555,15 @@ main(int argc, char **argv) int oc = 0; int c; + opt.ambig = AMBIG_ERROR; opt.prefix = NULL; opt.always_hex = 1; opt.anonymous_states = 1; opt.consolidate_edges = 1; opt.case_ranges = 1; + hooks.conflict = conflict; + while (c = getopt(argc, argv, "46f:l:Q"), c != -1) { switch (c) { case '4': ipv = IPV4; break; @@ -531,7 +608,7 @@ main(int argc, char **argv) memset(ones, 0xff, sizeof ones); - fsm = fsm_new(&opt); + fsm = fsm_new(NULL); if (fsm == NULL) { perror("fsm_new"); return -1; @@ -634,23 +711,31 @@ main(int argc, char **argv) struct record *r; RB_FOREACH(r, recmap, &recmap) { + struct fsm_combine_info ci; fsm_state_t start; - if (fsm_minimise(r->fsm) == 0) { + if (!fsm_determinise(r->fsm)) { + perror("fsm_determinse"); + exit(-1); + } + + if (!fsm_minimise(r->fsm)) { perror("fsm_minimise"); exit(-1); } - fsm_setendid(r->fsm, (intptr_t) r); /* XXX */ + fsm_setendid(r->fsm, r->id); (void) fsm_getstart(r->fsm, &start); - fsm = fsm_merge(fsm, r->fsm, NULL); + fsm = fsm_merge(fsm, r->fsm, &ci); if (fsm == NULL) { perror("fsm_merge"); exit(-1); } + (void) ci; + if (!fsm_addedge_epsilon(fsm, fsm_start, start)) { perror("fsm_addedge_epsilon"); exit(-1); @@ -676,7 +761,7 @@ main(int argc, char **argv) tstart = time(NULL); } - if (!fsm_determinise(fsm) == 0) { + if (!fsm_determinise(fsm)) { perror("fsm_determinise"); exit(-1); } @@ -687,15 +772,13 @@ main(int argc, char **argv) } if (oc) { - opt.fragment = 1; - opt.cp = "c"; - opt.leaf = leaf; - opt.leaf_opaque = NULL; - fsm_print(stdout, fsm, FSM_PRINT_C); + opt.fragment = 1; + opt.comments = 1; + hooks.comment = comment_c; + fsm_print(stdout, fsm, &opt, &hooks, FSM_PRINT_C); } else if (odot) { - opt.endleaf = endleaf_dot; - opt.endleaf_opaque = NULL; - fsm_print(stdout, fsm, FSM_PRINT_DOT); + hooks.accept = accept_dot; + fsm_print(stdout, fsm, &opt, &hooks, FSM_PRINT_DOT); } } diff --git a/examples/utf8dfa/main.c b/examples/utf8dfa/main.c index d987575b3..811cd76c0 100644 --- a/examples/utf8dfa/main.c +++ b/examples/utf8dfa/main.c @@ -164,7 +164,7 @@ main(int argc, char *argv[]) return EXIT_FAILURE; } - fsm = fsm_new(&opt); + fsm = fsm_new(NULL); if (fsm == NULL) { perror("fsm_new"); exit(1); @@ -215,12 +215,17 @@ main(int argc, char *argv[]) } } + if (!fsm_determinise(fsm)) { + perror("fsm_determinise"); + exit(1); + } + if (!fsm_minimise(fsm)) { perror("fsm_minimise"); exit(1); } - fsm_print(stdout, fsm, lang); + fsm_print(stdout, fsm, NULL, NULL, lang); fsm_free(fsm); diff --git a/examples/words/main.c b/examples/words/main.c index 1fe19ddf1..078848b78 100644 --- a/examples/words/main.c +++ b/examples/words/main.c @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { exit(EXIT_FAILURE); } } else { - fsm = fsm_new(&opt); + fsm = fsm_new(NULL); if (fsm == NULL) { perror("fsm_new"); return 1; @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { struct fsm *r; struct fsm_combine_info ci; - r = re_comp(native ? RE_NATIVE : RE_LITERAL, fsm_sgetc, &p, &opt, 0, &e); + r = re_comp(native ? RE_NATIVE : RE_LITERAL, fsm_sgetc, &p, NULL, 0, &e); if (r == NULL) { re_perror(native ? RE_NATIVE : RE_LITERAL, &e, NULL, s); return 1; @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) { } fsm = re_strings_build(g, - &opt, unanchored ? 0 : (RE_STRINGS_ANCHOR_LEFT | RE_STRINGS_ANCHOR_RIGHT)); + NULL, unanchored ? 0 : (RE_STRINGS_ANCHOR_LEFT | RE_STRINGS_ANCHOR_RIGHT)); if (fsm == NULL) { perror("re_strings_builder_build"); exit(EXIT_FAILURE); @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) { + ((long) post.tv_nsec - (long) pre.tv_nsec) / 1000000; } - fsm_print(stdout, fsm, lang); + fsm_print(stdout, fsm, &opt, NULL, lang); if (timing) { printf("construction, reduction, total: %lu, %lu, %lu\n", ms, mt, ms + mt);