From b1808c3364b6769ca3982c8788797dda1431e210 Mon Sep 17 00:00:00 2001 From: Daniel Kroening Date: Wed, 9 Apr 2025 10:26:44 -0400 Subject: [PATCH] C23 keywords This adds scanner, parser and typechecker support for the new C23 keywords. --- README.md | 15 ++-- doc/architectural/front-page.md | 15 ++-- doc/man/cbmc.1 | 2 +- doc/man/goto-analyzer.1 | 2 +- regression/cbmc/_BitInt/_BitInt1.c | 13 ++++ regression/cbmc/_BitInt/_BitInt1.desc | 8 +++ regression/cbmc/_BitInt/_BitInt2.c | 18 +++++ regression/cbmc/_BitInt/_BitInt2.desc | 10 +++ regression/cbmc/_BitInt/_BitInt3.c | 11 +++ regression/cbmc/_BitInt/_BitInt3.desc | 10 +++ regression/cbmc/_BitInt/_BitInt4.c | 10 +++ regression/cbmc/_BitInt/_BitInt4.desc | 10 +++ .../cbmc/constants/predefined-constants1.c | 10 +++ .../cbmc/constants/predefined-constants1.desc | 7 ++ .../cbmc/static_assert/static_assert1.c | 17 +++++ .../cbmc/static_assert/static_assert1.desc | 8 +++ regression/cbmc/typeof/typeof2.c | 23 ++++++ regression/cbmc/typeof/typeof2.desc | 8 +++ src/ansi-c/ansi_c_convert_type.cpp | 71 +++++++++++-------- src/ansi-c/ansi_c_convert_type.h | 7 +- src/ansi-c/ansi_c_language.cpp | 5 ++ src/ansi-c/builtin_factory.cpp | 2 + src/ansi-c/c_preprocess.cpp | 4 +- src/ansi-c/c_typecheck_base.h | 1 + src/ansi-c/c_typecheck_code.cpp | 9 ++- src/ansi-c/c_typecheck_type.cpp | 62 +++++++++++++++- src/ansi-c/expr2c.cpp | 33 ++++++++- src/ansi-c/gcc_version.cpp | 4 ++ src/ansi-c/goto-conversion/goto_convert.cpp | 6 +- src/ansi-c/parser.y | 52 +++++++++++++- src/ansi-c/scanner.l | 64 ++++++++--------- src/util/config.cpp | 6 ++ src/util/config.h | 4 +- src/util/irep_ids.def | 5 ++ 34 files changed, 437 insertions(+), 95 deletions(-) create mode 100644 regression/cbmc/_BitInt/_BitInt1.c create mode 100644 regression/cbmc/_BitInt/_BitInt1.desc create mode 100644 regression/cbmc/_BitInt/_BitInt2.c create mode 100644 regression/cbmc/_BitInt/_BitInt2.desc create mode 100644 regression/cbmc/_BitInt/_BitInt3.c create mode 100644 regression/cbmc/_BitInt/_BitInt3.desc create mode 100644 regression/cbmc/_BitInt/_BitInt4.c create mode 100644 regression/cbmc/_BitInt/_BitInt4.desc create mode 100644 regression/cbmc/constants/predefined-constants1.c create mode 100644 regression/cbmc/constants/predefined-constants1.desc create mode 100644 regression/cbmc/static_assert/static_assert1.c create mode 100644 regression/cbmc/static_assert/static_assert1.desc create mode 100644 regression/cbmc/typeof/typeof2.c create mode 100644 regression/cbmc/typeof/typeof2.desc diff --git a/README.md b/README.md index 1542988ca34..b50903f3985 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,14 @@ About ===== -CBMC is a Bounded Model Checker for C and C++ programs. It supports C89, C99, -most of C11 and most compiler extensions provided by gcc and Visual Studio. It -also supports SystemC using Scoot. It allows verifying array bounds (buffer -overflows), pointer safety, exceptions and user-specified assertions. -Furthermore, it can check C and C++ for consistency with other languages, such -as Verilog. The verification is performed by unwinding the loops in the program -and passing the resulting equation to a decision procedure. +CBMC is a Bounded Model Checker for C and C++ programs. It supports C89, +C99, most of C11, C17, C23 and most compiler extensions provided by gcc and +Visual Studio. It also supports SystemC using Scoot. It allows verifying +array bounds (buffer overflows), pointer safety, exceptions and +user-specified assertions. Furthermore, it can check C and C++ for +consistency with other languages, such as Verilog. The verification is +performed by unwinding the loops in the program and passing the resulting +equation to a decision procedure. For full information see [cprover.org](http://www.cprover.org/cbmc). diff --git a/doc/architectural/front-page.md b/doc/architectural/front-page.md index a729da7b769..24c34ad3ee8 100644 --- a/doc/architectural/front-page.md +++ b/doc/architectural/front-page.md @@ -8,13 +8,14 @@ website; contributors should use the repository hosted on GitHub. CBMC is part of CProver. -CBMC is a Bounded Model Checker for C and C++ programs. It supports C89, C99, -most of C11 and most compiler extensions provided by gcc and Visual Studio. It -also supports SystemC using Scoot. It allows verifying array bounds (buffer -overflows), pointer safety, arithmetic exceptions and user-specified assertions. -Furthermore, it can check C and C++ for consistency with other languages, such -as Verilog. The verification is performed by unwinding the loops in the program -and passing the resulting equation to a decision procedure. +CBMC is a Bounded Model Checker for C and C++ programs. It supports C89, +C99, most of C11, C17, C23 and most compiler extensions provided by gcc and +Visual Studio. It also supports SystemC using Scoot. It allows verifying +array bounds (buffer overflows), pointer safety, arithmetic exceptions and +user-specified assertions. Furthermore, it can check C and C++ for +consistency with other languages, such as Verilog. The verification is +performed by unwinding the loops in the program and passing the resulting +equation to a decision procedure. For further information see [cprover.org](http://www.cprover.org/cbmc). diff --git a/doc/man/cbmc.1 b/doc/man/cbmc.1 index f1a295674bf..edf0502fd85 100644 --- a/doc/man/cbmc.1 +++ b/doc/man/cbmc.1 @@ -154,7 +154,7 @@ set include file (C/C++) \fB\-D\fR macro define preprocessor macro (C/C++) .TP -\fB\-\-c89\fR, \fB\-\-c99\fR, \fB\-\-c11\fR +\fB\-\-c89\fR, \fB\-\-c99\fR, \fB\-\-c11\fR, \fB\-\-c17\fR, \fB\-\-c23\fR set C language standard (default: c11) .TP \fB\-\-cpp98\fR, \fB\-\-cpp03\fR, \fB\-\-cpp11\fR diff --git a/doc/man/goto-analyzer.1 b/doc/man/goto-analyzer.1 index 582ff022060..0750c91817c 100644 --- a/doc/man/goto-analyzer.1 +++ b/doc/man/goto-analyzer.1 @@ -425,7 +425,7 @@ set include file (C/C++) \fB\-D\fR macro define preprocessor macro (C/C++) .TP -\fB\-\-c89\fR, \fB\-\-c99\fR, \fB\-\-c11\fR +\fB\-\-c89\fR, \fB\-\-c99\fR, \fB\-\-c11\fR, \fB\-\-c17\fR, \fB\-\-c23\fR set C language standard (default: c11) .TP \fB\-\-cpp98\fR, \fB\-\-cpp03\fR, \fB\-\-cpp11\fR diff --git a/regression/cbmc/_BitInt/_BitInt1.c b/regression/cbmc/_BitInt/_BitInt1.c new file mode 100644 index 00000000000..f824408d779 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt1.c @@ -0,0 +1,13 @@ +// _BitInt is a C23 feature + +// sizeof +static_assert(sizeof(unsigned _BitInt(1)) == 1); +static_assert(sizeof(_BitInt(32)) == 4); +static_assert(sizeof(_BitInt(33)) == 8); +static_assert(sizeof(_BitInt(65)) == 16); +static_assert(sizeof(_BitInt(128)) == 16); + +int main() +{ + return 0; +} diff --git a/regression/cbmc/_BitInt/_BitInt1.desc b/regression/cbmc/_BitInt/_BitInt1.desc new file mode 100644 index 00000000000..40042a5cd07 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt1.desc @@ -0,0 +1,8 @@ +CORE +_BitInt1.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ diff --git a/regression/cbmc/_BitInt/_BitInt2.c b/regression/cbmc/_BitInt/_BitInt2.c new file mode 100644 index 00000000000..d79de332dc0 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt2.c @@ -0,0 +1,18 @@ +// _BitInt is a C23 feature +#include + +int main() +{ + // casts + assert((_BitInt(4))17 == 1); + assert((_BitInt(4)) - 1 == -1); + assert((unsigned _BitInt(4)) - 1 == 15); + + // promotion (or lack thereof) + assert((unsigned _BitInt(4))15 + (unsigned _BitInt(4))1 == 0); + assert((unsigned _BitInt(4))15 + (unsigned _BitInt(5))1 == 16); + assert((unsigned _BitInt(4))15 + (signed _BitInt(5))1 == -16); + assert((unsigned _BitInt(4))15 + 1 == 16); + + return 0; +} diff --git a/regression/cbmc/_BitInt/_BitInt2.desc b/regression/cbmc/_BitInt/_BitInt2.desc new file mode 100644 index 00000000000..7672040498c --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt2.desc @@ -0,0 +1,10 @@ +KNOWNBUG +_BitInt2.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +_BitInt implementation is missing. diff --git a/regression/cbmc/_BitInt/_BitInt3.c b/regression/cbmc/_BitInt/_BitInt3.c new file mode 100644 index 00000000000..ba0154d0a70 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt3.c @@ -0,0 +1,11 @@ +// _BitInt is a C23 feature +#include + +int main() +{ + // pointers + _BitInt(3) x, *p = &x; + *p = 1; + + return 0; +} diff --git a/regression/cbmc/_BitInt/_BitInt3.desc b/regression/cbmc/_BitInt/_BitInt3.desc new file mode 100644 index 00000000000..746a8675e11 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt3.desc @@ -0,0 +1,10 @@ +KNOWNBUG +_BitInt3.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +_BitInt implementation is missing. diff --git a/regression/cbmc/_BitInt/_BitInt4.c b/regression/cbmc/_BitInt/_BitInt4.c new file mode 100644 index 00000000000..57097633c5f --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt4.c @@ -0,0 +1,10 @@ +// _BitInt is a C23 feature + +static_assert(6uwb + -6wb == 0); +static_assert(sizeof(255uwb + 1uwb) == 1); +static_assert(sizeof(0b1111'1111uwb) == 1); + +int main() +{ + return 0; +} diff --git a/regression/cbmc/_BitInt/_BitInt4.desc b/regression/cbmc/_BitInt/_BitInt4.desc new file mode 100644 index 00000000000..8e3675bea83 --- /dev/null +++ b/regression/cbmc/_BitInt/_BitInt4.desc @@ -0,0 +1,10 @@ +KNOWNBUG +_BitInt4.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +_BitInt implementation is missing. diff --git a/regression/cbmc/constants/predefined-constants1.c b/regression/cbmc/constants/predefined-constants1.c new file mode 100644 index 00000000000..80a0b1c6346 --- /dev/null +++ b/regression/cbmc/constants/predefined-constants1.c @@ -0,0 +1,10 @@ +// C23 predefined constants +static_assert(!false, "false"); +static_assert(true, "true"); +static_assert(sizeof(false) == sizeof(bool), "sizeof false"); +static_assert(sizeof(true) == sizeof(bool), "sizeof true"); +static_assert(nullptr == 0, "nullptr"); + +int main() +{ +} diff --git a/regression/cbmc/constants/predefined-constants1.desc b/regression/cbmc/constants/predefined-constants1.desc new file mode 100644 index 00000000000..f0c31e117fd --- /dev/null +++ b/regression/cbmc/constants/predefined-constants1.desc @@ -0,0 +1,7 @@ +CORE +predefined-constants1.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring diff --git a/regression/cbmc/static_assert/static_assert1.c b/regression/cbmc/static_assert/static_assert1.c new file mode 100644 index 00000000000..74d179437e1 --- /dev/null +++ b/regression/cbmc/static_assert/static_assert1.c @@ -0,0 +1,17 @@ +// C23 introduces the "static_assert" keyword. + +struct S +{ + // Visual Studio does not support static_assert in compound bodies. +#ifndef _MSC_VER + static_assert(1, "in struct"); +#endif + int x; +} asd; + +static_assert(1, "global scope"); + +int main() +{ + static_assert(1, "in function"); +} diff --git a/regression/cbmc/static_assert/static_assert1.desc b/regression/cbmc/static_assert/static_assert1.desc new file mode 100644 index 00000000000..6192d2c3123 --- /dev/null +++ b/regression/cbmc/static_assert/static_assert1.desc @@ -0,0 +1,8 @@ +CORE +static_assert1.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ diff --git a/regression/cbmc/typeof/typeof2.c b/regression/cbmc/typeof/typeof2.c new file mode 100644 index 00000000000..8a313155942 --- /dev/null +++ b/regression/cbmc/typeof/typeof2.c @@ -0,0 +1,23 @@ +typedef int INTTYPE; + +int func1(); + +// C23 typeof +typeof(int) v1; +typeof(INTTYPE) v2; +typeof(v2) v3; +typeof(1 + 1) v4; +typeof(1 + 1 + func1()) v5; +const typeof(int) v6; +typeof(int) const v7; +static typeof(int) const v8; +static typeof(int) const *v9; +static volatile typeof(int) const *v10; + +void func2(typeof(int) *some_arg) +{ +} + +int main() +{ +} diff --git a/regression/cbmc/typeof/typeof2.desc b/regression/cbmc/typeof/typeof2.desc new file mode 100644 index 00000000000..04c91f70d33 --- /dev/null +++ b/regression/cbmc/typeof/typeof2.desc @@ -0,0 +1,8 @@ +CORE +typeof2.c +--c23 +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ diff --git a/src/ansi-c/ansi_c_convert_type.cpp b/src/ansi-c/ansi_c_convert_type.cpp index 51c2d469ee1..c7ed6453c90 100644 --- a/src/ansi-c/ansi_c_convert_type.cpp +++ b/src/ansi-c/ansi_c_convert_type.cpp @@ -77,6 +77,13 @@ void ansi_c_convert_typet::read_rec(const typet &type) int32_cnt++; else if(type.id()==ID_int64) int64_cnt++; + else if(type.id() == ID_c_bitint) + { + bitint_cnt++; + const exprt &size_expr = static_cast(type.find(ID_size)); + + bv_width = size_expr; + } else if(type.id()==ID_gcc_float16) gcc_float16_cnt++; else if(type.id()==ID_gcc_float32) @@ -290,15 +297,13 @@ void ansi_c_convert_typet::write(typet &type) if(!other.empty()) { - if(double_cnt || float_cnt || signed_cnt || - unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt || - short_cnt || char_cnt || complex_cnt || long_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_float16_cnt || - gcc_float32_cnt || gcc_float32x_cnt || - gcc_float64_cnt || gcc_float64x_cnt || - gcc_float128_cnt || gcc_float128x_cnt || - gcc_int128_cnt || bv_cnt) + if( + double_cnt || float_cnt || signed_cnt || unsigned_cnt || int_cnt || + c_bool_cnt || proper_bool_cnt || bitint_cnt || short_cnt || char_cnt || + complex_cnt || long_cnt || int8_cnt || int16_cnt || int32_cnt || + int64_cnt || gcc_float16_cnt || gcc_float32_cnt || gcc_float32x_cnt || + gcc_float64_cnt || gcc_float64x_cnt || gcc_float128_cnt || + gcc_float128x_cnt || gcc_int128_cnt || bv_cnt) { log.error().source_location = source_location; log.error() << "illegal type modifier for defined type" << messaget::eom; @@ -373,10 +378,10 @@ void ansi_c_convert_typet::write(typet &type) gcc_float64_cnt || gcc_float64x_cnt || gcc_float128_cnt || gcc_float128x_cnt) { - if(signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_int128_cnt || bv_cnt || - short_cnt || char_cnt) + if( + signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt || + bitint_cnt || int8_cnt || int16_cnt || int32_cnt || int64_cnt || + gcc_int128_cnt || bv_cnt || short_cnt || char_cnt) { log.error().source_location = source_location; log.error() << "cannot combine integer type with floating-point type" @@ -415,10 +420,10 @@ void ansi_c_convert_typet::write(typet &type) } else if(double_cnt || float_cnt) { - if(signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_int128_cnt|| bv_cnt || - short_cnt || char_cnt) + if( + signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt || + bitint_cnt || int8_cnt || int16_cnt || int32_cnt || int64_cnt || + gcc_int128_cnt || bv_cnt || short_cnt || char_cnt) { log.error().source_location = source_location; log.error() << "cannot combine integer type with floating-point type" @@ -460,10 +465,10 @@ void ansi_c_convert_typet::write(typet &type) } else if(c_bool_cnt) { - if(signed_cnt || unsigned_cnt || int_cnt || short_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_float128_cnt || bv_cnt || proper_bool_cnt || - char_cnt || long_cnt) + if( + signed_cnt || unsigned_cnt || int_cnt || short_cnt || bitint_cnt || + int8_cnt || int16_cnt || int32_cnt || int64_cnt || gcc_float128_cnt || + bv_cnt || proper_bool_cnt || char_cnt || long_cnt) { log.error().source_location = source_location; log.error() << "illegal type modifier for C boolean type" @@ -475,10 +480,10 @@ void ansi_c_convert_typet::write(typet &type) } else if(proper_bool_cnt) { - if(signed_cnt || unsigned_cnt || int_cnt || short_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_float128_cnt || bv_cnt || - char_cnt || long_cnt) + if( + signed_cnt || unsigned_cnt || int_cnt || short_cnt || bitint_cnt || + int8_cnt || int16_cnt || int32_cnt || int64_cnt || gcc_float128_cnt || + bv_cnt || char_cnt || long_cnt) { log.error().source_location = source_location; log.error() << "illegal type modifier for proper boolean type" @@ -496,9 +501,9 @@ void ansi_c_convert_typet::write(typet &type) } else if(char_cnt) { - if(int_cnt || short_cnt || long_cnt || - int8_cnt || int16_cnt || int32_cnt || int64_cnt || - gcc_float128_cnt || bv_cnt || proper_bool_cnt) + if( + int_cnt || short_cnt || long_cnt || bitint_cnt || int8_cnt || int16_cnt || + int32_cnt || int64_cnt || gcc_float128_cnt || bv_cnt || proper_bool_cnt) { log.error().source_location = source_location; log.error() << "illegal type modifier for char type" << messaget::eom; @@ -537,7 +542,9 @@ void ansi_c_convert_typet::write(typet &type) if(int8_cnt || int16_cnt || int32_cnt || int64_cnt) { - if(long_cnt || char_cnt || short_cnt || gcc_int128_cnt || bv_cnt) + if( + long_cnt || char_cnt || short_cnt || bitint_cnt || gcc_int128_cnt || + bv_cnt) { log.error().source_location = source_location; log.error() << "conflicting type modifiers" << messaget::eom; @@ -574,6 +581,12 @@ void ansi_c_convert_typet::write(typet &type) else type=gcc_unsigned_int128_type(); } + else if(bitint_cnt) + { + // explicitly-given expression for the number of value bits + type.id(is_signed ? ID_c_signed_bitint : ID_c_unsigned_bitint); + type.set(ID_width, bv_width); + } else if(bv_cnt) { // explicitly-given expression for width diff --git a/src/ansi-c/ansi_c_convert_type.h b/src/ansi-c/ansi_c_convert_type.h index 67aa894efb3..043198d8fb5 100644 --- a/src/ansi-c/ansi_c_convert_type.h +++ b/src/ansi-c/ansi_c_convert_type.h @@ -24,10 +24,8 @@ class message_handlert; class ansi_c_convert_typet { public: - unsigned unsigned_cnt, signed_cnt, char_cnt, - int_cnt, short_cnt, long_cnt, - double_cnt, float_cnt, c_bool_cnt, - proper_bool_cnt, complex_cnt; + unsigned unsigned_cnt, signed_cnt, char_cnt, int_cnt, short_cnt, long_cnt, + double_cnt, float_cnt, c_bool_cnt, proper_bool_cnt, complex_cnt, bitint_cnt; // extensions unsigned int8_cnt, int16_cnt, int32_cnt, int64_cnt, @@ -87,6 +85,7 @@ class ansi_c_convert_typet c_bool_cnt(0), proper_bool_cnt(0), complex_cnt(0), + bitint_cnt(0), int8_cnt(0), int16_cnt(0), int32_cnt(0), diff --git a/src/ansi-c/ansi_c_language.cpp b/src/ansi-c/ansi_c_language.cpp index 87a196298eb..570d62c4db9 100644 --- a/src/ansi-c/ansi_c_language.cpp +++ b/src/ansi-c/ansi_c_language.cpp @@ -80,6 +80,11 @@ bool ansi_c_languaget::parse( ansi_c_parser.fp16_type = config.ansi_c.fp16_type; ansi_c_parser.cpp98=false; // it's not C++ ansi_c_parser.cpp11=false; // it's not C++ + ansi_c_parser.c17 = + config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C17 || + config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C23; + ansi_c_parser.c23 = + config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C23; ansi_c_parser.mode=config.ansi_c.mode; ansi_c_scanner_init(ansi_c_parser); diff --git a/src/ansi-c/builtin_factory.cpp b/src/ansi-c/builtin_factory.cpp index 33e44d24521..172cb96b05a 100644 --- a/src/ansi-c/builtin_factory.cpp +++ b/src/ansi-c/builtin_factory.cpp @@ -58,6 +58,8 @@ static bool convert( ansi_c_parser.fp16_type = config.ansi_c.fp16_type; ansi_c_parser.cpp98=false; // it's not C++ ansi_c_parser.cpp11=false; // it's not C++ + ansi_c_parser.c17 = false; // we do C11 for now + ansi_c_parser.c23 = false; // we do C11 for now ansi_c_parser.mode=config.ansi_c.mode; ansi_c_scanner_init(ansi_c_parser); diff --git a/src/ansi-c/c_preprocess.cpp b/src/ansi-c/c_preprocess.cpp index c6190dd1fd3..f9fc552b7e4 100644 --- a/src/ansi-c/c_preprocess.cpp +++ b/src/ansi-c/c_preprocess.cpp @@ -602,10 +602,10 @@ bool c_preprocess_gcc_clang( case configt::ansi_ct::c_standardt::C23: #if defined(__OpenBSD__) if(preprocessor == configt::ansi_ct::preprocessort::CLANG) - argv.push_back("-std=c23"); + argv.push_back("-std=c2x"); else #endif - argv.push_back("-std=gnu23"); + argv.push_back("-std=gnu2x"); break; } } diff --git a/src/ansi-c/c_typecheck_base.h b/src/ansi-c/c_typecheck_base.h index dc0b11a209a..37316eafa98 100644 --- a/src/ansi-c/c_typecheck_base.h +++ b/src/ansi-c/c_typecheck_base.h @@ -265,6 +265,7 @@ class c_typecheck_baset: virtual void typecheck_array_type(array_typet &type); virtual void typecheck_vector_type(typet &type); virtual void typecheck_custom_type(typet &type); + virtual void typecheck_bitint_type(typet &); virtual void adjust_function_parameter(typet &type) const; virtual bool is_complete_type(const typet &type) const; diff --git a/src/ansi-c/c_typecheck_code.cpp b/src/ansi-c/c_typecheck_code.cpp index 6e348392845..c969bf12575 100644 --- a/src/ansi-c/c_typecheck_code.cpp +++ b/src/ansi-c/c_typecheck_code.cpp @@ -105,10 +105,13 @@ void c_typecheck_baset::typecheck_code(codet &code) } else if(statement==ID_static_assert) { - PRECONDITION(code.operands().size() == 2); + // C23 allows static_assert without message + PRECONDITION(code.operands().size() == 1 || code.operands().size() == 2); typecheck_expr(code.op0()); - typecheck_expr(code.op1()); + + if(code.operands().size() == 2) + typecheck_expr(code.op1()); implicit_typecast_bool(code.op0()); make_constant(code.op0()); @@ -118,7 +121,7 @@ void c_typecheck_baset::typecheck_code(codet &code) // failed error().source_location = code.find_source_location(); error() << "static assertion failed"; - if(code.op1().id() == ID_string_constant) + if(code.operands().size() == 2 && code.op1().id() == ID_string_constant) error() << ": " << to_string_constant(code.op1()).value(); error() << eom; throw 0; diff --git a/src/ansi-c/c_typecheck_type.cpp b/src/ansi-c/c_typecheck_type.cpp index 3e3121b0f40..d80669f5f42 100644 --- a/src/ansi-c/c_typecheck_type.cpp +++ b/src/ansi-c/c_typecheck_type.cpp @@ -94,7 +94,7 @@ void c_typecheck_baset::typecheck_type(typet &type) typecheck_c_enum_tag_type(to_c_enum_tag_type(type)); else if(type.id()==ID_c_bit_field) typecheck_c_bit_field_type(to_c_bit_field_type(type)); - else if(type.id()==ID_typeof) + else if(type.id() == ID_typeof || type.id() == ID_c_typeof_unqual) typecheck_typeof_type(type); else if(type.id() == ID_typedef_type) typecheck_typedef_type(type); @@ -116,6 +116,10 @@ void c_typecheck_baset::typecheck_type(typet &type) type.id()==ID_custom_floatbv || type.id()==ID_custom_fixedbv) typecheck_custom_type(type); + else if(type.id() == ID_c_signed_bitint || type.id() == ID_c_unsigned_bitint) + { + typecheck_bitint_type(type); + } else if(type.id()==ID_gcc_attribute_mode) { // get that mode @@ -417,6 +421,62 @@ void c_typecheck_baset::typecheck_custom_type(typet &type) UNREACHABLE; } +void c_typecheck_baset::typecheck_bitint_type(typet &type) +{ + // These have a given width, which is the sum of the number of + // value bits and, if signed, one for the sign bit (ISO C 2024, 6.2.6.2) + exprt width_expr = static_cast(type.find(ID_width)); + + typecheck_expr(width_expr); + source_locationt source_location = width_expr.source_location(); + make_constant_index(width_expr); + + mp_integer width_int; + if(to_integer(to_constant_expr(width_expr), width_int)) + { + throw errort().with_location(source_location) + << "failed to convert _BitInt width to constant"; + } + + bool is_signed = type.id() == ID_c_signed_bitint; + + // Must have at least one value bit + if(!is_signed) + { + if(width_int < 1) + { + throw errort().with_location(source_location) + << "unsigned _BitInt must have at least one bit"; + } + } + else + { + if(width_int < 2) + { + throw errort().with_location(source_location) + << "signed _BitInt must have at least two bits"; + } + } + + // These get padded up, much like _Bool. + // The padding is implementation-defined, + // and takes unspecified values. + auto bytes = (width_int % 8) == 0 ? width_int / 8 : width_int / 8 + 1; + + // We pad up to until the number of bytes is a power of two. + auto bytes_padded = power(2, bytes == 1 ? 0 : address_bits(bytes)); + + auto width = 8 * bytes_padded; + + type.set(ID_width, integer2string(width)); + type.set(ID_C_c_type, type.id()); + type.id(ID_bv); + + // We remember the original number of bits before padding, + // since these determine semantics + type.set(ID_C_c_bitint_width, integer2string(width_int)); +} + void c_typecheck_baset::typecheck_code_type(code_typet &type) { // the return type is still 'subtype()' diff --git a/src/ansi-c/expr2c.cpp b/src/ansi-c/expr2c.cpp index 6a390a2100f..705936ad81d 100644 --- a/src/ansi-c/expr2c.cpp +++ b/src/ansi-c/expr2c.cpp @@ -617,6 +617,19 @@ std::string expr2ct::convert_rec( { return q+"__attribute__(("+id2string(src.id())+")) void"+d; } + else if(src.id() == ID_bv) + { + // annotated? + irep_idt c_type = src.get(ID_C_c_type); + if(c_type == ID_c_signed_bitint) + { + return "_BitInt(" + src.get_string(ID_C_c_bitint_width) + ")"; + } + else if(c_type == ID_c_unsigned_bitint) + { + return "unsigned _BitInt(" + src.get_string(ID_C_c_bitint_width) + ")"; + } + } { lispexprt lisp; @@ -1822,8 +1835,24 @@ std::string expr2ct::convert_constant( return convert_norep(src, precedence); else if(type.id()==ID_bv) { - // not C - dest=id2string(value); + // used for _BitInt + irep_idt c_type = src.get(ID_C_c_type); + if(c_type == ID_c_signed_bitint) + { + auto as_int = bvrep2integer(value, to_bv_type(type).width(), false); + auto width = src.get_int(ID_C_c_bitint_width); + auto binary = integer2binary(as_int, width); // drops padding + return integer2string(binary2integer(binary, true)); + } + else if(c_type == ID_c_unsigned_bitint) + { + auto as_int = bvrep2integer(value, to_bv_type(type).width(), false); + auto width = src.get_int(ID_C_c_bitint_width); + auto binary = integer2binary(as_int, width); // drops padding + return integer2string(binary2integer(binary, false)); + } + else + return convert_norep(src, precedence); } else if(type.id()==ID_bool) { diff --git a/src/ansi-c/gcc_version.cpp b/src/ansi-c/gcc_version.cpp index 8ce7b950c0e..983aab3fe68 100644 --- a/src/ansi-c/gcc_version.cpp +++ b/src/ansi-c/gcc_version.cpp @@ -77,6 +77,10 @@ void gcc_versiont::get(const std::string &executable) default_c_standard = configt::ansi_ct::c_standardt::C99; else if(split[1] == "201112L") default_c_standard = configt::ansi_ct::c_standardt::C11; + else if(split[1] == "201710L") + default_c_standard = configt::ansi_ct::c_standardt::C17; + else if(split[1] == "202000L" || split[1] == "202311L") + default_c_standard = configt::ansi_ct::c_standardt::C23; } } diff --git a/src/ansi-c/goto-conversion/goto_convert.cpp b/src/ansi-c/goto-conversion/goto_convert.cpp index f07ed0f424c..382c5725af3 100644 --- a/src/ansi-c/goto-conversion/goto_convert.cpp +++ b/src/ansi-c/goto-conversion/goto_convert.cpp @@ -695,13 +695,15 @@ void goto_convertt::convert( convert_asm(to_code_asm(code), dest); else if(statement == ID_static_assert) { - PRECONDITION(code.operands().size() == 2); + // C23 allows static_assert without message + PRECONDITION(code.operands().size() == 1 || code.operands().size() == 2); + // We are double-checking the work of the type checker here. exprt assertion = typecast_exprt::conditional_cast(code.op0(), bool_typet()); simplify(assertion, ns); INVARIANT_WITH_DIAGNOSTICS( !assertion.is_false(), - "static assertion " + id2string(get_string_constant(code.op1())), + "static assertion is false", code.op0().find_source_location()); } else if(statement == ID_dead) diff --git a/src/ansi-c/parser.y b/src/ansi-c/parser.y index 59e320f7112..ac07f8cf029 100644 --- a/src/ansi-c/parser.y +++ b/src/ansi-c/parser.y @@ -62,8 +62,9 @@ int yyansi_cerror(const std::string &error); %token TOK_AUTO "auto" %token TOK_BOOL "bool" -%token TOK_COMPLEX "complex" +%token TOK_BITINT "_BitInt" %token TOK_BREAK "break" +%token TOK_COMPLEX "complex" %token TOK_CASE "case" %token TOK_CHAR "char" %token TOK_CONST "const" @@ -91,6 +92,7 @@ int yyansi_cerror(const std::string &error); %token TOK_STRUCT "struct" %token TOK_SWITCH "switch" %token TOK_TYPEDEF "typedef" +%token TOK_TYPEOF_UNQUAL "typeof_unqual" %token TOK_UNION "union" %token TOK_UNSIGNED "unsigned" %token TOK_VOID "void" @@ -344,10 +346,33 @@ string: /*** Constants **********************************************************/ -constant: integer +constant: + integer | floating | character | string + | predefined_constant + ; + +predefined_constant: + TOK_FALSE + { $$ = $1; + stack_expr($$).id(ID_constant); + stack_expr($$).set(ID_value, ID_0); + stack_expr($$).type() = c_bool_type(); + } + | TOK_TRUE + { $$ = $1; + stack_expr($$).id(ID_constant); + stack_expr($$).set(ID_value, ID_1); + stack_expr($$).type() = c_bool_type(); + } + | TOK_NULLPTR + { $$ = $1; + stack_expr($$).id(ID_constant); + stack_expr($$).set(ID_value, ID_NULL); + stack_expr($$).type() = pointer_type(void_type()); + } ; /*** Expressions ********************************************************/ @@ -963,6 +988,14 @@ static_assert_declaration: mto($$, $3); mto($$, $5); } + | TOK_STATIC_ASSERT '(' assignment_expression ')' + { + // C23 adds static_assert without message + $$=$1; + set($$, ID_declaration); + to_ansi_c_declaration(parser_stack($$)).set_is_static_assert(true); + mto($$, $3); + } ; default_declaring_list: @@ -1338,6 +1371,16 @@ typeof_specifier: parser_stack($$).id(ID_typeof); parser_stack($$).set(ID_type_arg, parser_stack($3)); } + | TOK_TYPEOF_UNQUAL '(' comma_expression ')' + { $$ = $1; + parser_stack($$).id(ID_c_typeof_unqual); + mto($$, $3); + } + | TOK_TYPEOF_UNQUAL '(' type_name ')' + { $$ = $1; + parser_stack($$).id(ID_c_typeof_unqual); + parser_stack($$).set(ID_type_arg, parser_stack($3)); + } ; typeof_type_specifier: @@ -1517,6 +1560,11 @@ basic_type_name: | TOK_DOUBLE { $$=$1; set($$, ID_double); } | TOK_SIGNED { $$=$1; set($$, ID_signed); } | TOK_UNSIGNED { $$=$1; set($$, ID_unsigned); } + | TOK_BITINT '(' constant_expression ')' + { + init($$, ID_c_bitint); + parser_stack($$).add(ID_size).swap(parser_stack($3)); + } | TOK_VOID { $$=$1; set($$, ID_void); } | TOK_BOOL { $$=$1; set($$, ID_c_bool); } | TOK_COMPLEX { $$=$1; set($$, ID_complex); } diff --git a/src/ansi-c/scanner.l b/src/ansi-c/scanner.l index 28669730c38..50c86fb90a7 100644 --- a/src/ansi-c/scanner.l +++ b/src/ansi-c/scanner.l @@ -162,28 +162,6 @@ int conditional_keyword(bool condition, int token) return make_identifier(); } -int c17_keyword(int token) -{ - if(PARSER.c17) - { - loc(); - return token; - } - else - return make_identifier(); -} - -int c23_keyword(int token) -{ - if(PARSER.c23) - { - loc(); - return token; - } - else - return make_identifier(); -} - int MSC_cpp_keyword(int token) { if(PARSER.cpp98 && PARSER.mode==configt::ansi_ct::flavourt::VISUAL_STUDIO) @@ -284,6 +262,7 @@ enable_or_disable ("enable"|"disable") %x ASM_BLOCK %x MSC_ASM %x IGNORE_PARENS +%x STD_ANNOTATION %x MSC_PRAGMA %x MSC_ANNOTATION %x GCC_ATTRIBUTE1 @@ -782,15 +761,22 @@ enable_or_disable ("enable"|"disable") TOK_WCHAR_T); } +%{ +/* C23 Keywords */ +%} + +"_BitInt" { return conditional_keyword(PARSER.c23, TOK_BITINT); } +"_typeof_unqual" { return conditional_keyword(PARSER.c23, TOK_TYPEOF_UNQUAL); } + %{ /* C++ Keywords and Operators */ %} -"alignas" { return conditional_keyword(PARSER.cpp11, TOK_ALIGNAS); } // C++11 -"alignof" { return conditional_keyword(PARSER.cpp11, TOK_ALIGNOF); } // C++11 +"alignas" { return conditional_keyword(PARSER.cpp11 || PARSER.c23, TOK_ALIGNAS); } +"alignof" { return conditional_keyword(PARSER.cpp11 || PARSER.c23, TOK_ALIGNOF); } "and" { return conditional_keyword(PARSER.cpp98, TOK_ANDAND); } "and_eq" { return conditional_keyword(PARSER.cpp98, TOK_ANDASSIGN); } -"bool" { return conditional_keyword(PARSER.cpp98, TOK_BOOL); } +"bool" { return conditional_keyword(PARSER.cpp98 || PARSER.c23, TOK_BOOL); } "catch" { return conditional_keyword(PARSER.cpp98, TOK_CATCH); } "char16_t" { // C++11, but Visual Studio uses typedefs return conditional_keyword( @@ -806,11 +792,11 @@ enable_or_disable ("enable"|"disable") } "class" { return conditional_keyword(PARSER.cpp98, TOK_CLASS); } "compl" { return conditional_keyword(PARSER.cpp98, '~'); } -"constexpr" { return conditional_keyword(PARSER.cpp11, TOK_CONSTEXPR); } // C++11 +"constexpr" { return conditional_keyword(PARSER.cpp11 || PARSER.c23, TOK_CONSTEXPR); } "delete" { return conditional_keyword(PARSER.cpp98, TOK_DELETE); } "decltype" { return conditional_keyword(PARSER.cpp11, TOK_DECLTYPE); } // C++11 "explicit" { return conditional_keyword(PARSER.cpp98, TOK_EXPLICIT); } -"false" { return conditional_keyword(PARSER.cpp98, TOK_FALSE); } +"false" { return conditional_keyword(PARSER.cpp98 || PARSER.c23, TOK_FALSE); } "friend" { return conditional_keyword(PARSER.cpp98, TOK_FRIEND); } "mutable" { return conditional_keyword(PARSER.cpp98, TOK_MUTABLE); } "namespace" { return conditional_keyword(PARSER.cpp98, TOK_NAMESPACE); } @@ -820,7 +806,7 @@ enable_or_disable ("enable"|"disable") "noreturn" { return conditional_keyword(PARSER.cpp11, TOK_NORETURN); } // C++11 "not" { return conditional_keyword(PARSER.cpp98, '!'); } "not_eq" { return conditional_keyword(PARSER.cpp98, TOK_NE); } -"nullptr" { return conditional_keyword(PARSER.cpp11, TOK_NULLPTR); } // C++11 +"nullptr" { return conditional_keyword(PARSER.cpp11 || PARSER.c23, TOK_NULLPTR); } "operator" { return conditional_keyword(PARSER.cpp98, TOK_OPERATOR); } "or" { return conditional_keyword(PARSER.cpp98, TOK_OROR); } "or_eq" { return conditional_keyword(PARSER.cpp98, TOK_ORASSIGN); } @@ -831,15 +817,15 @@ enable_or_disable ("enable"|"disable") // as a keyword, even though the documentation claims // it's a macro. return conditional_keyword( - PARSER.cpp11 || + PARSER.cpp11 || PARSER.c23 || PARSER.mode == configt::ansi_ct::flavourt::VISUAL_STUDIO, TOK_STATIC_ASSERT); } "template" { return conditional_keyword(PARSER.cpp98, TOK_TEMPLATE); } "this" { return conditional_keyword(PARSER.cpp98, TOK_THIS); } -"thread_local" { return conditional_keyword(PARSER.cpp11, TOK_THREAD_LOCAL); } // C++11 +"thread_local" { return conditional_keyword(PARSER.cpp11 || PARSER.c23, TOK_THREAD_LOCAL); } "throw" { return conditional_keyword(PARSER.cpp98, TOK_THROW); } -"true" { return conditional_keyword(PARSER.cpp98, TOK_TRUE); } +"true" { return conditional_keyword(PARSER.cpp98 || PARSER.c23, TOK_TRUE); } "typeid" { return conditional_keyword(PARSER.cpp98, TOK_TYPEID); } "typename" { return conditional_keyword(PARSER.cpp98, TOK_TYPENAME); } "using" { return conditional_keyword(PARSER.cpp98, TOK_USING); } @@ -913,6 +899,17 @@ enable_or_disable ("enable"|"disable") "__if_not_exists" { return MSC_cpp_keyword(TOK_MSC_IF_NOT_EXISTS); } "__underlying_type" { return conditional_keyword(PARSER.cpp98, TOK_UNDERLYING_TYPE); } +"[[" { if(PARSER.c23) + BEGIN(STD_ANNOTATION); + else + { + yyless(1); // puts one [ back into stream + loc(); + PARSER.tag_following=false; + return yytext[0]; // returns one [ + } + } + "["{ws}"repeatable" | "["{ws}"source_annotation_attribute" | "["{ws}"returnvalue" | @@ -1173,7 +1170,7 @@ enable_or_disable ("enable"|"disable") } "typeof" { return conditional_keyword( - PARSER.cpp98 || + PARSER.cpp98 || PARSER.c23 || PARSER.mode==configt::ansi_ct::flavourt::GCC || PARSER.mode==configt::ansi_ct::flavourt::CLANG || PARSER.mode==configt::ansi_ct::flavourt::CODEWARRIOR || @@ -1561,6 +1558,9 @@ enable_or_disable ("enable"|"disable") . { loc(); PARSER.tag_following=false; return yytext[0]; } } +"]]" { BEGIN(GRAMMAR); } +. { /* ignore */ } + "]" { BEGIN(GRAMMAR); } . { /* ignore */ } diff --git a/src/util/config.cpp b/src/util/config.cpp index 49afdc2e3d3..da70a3b9888 100644 --- a/src/util/config.cpp +++ b/src/util/config.cpp @@ -1238,6 +1238,12 @@ bool configt::set(const cmdlinet &cmdline) if(cmdline.isset("c11")) ansi_c.set_c11(); + if(cmdline.isset("c17")) + ansi_c.set_c17(); + + if(cmdline.isset("c23")) + ansi_c.set_c23(); + if(cmdline.isset("cpp98")) cpp.set_cpp98(); diff --git a/src/util/config.h b/src/util/config.h index 7189453663c..e1cdc3eb99c 100644 --- a/src/util/config.h +++ b/src/util/config.h @@ -23,7 +23,7 @@ class symbol_table_baset; #define OPT_CONFIG_C_CPP \ "D:I:(include)(function)" \ - "(c89)(c99)(c11)(cpp98)(cpp03)(cpp11)" \ + "(c89)(c99)(c11)(c17)(c23)(cpp98)(cpp03)(cpp11)" \ "(unsigned-char)" \ "(round-to-even)(round-to-nearest)" \ "(round-to-plus-inf)(round-to-minus-inf)(round-to-zero)" \ @@ -33,7 +33,7 @@ class symbol_table_baset; " {y-I} {upath} \t set include path (C/C++)\n" \ " {y--include} {ufile} \t set include file (C/C++)\n" \ " {y-D} {umacro} \t define preprocessor macro (C/C++)\n" \ - " {y--c89}, {y--c99}, {y--c11} \t " \ + " {y--c89}, {y--c99}, {y--c11},\n {y--c17}, {y--c23} \t " \ "set C language standard (default: " + \ std::string( \ configt::ansi_ct::default_c_standard() == \ diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 0fbb2a95263..840741057dd 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -8,7 +8,11 @@ IREP_ID_ONE(let_binding) IREP_ID_ONE(nil) IREP_ID_ONE(type) IREP_ID_ONE(bool) +IREP_ID_ONE(c_bitint) IREP_ID_ONE(c_bool) +IREP_ID_ONE(c_signed_bitint) +IREP_ID_ONE(c_typeof_unqual) +IREP_ID_ONE(c_unsigned_bitint) IREP_ID_ONE(proper_bool) IREP_ID_ONE(signedbv) IREP_ID_ONE(unsignedbv) @@ -501,6 +505,7 @@ IREP_ID_ONE(cpp_declaration) IREP_ID_ONE(cpp_static_assert) IREP_ID_ONE(cpp_member_spec) IREP_ID_TWO(C_c_type, #c_type) +IREP_ID_TWO(C_c_bitint_width, #c_bitint_width) IREP_ID_ONE(namespace) IREP_ID_ONE(linkage) IREP_ID_ONE(decltype)