C23 keywords

kroening · kroening · commit 7c8b66179543 · 2025-04-09T18:20:30.000-04:00
This adds scanner, parser and typechecker support for the new C23 keywords.
diff --git a/regression/cbmc/_BitInt/_BitInt1.c b/regression/cbmc/_BitInt/_BitInt1.c
@@ -0,0 +1,28 @@
+// _BitInt is a C23 feature
+#include <assert.h>
+
+int main()
+{
+  // sizeof
+  assert(sizeof(_BitInt(32)) == 4);
+  assert(sizeof(_BitInt(33)) == 8);
+  assert(sizeof(_BitInt(65)) == 16);
+  assert(sizeof(_BitInt(128)) == 16);
+
+  // casts
+  //assert((_BitInt(4))17 == 1);
+  //assert((_BitInt(4)) - 1 == -1);
+  //assert((unsigned _BitInt(4)) - 1 == 15);
+
+  // promotion (or lack thereof)
+  //assert((unsigned _BitInt(4))15 + (unsigned _BitInt(4))1 == 0);
+  assert((unsigned _BitInt(4))15 + (unsigned _BitInt(5))1 == 16);
+  //assert((unsigned _BitInt(4))15 + (signed _BitInt(5))1 == -16);
+  assert((unsigned _BitInt(4))15 + 1 == 16);
+
+  // pointers
+  _BitInt(3) x, *p = &x;
+  *p = 1;
+
+  return 0;
+}
diff --git a/regression/cbmc/_BitInt/_BitInt1.desc b/regression/cbmc/_BitInt/_BitInt1.desc
@@ -0,0 +1,8 @@
+KNOWNBUG
+_BitInt1.c
+
+^EXIT=0$
+^SIGNAL=0$
+--
+^warning: ignoring
+^CONVERSION ERROR$
diff --git a/regression/cbmc/constants/predefined-constants1.c b/regression/cbmc/constants/predefined-constants1.c
@@ -0,0 +1,7 @@
+int main()
+{
+  // C23 predefined constants
+  __CPROVER_assert(!false, "false");
+  __CPROVER_assert(true, "true");
+  __CPROVER_assert(nullptr == 0, "nullptr");
+}
diff --git a/regression/cbmc/constants/predefined-constants1.desc b/regression/cbmc/constants/predefined-constants1.desc
@@ -0,0 +1,7 @@
+CORE
+predefined-constants1.c
+--c23
+^EXIT=0$
+^SIGNAL=0$
+--
+^warning: ignoring
diff --git a/regression/cbmc/static_assert/static_assert1.c b/regression/cbmc/static_assert/static_assert1.c
@@ -0,0 +1,17 @@
+// C23 introduces the "static_assert" keyword.
+
+struct S
+{
+  // Visual Studio does not support static_assert in compound bodies.
+#ifndef _MSC_VER
+  static_assert(1, "in struct");
+#endif
+  int x;
+} asd;
+
+static_assert(1, "global scope");
+
+int main()
+{
+  static_assert(1, "in function");
+}
diff --git a/regression/cbmc/static_assert/static_assert1.desc b/regression/cbmc/static_assert/static_assert1.desc
@@ -0,0 +1,8 @@
+CORE
+static_assert1.c
+--c23
+^EXIT=0$
+^SIGNAL=0$
+--
+^warning: ignoring
+^CONVERSION ERROR$
diff --git a/regression/cbmc/typeof/typeof2.c b/regression/cbmc/typeof/typeof2.c
@@ -0,0 +1,23 @@
+typedef int INTTYPE;
+
+int func1();
+
+// C23 typeof
+typeof(int) v1;
+typeof(INTTYPE) v2;
+typeof(v2) v3;
+typeof(1 + 1) v4;
+typeof(1 + 1 + func1()) v5;
+const typeof(int) v6;
+typeof(int) const v7;
+static typeof(int) const v8;
+static typeof(int) const *v9;
+static volatile typeof(int) const *v10;
+
+void func2(typeof(int) *some_arg)
+{
+}
+
+int main()
+{
+}
diff --git a/regression/cbmc/typeof/typeof2.desc b/regression/cbmc/typeof/typeof2.desc
@@ -0,0 +1,8 @@
+CORE
+typeof2.c
+--c23
+^EXIT=0$
+^SIGNAL=0$
+--
+^warning: ignoring
+^CONVERSION ERROR$
diff --git a/src/ansi-c/ansi_c_convert_type.cpp b/src/ansi-c/ansi_c_convert_type.cpp
@@ -77,6 +77,13 @@ void ansi_c_convert_typet::read_rec(const typet &type)
     int32_cnt++;
   else if(type.id()==ID_int64)
     int64_cnt++;
+  else if(type.id() == ID_c_bitint)
+  {
+    bitint_cnt++;
+    const exprt &size_expr = static_cast<const exprt &>(type.find(ID_size));
+
+    bv_width = size_expr;
+  }
   else if(type.id()==ID_gcc_float16)
     gcc_float16_cnt++;
   else if(type.id()==ID_gcc_float32)
@@ -290,15 +297,13 @@ void ansi_c_convert_typet::write(typet &type)
 
   if(!other.empty())
   {
-    if(double_cnt || float_cnt || signed_cnt ||
-       unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt ||
-       short_cnt || char_cnt || complex_cnt || long_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_float16_cnt ||
-       gcc_float32_cnt || gcc_float32x_cnt ||
-       gcc_float64_cnt || gcc_float64x_cnt ||
-       gcc_float128_cnt || gcc_float128x_cnt ||
-       gcc_int128_cnt || bv_cnt)
+    if(
+      double_cnt || float_cnt || signed_cnt || unsigned_cnt || int_cnt ||
+      c_bool_cnt || proper_bool_cnt || bitint_cnt || short_cnt || char_cnt ||
+      complex_cnt || long_cnt || int8_cnt || int16_cnt || int32_cnt ||
+      int64_cnt || gcc_float16_cnt || gcc_float32_cnt || gcc_float32x_cnt ||
+      gcc_float64_cnt || gcc_float64x_cnt || gcc_float128_cnt ||
+      gcc_float128x_cnt || gcc_int128_cnt || bv_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "illegal type modifier for defined type" << messaget::eom;
@@ -373,10 +378,10 @@ void ansi_c_convert_typet::write(typet &type)
           gcc_float64_cnt || gcc_float64x_cnt ||
           gcc_float128_cnt || gcc_float128x_cnt)
   {
-    if(signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_int128_cnt || bv_cnt ||
-       short_cnt || char_cnt)
+    if(
+      signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt ||
+      bitint_cnt || int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
+      gcc_int128_cnt || bv_cnt || short_cnt || char_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "cannot combine integer type with floating-point type"
@@ -415,10 +420,10 @@ void ansi_c_convert_typet::write(typet &type)
   }
   else if(double_cnt || float_cnt)
   {
-    if(signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_int128_cnt|| bv_cnt ||
-       short_cnt || char_cnt)
+    if(
+      signed_cnt || unsigned_cnt || int_cnt || c_bool_cnt || proper_bool_cnt ||
+      bitint_cnt || int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
+      gcc_int128_cnt || bv_cnt || short_cnt || char_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "cannot combine integer type with floating-point type"
@@ -460,10 +465,10 @@ void ansi_c_convert_typet::write(typet &type)
   }
   else if(c_bool_cnt)
   {
-    if(signed_cnt || unsigned_cnt || int_cnt || short_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_float128_cnt || bv_cnt || proper_bool_cnt ||
-       char_cnt || long_cnt)
+    if(
+      signed_cnt || unsigned_cnt || int_cnt || short_cnt || bitint_cnt ||
+      int8_cnt || int16_cnt || int32_cnt || int64_cnt || gcc_float128_cnt ||
+      bv_cnt || proper_bool_cnt || char_cnt || long_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "illegal type modifier for C boolean type"
@@ -475,10 +480,10 @@ void ansi_c_convert_typet::write(typet &type)
   }
   else if(proper_bool_cnt)
   {
-    if(signed_cnt || unsigned_cnt || int_cnt || short_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_float128_cnt || bv_cnt ||
-       char_cnt || long_cnt)
+    if(
+      signed_cnt || unsigned_cnt || int_cnt || short_cnt || bitint_cnt ||
+      int8_cnt || int16_cnt || int32_cnt || int64_cnt || gcc_float128_cnt ||
+      bv_cnt || char_cnt || long_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "illegal type modifier for proper boolean type"
@@ -496,9 +501,9 @@ void ansi_c_convert_typet::write(typet &type)
   }
   else if(char_cnt)
   {
-    if(int_cnt || short_cnt || long_cnt ||
-       int8_cnt || int16_cnt || int32_cnt || int64_cnt ||
-       gcc_float128_cnt || bv_cnt || proper_bool_cnt)
+    if(
+      int_cnt || short_cnt || long_cnt || bitint_cnt || int8_cnt || int16_cnt ||
+      int32_cnt || int64_cnt || gcc_float128_cnt || bv_cnt || proper_bool_cnt)
     {
       log.error().source_location = source_location;
       log.error() << "illegal type modifier for char type" << messaget::eom;
@@ -537,7 +542,9 @@ void ansi_c_convert_typet::write(typet &type)
 
     if(int8_cnt || int16_cnt || int32_cnt || int64_cnt)
     {
-      if(long_cnt || char_cnt || short_cnt || gcc_int128_cnt || bv_cnt)
+      if(
+        long_cnt || char_cnt || short_cnt || bitint_cnt || gcc_int128_cnt ||
+        bv_cnt)
       {
         log.error().source_location = source_location;
         log.error() << "conflicting type modifiers" << messaget::eom;
@@ -574,6 +581,12 @@ void ansi_c_convert_typet::write(typet &type)
       else
         type=gcc_unsigned_int128_type();
     }
+    else if(bitint_cnt)
+    {
+      // explicitly-given expression for the number of bits
+      type.id(is_signed ? ID_c_signed_bitint : ID_c_unsigned_bitint);
+      type.set(ID_width, bv_width);
+    }
     else if(bv_cnt)
     {
       // explicitly-given expression for width
diff --git a/src/ansi-c/ansi_c_convert_type.h b/src/ansi-c/ansi_c_convert_type.h
@@ -24,10 +24,8 @@ class message_handlert;
 class ansi_c_convert_typet
 {
 public:
-  unsigned unsigned_cnt, signed_cnt, char_cnt,
-           int_cnt, short_cnt, long_cnt,
-           double_cnt, float_cnt, c_bool_cnt,
-           proper_bool_cnt, complex_cnt;
+  unsigned unsigned_cnt, signed_cnt, char_cnt, int_cnt, short_cnt, long_cnt,
+    double_cnt, float_cnt, c_bool_cnt, proper_bool_cnt, complex_cnt, bitint_cnt;
 
   // extensions
   unsigned int8_cnt, int16_cnt, int32_cnt, int64_cnt,
@@ -87,6 +85,7 @@ class ansi_c_convert_typet
       c_bool_cnt(0),
       proper_bool_cnt(0),
       complex_cnt(0),
+      bitint_cnt(0),
       int8_cnt(0),
       int16_cnt(0),
       int32_cnt(0),
diff --git a/src/ansi-c/ansi_c_language.cpp b/src/ansi-c/ansi_c_language.cpp
@@ -80,6 +80,11 @@ bool ansi_c_languaget::parse(
   ansi_c_parser.fp16_type = config.ansi_c.fp16_type;
   ansi_c_parser.cpp98=false; // it's not C++
   ansi_c_parser.cpp11=false; // it's not C++
+  ansi_c_parser.c17 =
+    config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C17 ||
+    config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C23;
+  ansi_c_parser.c23 =
+    config.ansi_c.c_standard == configt::ansi_ct::c_standardt::C23;
   ansi_c_parser.mode=config.ansi_c.mode;
 
   ansi_c_scanner_init(ansi_c_parser);
diff --git a/src/ansi-c/builtin_factory.cpp b/src/ansi-c/builtin_factory.cpp
@@ -58,6 +58,8 @@ static bool convert(
   ansi_c_parser.fp16_type = config.ansi_c.fp16_type;
   ansi_c_parser.cpp98=false; // it's not C++
   ansi_c_parser.cpp11=false; // it's not C++
+  ansi_c_parser.c17 = false; // we do C11 for now
+  ansi_c_parser.c23 = false; // we do C11 for now
   ansi_c_parser.mode=config.ansi_c.mode;
 
   ansi_c_scanner_init(ansi_c_parser);
diff --git a/src/ansi-c/c_preprocess.cpp b/src/ansi-c/c_preprocess.cpp
@@ -602,10 +602,10 @@ bool c_preprocess_gcc_clang(
     case configt::ansi_ct::c_standardt::C23:
 #if defined(__OpenBSD__)
       if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
-        argv.push_back("-std=c23");
+        argv.push_back("-std=c2x");
       else
 #endif
-        argv.push_back("-std=gnu23");
+        argv.push_back("-std=gnu2x");
       break;
     }
   }
diff --git a/src/ansi-c/c_typecheck_base.h b/src/ansi-c/c_typecheck_base.h
@@ -265,6 +265,7 @@ class c_typecheck_baset:
   virtual void typecheck_array_type(array_typet &type);
   virtual void typecheck_vector_type(typet &type);
   virtual void typecheck_custom_type(typet &type);
+  virtual void typecheck_bitint_type(typet &);
   virtual void adjust_function_parameter(typet &type) const;
   virtual bool is_complete_type(const typet &type) const;
 
diff --git a/src/ansi-c/c_typecheck_type.cpp b/src/ansi-c/c_typecheck_type.cpp
@@ -94,7 +94,7 @@ void c_typecheck_baset::typecheck_type(typet &type)
     typecheck_c_enum_tag_type(to_c_enum_tag_type(type));
   else if(type.id()==ID_c_bit_field)
     typecheck_c_bit_field_type(to_c_bit_field_type(type));
-  else if(type.id()==ID_typeof)
+  else if(type.id() == ID_typeof || type.id() == ID_c_typeof_unqual)
     typecheck_typeof_type(type);
   else if(type.id() == ID_typedef_type)
     typecheck_typedef_type(type);
@@ -116,6 +116,10 @@ void c_typecheck_baset::typecheck_type(typet &type)
           type.id()==ID_custom_floatbv ||
           type.id()==ID_custom_fixedbv)
     typecheck_custom_type(type);
+  else if(type.id() == ID_c_signed_bitint || type.id() == ID_c_unsigned_bitint)
+  {
+    typecheck_bitint_type(type);
+  }
   else if(type.id()==ID_gcc_attribute_mode)
   {
     // get that mode
@@ -417,6 +421,58 @@ void c_typecheck_baset::typecheck_custom_type(typet &type)
     UNREACHABLE;
 }
 
+void c_typecheck_baset::typecheck_bitint_type(typet &type)
+{
+  // they all have a width
+  exprt size_expr = static_cast<const exprt &>(type.find(ID_width));
+
+  typecheck_expr(size_expr);
+  source_locationt source_location = size_expr.source_location();
+  make_constant_index(size_expr);
+
+  mp_integer size_int;
+  if(to_integer(to_constant_expr(size_expr), size_int))
+  {
+    throw errort().with_location(source_location)
+      << "failed to convert _BitInt width to constant";
+  }
+
+  bool is_signed = type.id() == ID_c_signed_bitint;
+
+  // Must have at least one bit if unsigned, and at least two if signed
+  if(!is_signed)
+  {
+    if(size_int < 1)
+    {
+      throw errort().with_location(source_location)
+        << "unsigned _BitInt must have at least one bit";
+    }
+  }
+  else
+  {
+    if(size_int < 2)
+    {
+      throw errort().with_location(source_location)
+        << "signed _BitInt must have at least two bits";
+    }
+  }
+
+  // These get padded up, much like _Bool
+  auto bytes = (size_int % 8) == 0 ? size_int / 8 : size_int / 8 + 1;
+
+  auto bytes_padded = power(2, address_bits(bytes));
+
+  auto width = 8 * bytes_padded;
+
+  type.set(ID_width, integer2string(width));
+  type.set(ID_C_c_type, type.id());
+  type.id(is_signed ? ID_signedbv : ID_unsignedbv);
+
+  // We remember the original number of bits before padding,
+  // since these determine semantics
+  type.set(ID_C_c_bitint_bits, integer2string(size_int));
+}
+
 void c_typecheck_baset::typecheck_code_type(code_typet &type)
 {
   // the return type is still 'subtype()'
diff --git a/src/ansi-c/parser.y b/src/ansi-c/parser.y
diff --git a/src/ansi-c/scanner.l b/src/ansi-c/scanner.l
diff --git a/src/util/config.cpp b/src/util/config.cpp
diff --git a/src/util/config.h b/src/util/config.h
diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def