diff --git a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs index 96b3ec6f18728..7ccebd83f24f7 100644 --- a/compiler/rustc_const_eval/src/const_eval/eval_queries.rs +++ b/compiler/rustc_const_eval/src/const_eval/eval_queries.rs @@ -94,7 +94,7 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>( let intern_result = intern_const_alloc_recursive(ecx, intern_kind, &ret); // Since evaluation had no errors, validate the resulting constant. - const_validate_mplace(&ecx, &ret, cid)?; + const_validate_mplace(ecx, &ret, cid)?; // Only report this after validation, as validaiton produces much better diagnostics. // FIXME: ensure validation always reports this and stop making interning care about it. @@ -391,7 +391,7 @@ fn eval_in_interpreter<'tcx, R: InterpretationResult<'tcx>>( #[inline(always)] fn const_validate_mplace<'tcx>( - ecx: &InterpCx<'tcx, CompileTimeMachine<'tcx>>, + ecx: &mut InterpCx<'tcx, CompileTimeMachine<'tcx>>, mplace: &MPlaceTy<'tcx>, cid: GlobalId<'tcx>, ) -> Result<(), ErrorHandled> { diff --git a/compiler/rustc_const_eval/src/const_eval/machine.rs b/compiler/rustc_const_eval/src/const_eval/machine.rs index 9c1fef095f552..7405ca09342da 100644 --- a/compiler/rustc_const_eval/src/const_eval/machine.rs +++ b/compiler/rustc_const_eval/src/const_eval/machine.rs @@ -1,16 +1,16 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, Cow}; use std::fmt; use std::hash::Hash; use std::ops::ControlFlow; use rustc_ast::Mutability; -use rustc_data_structures::fx::{FxIndexMap, IndexEntry}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap, IndexEntry}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::{self as hir, LangItem, CRATE_HIR_ID}; use rustc_middle::mir::AssertMessage; use rustc_middle::query::TyCtxtAt; use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout}; -use rustc_middle::ty::{self, TyCtxt}; +use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::{bug, mir}; use rustc_span::symbol::{sym, Symbol}; use rustc_span::Span; @@ -24,8 +24,8 @@ use crate::fluent_generated as fluent; use crate::interpret::{ self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup, throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame, - GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar, - StackPopCleanup, + GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, + RangeSet, Scalar, StackPopCleanup, }; /// When hitting this many interpreted terminators we emit a deny by default lint @@ -65,6 +65,9 @@ pub struct CompileTimeMachine<'tcx> { /// storing the result in the given `AllocId`. /// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops. pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>, + + /// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes). + union_data_ranges: FxHashMap, RangeSet>, } #[derive(Copy, Clone)] @@ -99,6 +102,7 @@ impl<'tcx> CompileTimeMachine<'tcx> { can_access_mut_global, check_alignment, static_root_ids: None, + union_data_ranges: FxHashMap::default(), } } } @@ -766,6 +770,19 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> { } Ok(()) } + + fn cached_union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, Self>, + ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + if ecx.tcx.sess.opts.unstable_opts.extra_const_ub_checks { + Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range)) + } else { + // Don't bother caching, we're only doing one validation at the end anyway. + Cow::Owned(compute_range()) + } + } } // Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups diff --git a/compiler/rustc_const_eval/src/interpret/discriminant.rs b/compiler/rustc_const_eval/src/interpret/discriminant.rs index 0008a15722bde..de93ed85704b5 100644 --- a/compiler/rustc_const_eval/src/interpret/discriminant.rs +++ b/compiler/rustc_const_eval/src/interpret/discriminant.rs @@ -7,7 +7,7 @@ use rustc_target::abi::{self, TagEncoding, VariantIdx, Variants}; use tracing::{instrument, trace}; use super::{ - err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Readable, Scalar, Writeable, + err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Projectable, Scalar, Writeable, }; impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { @@ -60,7 +60,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { #[instrument(skip(self), level = "trace")] pub fn read_discriminant( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, VariantIdx> { let ty = op.layout().ty; trace!("read_discriminant_value {:#?}", op.layout()); diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index 6cfd7be48e624..8cab3c34eedfb 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -10,6 +10,7 @@ use rustc_apfloat::{Float, FloatConvert}; use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_middle::query::TyCtxtAt; use rustc_middle::ty::layout::TyAndLayout; +use rustc_middle::ty::Ty; use rustc_middle::{mir, ty}; use rustc_span::def_id::DefId; use rustc_span::Span; @@ -19,7 +20,7 @@ use rustc_target::spec::abi::Abi as CallAbi; use super::{ throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation, ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy, - MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT, + MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, RangeSet, CTFE_ALLOC_SALT, }; /// Data returned by [`Machine::after_stack_pop`], and consumed by @@ -578,6 +579,15 @@ pub trait Machine<'tcx>: Sized { ecx: &InterpCx<'tcx, Self>, instance: Option>, ) -> usize; + + fn cached_union_data_range<'e>( + _ecx: &'e mut InterpCx<'tcx, Self>, + _ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + // Default to no caching. + Cow::Owned(compute_range()) + } } /// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 45a5eb9bd52fc..d87588496c0bd 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -8,9 +8,8 @@ use std::assert_matches::assert_matches; use std::borrow::Cow; -use std::cell::Cell; use std::collections::VecDeque; -use std::{fmt, ptr}; +use std::{fmt, mem, ptr}; use rustc_ast::Mutability; use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; @@ -118,7 +117,7 @@ pub struct Memory<'tcx, M: Machine<'tcx>> { /// This stores whether we are currently doing reads purely for the purpose of validation. /// Those reads do not trigger the machine's hooks for memory reads. /// Needless to say, this must only be set with great care! - validation_in_progress: Cell, + validation_in_progress: bool, } /// A reference to some allocation that was already bounds-checked for the given region @@ -145,7 +144,7 @@ impl<'tcx, M: Machine<'tcx>> Memory<'tcx, M> { alloc_map: M::MemoryMap::default(), extra_fn_ptr_map: FxIndexMap::default(), dead_alloc_map: FxIndexMap::default(), - validation_in_progress: Cell::new(false), + validation_in_progress: false, } } @@ -682,7 +681,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // We want to call the hook on *all* accesses that involve an AllocId, including zero-sized // accesses. That means we cannot rely on the closure above or the `Some` branch below. We // do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked. - if !self.memory.validation_in_progress.get() { + if !self.memory.validation_in_progress { if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) { M::before_alloc_read(self, alloc_id)?; } @@ -690,7 +689,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc { let range = alloc_range(offset, size); - if !self.memory.validation_in_progress.get() { + if !self.memory.validation_in_progress { M::before_memory_read( self.tcx, &self.machine, @@ -766,11 +765,14 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let parts = self.get_ptr_access(ptr, size)?; if let Some((alloc_id, offset, prov)) = parts { let tcx = self.tcx; + let validation_in_progress = self.memory.validation_in_progress; // FIXME: can we somehow avoid looking up the allocation twice here? // We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`. let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?; let range = alloc_range(offset, size); - M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?; + if !validation_in_progress { + M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?; + } Ok(Some(AllocRefMut { alloc, range, tcx: *tcx, alloc_id })) } else { Ok(None) @@ -1014,16 +1016,16 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// /// We do this so Miri's allocation access tracking does not show the validation /// reads as spurious accesses. - pub fn run_for_validation(&self, f: impl FnOnce() -> R) -> R { + pub fn run_for_validation(&mut self, f: impl FnOnce(&mut Self) -> R) -> R { // This deliberately uses `==` on `bool` to follow the pattern // `assert!(val.replace(new) == old)`. assert!( - self.memory.validation_in_progress.replace(true) == false, + mem::replace(&mut self.memory.validation_in_progress, true) == false, "`validation_in_progress` was already set" ); - let res = f(); + let res = f(self); assert!( - self.memory.validation_in_progress.replace(false) == true, + mem::replace(&mut self.memory.validation_in_progress, false) == true, "`validation_in_progress` was unset by someone else" ); res @@ -1115,6 +1117,10 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> { impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRefMut<'a, 'tcx, Prov, Extra, Bytes> { + pub fn as_ref<'b>(&'b self) -> AllocRef<'b, 'tcx, Prov, Extra, Bytes> { + AllocRef { alloc: self.alloc, range: self.range, tcx: self.tcx, alloc_id: self.alloc_id } + } + /// `range` is relative to this allocation reference, not the base of the allocation. pub fn write_scalar(&mut self, range: AllocRange, val: Scalar) -> InterpResult<'tcx> { let range = self.range.subrange(range); @@ -1130,13 +1136,30 @@ impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val) } + /// Mark the given sub-range (relative to this allocation reference) as uninitialized. + pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> { + let range = self.range.subrange(range); + Ok(self + .alloc + .write_uninit(&self.tcx, range) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } + /// Mark the entire referenced range as uninitialized - pub fn write_uninit(&mut self) -> InterpResult<'tcx> { + pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> { Ok(self .alloc .write_uninit(&self.tcx, self.range) .map_err(|e| e.to_interp_error(self.alloc_id))?) } + + /// Remove all provenance in the reference range. + pub fn clear_provenance(&mut self) -> InterpResult<'tcx> { + Ok(self + .alloc + .clear_provenance(&self.tcx, self.range) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } } impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> { @@ -1278,7 +1301,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { }; let src_alloc = self.get_alloc_raw(src_alloc_id)?; let src_range = alloc_range(src_offset, size); - assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation"); + assert!(!self.memory.validation_in_progress, "we can't be copying during validation"); M::before_memory_read( tcx, &self.machine, diff --git a/compiler/rustc_const_eval/src/interpret/mod.rs b/compiler/rustc_const_eval/src/interpret/mod.rs index 511756e3f86c9..561d681f804a1 100644 --- a/compiler/rustc_const_eval/src/interpret/mod.rs +++ b/compiler/rustc_const_eval/src/interpret/mod.rs @@ -33,11 +33,11 @@ pub(crate) use self::intrinsics::eval_nullary_intrinsic; pub use self::machine::{compile_time_machine, AllocMap, Machine, MayLeak, ReturnAction}; pub use self::memory::{AllocKind, AllocRef, AllocRefMut, FnVal, Memory, MemoryKind}; use self::operand::Operand; -pub use self::operand::{ImmTy, Immediate, OpTy, Readable}; +pub use self::operand::{ImmTy, Immediate, OpTy}; pub use self::place::{MPlaceTy, MemPlaceMeta, PlaceTy, Writeable}; use self::place::{MemPlace, Place}; pub use self::projection::{OffsetMode, Projectable}; pub use self::stack::{Frame, FrameInfo, LocalState, StackPopCleanup, StackPopInfo}; pub(crate) use self::util::create_static_alloc; -pub use self::validity::{CtfeValidationMode, RefTracking}; +pub use self::validity::{CtfeValidationMode, RangeSet, RefTracking}; pub use self::visitor::ValueVisitor; diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs index 9a8ccaa7cc5ca..b906e3422dba5 100644 --- a/compiler/rustc_const_eval/src/interpret/operand.rs +++ b/compiler/rustc_const_eval/src/interpret/operand.rs @@ -111,6 +111,46 @@ impl Immediate { Immediate::Uninit => bug!("Got uninit where a scalar or scalar pair was expected"), } } + + /// Assert that this immediate is a valid value for the given ABI. + pub fn assert_matches_abi(self, abi: Abi, cx: &impl HasDataLayout) { + match (self, abi) { + (Immediate::Scalar(scalar), Abi::Scalar(s)) => { + assert_eq!(scalar.size(), s.size(cx)); + if !matches!(s.primitive(), abi::Pointer(..)) { + assert!(matches!(scalar, Scalar::Int(..))); + } + } + (Immediate::ScalarPair(a_val, b_val), Abi::ScalarPair(a, b)) => { + assert_eq!(a_val.size(), a.size(cx)); + if !matches!(a.primitive(), abi::Pointer(..)) { + assert!(matches!(a_val, Scalar::Int(..))); + } + assert_eq!(b_val.size(), b.size(cx)); + if !matches!(b.primitive(), abi::Pointer(..)) { + assert!(matches!(b_val, Scalar::Int(..))); + } + } + (Immediate::Uninit, _) => {} + _ => { + bug!("value {self:?} does not match ABI {abi:?})",) + } + } + } + + pub fn clear_provenance<'tcx>(&mut self) -> InterpResult<'tcx> { + match self { + Immediate::Scalar(s) => { + s.clear_provenance()?; + } + Immediate::ScalarPair(a, b) => { + a.clear_provenance()?; + b.clear_provenance()?; + } + Immediate::Uninit => {} + } + Ok(()) + } } // ScalarPair needs a type to interpret, so we often have an immediate and a type together @@ -490,32 +530,6 @@ impl<'tcx, Prov: Provenance> Projectable<'tcx, Prov> for OpTy<'tcx, Prov> { } } -/// The `Readable` trait describes interpreter values that one can read from. -pub trait Readable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> { - fn as_mplace_or_imm(&self) -> Either, ImmTy<'tcx, Prov>>; -} - -impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for OpTy<'tcx, Prov> { - #[inline(always)] - fn as_mplace_or_imm(&self) -> Either, ImmTy<'tcx, Prov>> { - self.as_mplace_or_imm() - } -} - -impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for MPlaceTy<'tcx, Prov> { - #[inline(always)] - fn as_mplace_or_imm(&self) -> Either, ImmTy<'tcx, Prov>> { - Left(self.clone()) - } -} - -impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for ImmTy<'tcx, Prov> { - #[inline(always)] - fn as_mplace_or_imm(&self) -> Either, ImmTy<'tcx, Prov>> { - Right(self.clone()) - } -} - impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// Try reading an immediate in memory; this is interesting particularly for `ScalarPair`. /// Returns `None` if the layout does not permit loading this as a value. @@ -588,9 +602,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// ConstProp needs it, though. pub fn read_immediate_raw( &self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, Either, ImmTy<'tcx, M::Provenance>>> { - Ok(match src.as_mplace_or_imm() { + Ok(match src.to_op(self)?.as_mplace_or_imm() { Left(ref mplace) => { if let Some(val) = self.read_immediate_from_mplace_raw(mplace)? { Right(val) @@ -608,7 +622,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { #[inline(always)] pub fn read_immediate( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> { if !matches!( op.layout().abi, @@ -627,7 +641,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// Read a scalar from a place pub fn read_scalar( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, Scalar> { Ok(self.read_immediate(op)?.to_scalar()) } @@ -638,21 +652,21 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// Read a pointer from a place. pub fn read_pointer( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, Pointer>> { self.read_scalar(op)?.to_pointer(self) } /// Read a pointer-sized unsigned integer from a place. pub fn read_target_usize( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, u64> { self.read_scalar(op)?.to_target_usize(self) } /// Read a pointer-sized signed integer from a place. pub fn read_target_isize( &self, - op: &impl Readable<'tcx, M::Provenance>, + op: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, i64> { self.read_scalar(op)?.to_target_isize(self) } @@ -717,7 +731,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { match place.as_mplace_or_local() { Left(mplace) => Ok(mplace.into()), - Right((local, offset, locals_addr)) => { + Right((local, offset, locals_addr, _)) => { debug_assert!(place.layout.is_sized()); // only sized locals can ever be `Place::Local`. debug_assert_eq!(locals_addr, self.frame().locals_addr()); let base = self.local_to_op(local, None)?; diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index 840f7986c6e0a..3b14142da02ed 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -15,7 +15,7 @@ use tracing::{instrument, trace}; use super::{ alloc_range, mir_assign_valid_types, AllocRef, AllocRefMut, CheckAlignMsg, CtfeProvenance, ImmTy, Immediate, InterpCx, InterpResult, Machine, MemoryKind, Misalignment, OffsetMode, OpTy, - Operand, Pointer, Projectable, Provenance, Readable, Scalar, + Operand, Pointer, Projectable, Provenance, Scalar, }; #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] @@ -180,7 +180,8 @@ pub(super) enum Place { Ptr(MemPlace), /// To support alloc-free locals, we are able to write directly to a local. The offset indicates - /// where in the local this place is located; if it is `None`, no projection has been applied. + /// where in the local this place is located; if it is `None`, no projection has been applied + /// and the type of the place is exactly the type of the local. /// Such projections are meaningful even if the offset is 0, since they can change layouts. /// (Without that optimization, we'd just always be a `MemPlace`.) /// `Local` places always refer to the current stack frame, so they are unstable under @@ -231,10 +232,12 @@ impl<'tcx, Prov: Provenance> PlaceTy<'tcx, Prov> { #[inline(always)] pub fn as_mplace_or_local( &self, - ) -> Either, (mir::Local, Option, usize)> { + ) -> Either, (mir::Local, Option, usize, TyAndLayout<'tcx>)> { match self.place { Place::Ptr(mplace) => Left(MPlaceTy { mplace, layout: self.layout }), - Place::Local { local, offset, locals_addr } => Right((local, offset, locals_addr)), + Place::Local { local, offset, locals_addr } => { + Right((local, offset, locals_addr, self.layout)) + } } } @@ -277,7 +280,7 @@ impl<'tcx, Prov: Provenance> Projectable<'tcx, Prov> for PlaceTy<'tcx, Prov> { ) -> InterpResult<'tcx, Self> { Ok(match self.as_mplace_or_local() { Left(mplace) => mplace.offset_with_meta(offset, mode, meta, layout, ecx)?.into(), - Right((local, old_offset, locals_addr)) => { + Right((local, old_offset, locals_addr, _)) => { debug_assert!(layout.is_sized(), "unsized locals should live in memory"); assert_matches!(meta, MemPlaceMeta::None); // we couldn't store it anyway... // `Place::Local` are always in-bounds of their surrounding local, so we can just @@ -328,9 +331,7 @@ impl<'tcx, Prov: Provenance> OpTy<'tcx, Prov> { /// The `Weiteable` trait describes interpreter values that can be written to. pub trait Writeable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> { - fn as_mplace_or_local( - &self, - ) -> Either, (mir::Local, Option, usize, TyAndLayout<'tcx>)>; + fn to_place(&self) -> PlaceTy<'tcx, Prov>; fn force_mplace>( &self, @@ -340,11 +341,8 @@ pub trait Writeable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> { impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for PlaceTy<'tcx, Prov> { #[inline(always)] - fn as_mplace_or_local( - &self, - ) -> Either, (mir::Local, Option, usize, TyAndLayout<'tcx>)> { - self.as_mplace_or_local() - .map_right(|(local, offset, locals_addr)| (local, offset, locals_addr, self.layout)) + fn to_place(&self) -> PlaceTy<'tcx, Prov> { + self.clone() } #[inline(always)] @@ -358,10 +356,8 @@ impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for PlaceTy<'tcx, Prov> { impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for MPlaceTy<'tcx, Prov> { #[inline(always)] - fn as_mplace_or_local( - &self, - ) -> Either, (mir::Local, Option, usize, TyAndLayout<'tcx>)> { - Left(self.clone()) + fn to_place(&self) -> PlaceTy<'tcx, Prov> { + self.clone().into() } #[inline(always)] @@ -436,7 +432,7 @@ where #[instrument(skip(self), level = "trace")] pub fn deref_pointer( &self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { if src.layout().ty.is_box() { // Derefer should have removed all Box derefs. @@ -562,6 +558,40 @@ where Ok(place) } + /// Given a place, returns either the underlying mplace or a reference to where the value of + /// this place is stored. + fn as_mplace_or_mutable_local( + &mut self, + place: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult< + 'tcx, + Either, (&mut Immediate, TyAndLayout<'tcx>)>, + > { + Ok(match place.to_place().as_mplace_or_local() { + Left(mplace) => Left(mplace), + Right((local, offset, locals_addr, layout)) => { + if offset.is_some() { + // This has been projected to a part of this local, or had the type changed. + // FIMXE: there are cases where we could still avoid allocating an mplace. + Left(place.force_mplace(self)?) + } else { + debug_assert_eq!(locals_addr, self.frame().locals_addr()); + debug_assert_eq!(self.layout_of_local(self.frame(), local, None)?, layout); + match self.frame_mut().locals[local].access_mut()? { + Operand::Indirect(mplace) => { + // The local is in memory. + Left(MPlaceTy { mplace: *mplace, layout }) + } + Operand::Immediate(local_val) => { + // The local still has the optimized representation. + Right((local_val, layout)) + } + } + } + } + }) + } + /// Write an immediate to a place #[inline(always)] #[instrument(skip(self), level = "trace")] @@ -574,9 +604,11 @@ where if M::enforce_validity(self, dest.layout()) { // Data got changed, better make sure it matches the type! + // Also needed to reset padding. self.validate_operand( - &dest.to_op(self)?, + &dest.to_place(), M::enforce_validity_recursively(self, dest.layout()), + /*reset_provenance_and_padding*/ true, )?; } @@ -606,67 +638,27 @@ where /// Write an immediate to a place. /// If you use this you are responsible for validating that things got copied at the /// right type. - fn write_immediate_no_validate( + pub(super) fn write_immediate_no_validate( &mut self, src: Immediate, dest: &impl Writeable<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { assert!(dest.layout().is_sized(), "Cannot write unsized immediate data"); - // See if we can avoid an allocation. This is the counterpart to `read_immediate_raw`, - // but not factored as a separate function. - let mplace = match dest.as_mplace_or_local() { - Right((local, offset, locals_addr, layout)) => { - if offset.is_some() { - // This has been projected to a part of this local. We could have complicated - // logic to still keep this local as an `Operand`... but it's much easier to - // just fall back to the indirect path. - dest.force_mplace(self)? - } else { - debug_assert_eq!(locals_addr, self.frame().locals_addr()); - match self.frame_mut().locals[local].access_mut()? { - Operand::Immediate(local_val) => { - // Local can be updated in-place. - *local_val = src; - // Double-check that the value we are storing and the local fit to each other. - // (*After* doing the update for borrow checker reasons.) - if cfg!(debug_assertions) { - let local_layout = - self.layout_of_local(&self.frame(), local, None)?; - match (src, local_layout.abi) { - (Immediate::Scalar(scalar), Abi::Scalar(s)) => { - assert_eq!(scalar.size(), s.size(self)) - } - ( - Immediate::ScalarPair(a_val, b_val), - Abi::ScalarPair(a, b), - ) => { - assert_eq!(a_val.size(), a.size(self)); - assert_eq!(b_val.size(), b.size(self)); - } - (Immediate::Uninit, _) => {} - (src, abi) => { - bug!( - "value {src:?} cannot be written into local with type {} (ABI {abi:?})", - local_layout.ty - ) - } - }; - } - return Ok(()); - } - Operand::Indirect(mplace) => { - // The local is in memory, go on below. - MPlaceTy { mplace: *mplace, layout } - } - } + match self.as_mplace_or_mutable_local(&dest.to_place())? { + Right((local_val, local_layout)) => { + // Local can be updated in-place. + *local_val = src; + // Double-check that the value we are storing and the local fit to each other. + if cfg!(debug_assertions) { + src.assert_matches_abi(local_layout.abi, self); } } - Left(mplace) => mplace, // already referring to memory - }; - - // This is already in memory, write there. - self.write_immediate_to_mplace_no_validate(src, mplace.layout, mplace.mplace) + Left(mplace) => { + self.write_immediate_to_mplace_no_validate(src, mplace.layout, mplace.mplace)?; + } + } + Ok(()) } /// Write an immediate to memory. @@ -678,6 +670,9 @@ where layout: TyAndLayout<'tcx>, dest: MemPlace, ) -> InterpResult<'tcx> { + if cfg!(debug_assertions) { + value.assert_matches_abi(layout.abi, self); + } // Note that it is really important that the type here is the right one, and matches the // type things are read at. In case `value` is a `ScalarPair`, we don't do any magic here // to handle padding properly, which is only correct if we never look at this data with the @@ -691,15 +686,7 @@ where match value { Immediate::Scalar(scalar) => { - let Abi::Scalar(s) = layout.abi else { - span_bug!( - self.cur_span(), - "write_immediate_to_mplace: invalid Scalar layout: {layout:#?}", - ) - }; - let size = s.size(&tcx); - assert_eq!(size, layout.size, "abi::Scalar size does not match layout size"); - alloc.write_scalar(alloc_range(Size::ZERO, size), scalar) + alloc.write_scalar(alloc_range(Size::ZERO, scalar.size()), scalar) } Immediate::ScalarPair(a_val, b_val) => { let Abi::ScalarPair(a, b) = layout.abi else { @@ -709,18 +696,19 @@ where layout ) }; - let (a_size, b_size) = (a.size(&tcx), b.size(&tcx)); - let b_offset = a_size.align_to(b.align(&tcx).abi); + let b_offset = a.size(&tcx).align_to(b.align(&tcx).abi); assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields // It is tempting to verify `b_offset` against `layout.fields.offset(1)`, // but that does not work: We could be a newtype around a pair, then the // fields do not match the `ScalarPair` components. - alloc.write_scalar(alloc_range(Size::ZERO, a_size), a_val)?; - alloc.write_scalar(alloc_range(b_offset, b_size), b_val) + alloc.write_scalar(alloc_range(Size::ZERO, a_val.size()), a_val)?; + alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val)?; + // We don't have to reset padding here, `write_immediate` will anyway do a validation run. + Ok(()) } - Immediate::Uninit => alloc.write_uninit(), + Immediate::Uninit => alloc.write_uninit_full(), } } @@ -728,35 +716,38 @@ where &mut self, dest: &impl Writeable<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { - let mplace = match dest.as_mplace_or_local() { - Left(mplace) => mplace, - Right((local, offset, locals_addr, layout)) => { - if offset.is_some() { - // This has been projected to a part of this local. We could have complicated - // logic to still keep this local as an `Operand`... but it's much easier to - // just fall back to the indirect path. - // FIXME: share the logic with `write_immediate_no_validate`. - dest.force_mplace(self)? - } else { - debug_assert_eq!(locals_addr, self.frame().locals_addr()); - match self.frame_mut().locals[local].access_mut()? { - Operand::Immediate(local) => { - *local = Immediate::Uninit; - return Ok(()); - } - Operand::Indirect(mplace) => { - // The local is in memory, go on below. - MPlaceTy { mplace: *mplace, layout } - } - } - } + match self.as_mplace_or_mutable_local(&dest.to_place())? { + Right((local_val, _local_layout)) => { + *local_val = Immediate::Uninit; } - }; - let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else { - // Zero-sized access - return Ok(()); - }; - alloc.write_uninit()?; + Left(mplace) => { + let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else { + // Zero-sized access + return Ok(()); + }; + alloc.write_uninit_full()?; + } + } + Ok(()) + } + + /// Remove all provenance in the given place. + pub fn clear_provenance( + &mut self, + dest: &impl Writeable<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + match self.as_mplace_or_mutable_local(&dest.to_place())? { + Right((local_val, _local_layout)) => { + local_val.clear_provenance()?; + } + Left(mplace) => { + let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else { + // Zero-sized access + return Ok(()); + }; + alloc.clear_provenance()?; + } + } Ok(()) } @@ -768,7 +759,7 @@ where #[inline(always)] pub(super) fn copy_op_no_dest_validation( &mut self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, dest: &impl Writeable<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { self.copy_op_inner( @@ -781,7 +772,7 @@ where #[inline(always)] pub fn copy_op_allow_transmute( &mut self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, dest: &impl Writeable<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { self.copy_op_inner( @@ -794,7 +785,7 @@ where #[inline(always)] pub fn copy_op( &mut self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, dest: &impl Writeable<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { self.copy_op_inner( @@ -808,28 +799,35 @@ where #[instrument(skip(self), level = "trace")] fn copy_op_inner( &mut self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, dest: &impl Writeable<'tcx, M::Provenance>, allow_transmute: bool, validate_dest: bool, ) -> InterpResult<'tcx> { - // Generally for transmutation, data must be valid both at the old and new type. - // But if the types are the same, the 2nd validation below suffices. - if src.layout().ty != dest.layout().ty && M::enforce_validity(self, src.layout()) { - self.validate_operand( - &src.to_op(self)?, - M::enforce_validity_recursively(self, src.layout()), - )?; - } + // These are technically *two* typed copies: `src` is a not-yet-loaded value, + // so we're going a typed copy at `src` type from there to some intermediate storage. + // And then we're doing a second typed copy from that intermediate storage to `dest`. + // But as an optimization, we only make a single direct copy here. // Do the actual copy. self.copy_op_no_validate(src, dest, allow_transmute)?; if validate_dest && M::enforce_validity(self, dest.layout()) { - // Data got changed, better make sure it matches the type! + let dest = dest.to_place(); + // Given that there were two typed copies, we have to ensure this is valid at both types, + // and we have to ensure this loses provenance and padding according to both types. + // But if the types are identical, we only do one pass. + if allow_transmute && src.layout().ty != dest.layout().ty { + self.validate_operand( + &dest.transmute(src.layout(), self)?, + M::enforce_validity_recursively(self, src.layout()), + /*reset_provenance_and_padding*/ true, + )?; + } self.validate_operand( - &dest.to_op(self)?, + &dest, M::enforce_validity_recursively(self, dest.layout()), + /*reset_provenance_and_padding*/ true, )?; } @@ -843,7 +841,7 @@ where #[instrument(skip(self), level = "trace")] fn copy_op_no_validate( &mut self, - src: &impl Readable<'tcx, M::Provenance>, + src: &impl Projectable<'tcx, M::Provenance>, dest: &impl Writeable<'tcx, M::Provenance>, allow_transmute: bool, ) -> InterpResult<'tcx> { diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index 26b7251f6dbc5..fb24f983ca9c3 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -4,6 +4,7 @@ //! That's useful because it means other passes (e.g. promotion) can rely on `const`s //! to be const-safe. +use std::borrow::Cow; use std::fmt::Write; use std::hash::Hash; use std::num::NonZero; @@ -16,22 +17,22 @@ use rustc_hir as hir; use rustc_middle::bug; use rustc_middle::mir::interpret::ValidationErrorKind::{self, *}; use rustc_middle::mir::interpret::{ - ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance, + alloc_range, ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance, UnsupportedOpInfo, ValidationErrorInfo, }; -use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; -use rustc_middle::ty::{self, Ty}; +use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout}; +use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_span::symbol::{sym, Symbol}; use rustc_target::abi::{ - Abi, FieldIdx, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange, + Abi, FieldIdx, FieldsShape, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange, }; use tracing::trace; use super::machine::AllocMap; use super::{ err_ub, format_interp_error, throw_ub, AllocId, AllocKind, CheckInAllocMsg, GlobalAlloc, ImmTy, - Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy, Pointer, Projectable, - Scalar, ValueVisitor, + Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, PlaceTy, Pointer, + Projectable, Scalar, ValueVisitor, }; // for the validation errors @@ -125,6 +126,7 @@ pub enum PathElem { EnumTag, CoroutineTag, DynDowncast, + Vtable, } /// Extra things to check for during validation of CTFE results. @@ -163,22 +165,22 @@ impl RefTracking pub fn empty() -> Self { RefTracking { seen: FxHashSet::default(), todo: vec![] } } - pub fn new(op: T) -> Self { + pub fn new(val: T) -> Self { let mut ref_tracking_for_consts = - RefTracking { seen: FxHashSet::default(), todo: vec![(op.clone(), PATH::default())] }; - ref_tracking_for_consts.seen.insert(op); + RefTracking { seen: FxHashSet::default(), todo: vec![(val.clone(), PATH::default())] }; + ref_tracking_for_consts.seen.insert(val); ref_tracking_for_consts } pub fn next(&mut self) -> Option<(T, PATH)> { self.todo.pop() } - fn track(&mut self, op: T, path: impl FnOnce() -> PATH) { - if self.seen.insert(op.clone()) { - trace!("Recursing below ptr {:#?}", op); + fn track(&mut self, val: T, path: impl FnOnce() -> PATH) { + if self.seen.insert(val.clone()) { + trace!("Recursing below ptr {:#?}", val); let path = path(); // Remember to come back to this later. - self.todo.push((op, path)); + self.todo.push((val, path)); } } } @@ -204,11 +206,62 @@ fn write_path(out: &mut String, path: &[PathElem]) { // not the root. Deref => write!(out, "."), DynDowncast => write!(out, "."), + Vtable => write!(out, "."), } .unwrap() } } +/// Represents a set of `Size` values as a sorted list of ranges. +// These are (offset, length) pairs, and they are sorted and mutually disjoint, +// and never adjacent (i.e. there's always a gap between two of them). +#[derive(Debug, Clone)] +pub struct RangeSet(Vec<(Size, Size)>); + +impl RangeSet { + fn add_range(&mut self, offset: Size, size: Size) { + if size.bytes() == 0 { + // No need to track empty ranges. + return; + } + let v = &mut self.0; + // We scan for a partition point where the left partition is all the elements that end + // strictly before we start. Those are elements that are too "low" to merge with us. + let idx = + v.partition_point(|&(other_offset, other_size)| other_offset + other_size < offset); + // Now we want to either merge with the first element of the second partition, or insert ourselves before that. + if let Some(&(other_offset, other_size)) = v.get(idx) + && offset + size >= other_offset + { + // Their end is >= our start (otherwise it would not be in the 2nd partition) and + // our end is >= their start. This means we can merge the ranges. + let new_start = other_offset.min(offset); + let mut new_end = (other_offset + other_size).max(offset + size); + // We grew to the right, so merge with overlapping/adjacent elements. + // (We also may have grown to the left, but that can never make us adjacent with + // anything there since we selected the first such candidate via `partition_point`.) + let mut scan_right = 1; + while let Some(&(next_offset, next_size)) = v.get(idx + scan_right) + && new_end >= next_offset + { + // Increase our size to absorb the next element. + new_end = new_end.max(next_offset + next_size); + // Look at the next element. + scan_right += 1; + } + // Update the element we grew. + v[idx] = (new_start, new_end - new_start); + // Remove the elements we absorbed (if any). + if scan_right > 1 { + drop(v.drain((idx + 1)..(idx + scan_right))); + } + } else { + // Insert new element. + v.insert(idx, (offset, size)); + } + } +} + struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> { /// The `path` may be pushed to, but the part that is present when a function /// starts must not be changed! `visit_fields` and `visit_array` rely on @@ -217,7 +270,17 @@ struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> { ref_tracking: Option<&'rt mut RefTracking, Vec>>, /// `None` indicates this is not validating for CTFE (but for runtime). ctfe_mode: Option, - ecx: &'rt InterpCx<'tcx, M>, + ecx: &'rt mut InterpCx<'tcx, M>, + /// Whether provenance should be reset outside of pointers (emulating the effect of a typed + /// copy). + reset_provenance_and_padding: bool, + /// This tracks which byte ranges in this value contain data; the remaining bytes are padding. + /// The ideal representation here would be pointer-length pairs, but to keep things more compact + /// we only store a (range) set of offsets -- the base pointer is the same throughout the entire + /// visit, after all. + /// If this is `Some`, then `reset_provenance_and_padding` must be true (but not vice versa: + /// we might not track data vs padding bytes if the operand isn't stored in memory anyway). + data_bytes: Option, } impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { @@ -287,8 +350,14 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { // arrays/slices ty::Array(..) | ty::Slice(..) => PathElem::ArrayElem(field), + // dyn* vtables + ty::Dynamic(_, _, ty::DynKind::DynStar) if field == 1 => PathElem::Vtable, + // dyn traits - ty::Dynamic(..) => PathElem::DynDowncast, + ty::Dynamic(..) => { + assert_eq!(field, 0); + PathElem::DynDowncast + } // nothing else has an aggregate layout _ => bug!("aggregate_field_path_elem: got non-aggregate type {:?}", layout.ty), @@ -314,11 +383,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { fn read_immediate( &self, - op: &OpTy<'tcx, M::Provenance>, + val: &PlaceTy<'tcx, M::Provenance>, expected: ExpectedKind, ) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> { Ok(try_validation!( - self.ecx.read_immediate(op), + self.ecx.read_immediate(val), self.path, Ub(InvalidUninitBytes(None)) => Uninit { expected }, @@ -332,10 +401,40 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { fn read_scalar( &self, - op: &OpTy<'tcx, M::Provenance>, + val: &PlaceTy<'tcx, M::Provenance>, expected: ExpectedKind, ) -> InterpResult<'tcx, Scalar> { - Ok(self.read_immediate(op, expected)?.to_scalar()) + Ok(self.read_immediate(val, expected)?.to_scalar()) + } + + fn deref_pointer( + &mut self, + val: &PlaceTy<'tcx, M::Provenance>, + expected: ExpectedKind, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + // Not using `ecx.deref_pointer` since we want to use our `read_immediate` wrapper. + let imm = self.read_immediate(val, expected)?; + // Reset provenance: ensure slice tail metadata does not preserve provenance, + // and ensure all pointers do not preserve partial provenance. + if self.reset_provenance_and_padding { + if matches!(imm.layout.abi, Abi::Scalar(..)) { + // A thin pointer. If it has provenance, we don't have to do anything. + // If it does not, ensure we clear the provenance in memory. + if matches!(imm.to_scalar(), Scalar::Int(..)) { + self.ecx.clear_provenance(val)?; + } + } else { + // A wide pointer. This means we have to worry both about the pointer itself and the + // metadata. We do the lazy thing and just write back the value we got. Just + // clearing provenance in a targeted manner would be more efficient, but unless this + // is a perf hotspot it's just not worth the effort. + self.ecx.write_immediate_no_validate(*imm, val)?; + } + // The entire thing is data, not padding. + self.add_data_range_place(val); + } + // Now turn it into a place. + self.ecx.ref_to_mplace(&imm) } fn check_wide_ptr_meta( @@ -376,11 +475,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { /// Check a reference or `Box`. fn check_safe_pointer( &mut self, - value: &OpTy<'tcx, M::Provenance>, + value: &PlaceTy<'tcx, M::Provenance>, ptr_kind: PointerKind, ) -> InterpResult<'tcx> { - // Not using `deref_pointer` since we want to use our `read_immediate` wrapper. - let place = self.ecx.ref_to_mplace(&self.read_immediate(value, ptr_kind.into())?)?; + let place = self.deref_pointer(value, ptr_kind.into())?; // Handle wide pointers. // Check metadata early, for better diagnostics if place.layout.is_unsized() { @@ -564,31 +662,39 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { /// Note that not all of these have `FieldsShape::Primitive`, e.g. wide references. fn try_visit_primitive( &mut self, - value: &OpTy<'tcx, M::Provenance>, + value: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx, bool> { // Go over all the primitive types let ty = value.layout.ty; match ty.kind() { ty::Bool => { - let value = self.read_scalar(value, ExpectedKind::Bool)?; + let scalar = self.read_scalar(value, ExpectedKind::Bool)?; try_validation!( - value.to_bool(), + scalar.to_bool(), self.path, Ub(InvalidBool(..)) => ValidationErrorKind::InvalidBool { - value: format!("{value:x}"), + value: format!("{scalar:x}"), } ); + if self.reset_provenance_and_padding { + self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); + } Ok(true) } ty::Char => { - let value = self.read_scalar(value, ExpectedKind::Char)?; + let scalar = self.read_scalar(value, ExpectedKind::Char)?; try_validation!( - value.to_char(), + scalar.to_char(), self.path, Ub(InvalidChar(..)) => ValidationErrorKind::InvalidChar { - value: format!("{value:x}"), + value: format!("{scalar:x}"), } ); + if self.reset_provenance_and_padding { + self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); + } Ok(true) } ty::Float(_) | ty::Int(_) | ty::Uint(_) => { @@ -602,11 +708,14 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { ExpectedKind::Int }, )?; + if self.reset_provenance_and_padding { + self.ecx.clear_provenance(value)?; + self.add_data_range_place(value); + } Ok(true) } ty::RawPtr(..) => { - let place = - self.ecx.ref_to_mplace(&self.read_immediate(value, ExpectedKind::RawPtr)?)?; + let place = self.deref_pointer(value, ExpectedKind::RawPtr)?; if place.layout.is_unsized() { self.check_wide_ptr_meta(place.meta(), place.layout)?; } @@ -617,11 +726,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { Ok(true) } ty::FnPtr(..) => { - let value = self.read_scalar(value, ExpectedKind::FnPtr)?; + let scalar = self.read_scalar(value, ExpectedKind::FnPtr)?; // If we check references recursively, also check that this points to a function. if let Some(_) = self.ref_tracking { - let ptr = value.to_pointer(self.ecx)?; + let ptr = scalar.to_pointer(self.ecx)?; let _fn = try_validation!( self.ecx.get_ptr_fn(ptr), self.path, @@ -631,10 +740,18 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { // FIXME: Check if the signature matches } else { // Otherwise (for standalone Miri), we have to still check it to be non-null. - if self.ecx.scalar_may_be_null(value)? { + if self.ecx.scalar_may_be_null(scalar)? { throw_validation_failure!(self.path, NullFnPtr); } } + if self.reset_provenance_and_padding { + // Make sure we do not preserve partial provenance. This matches the thin + // pointer handling in `deref_pointer`. + if matches!(scalar, Scalar::Int(..)) { + self.ecx.clear_provenance(value)?; + } + self.add_data_range_place(value); + } Ok(true) } ty::Never => throw_validation_failure!(self.path, NeverVal), @@ -716,13 +833,178 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> { } } - fn in_mutable_memory(&self, op: &OpTy<'tcx, M::Provenance>) -> bool { - if let Some(mplace) = op.as_mplace_or_imm().left() { + fn in_mutable_memory(&self, val: &PlaceTy<'tcx, M::Provenance>) -> bool { + if let Some(mplace) = val.as_mplace_or_local().left() { if let Some(alloc_id) = mplace.ptr().provenance.and_then(|p| p.get_alloc_id()) { - return mutability(self.ecx, alloc_id).is_mut(); + mutability(self.ecx, alloc_id).is_mut() + } else { + // No memory at all. + false + } + } else { + // A local variable -- definitely mutable. + true + } + } + + /// Add the given pointer-length pair to the "data" range of this visit. + fn add_data_range(&mut self, ptr: Pointer>, size: Size) { + if let Some(data_bytes) = self.data_bytes.as_mut() { + // We only have to store the offset, the rest is the same for all pointers here. + let (_prov, offset) = ptr.into_parts(); + // Add this. + data_bytes.add_range(offset, size); + }; + } + + /// Add the entire given place to the "data" range of this visit. + fn add_data_range_place(&mut self, place: &PlaceTy<'tcx, M::Provenance>) { + // Only sized places can be added this way. + debug_assert!(place.layout.abi.is_sized()); + if let Some(data_bytes) = self.data_bytes.as_mut() { + let offset = Self::data_range_offset(self.ecx, place); + data_bytes.add_range(offset, place.layout.size); + } + } + + /// Convert a place into the offset it starts at, for the purpose of data_range tracking. + /// Must only be called if `data_bytes` is `Some(_)`. + fn data_range_offset(ecx: &InterpCx<'tcx, M>, place: &PlaceTy<'tcx, M::Provenance>) -> Size { + // The presence of `data_bytes` implies that our place is in memory. + let ptr = ecx + .place_to_op(place) + .expect("place must be in memory") + .as_mplace_or_imm() + .expect_left("place must be in memory") + .ptr(); + let (_prov, offset) = ptr.into_parts(); + offset + } + + fn reset_padding(&mut self, place: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + let Some(data_bytes) = self.data_bytes.as_mut() else { return Ok(()) }; + // Our value must be in memory, otherwise we would not have set up `data_bytes`. + let mplace = self.ecx.force_allocation(place)?; + // Determine starting offset and size. + let (_prov, start_offset) = mplace.ptr().into_parts(); + let (size, _align) = self + .ecx + .size_and_align_of_mplace(&mplace)? + .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); + // If there is no padding at all, we can skip the rest: check for + // a single data range covering the entire value. + if data_bytes.0 == &[(start_offset, size)] { + return Ok(()); + } + // Get a handle for the allocation. Do this only once, to avoid looking up the same + // allocation over and over again. (Though to be fair, iterating the value already does + // exactly that.) + let Some(mut alloc) = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)? else { + // A ZST, no padding to clear. + return Ok(()); + }; + // Add a "finalizer" data range at the end, so that the iteration below finds all gaps + // between ranges. + data_bytes.0.push((start_offset + size, Size::ZERO)); + // Iterate, and reset gaps. + let mut padding_cleared_until = start_offset; + for &(offset, size) in data_bytes.0.iter() { + assert!( + offset >= padding_cleared_until, + "reset_padding on {}: previous field ended at offset {}, next field starts at {} (and has a size of {} bytes)", + mplace.layout.ty, + (padding_cleared_until - start_offset).bytes(), + (offset - start_offset).bytes(), + size.bytes(), + ); + if offset > padding_cleared_until { + // We found padding. Adjust the range to be relative to `alloc`, and make it uninit. + let padding_start = padding_cleared_until - start_offset; + let padding_size = offset - padding_cleared_until; + let range = alloc_range(padding_start, padding_size); + trace!("reset_padding on {}: resetting padding range {range:?}", mplace.layout.ty); + alloc.write_uninit(range)?; + } + padding_cleared_until = offset + size; + } + assert!(padding_cleared_until == start_offset + size); + Ok(()) + } + + /// Computes the data range of this union type: + /// which bytes are inside a field (i.e., not padding.) + fn union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, M>, + layout: TyAndLayout<'tcx>, + ) -> Cow<'e, RangeSet> { + assert!(layout.ty.is_union()); + assert!(layout.abi.is_sized(), "there are no unsized unions"); + let layout_cx = LayoutCx { tcx: *ecx.tcx, param_env: ecx.param_env }; + return M::cached_union_data_range(ecx, layout.ty, || { + let mut out = RangeSet(Vec::new()); + union_data_range_uncached(&layout_cx, layout, Size::ZERO, &mut out); + out + }); + + /// Helper for recursive traversal: add data ranges of the given type to `out`. + fn union_data_range_uncached<'tcx>( + cx: &LayoutCx<'tcx, TyCtxt<'tcx>>, + layout: TyAndLayout<'tcx>, + base_offset: Size, + out: &mut RangeSet, + ) { + // If this is a ZST, we don't contain any data. In particular, this helps us to quickly + // skip over huge arrays of ZST. + if layout.is_zst() { + return; + } + // Just recursively add all the fields of everything to the output. + match &layout.fields { + FieldsShape::Primitive => { + out.add_range(base_offset, layout.size); + } + &FieldsShape::Union(fields) => { + // Currently, all fields start at offset 0 (relative to `base_offset`). + for field in 0..fields.get() { + let field = layout.field(cx, field); + union_data_range_uncached(cx, field, base_offset, out); + } + } + &FieldsShape::Array { stride, count } => { + let elem = layout.field(cx, 0); + + // Fast-path for large arrays of simple types that do not contain any padding. + if elem.abi.is_scalar() { + out.add_range(base_offset, elem.size * count); + } else { + for idx in 0..count { + // This repeats the same computation for every array element... but the alternative + // is to allocate temporary storage for a dedicated `out` set for the array element, + // and replicating that N times. Is that better? + union_data_range_uncached(cx, elem, base_offset + idx * stride, out); + } + } + } + FieldsShape::Arbitrary { offsets, .. } => { + for (field, &offset) in offsets.iter_enumerated() { + let field = layout.field(cx, field.as_usize()); + union_data_range_uncached(cx, field, base_offset + offset, out); + } + } + } + // Don't forget potential other variants. + match &layout.variants { + Variants::Single { .. } => { + // Fully handled above. + } + Variants::Multiple { variants, .. } => { + for variant in variants.indices() { + let variant = layout.for_variant(cx, variant); + union_data_range_uncached(cx, variant, base_offset, out); + } + } } } - false } } @@ -774,7 +1056,7 @@ fn mutability<'tcx>(ecx: &InterpCx<'tcx, impl Machine<'tcx>>, alloc_id: AllocId) } impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, 'tcx, M> { - type V = OpTy<'tcx, M::Provenance>; + type V = PlaceTy<'tcx, M::Provenance>; #[inline(always)] fn ecx(&self) -> &InterpCx<'tcx, M> { @@ -783,11 +1065,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, fn read_discriminant( &mut self, - op: &OpTy<'tcx, M::Provenance>, + val: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx, VariantIdx> { self.with_elem(PathElem::EnumTag, move |this| { Ok(try_validation!( - this.ecx.read_discriminant(op), + this.ecx.read_discriminant(val), this.path, Ub(InvalidTag(val)) => InvalidEnumTag { value: format!("{val:x}"), @@ -802,44 +1084,54 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, #[inline] fn visit_field( &mut self, - old_op: &OpTy<'tcx, M::Provenance>, + old_val: &PlaceTy<'tcx, M::Provenance>, field: usize, - new_op: &OpTy<'tcx, M::Provenance>, + new_val: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { - let elem = self.aggregate_field_path_elem(old_op.layout, field); - self.with_elem(elem, move |this| this.visit_value(new_op)) + let elem = self.aggregate_field_path_elem(old_val.layout, field); + self.with_elem(elem, move |this| this.visit_value(new_val)) } #[inline] fn visit_variant( &mut self, - old_op: &OpTy<'tcx, M::Provenance>, + old_val: &PlaceTy<'tcx, M::Provenance>, variant_id: VariantIdx, - new_op: &OpTy<'tcx, M::Provenance>, + new_val: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { - let name = match old_op.layout.ty.kind() { + let name = match old_val.layout.ty.kind() { ty::Adt(adt, _) => PathElem::Variant(adt.variant(variant_id).name), // Coroutines also have variants ty::Coroutine(..) => PathElem::CoroutineState(variant_id), - _ => bug!("Unexpected type with variant: {:?}", old_op.layout.ty), + _ => bug!("Unexpected type with variant: {:?}", old_val.layout.ty), }; - self.with_elem(name, move |this| this.visit_value(new_op)) + self.with_elem(name, move |this| this.visit_value(new_val)) } #[inline(always)] fn visit_union( &mut self, - op: &OpTy<'tcx, M::Provenance>, + val: &PlaceTy<'tcx, M::Provenance>, _fields: NonZero, ) -> InterpResult<'tcx> { // Special check for CTFE validation, preventing `UnsafeCell` inside unions in immutable memory. if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) { - if !op.layout.is_zst() && !op.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) { - if !self.in_mutable_memory(op) { + if !val.layout.is_zst() && !val.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) { + if !self.in_mutable_memory(val) { throw_validation_failure!(self.path, UnsafeCellInImmutable); } } } + if self.reset_provenance_and_padding + && let Some(data_bytes) = self.data_bytes.as_mut() + { + let base_offset = Self::data_range_offset(self.ecx, val); + // Determine and add data range for this union. + let union_data_range = Self::union_data_range(self.ecx, val.layout); + for &(offset, size) in union_data_range.0.iter() { + data_bytes.add_range(base_offset + offset, size); + } + } Ok(()) } @@ -847,39 +1139,41 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, fn visit_box( &mut self, _box_ty: Ty<'tcx>, - op: &OpTy<'tcx, M::Provenance>, + val: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx> { - self.check_safe_pointer(op, PointerKind::Box)?; + self.check_safe_pointer(val, PointerKind::Box)?; Ok(()) } #[inline] - fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { - trace!("visit_value: {:?}, {:?}", *op, op.layout); + fn visit_value(&mut self, val: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + trace!("visit_value: {:?}, {:?}", *val, val.layout); // Check primitive types -- the leaves of our recursive descent. + // This is called even for enum discriminants (which are "fields" of their enum), + // so for integer-typed discriminants the provenance reset will happen here. // We assume that the Scalar validity range does not restrict these values // any further than `try_visit_primitive` does! - if self.try_visit_primitive(op)? { + if self.try_visit_primitive(val)? { return Ok(()); } // Special check preventing `UnsafeCell` in the inner part of constants if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) { - if !op.layout.is_zst() - && let Some(def) = op.layout.ty.ty_adt_def() + if !val.layout.is_zst() + && let Some(def) = val.layout.ty.ty_adt_def() && def.is_unsafe_cell() { - if !self.in_mutable_memory(op) { + if !self.in_mutable_memory(val) { throw_validation_failure!(self.path, UnsafeCellInImmutable); } } } // Recursively walk the value at its type. Apply optimizations for some large types. - match op.layout.ty.kind() { + match val.layout.ty.kind() { ty::Str => { - let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate + let mplace = val.assert_mem_place(); // strings are unsized and hence never immediate let len = mplace.len(self.ecx)?; try_validation!( self.ecx.read_bytes_ptr_strip_provenance(mplace.ptr(), Size::from_bytes(len)), @@ -889,11 +1183,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, ); } ty::Array(tys, ..) | ty::Slice(tys) - // This optimization applies for types that can hold arbitrary bytes (such as - // integer and floating point types) or for structs or tuples with no fields. - // FIXME(wesleywiser) This logic could be extended further to arbitrary structs - // or tuples made up of integer/floating point types or inhabited ZSTs with no - // padding. + // This optimization applies for types that can hold arbitrary non-provenance bytes (such as + // integer and floating point types). + // FIXME(wesleywiser) This logic could be extended further to arbitrary structs or + // tuples made up of integer/floating point types or inhabited ZSTs with no padding. if matches!(tys.kind(), ty::Int(..) | ty::Uint(..) | ty::Float(..)) => { @@ -901,18 +1194,19 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, // Optimized handling for arrays of integer/float type. // This is the length of the array/slice. - let len = op.len(self.ecx)?; + let len = val.len(self.ecx)?; // This is the element type size. let layout = self.ecx.layout_of(*tys)?; // This is the size in bytes of the whole array. (This checks for overflow.) let size = layout.size * len; // If the size is 0, there is nothing to check. - // (`size` can only be 0 of `len` is 0, and empty arrays are always valid.) + // (`size` can only be 0 if `len` is 0, and empty arrays are always valid.) if size == Size::ZERO { return Ok(()); } - // Now that we definitely have a non-ZST array, we know it lives in memory. - let mplace = match op.as_mplace_or_imm() { + // Now that we definitely have a non-ZST array, we know it lives in memory -- except it may + // be an uninitialized local variable, those are also "immediate". + let mplace = match val.to_op(self.ecx)?.as_mplace_or_imm() { Left(mplace) => mplace, Right(imm) => match *imm { Immediate::Uninit => @@ -958,20 +1252,30 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, } } } + + // Don't forget that these are all non-pointer types, and thus do not preserve + // provenance. + if self.reset_provenance_and_padding { + // We can't share this with above as above, we might be looking at read-only memory. + let mut alloc = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)?.expect("we already excluded size 0"); + alloc.clear_provenance()?; + // Also, mark this as containing data, not padding. + self.add_data_range(mplace.ptr(), size); + } } // Fast path for arrays and slices of ZSTs. We only need to check a single ZST element // of an array and not all of them, because there's only a single value of a specific // ZST type, so either validation fails for all elements or none. ty::Array(tys, ..) | ty::Slice(tys) if self.ecx.layout_of(*tys)?.is_zst() => { // Validate just the first element (if any). - if op.len(self.ecx)? > 0 { - self.visit_field(op, 0, &self.ecx.project_index(op, 0)?)?; + if val.len(self.ecx)? > 0 { + self.visit_field(val, 0, &self.ecx.project_index(val, 0)?)?; } } _ => { // default handler try_validation!( - self.walk_value(op), + self.walk_value(val), self.path, // It's not great to catch errors here, since we can't give a very good path, // but it's better than ICEing. @@ -992,15 +1296,15 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, // FIXME: We could avoid some redundant checks here. For newtypes wrapping // scalars, we do the same check on every "level" (e.g., first we check // MyNewtype and then the scalar in there). - match op.layout.abi { + match val.layout.abi { Abi::Uninhabited => { - let ty = op.layout.ty; + let ty = val.layout.ty; throw_validation_failure!(self.path, UninhabitedVal { ty }); } Abi::Scalar(scalar_layout) => { if !scalar_layout.is_uninit_valid() { // There is something to check here. - let scalar = self.read_scalar(op, ExpectedKind::InitScalar)?; + let scalar = self.read_scalar(val, ExpectedKind::InitScalar)?; self.visit_scalar(scalar, scalar_layout)?; } } @@ -1010,7 +1314,7 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, // the other must be init. if !a_layout.is_uninit_valid() && !b_layout.is_uninit_valid() { let (a, b) = - self.read_immediate(op, ExpectedKind::InitScalar)?.to_scalar_pair(); + self.read_immediate(val, ExpectedKind::InitScalar)?.to_scalar_pair(); self.visit_scalar(a, a_layout)?; self.visit_scalar(b, b_layout)?; } @@ -1031,19 +1335,34 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { fn validate_operand_internal( - &self, - op: &OpTy<'tcx, M::Provenance>, + &mut self, + val: &PlaceTy<'tcx, M::Provenance>, path: Vec, ref_tracking: Option<&mut RefTracking, Vec>>, ctfe_mode: Option, + reset_provenance_and_padding: bool, ) -> InterpResult<'tcx> { - trace!("validate_operand_internal: {:?}, {:?}", *op, op.layout.ty); - - // Construct a visitor - let mut visitor = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx: self }; + trace!("validate_operand_internal: {:?}, {:?}", *val, val.layout.ty); - // Run it. - match self.run_for_validation(|| visitor.visit_value(op)) { + // Run the visitor. + match self.run_for_validation(|ecx| { + let reset_padding = reset_provenance_and_padding && { + // Check if `val` is actually stored in memory. If not, padding is not even + // represented and we need not reset it. + ecx.place_to_op(val)?.as_mplace_or_imm().is_left() + }; + let mut v = ValidityVisitor { + path, + ref_tracking, + ctfe_mode, + ecx, + reset_provenance_and_padding, + data_bytes: reset_padding.then_some(RangeSet(Vec::new())), + }; + v.visit_value(val)?; + v.reset_padding(val)?; + InterpResult::Ok(()) + }) { Ok(()) => Ok(()), // Pass through validation failures and "invalid program" issues. Err(err) @@ -1079,13 +1398,19 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// - no `UnsafeCell` or non-ZST `&mut`. #[inline(always)] pub(crate) fn const_validate_operand( - &self, - op: &OpTy<'tcx, M::Provenance>, + &mut self, + val: &PlaceTy<'tcx, M::Provenance>, path: Vec, ref_tracking: &mut RefTracking, Vec>, ctfe_mode: CtfeValidationMode, ) -> InterpResult<'tcx> { - self.validate_operand_internal(op, path, Some(ref_tracking), Some(ctfe_mode)) + self.validate_operand_internal( + val, + path, + Some(ref_tracking), + Some(ctfe_mode), + /*reset_provenance*/ false, + ) } /// This function checks the data at `op` to be runtime-valid. @@ -1093,21 +1418,41 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// It will error if the bits at the destination do not match the ones described by the layout. #[inline(always)] pub fn validate_operand( - &self, - op: &OpTy<'tcx, M::Provenance>, + &mut self, + val: &PlaceTy<'tcx, M::Provenance>, recursive: bool, + reset_provenance_and_padding: bool, ) -> InterpResult<'tcx> { // Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's // still correct to not use `ctfe_mode`: that mode is for validation of the final constant // value, it rules out things like `UnsafeCell` in awkward places. if !recursive { - return self.validate_operand_internal(op, vec![], None, None); + return self.validate_operand_internal( + val, + vec![], + None, + None, + reset_provenance_and_padding, + ); } // Do a recursive check. let mut ref_tracking = RefTracking::empty(); - self.validate_operand_internal(op, vec![], Some(&mut ref_tracking), None)?; + self.validate_operand_internal( + val, + vec![], + Some(&mut ref_tracking), + None, + reset_provenance_and_padding, + )?; while let Some((mplace, path)) = ref_tracking.todo.pop() { - self.validate_operand_internal(&mplace.into(), path, Some(&mut ref_tracking), None)?; + // Things behind reference do *not* have the provenance reset. + self.validate_operand_internal( + &mplace.into(), + path, + Some(&mut ref_tracking), + None, + /*reset_provenance_and_padding*/ false, + )?; } Ok(()) } diff --git a/compiler/rustc_const_eval/src/interpret/visitor.rs b/compiler/rustc_const_eval/src/interpret/visitor.rs index b02f12e3c7f0b..d8af67bd0e705 100644 --- a/compiler/rustc_const_eval/src/interpret/visitor.rs +++ b/compiler/rustc_const_eval/src/interpret/visitor.rs @@ -5,6 +5,7 @@ use std::num::NonZero; use rustc_index::IndexVec; use rustc_middle::mir::interpret::InterpResult; +use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, Ty}; use rustc_target::abi::{FieldIdx, FieldsShape, VariantIdx, Variants}; use tracing::trace; @@ -82,6 +83,7 @@ pub trait ValueVisitor<'tcx, M: Machine<'tcx>>: Sized { self.visit_value(new_val) } + /// Traversal logic; should not be overloaded. fn walk_value(&mut self, v: &Self::V) -> InterpResult<'tcx> { let ty = v.layout().ty; trace!("walk_value: type: {ty}"); @@ -104,6 +106,17 @@ pub trait ValueVisitor<'tcx, M: Machine<'tcx>>: Sized { // DynStar types. Very different from a dyn type (but strangely part of the // same variant in `TyKind`): These are pairs where the 2nd component is the // vtable, and the first component is the data (which must be ptr-sized). + + // First make sure the vtable can be read at its type. + // The type of this vtable is fake, it claims to be a reference to some actual memory but that isn't true. + // So we transmute it to a raw pointer. + let raw_ptr_ty = Ty::new_mut_ptr(*self.ecx().tcx, self.ecx().tcx.types.unit); + let raw_ptr_ty = self.ecx().layout_of(raw_ptr_ty)?; + let vtable_field = + self.ecx().project_field(v, 1)?.transmute(raw_ptr_ty, self.ecx())?; + self.visit_field(v, 1, &vtable_field)?; + + // Then unpack the first field, and continue. let data = self.ecx().unpack_dyn_star(v, data)?; return self.visit_field(v, 0, &data); } diff --git a/compiler/rustc_const_eval/src/util/check_validity_requirement.rs b/compiler/rustc_const_eval/src/util/check_validity_requirement.rs index cbd1fdeea2aa3..611a8e1a88497 100644 --- a/compiler/rustc_const_eval/src/util/check_validity_requirement.rs +++ b/compiler/rustc_const_eval/src/util/check_validity_requirement.rs @@ -4,7 +4,7 @@ use rustc_middle::ty::{ParamEnvAnd, Ty, TyCtxt}; use rustc_target::abi::{Abi, FieldsShape, Scalar, Variants}; use crate::const_eval::{CanAccessMutGlobal, CheckAlignment, CompileTimeMachine}; -use crate::interpret::{InterpCx, MemoryKind, OpTy}; +use crate::interpret::{InterpCx, MemoryKind}; /// Determines if this type permits "raw" initialization by just transmuting some memory into an /// instance of `T`. @@ -32,15 +32,15 @@ pub fn check_validity_requirement<'tcx>( let layout_cx = LayoutCx { tcx, param_env: param_env_and_ty.param_env }; if kind == ValidityRequirement::Uninit || tcx.sess.opts.unstable_opts.strict_init_checks { - might_permit_raw_init_strict(layout, &layout_cx, kind) + check_validity_requirement_strict(layout, &layout_cx, kind) } else { - might_permit_raw_init_lax(layout, &layout_cx, kind) + check_validity_requirement_lax(layout, &layout_cx, kind) } } -/// Implements the 'strict' version of the `might_permit_raw_init` checks; see that function for -/// details. -fn might_permit_raw_init_strict<'tcx>( +/// Implements the 'strict' version of the [`check_validity_requirement`] checks; see that function +/// for details. +fn check_validity_requirement_strict<'tcx>( ty: TyAndLayout<'tcx>, cx: &LayoutCx<'tcx, TyCtxt<'tcx>>, kind: ValidityRequirement, @@ -61,18 +61,24 @@ fn might_permit_raw_init_strict<'tcx>( .expect("failed to write bytes for zero valid check"); } - let ot: OpTy<'_, _> = allocated.into(); - // Assume that if it failed, it's a validation failure. // This does *not* actually check that references are dereferenceable, but since all types that // require dereferenceability also require non-null, we don't actually get any false negatives // due to this. - Ok(cx.validate_operand(&ot, /*recursive*/ false).is_ok()) + // The value we are validating is temporary and discarded at the end of this function, so + // there is no point in reseting provenance and padding. + Ok(cx + .validate_operand( + &allocated.into(), + /*recursive*/ false, + /*reset_provenance_and_padding*/ false, + ) + .is_ok()) } -/// Implements the 'lax' (default) version of the `might_permit_raw_init` checks; see that function for -/// details. -fn might_permit_raw_init_lax<'tcx>( +/// Implements the 'lax' (default) version of the [`check_validity_requirement`] checks; see that +/// function for details. +fn check_validity_requirement_lax<'tcx>( this: TyAndLayout<'tcx>, cx: &LayoutCx<'tcx, TyCtxt<'tcx>>, init_kind: ValidityRequirement, @@ -137,7 +143,7 @@ fn might_permit_raw_init_lax<'tcx>( } FieldsShape::Arbitrary { offsets, .. } => { for idx in 0..offsets.len() { - if !might_permit_raw_init_lax(this.field(cx, idx), cx, init_kind)? { + if !check_validity_requirement_lax(this.field(cx, idx), cx, init_kind)? { // We found a field that is unhappy with this kind of initialization. return Ok(false); } diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index 503470f896d09..34f541a8cc639 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -17,7 +17,7 @@ mod tests; /// first value of the following element. #[derive(Debug, Clone)] pub struct IntervalSet { - // Start, end + // Start, end (both inclusive) map: SmallVec<[(u32, u32); 2]>, domain: usize, _data: PhantomData, diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index 5fb8af576ae93..cd56d0edc0585 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -644,6 +644,12 @@ impl Allocation return Ok(()); } + /// Remove all provenance in the given memory range. + pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + self.provenance.clear(range, cx)?; + return Ok(()); + } + /// Applies a previously prepared provenance copy. /// The affected range, as defined in the parameters to `provenance().prepare_copy` is expected /// to be clear of provenance. diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs index 84c17b39a623e..989f03d3d1399 100644 --- a/compiler/rustc_middle/src/mir/interpret/value.rs +++ b/compiler/rustc_middle/src/mir/interpret/value.rs @@ -307,6 +307,13 @@ impl<'tcx, Prov: Provenance> Scalar { } } + pub fn clear_provenance(&mut self) -> InterpResult<'tcx> { + if matches!(self, Scalar::Ptr(..)) { + *self = self.to_scalar_int()?.into(); + } + Ok(()) + } + #[inline(always)] pub fn to_scalar_int(self) -> InterpResult<'tcx, ScalarInt> { self.try_to_scalar_int().map_err(|_| err_unsup!(ReadPointerAsInt(None)).into()) diff --git a/compiler/rustc_middle/src/ty/sty.rs b/compiler/rustc_middle/src/ty/sty.rs index 1f4f2c62d7084..730ba265b19d6 100644 --- a/compiler/rustc_middle/src/ty/sty.rs +++ b/compiler/rustc_middle/src/ty/sty.rs @@ -1136,6 +1136,7 @@ impl<'tcx> Ty<'tcx> { } /// Tests if this is any kind of primitive pointer type (reference, raw pointer, fn pointer). + /// `Box` is *not* considered a pointer here! #[inline] pub fn is_any_ptr(self) -> bool { self.is_ref() || self.is_unsafe_ptr() || self.is_fn_ptr() diff --git a/library/core/tests/mem.rs b/library/core/tests/mem.rs index b7eee10ec3f9c..f3b4387f6a898 100644 --- a/library/core/tests/mem.rs +++ b/library/core/tests/mem.rs @@ -773,15 +773,20 @@ fn offset_of_addr() { #[test] fn const_maybe_uninit_zeroed() { // Sanity check for `MaybeUninit::zeroed` in a realistic const situation (plugin array term) + + // It is crucial that this type has no padding! #[repr(C)] struct Foo { - a: Option<&'static str>, + a: Option<&'static u8>, b: Bar, c: f32, + _pad: u32, d: *const u8, } + #[repr(C)] struct Bar(usize); + struct FooPtr(*const Foo); unsafe impl Sync for FooPtr {} diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index 6fd207c92b937..b604fd868a02a 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -637,7 +637,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { // The program didn't actually do a read, so suppress the memory access hooks. // This is also a very special exception where we just ignore an error -- if this read // was UB e.g. because the memory is uninitialized, we don't want to know! - let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok(); + let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok(); this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?; this.validate_atomic_store(dest, atomic)?; this.buffered_atomic_write(val, dest, atomic, old_val) diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index a546ad20fef98..cba99c0bd7a81 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -869,7 +869,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Dereference a pointer operand to a place using `layout` instead of the pointer's declared type fn deref_pointer_as( &self, - op: &impl Readable<'tcx, Provenance>, + op: &impl Projectable<'tcx, Provenance>, layout: TyAndLayout<'tcx>, ) -> InterpResult<'tcx, MPlaceTy<'tcx>> { let this = self.eval_context_ref(); @@ -880,7 +880,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Calculates the MPlaceTy given the offset and layout of an access on an operand fn deref_pointer_and_offset( &self, - op: &impl Readable<'tcx, Provenance>, + op: &impl Projectable<'tcx, Provenance>, offset: u64, base_layout: TyAndLayout<'tcx>, value_layout: TyAndLayout<'tcx>, @@ -897,7 +897,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { fn deref_pointer_and_read( &self, - op: &impl Readable<'tcx, Provenance>, + op: &impl Projectable<'tcx, Provenance>, offset: u64, base_layout: TyAndLayout<'tcx>, value_layout: TyAndLayout<'tcx>, @@ -909,7 +909,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { fn deref_pointer_and_write( &mut self, - op: &impl Readable<'tcx, Provenance>, + op: &impl Projectable<'tcx, Provenance>, offset: u64, value: impl Into, base_layout: TyAndLayout<'tcx>, diff --git a/src/tools/miri/src/intrinsics/mod.rs b/src/tools/miri/src/intrinsics/mod.rs index 18b22827bdb15..0ab1b9dfb61e9 100644 --- a/src/tools/miri/src/intrinsics/mod.rs +++ b/src/tools/miri/src/intrinsics/mod.rs @@ -152,8 +152,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // ``` // Would not be considered UB, or the other way around (`is_val_statically_known(0)`). "is_val_statically_known" => { - let [arg] = check_arg_count(args)?; - this.validate_operand(arg, /*recursive*/ false)?; + let [_arg] = check_arg_count(args)?; + // FIXME: should we check for validity here? It's tricky because we do not have a + // place. Codegen does not seem to set any attributes like `noundef` for intrinsic + // calls, so we don't *have* to do anything. let branch: bool = this.machine.rng.get_mut().gen(); this.write_scalar(Scalar::from_bool(branch), dest)?; } diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 66c6966d1a6c3..d1267cd68668a 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -572,6 +572,9 @@ pub struct MiriMachine<'tcx> { /// Invariant: the promised alignment will never be less than the native alignment of the /// allocation. pub(crate) symbolic_alignment: RefCell>, + + /// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes). + union_data_ranges: FxHashMap, RangeSet>, } impl<'tcx> MiriMachine<'tcx> { @@ -714,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> { allocation_spans: RefCell::new(FxHashMap::default()), const_cache: RefCell::new(FxHashMap::default()), symbolic_alignment: RefCell::new(FxHashMap::default()), + union_data_ranges: FxHashMap::default(), } } @@ -826,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> { allocation_spans: _, const_cache: _, symbolic_alignment: _, + union_data_ranges: _, } = self; threads.visit_provenance(visit); @@ -1627,4 +1632,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL } } + + fn cached_union_data_range<'e>( + ecx: &'e mut InterpCx<'tcx, Self>, + ty: Ty<'tcx>, + compute_range: impl FnOnce() -> RangeSet, + ) -> Cow<'e, RangeSet> { + Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range)) + } } diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.rs b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.rs new file mode 100644 index 0000000000000..fd0773ed916b7 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.rs @@ -0,0 +1,10 @@ +use std::mem; + +// Doing a copy at integer type should lose provenance. +// This tests the unoptimized base case. +fn main() { + let ptrs = [(&42, true)]; + let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) }; + let ptr = (&raw const ints[0].0).cast::<&i32>(); + let _val = unsafe { *ptr.read() }; //~ERROR: dangling +} diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.stderr b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.stderr new file mode 100644 index 0000000000000..fc012af3ad877 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance0.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + --> $DIR/int_copy_looses_provenance0.rs:LL:CC + | +LL | let _val = unsafe { *ptr.read() }; + | ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.rs b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.rs new file mode 100644 index 0000000000000..ce64dcc1a07c4 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.rs @@ -0,0 +1,10 @@ +use std::mem; + +// Doing a copy at integer type should lose provenance. +// This tests the optimized-array case of integer copies. +fn main() { + let ptrs = [&42]; + let ints: [usize; 1] = unsafe { mem::transmute(ptrs) }; + let ptr = (&raw const ints[0]).cast::<&i32>(); + let _val = unsafe { *ptr.read() }; //~ERROR: dangling +} diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.stderr b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.stderr new file mode 100644 index 0000000000000..375262655d0cf --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance1.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + --> $DIR/int_copy_looses_provenance1.rs:LL:CC + | +LL | let _val = unsafe { *ptr.read() }; + | ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.rs b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.rs new file mode 100644 index 0000000000000..e8966c53d7059 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.rs @@ -0,0 +1,12 @@ +use std::mem; + +// Doing a copy at integer type should lose provenance. +// This tests the case where provenacne is hiding in the metadata of a pointer. +fn main() { + let ptrs = [(&42, &42)]; + // Typed copy at wide pointer type (with integer-typed metadata). + let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) }; + // Get a pointer to the metadata field. + let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>(); + let _val = unsafe { *ptr.read() }; //~ERROR: dangling +} diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.stderr b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.stderr new file mode 100644 index 0000000000000..8402c7b5e130f --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance2.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + --> $DIR/int_copy_looses_provenance2.rs:LL:CC + | +LL | let _val = unsafe { *ptr.read() }; + | ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.rs b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.rs new file mode 100644 index 0000000000000..48a48ce4587ee --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.rs @@ -0,0 +1,29 @@ +#![feature(strict_provenance)] +use std::mem; + +#[repr(C, usize)] +#[allow(unused)] +enum E { + Var1(usize), + Var2(usize), +} + +// Doing a copy at integer type should lose provenance. +// This tests the case where provenacne is hiding in the discriminant of an enum. +fn main() { + assert_eq!(mem::size_of::(), 2*mem::size_of::()); + + // We want to store provenance in the enum discriminant, but the value still needs to + // be valid atfor the type. So we split provenance and data. + let ptr = &42; + let ptr = ptr as *const i32; + let ptrs = [(ptr.with_addr(0), ptr)]; + // Typed copy at the enum type. + let ints: [E; 1] = unsafe { mem::transmute(ptrs) }; + // Read the discriminant. + let discr = unsafe { (&raw const ints[0]).cast::<*const i32>().read() }; + // Take the provenance from there, together with the original address. + let ptr = discr.with_addr(ptr.addr()); + // There should be no provenance is `discr`, so this should be UB. + let _val = unsafe { *ptr }; //~ERROR: dangling +} diff --git a/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.stderr b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.stderr new file mode 100644 index 0000000000000..b50e23da96a7e --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/int_copy_looses_provenance3.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + --> $DIR/int_copy_looses_provenance3.rs:LL:CC + | +LL | let _val = unsafe { *ptr }; + | ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/int_copy_looses_provenance3.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.rs b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.rs new file mode 100644 index 0000000000000..ff94f2263c517 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.rs @@ -0,0 +1,18 @@ +fn main() { + let half_ptr = std::mem::size_of::<*const ()>() / 2; + let mut bytes = [1u8; 16]; + let bytes = bytes.as_mut_ptr(); + + unsafe { + // Put a pointer in the middle. + bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42); + // Typed copy of the entire thing as two pointers, but not perfectly + // overlapping with the pointer we have in there. + let copy = bytes.cast::<[*const (); 2]>().read_unaligned(); + let copy_bytes = copy.as_ptr().cast::(); + // Now go to the middle of the copy and get the pointer back out. + let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned(); + // Dereferencing this should fail as the copy has removed the provenance. + let _val = *ptr; //~ERROR: dangling + } +} diff --git a/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.stderr b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.stderr new file mode 100644 index 0000000000000..ed38572a5f398 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance0.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + --> $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC + | +LL | let _val = *ptr; + | ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.rs b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.rs new file mode 100644 index 0000000000000..d0e3dac7792b0 --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.rs @@ -0,0 +1,18 @@ +fn main() { + let half_ptr = std::mem::size_of::<*const ()>() / 2; + let mut bytes = [1u8; 16]; + let bytes = bytes.as_mut_ptr(); + + unsafe { + // Put a pointer in the middle. + bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42); + // Typed copy of the entire thing as two *function* pointers, but not perfectly + // overlapping with the pointer we have in there. + let copy = bytes.cast::<[fn(); 2]>().read_unaligned(); + let copy_bytes = copy.as_ptr().cast::(); + // Now go to the middle of the copy and get the pointer back out. + let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned(); + // Dereferencing this should fail as the copy has removed the provenance. + let _val = *ptr; //~ERROR: dangling + } +} diff --git a/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.stderr b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.stderr new file mode 100644 index 0000000000000..2e11687175afe --- /dev/null +++ b/src/tools/miri/tests/fail/provenance/ptr_copy_loses_partial_provenance1.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + --> $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC + | +LL | let _val = *ptr; + | ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-enum.rs b/src/tools/miri/tests/fail/uninit/padding-enum.rs new file mode 100644 index 0000000000000..3852ac5c477d5 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-enum.rs @@ -0,0 +1,23 @@ +use std::mem; + +// We have three fields to avoid the ScalarPair optimization. +#[allow(unused)] +enum E { + None, + Some(&'static (), &'static (), usize), +} + +fn main() { unsafe { + let mut p: mem::MaybeUninit = mem::MaybeUninit::zeroed(); + // The copy when `E` is returned from `transmute` should destroy padding + // (even when we use `write_unaligned`, which under the hood uses an untyped copy). + p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize, 0usize))); + // This is a `None`, so everything but the discriminant is padding. + assert!(matches!(*p.as_ptr(), E::None)); + + // Turns out the discriminant is (currently) stored + // in the 2nd pointer, so the first half is padding. + let c = &p as *const _ as *const u8; + let _val = *c.add(0); // Get a padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-enum.stderr b/src/tools/miri/tests/fail/uninit/padding-enum.stderr new file mode 100644 index 0000000000000..c571f18874076 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-enum.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-enum.rs:LL:CC + | +LL | let _val = *c.add(0); // Get a padding byte. + | ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-enum.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-pair.rs b/src/tools/miri/tests/fail/uninit/padding-pair.rs new file mode 100644 index 0000000000000..c8c00b3c65a06 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-pair.rs @@ -0,0 +1,25 @@ +#![feature(core_intrinsics)] + +use std::mem::{self, MaybeUninit}; + +fn main() { + // This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not. + // Ensure that these 9 bytes are indeed initialized, and the rest is indeed not. + // This should be the case even if we write into previously initialized storage. + let mut x: MaybeUninit> = MaybeUninit::zeroed(); + let z = std::intrinsics::add_with_overflow(0usize, 0usize); + unsafe { x.as_mut_ptr().cast::<(usize, bool)>().write(z) }; + // Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by + // (`ptr_size - 1`) undef bytes (the padding after the bool) in there. + let z: *const u8 = &x as *const _ as *const _; + let first_undef = mem::size_of::() as isize + 1; + for i in 0..first_undef { + let byte = unsafe { *z.offset(i) }; + assert_eq!(byte, 0); + } + let v = unsafe { *z.offset(first_undef) }; + //~^ ERROR: uninitialized + if v == 0 { + println!("it is zero"); + } +} diff --git a/src/tools/miri/tests/fail/uninit/padding-pair.stderr b/src/tools/miri/tests/fail/uninit/padding-pair.stderr new file mode 100644 index 0000000000000..d35934d83d58f --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-pair.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-pair.rs:LL:CC + | +LL | let v = unsafe { *z.offset(first_undef) }; + | ^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-pair.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs new file mode 100644 index 0000000000000..132b85828362d --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.rs @@ -0,0 +1,32 @@ +#[repr(C)] +#[derive(Debug, Copy, Clone)] +struct Foo { + val16: u16, + // Padding bytes go here! + val32: u32, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +struct Bar { + bytes: [u8; 8], +} + +#[repr(C)] +union FooBar { + foo: Foo, + bar: Bar, +} + +pub fn main() { + // Initialize as u8 to ensure padding bytes are zeroed. + let mut foobar = FooBar { bar: Bar { bytes: [0u8; 8] } }; + // Reading either field is ok. + let _val = unsafe { (foobar.foo, foobar.bar) }; + // Does this assignment copy the uninitialized padding bytes + // over the initialized padding bytes? miri doesn't seem to think so. + foobar.foo = Foo { val16: 1, val32: 2 }; + // This resets the padding to uninit. + let _val = unsafe { (foobar.foo, foobar.bar) }; + //~^ ERROR: uninitialized +} diff --git a/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr new file mode 100644 index 0000000000000..e122249af16e5 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct-in-union.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer + --> $DIR/padding-struct-in-union.rs:LL:CC + | +LL | let _val = unsafe { (foobar.foo, foobar.bar) }; + | ^^^^^^^^^^ constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-struct-in-union.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-struct.rs b/src/tools/miri/tests/fail/uninit/padding-struct.rs new file mode 100644 index 0000000000000..dd3be50343902 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct.rs @@ -0,0 +1,11 @@ +use std::mem; + +#[repr(C)] +struct Pair(u8, u16); + +fn main() { unsafe { + let p: Pair = mem::transmute(0u32); // The copy when `Pair` is returned from `transmute` should destroy padding. + let c = &p as *const _ as *const u8; + let _val = *c.add(1); // Get the padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-struct.stderr b/src/tools/miri/tests/fail/uninit/padding-struct.stderr new file mode 100644 index 0000000000000..8dc40a482ac52 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-struct.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-struct.rs:LL:CC + | +LL | let _val = *c.add(1); // Get the padding byte. + | ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-struct.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-union.rs b/src/tools/miri/tests/fail/uninit/padding-union.rs new file mode 100644 index 0000000000000..2e9e0a40d6c68 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-union.rs @@ -0,0 +1,14 @@ +use std::mem; + +#[allow(unused)] +#[repr(C)] +union U { + field: (u8, u16), +} + +fn main() { unsafe { + let p: U = mem::transmute(0u32); // The copy when `U` is returned from `transmute` should destroy padding. + let c = &p as *const _ as *const [u8; 4]; + let _val = *c; // Read the entire thing, definitely contains the padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-union.stderr b/src/tools/miri/tests/fail/uninit/padding-union.stderr new file mode 100644 index 0000000000000..04002da4f195c --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-union.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value at [1]: encountered uninitialized memory, but expected an integer + --> $DIR/padding-union.rs:LL:CC + | +LL | let _val = *c; // Read the entire thing, definitely contains the padding byte. + | ^^ constructing invalid value at [1]: encountered uninitialized memory, but expected an integer + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-union.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/uninit/padding-wide-ptr.rs b/src/tools/miri/tests/fail/uninit/padding-wide-ptr.rs new file mode 100644 index 0000000000000..0403a9caba66d --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-wide-ptr.rs @@ -0,0 +1,18 @@ +use std::mem; + +// If this is `None`, the metadata becomes padding. +type T = Option<&'static str>; + +fn main() { unsafe { + let mut p: mem::MaybeUninit = mem::MaybeUninit::zeroed(); + // The copy when `T` is returned from `transmute` should destroy padding + // (even when we use `write_unaligned`, which under the hood uses an untyped copy). + p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize))); + // Null epresents `None`. + assert!(matches!(*p.as_ptr(), None)); + + // The second part, with the length, becomes padding. + let c = &p as *const _ as *const u8; + let _val = *c.add(mem::size_of::<*const u8>()); // Get a padding byte. + //~^ERROR: uninitialized +} } diff --git a/src/tools/miri/tests/fail/uninit/padding-wide-ptr.stderr b/src/tools/miri/tests/fail/uninit/padding-wide-ptr.stderr new file mode 100644 index 0000000000000..0da72550b2e08 --- /dev/null +++ b/src/tools/miri/tests/fail/uninit/padding-wide-ptr.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/padding-wide-ptr.rs:LL:CC + | +LL | let _val = *c.add(mem::size_of::<*const u8>()); // Get a padding byte. + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at $DIR/padding-wide-ptr.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/fail/transmute-pair-uninit.rs b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs similarity index 61% rename from src/tools/miri/tests/fail/transmute-pair-uninit.rs rename to src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs index bc95f3cb7ad3a..0ba5520a54468 100644 --- a/src/tools/miri/tests/fail/transmute-pair-uninit.rs +++ b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.rs @@ -1,16 +1,17 @@ #![feature(core_intrinsics)] -use std::mem; +use std::mem::{self, MaybeUninit}; fn main() { - let x: Option> = unsafe { + // This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not. + // Ensure that these 9 bytes are indeed initialized, and the rest is indeed not. + let x: MaybeUninit> = unsafe { let z = std::intrinsics::add_with_overflow(0usize, 0usize); - std::mem::transmute::<(usize, bool), Option>>(z) + std::mem::transmute::<(usize, bool), MaybeUninit>>(z) }; - let y = &x; // Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by // (`ptr_size - 1`) undef bytes (the padding after the bool) in there. - let z: *const u8 = y as *const _ as *const _; + let z: *const u8 = &x as *const _ as *const _; let first_undef = mem::size_of::() as isize + 1; for i in 0..first_undef { let byte = unsafe { *z.offset(i) }; diff --git a/src/tools/miri/tests/fail/transmute-pair-uninit.stderr b/src/tools/miri/tests/fail/uninit/transmute-pair-uninit.stderr similarity index 100% rename from src/tools/miri/tests/fail/transmute-pair-uninit.stderr rename to src/tools/miri/tests/fail/uninit/transmute-pair-uninit.stderr diff --git a/src/tools/miri/tests/pass/arrays.rs b/src/tools/miri/tests/pass/arrays.rs index 61b44453e9bd9..b0c6f54cab87c 100644 --- a/src/tools/miri/tests/pass/arrays.rs +++ b/src/tools/miri/tests/pass/arrays.rs @@ -61,6 +61,20 @@ fn debug() { println!("{:?}", array); } +fn huge_zst() { + fn id(x: T) -> T { x } + + // A "huge" zero-sized array. Make sure we don't loop over it in any part of Miri. + let val = [(); usize::MAX]; + id(val); // make a copy + + let val = [val; 2]; + id(val); + + // Also wrap it in a union (which, in particular, hits the logic for computing union padding). + let _copy = std::mem::MaybeUninit::new(val); +} + fn main() { assert_eq!(empty_array(), []); assert_eq!(index_unsafe(), 20); @@ -73,4 +87,5 @@ fn main() { from(); eq(); debug(); + huge_zst(); } diff --git a/src/tools/miri/tests/pass/enums.rs b/src/tools/miri/tests/pass/enums.rs index 1dafef025e958..9fc61f07c047f 100644 --- a/src/tools/miri/tests/pass/enums.rs +++ b/src/tools/miri/tests/pass/enums.rs @@ -132,6 +132,43 @@ fn overaligned_casts() { assert_eq!(aligned as u8, 0); } +// This hits a corner case in the logic for clearing padding on typed copies. +fn padding_clear_corner_case() { + #[allow(unused)] + #[derive(Copy, Clone)] + #[repr(C)] + pub struct Decoded { + /// The scaled mantissa. + pub mant: u64, + /// The lower error range. + pub minus: u64, + /// The upper error range. + pub plus: u64, + /// The shared exponent in base 2. + pub exp: i16, + /// True when the error range is inclusive. + /// + /// In IEEE 754, this is true when the original mantissa was even. + pub inclusive: bool, + } + + #[allow(unused)] + #[derive(Copy, Clone)] + pub enum FullDecoded { + /// Not-a-number. + Nan, + /// Infinities, either positive or negative. + Infinite, + /// Zero, either positive or negative. + Zero, + /// Finite numbers with further decoded fields. + Finite(Decoded), + } + + let val = FullDecoded::Finite(Decoded { mant: 0, minus: 0, plus: 0, exp: 0, inclusive: false }); + let _val2 = val; // trigger typed copy +} + fn main() { test(MyEnum::MyEmptyVariant); test(MyEnum::MyNewtypeVariant(42)); @@ -141,4 +178,5 @@ fn main() { discriminant_overflow(); more_discriminant_overflow(); overaligned_casts(); + padding_clear_corner_case(); } diff --git a/src/tools/miri/tests/pass/provenance.rs b/src/tools/miri/tests/pass/provenance.rs index 9e8a9651b3d96..2e4d240cc48a1 100644 --- a/src/tools/miri/tests/pass/provenance.rs +++ b/src/tools/miri/tests/pass/provenance.rs @@ -12,6 +12,7 @@ fn main() { bytewise_custom_memcpy(); bytewise_custom_memcpy_chunked(); int_load_strip_provenance(); + maybe_uninit_preserves_partial_provenance(); } /// Some basic smoke tests for provenance. @@ -145,3 +146,24 @@ fn int_load_strip_provenance() { let ints: [usize; 1] = unsafe { mem::transmute(ptrs) }; assert_eq!(ptrs[0] as *const _ as usize, ints[0]); } + +fn maybe_uninit_preserves_partial_provenance() { + // This is the same test as ptr_copy_loses_partial_provenance.rs, but using MaybeUninit and thus + // properly preserving partial provenance. + unsafe { + let mut bytes = [1u8; 16]; + let bytes = bytes.as_mut_ptr(); + + // Put a pointer in the middle. + bytes.add(4).cast::<&i32>().write_unaligned(&42); + // Copy the entire thing as two pointers but not perfectly + // overlapping with the pointer we have in there. + let copy = bytes.cast::<[mem::MaybeUninit<*const ()>; 2]>().read_unaligned(); + let copy_bytes = copy.as_ptr().cast::(); + // Now go to the middle of the copy and get the pointer back out. + let ptr = copy_bytes.add(4).cast::<*const i32>().read_unaligned(); + // And deref this to ensure we get the right value. + let val = *ptr; + assert_eq!(val, 42); + } +}