From 8c8a8601bf6dfafb02584b88981b68198d05fe09 Mon Sep 17 00:00:00 2001 From: Jannik Vogel Date: Wed, 2 May 2018 17:39:17 +0200 Subject: [PATCH] Dangerous matrix abtraction (restricted pointers) --- swep1rcr.exe/vec.c | 69 ++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/swep1rcr.exe/vec.c b/swep1rcr.exe/vec.c index c8d0258..fbebfc1 100644 --- a/swep1rcr.exe/vec.c +++ b/swep1rcr.exe/vec.c @@ -273,71 +273,48 @@ float *__cdecl sub_432030(float *a1, const float *a2) { } //----- (004323C0) -------------------------------------------------------- -float* __cdecl sub_4323C0(float* a1, const float *a2) { - float v5; // edx - double v6; // st4 - double v7; // rt2 - double v8; // st5 - double v11; // st7 - double v12; // st7 - double v13; // st2 - double v14; // st7 - float v15; // [esp+8h] [ebp+4h] - float v17; // [esp+8h] [ebp+4h] - float v18; // [esp+Ch] [ebp+8h] - float v20; // [esp+Ch] [ebp+8h] - - //FIXME: This is a bit more complicated because a2 is still being read while a1 has been written. - // This means the order of operation suddenly becomes critical if regions in a1 and a2 overlap. +float* __cdecl sub_4323C0(float* restrict a1, const float* restrict a2) { // Get squared length + float v19 = a2[0 * 4 + 0] * a2[0 * 4 + 0] + + a2[0 * 4 + 1] * a2[0 * 4 + 1] + + a2[0 * 4 + 2] * a2[0 * 4 + 2]; double v3 = a2[1 * 4 + 0] * a2[1 * 4 + 0] + a2[1 * 4 +1] * a2[1 * 4 + 1] + a2[1 * 4 + 2] * a2[1 * 4 + 2]; float v16 = a2[2 * 4 + 0] * a2[2 * 4 + 0] + a2[2 * 4 + 1] * a2[2 * 4 + 1] + a2[2 * 4 + 2] * a2[2 * 4 + 2]; - float v19 = a2[0 * 4 + 0] * a2[0 * 4 + 0] + - a2[0 * 4 + 1] * a2[0 * 4 + 1] + - a2[0 * 4 + 2] * a2[0 * 4 + 2]; - // Can't trust a2 after this (might have been written): + //FIXME: This is a bit more complicated because a2 is still being read after a1 has been written. + // This means the order of operation suddenly becomes critical if regions in a1 and a2 overlap. + // During this analysis I did break this order! So if the game depends on proper behaviour, it will break! + // Hence, the pointers have been marked as restricted. + a1[0 * 4 + 0] = a2[0 * 4 + 0] / v19; a1[0 * 4 + 1] = a2[1 * 4 + 0] / v3; a1[0 * 4 + 2] = a2[2 * 4 + 0] / v16; + a1[0 * 4 + 3] = 0.0f; + a1[1 * 4 + 0] = a2[0 * 4 + 1] / v19; + a1[1 * 4 + 1] = a2[1 * 4 + 1] / v3; a1[1 * 4 + 2] = a2[2 * 4 + 1] / v16; - - - a1[1 * 4 + 0] = a2[1] / v19; - a1[2 * 4 + 0] = a2[2] / v19; - - a1[2 * 4 + 1] = a2[6] / v3; - - a1[0 * 4 + 0] = a2[0] / v19; - - a1[1 * 4 + 1] = a2[5] / v3; - - v11 = a2[10] / v16; - - a1[0 * 4 + 3] = 0.0f; a1[1 * 4 + 3] = 0.0f; - a1[2 * 4 + 3] = 0.0f; - a1[3 * 4 + 3] = 1.0f; - - a1[10] = v11; - v20 = a2[3 * 4 + 0]; - v17 = a2[3 * 4 + 1]; - v12 = a2[3 * 4 + 2]; + a1[2 * 4 + 0] = a2[0 * 4 + 2] / v19; + a1[2 * 4 + 1] = a2[1 * 4 + 2] / v3; + a1[2 * 4 + 2] = a2[2 * 4 + 2] / v16; + a1[2 * 4 + 3] = 0.0f; - v13 = v12 * a1[2 * 4 + 1]; + float v20 = a2[3 * 4 + 0]; + float v17 = a2[3 * 4 + 1]; + double v12 = a2[3 * 4 + 2]; - a1[12] = -(v17 * a1[4] + v12 * a1[8] + a1[0] * v20); - v14 = -(v17 * a1[6] + v20 * a1[2] + v12 * a1[10]); + a1[3 * 4 + 0] = -(v20 * a1[0 * 4 + 0] + v17 * a1[1 * 4 + 0] + v12 * a1[2 * 4 + 0]); + a1[3 * 4 + 1] = -(v20 * a1[0 * 4 + 1] + v17 * a1[1 * 4 + 1] + v12 * a1[2 * 4 + 1]); + a1[3 * 4 + 2] = -(v20 * a1[0 * 4 + 2] + v17 * a1[1 * 4 + 2] + v12 * a1[2 * 4 + 2]); + a1[3 * 4 + 3] = 1.0f; - a1[13] = -(v13 + v20 * a1[1] + v17 * a1[5]; - a1[14] = v14; return a1; }