forked from perilouswithadollarsign/cstrike15_src
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimdvectormatrix.cpp
140 lines (128 loc) · 3.81 KB
/
simdvectormatrix.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
//====== Copyright © 1996-2006, Valve Corporation, All rights reserved. =======//
//
// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
// for high speed processing in tools.
//
// $NoKeywords: $
//
//=============================================================================//
#include "basetypes.h"
#include "mathlib/mathlib.h"
#include "mathlib/simdvectormatrix.h"
#include "mathlib/ssemath.h"
#include "tier0/dbg.h"
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
void CSIMDVectorMatrix::CreateFromCSOAAttributes( CSOAContainer const *pSrc,
int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 )
{
SetSize( pSrc->NumCols(), pSrc->NumRows() );
FourVectors *p_write_ptr = m_pData;
int n_vectors_per_source_line = pSrc->NumQuadsPerRow();
for( int y = 0; y < pSrc->NumRows(); y++ )
{
fltx4 const * data_in0 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx0, y ) );
fltx4 const * data_in1 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx1, y ) );
fltx4 const * data_in2 = reinterpret_cast<fltx4 const *>( pSrc->ConstRowPtr( nAttrIdx2, y ) );
fltx4 *data_out = reinterpret_cast < fltx4 *> ( p_write_ptr );
// copy full input blocks
for( int x = 0; x < n_vectors_per_source_line; x++ )
{
*(data_out++) = (* data_in0++ );
*(data_out++) = (* data_in1++ );
*(data_out++) = (* data_in2++ );
}
// advance ptrs to next line
p_write_ptr += m_nPaddedWidth;
}
}
void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData( int srcwidth, int srcheight,
float const * srcdata )
{
Assert( srcwidth && srcheight && srcdata );
SetSize( srcwidth, srcheight );
FourVectors * p_write_ptr = m_pData;
int n_vectors_per_source_line = ( srcwidth >> 2 );
int ntrailing_pixels_per_source_line = ( srcwidth & 3 );
for( int y = 0; y < srcheight; y++ )
{
float const * data_in = srcdata;
float * data_out = reinterpret_cast < float *> ( p_write_ptr );
// copy full input blocks
for( int x = 0; x < n_vectors_per_source_line; x++ )
{
for( int c = 0; c < 3; c++ )
{
data_out[0]= data_in[c]; // x0
data_out[1]= data_in[4 + c]; // x1
data_out[2]= data_in[8 + c]; // x2
data_out[3]= data_in[12 + c]; // x3
data_out += 4;
}
data_in += 16;
}
// now, copy trailing data and pad with copies
if ( ntrailing_pixels_per_source_line )
{
for( int c = 0; c < 3; c++ )
{
for( int cp = 0; cp < 4; cp++ )
{
int real_cp = MIN( cp, ntrailing_pixels_per_source_line - 1 );
data_out[4 * c + cp]= data_in[c + 4 * real_cp];
}
}
}
// advance ptrs to next line
p_write_ptr += m_nPaddedWidth;
srcdata += 4 * srcwidth;
}
}
void CSIMDVectorMatrix::RaiseToPower( float power )
{
int nv = NVectors();
if ( nv )
{
int fixed_point_exp = ( int ) ( 4.0 * power );
FourVectors * src = m_pData;
do
{
src->x = Pow_FixedPoint_Exponent_SIMD( src->x, fixed_point_exp );
src->y = Pow_FixedPoint_Exponent_SIMD( src->y, fixed_point_exp );
src->z = Pow_FixedPoint_Exponent_SIMD( src->z, fixed_point_exp );
src++;
} while (-- nv );
}
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator += ( CSIMDVectorMatrix const & src )
{
Assert( m_nWidth == src.m_nWidth );
Assert( m_nHeight == src.m_nHeight );
int nv = NVectors();
if ( nv )
{
FourVectors * srcv = src.m_pData;
FourVectors * destv = m_pData;
do // !! speed !! inline more iters
{
* ( destv++ ) += * ( srcv++ );
} while (-- nv );
}
return * this;
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator *= ( Vector const & src )
{
int nv = NVectors();
if ( nv )
{
FourVectors scalevalue;
scalevalue.DuplicateVector( src );
FourVectors * destv = m_pData;
do // !! speed !! inline more iters
{
destv->VProduct( scalevalue );
destv++;
} while (-- nv );
}
return * this;
}