

在学习3D游戏编程大师技巧的时候,就了解到,可是使用一种称之为“单指令,多数据(SIMD)”的技术来编写3D数学库。通过这样的方法,可以将我们经常使用的诸如向量计算,矩阵变换等操作加快很多倍。这次,在学习3D引擎开发的时候,也用到了这个技术。SIMD是一种技术的名称,而并不是具体的工具。实现这种技术,不同的CPU厂商推出了不同的技术,像MMX, 3DNow!, SSE, SSE2, SSE3...。由于我的计算机上使用的是Intel的处理器,它支持MMX,SSE,SSE2,所以在这里我使用SSE的指令来进行。如果你使用的是AMD处理器,并且支持!3DNow!的话,不用担心,虽然他们是不同的厂商,但是他们的指令使用的是同一个标准,所以依然能够使用这里的代码。



mov eax , 1


mov flag, edx

这样,flag中就保存了我们CPU的特性值了。然后只要解析这个特性值就能够判断我们的计算机是否支持上面提到的那些指令集 了。但是,在这里,我并不使用这种方法。如果读者希望使用这样的方法的话,可以自行上网上进行有关资料的查询。




<span style="font-family:Microsoft YaHei;">#include"ZFX3D.h"
using namespace ZFXEngine ;
using namespace std ;

* Global variant
bool g_bSSE = false ;							//Check if the operate system support sse
void expand(int avail, int mask, ofstream* pOut)
	char buffer[64];
    if (mask & _CPU_FEATURE_MMX) {
               avail & _CPU_FEATURE_MMX ? "yes" : "no");
    if (mask & _CPU_FEATURE_SSE) {
               avail & _CPU_FEATURE_SSE ? "yes" : "no");
    if (mask & _CPU_FEATURE_SSE2) {
               avail & _CPU_FEATURE_SSE2 ? "yes" : "no");
    if (mask & _CPU_FEATURE_3DNOW) {
               avail & _CPU_FEATURE_3DNOW ? "yes" : "no");

bool ZFX3DInitCPU(void) {
	 _p_info info;


	ofstream out ;

	char buffer[64];
	sprintf(buffer,"v_name:\t\t%s\n", info.v_name);

    sprintf(buffer,"model:\t\t%s\n", info.model_name);

    sprintf(buffer,"family:\t\t%d\n", info.family);

    sprintf(buffer,"model:\t\t%d\n", info.model);

    sprintf(buffer,"stepping:\t%d\n", info.stepping);

    sprintf(buffer,"feature:\t%08x\n", info.feature);

    expand(info.feature, info.checks, &out);

    sprintf(buffer,"os_support:\t%08x\n", info.os_support);

    expand(info.os_support, info.checks,&out);

	sprintf(buffer,"checks:\t\t%08x\n", info.checks);

	if((info.feature & _CPU_FEATURE_SSE)
		&&(info.os_support & _CPU_FEATURE_SSE))
		g_bSSE = true ;
		g_bSSE = false ;


	return g_bSSE ;
}// end for ZFX3DInitCPU</span>


<span style="font-family:Microsoft YaHei;">v_name:        GenuineIntel
model:        INTEL Pentium-III
family:        6
model:        10
stepping:    9
feature:    00000007
    yes    _CPU_FEATURE_MMX
    yes    _CPU_FEATURE_SSE
    yes    _CPU_FEATURE_SSE2
    no    _CPU_FEATURE_3DNOW
os_support:    00000007
    yes    _CPU_FEATURE_MMX
    yes    _CPU_FEATURE_SSE
    yes    _CPU_FEATURE_SSE2
    no    _CPU_FEATURE_3DNOW
checks:        0000000f



<span style="font-family:Microsoft YaHei;">/**
* Define ZFXVector
class _declspec(dllexport) ZFXVector
	float x, y, z, w ;

	ZFXVector(void){ x = 0 , y = 0 , z = 0, w = 1.0f ;}
	ZFXVector(float _x, float _y, float _z)


	inline void set(float _x, float _y, float _z, float _w = 1.0f);
	inline float getLength(void);
	inline float getSqrtLength(void) const ;
	inline void negate(void);
	inline void normalize(void);
	inline float angleWith(ZFXVector& v);
	inline void difference(const ZFXVector& u,
		const ZFXVector&v);
	void operator +=(const ZFXVector &v);
	void operator -=(const ZFXVector &v);
	void operator *=(float f);
	void operator /=(float f);
	float operator *(const ZFXVector &v) const ;
	ZFXVector operator *(float f) const ;
	ZFXVector operator * (const ZFXMatrix &m) const ;
	ZFXVector operator + (const ZFXVector &v) const ;
	ZFXVector operator - (const ZFXVector &v) const ;
	inline void cross(const ZFXVector &u, const ZFXVector& v);
}; // end for ZFXVector</span>


<span style="font-family:Microsoft YaHei;">#include"ZFX3D.h"
using namespace ZFXEngine ;
extern bool g_bSSE ;

float _fabs(float f)
	if(f < 0.0f)
		return -f ;

	return f ;
}// end for _fabs

inline void ZFXVector::set(float _x, float _y,
	float _z, float _w)
	x = _x ;
	y = _y ;
	z = _z ;
	w = _w ;
}// end for set

void ZFXVector::operator+=(const ZFXVector& v)
	x += v.x ;
	y += v.y ;
	z += v.z ;
}// end for +=

ZFXVector ZFXVector::operator+(const ZFXVector& v) const
	return ZFXVector(x + v.x, y + v.y, z+ v.z);
}// end for +

void ZFXVector::operator -=(const ZFXVector& v)
	x -= v.x ;
	y -= v.y ;
	z -= v.z ;
}// end for -=

ZFXVector ZFXVector::operator -(const ZFXVector& v) const
	return ZFXVector(x - v.x, y - v.y, z - v.z);
}// end for -

void ZFXVector::operator *=(float f)
	x *= f ;
	y *= f ;
	z *= f ;
}// end for *=

void ZFXVector::operator /= (float f)
	x /= f ;
	y /= f ;
	z /= f ;
}// end for /=

ZFXVector ZFXVector::operator *(float f) const
	return ZFXVector(x * f, y * f, z * f) ;
}// end for *

float ZFXVector::operator*(const ZFXVector& v) const
	return (x * v.x + y * v.y + z * v.z);
}// end for *

inline float ZFXVector::getSqrtLength(void) const
	return (x * x + y * y + z * z) ;
}// end for getSqrLength

inline void ZFXVector::negate(void)
	x = -x ;
	y = -y ;
	z = -z ;
}// end for negate

inline void ZFXVector::difference(const ZFXVector&v1,
	const ZFXVector&v2)
	x = v2.x - v1.x ;
	y = v2.y - v1.y ;
	z = v2.z - v1.z ;
	w = 1.0f ;
}// end for difference

inline float ZFXVector::angleWith(ZFXVector& v)
	return (float)acos(((*this) * v )/(this->getLength() * v.getLength()));
}// end for angleWith

inline float ZFXVector::getLength(void)
	float f = 0.0f ;

		f = (float)sqrt(x*x + y*y + z*z);
		float *pf = &f ;
		w = 0.0f;
			mov ecx , pf		; point to the result
			mov esi , this		; copy the pointer of this to esi
			movups xmm0, [esi]	; copy the this vector to xmm0
			mulps xmm0, xmm0	; multiply all the component
			movaps xmm1, xmm0	; copy result to xmm1
			shufps xmm1, xmm1, 4Eh; shuffle : f1, f0, f3, f2
			addps  xmm0, xmm1	;
			movaps xmm1, xmm0	; copy the xmm0 to xmm1
			shufps xmm1, xmm1, 11h;
			addps xmm0, xmm1
			sqrtss xmm0, xmm0	; sqrt the first element
			movss [ecx], xmm0	; copy the first element to the result
		}// end for _asm

		w = 1.0f ;

	return f ;
}// end for getLength

inline void ZFXVector::normalize(void)
	if(x == 0 && y == 0 && z == 0)
		return ;

		float f = (float)sqrt(x*x + y*y + z*z);

		x /= f;
		y /= f;
		z /= f;
		w = 0.0f ;
			mov esi , this		; copy the pointer of this to esi
			movups xmm0, [esi]	; copy the this vector to xmm0
			movaps xmm2, xmm0
			mulps xmm0, xmm0	; multiply all the component
			movaps xmm1, xmm0	; copy result to xmm1
			shufps xmm1, xmm1, 4Eh; shuffle : f1, f0, f3, f2
			addps  xmm0, xmm1	;
			movaps xmm1, xmm0	; copy the xmm0 to xmm1
			shufps xmm1, xmm1, 11h;
			addps xmm0, xmm1

			rsqrtps xmm0, xmm0 ;
			mulps	xmm2, xmm0 ; multiply the inverse of squre root
			movups [esi], xmm2
		}// end for _asm

		w = 1.0f;
	}// end if...else...
}// end for normalize

inline void ZFXVector::cross(const ZFXVector& v, const ZFXVector& u)
		x = v.y * u.z - v.z * u.y ;
		y = v.z * u.x - v.x * u.z ;
		z = v.x * u.y - v.y * u.x ;
		w = 1.0f;
			mov esi , v
			mov edi , u

			movups xmm0, [esi]
			movups xmm1, [edi]
			movaps xmm2, xmm0
			movaps xmm3, xmm1

			shufps xmm0, xmm0, 0xC9
			shufps xmm1, xmm1, 0xD2
			mulps xmm0, xmm1

			shufps xmm2, xmm2, 0xD2
			shufps xmm3, xmm3, 0xC9
			mulps xmm2, xmm3

			subps xmm0, xmm2

			mov esi, this
			movups [esi], xmm0
		}// end for _asm

		w = 1.0f ;
	}// end if...else...
}// end for cross

ZFXVector ZFXVector::operator*(const ZFXMatrix& m) const
	ZFXVector vcResult ;

		vcResult.x = x* m._11 + y * m._21 + z * m._31 + w * m._41 ;
		vcResult.y = x* m._12 + y * m._22 + z * m._32 + w * m._42 ;
		vcResult.z = x* m._13 + y * m._23 + z * m._33 + w * m._43 ;
		vcResult.w = x* m._14 + y * m._24 + z * m._34 + w * m._44 ;
		float *ptrRet = (float*)&vcResult ;

		ZFXVector s ; s.set(m._11, m._12, m._13, m._14);
		ZFXVector t ; t.set(m._21, m._22, m._23, m._24);
		ZFXVector u ; u.set(m._31, m._32, m._33, m._34);
		ZFXVector v ; v.set(m._41, m._42, m._43, m._44);
		float* ps = (float*)&s ;
		float* pt = (float*)&t ;
		float* pu = (float*)&u ;
		float* pv = (float*)&v ;

			 mov    esi, this

			 movups xmm0, [esi]
			 movaps xmm1, xmm0
			 movaps xmm2, xmm0
			 movaps xmm3, xmm0

			 shufps xmm0, xmm2, 0x00
			 shufps xmm1, xmm2, 0x55
			 shufps xmm2, xmm2, 0xAA
			 shufps xmm3, xmm3, 0xFF

			 mov    edx,  ps
			 movups xmm4, [edx]
			 mov    edx,  pt
			 movups xmm5, [edx]
			 mov    edx,  pu
			 movups xmm6, [edx]
			 mov    edx,  pv
			 movups xmm7, [edx]

			 mulps xmm0, xmm4
			 mulps xmm1, xmm5
			 mulps xmm2, xmm6
			 mulps xmm3, xmm7

			 addps xmm0, xmm1
			 addps xmm0, xmm2
			 addps xmm0, xmm3

			 mov edx, ptrRet ;
			 movups [edx], xmm0 ;
		}// end for _asm
	}// end if...else...

	//homo it
	 if(vcResult.w != 1.0f
		&& vcResult.w != 0.0f)
		vcResult.x /= vcResult.w ;
		vcResult.y /= vcResult.w ;
		vcResult.z /= vcResult.w ;
		vcResult.w = 1.0f ;

	return vcResult ;
}// end for *</span>






