You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
638 lines
16 KiB
638 lines
16 KiB
/* |
|
Copyright (C) 1997-2001 Id Software, Inc. |
|
|
|
This program is free software; you can redistribute it and/or |
|
modify it under the terms of the GNU General Public License |
|
as published by the Free Software Foundation; either version 2 |
|
of the License, or (at your option) any later version. |
|
|
|
This program is distributed in the hope that it will be useful, |
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
|
|
See the GNU General Public License for more details. |
|
|
|
You should have received a copy of the GNU General Public License |
|
along with this program; if not, write to the Free Software |
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
*/ |
|
#include "r_local.h" |
|
|
|
vec3_t r_pright, r_pup, r_ppn; |
|
|
|
#define PARTICLE_33 0 |
|
#define PARTICLE_66 1 |
|
#define PARTICLE_OPAQUE 2 |
|
|
|
typedef struct |
|
{ |
|
particle_t *particle; |
|
int level; |
|
int color; |
|
} partparms_t; |
|
|
|
static partparms_t partparms; |
|
|
|
#if id386 && !defined __linux__ |
|
|
|
static unsigned s_prefetch_address; |
|
|
|
/* |
|
** BlendParticleXX |
|
** |
|
** Inputs: |
|
** EAX = color |
|
** EDI = pdest |
|
** |
|
** Scratch: |
|
** EBX = scratch (dstcolor) |
|
** EBP = scratch |
|
** |
|
** Outputs: |
|
** none |
|
*/ |
|
__declspec(naked) void BlendParticle33( void ) |
|
{ |
|
// return vid.alphamap[color + dstcolor*256]; |
|
__asm mov ebp, vid.alphamap |
|
__asm xor ebx, ebx |
|
|
|
__asm mov bl, byte ptr [edi] |
|
__asm shl ebx, 8 |
|
|
|
__asm add ebp, ebx |
|
__asm add ebp, eax |
|
|
|
__asm mov al, byte ptr [ebp] |
|
|
|
__asm mov byte ptr [edi], al |
|
|
|
__asm ret |
|
} |
|
|
|
__declspec(naked) void BlendParticle66( void ) |
|
{ |
|
// return vid.alphamap[pcolor*256 + dstcolor]; |
|
__asm mov ebp, vid.alphamap |
|
__asm xor ebx, ebx |
|
|
|
__asm shl eax, 8 |
|
__asm mov bl, byte ptr [edi] |
|
|
|
__asm add ebp, ebx |
|
__asm add ebp, eax |
|
|
|
__asm mov al, byte ptr [ebp] |
|
|
|
__asm mov byte ptr [edi], al |
|
|
|
__asm ret |
|
} |
|
|
|
__declspec(naked) void BlendParticle100( void ) |
|
{ |
|
__asm mov byte ptr [edi], al |
|
__asm ret |
|
} |
|
|
|
/* |
|
** R_DrawParticle (asm version) |
|
** |
|
** Since we use __declspec( naked ) we don't have a stack frame |
|
** that we can use. Since I want to reserve EBP anyway, I tossed |
|
** all the important variables into statics. This routine isn't |
|
** meant to be re-entrant, so this shouldn't cause any problems |
|
** other than a slightly higher global memory footprint. |
|
** |
|
*/ |
|
__declspec(naked) void R_DrawParticle( void ) |
|
{ |
|
static vec3_t local, transformed; |
|
static float zi; |
|
static int u, v, tmp; |
|
static short izi; |
|
static int ebpsave; |
|
|
|
static byte (*blendfunc)(void); |
|
|
|
/* |
|
** must be memvars since x86 can't load constants |
|
** directly. I guess I could use fld1, but that |
|
** actually costs one more clock than fld [one]! |
|
*/ |
|
static float particle_z_clip = PARTICLE_Z_CLIP; |
|
static float one = 1.0F; |
|
static float point_five = 0.5F; |
|
static float eight_thousand_hex = 0x8000; |
|
|
|
/* |
|
** save trashed variables |
|
*/ |
|
__asm mov ebpsave, ebp |
|
__asm push esi |
|
__asm push edi |
|
|
|
/* |
|
** transform the particle |
|
*/ |
|
// VectorSubtract (pparticle->origin, r_origin, local); |
|
__asm mov esi, partparms.particle |
|
__asm fld dword ptr [esi+0] ; p_o.x |
|
__asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x |
|
__asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x |
|
__asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x |
|
__asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x |
|
__asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x |
|
__asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z |
|
__asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z |
|
__asm fstp dword ptr [local+4] ; p_o.z-r_o.z |
|
__asm fstp dword ptr [local+8] ; (empty) |
|
|
|
// transformed[0] = DotProduct(local, r_pright); |
|
// transformed[1] = DotProduct(local, r_pup); |
|
// transformed[2] = DotProduct(local, r_ppn); |
|
__asm fld dword ptr [local+0] ; l.x |
|
__asm fmul dword ptr [r_pright+0] ; l.x*pr.x |
|
__asm fld dword ptr [local+4] ; l.y | l.x*pr.x |
|
__asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x |
|
__asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x |
|
__asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x |
|
__asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z |
|
__asm fstp dword ptr [transformed+0] ; (empty) |
|
|
|
__asm fld dword ptr [local+0] ; l.x |
|
__asm fmul dword ptr [r_pup+0] ; l.x*pr.x |
|
__asm fld dword ptr [local+4] ; l.y | l.x*pr.x |
|
__asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x |
|
__asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x |
|
__asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x |
|
__asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z |
|
__asm fstp dword ptr [transformed+4] ; (empty) |
|
|
|
__asm fld dword ptr [local+0] ; l.x |
|
__asm fmul dword ptr [r_ppn+0] ; l.x*pr.x |
|
__asm fld dword ptr [local+4] ; l.y | l.x*pr.x |
|
__asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x |
|
__asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x |
|
__asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x |
|
__asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z |
|
__asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z |
|
__asm fstp dword ptr [transformed+8] ; (empty) |
|
|
|
/* |
|
** make sure that the transformed particle is not in front of |
|
** the particle Z clip plane. We can do the comparison in |
|
** integer space since we know the sign of one of the inputs |
|
** and can figure out the sign of the other easily enough. |
|
*/ |
|
// if (transformed[2] < PARTICLE_Z_CLIP) |
|
// return; |
|
|
|
__asm mov eax, dword ptr [transformed+8] |
|
__asm and eax, eax |
|
__asm js end |
|
__asm cmp eax, particle_z_clip |
|
__asm jl end |
|
|
|
/* |
|
** project the point by initiating the 1/z calc |
|
*/ |
|
// zi = 1.0 / transformed[2]; |
|
__asm fld one |
|
__asm fdiv dword ptr [transformed+8] |
|
|
|
/* |
|
** bind the blend function pointer to the appropriate blender |
|
** while we're dividing |
|
*/ |
|
//if ( level == PARTICLE_33 ) |
|
// blendparticle = BlendParticle33; |
|
//else if ( level == PARTICLE_66 ) |
|
// blendparticle = BlendParticle66; |
|
//else |
|
// blendparticle = BlendParticle100; |
|
|
|
__asm cmp partparms.level, PARTICLE_66 |
|
__asm je blendfunc_66 |
|
__asm jl blendfunc_33 |
|
__asm lea ebx, BlendParticle100 |
|
__asm jmp done_selecting_blend_func |
|
blendfunc_33: |
|
__asm lea ebx, BlendParticle33 |
|
__asm jmp done_selecting_blend_func |
|
blendfunc_66: |
|
__asm lea ebx, BlendParticle66 |
|
done_selecting_blend_func: |
|
__asm mov blendfunc, ebx |
|
|
|
// prefetch the next particle |
|
__asm mov ebp, s_prefetch_address |
|
__asm mov ebp, [ebp] |
|
|
|
// finish the above divide |
|
__asm fstp zi |
|
|
|
// u = (int)(xcenter + zi * transformed[0] + 0.5); |
|
// v = (int)(ycenter - zi * transformed[1] + 0.5); |
|
__asm fld zi ; zi |
|
__asm fmul dword ptr [transformed+0] ; zi * transformed[0] |
|
__asm fld zi ; zi | zi * transformed[0] |
|
__asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0] |
|
__asm fxch st(1) ; zi * transformed[0] | zi * transformed[1] |
|
__asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1] |
|
__asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0] |
|
__asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0] |
|
__asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] |
|
__asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1] |
|
__asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1] |
|
__asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 |
|
__asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 |
|
__asm fxch st(1) ; u | v |
|
__asm fistp dword ptr [u] ; v |
|
__asm fistp dword ptr [v] ; (empty) |
|
|
|
/* |
|
** clip out the particle |
|
*/ |
|
|
|
// if ((v > d_vrectbottom_particle) || |
|
// (u > d_vrectright_particle) || |
|
// (v < d_vrecty) || |
|
// (u < d_vrectx)) |
|
// { |
|
// return; |
|
// } |
|
|
|
__asm mov ebx, u |
|
__asm mov ecx, v |
|
__asm cmp ecx, d_vrectbottom_particle |
|
__asm jg end |
|
__asm cmp ecx, d_vrecty |
|
__asm jl end |
|
__asm cmp ebx, d_vrectright_particle |
|
__asm jg end |
|
__asm cmp ebx, d_vrectx |
|
__asm jl end |
|
|
|
/* |
|
** compute addresses of zbuffer, framebuffer, and |
|
** compute the Z-buffer reference value. |
|
** |
|
** EBX = U |
|
** ECX = V |
|
** |
|
** Outputs: |
|
** ESI = Z-buffer address |
|
** EDI = framebuffer address |
|
*/ |
|
// ESI = d_pzbuffer + (d_zwidth * v) + u; |
|
__asm mov esi, d_pzbuffer ; esi = d_pzbuffer |
|
__asm mov eax, d_zwidth ; eax = d_zwidth |
|
__asm mul ecx ; eax = d_zwidth*v |
|
__asm add eax, ebx ; eax = d_zwidth*v+u |
|
__asm shl eax, 1 ; eax = 2*(d_zwidth*v+u) |
|
__asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u ) |
|
|
|
// initiate |
|
// izi = (int)(zi * 0x8000); |
|
__asm fld zi |
|
__asm fmul eight_thousand_hex |
|
|
|
// EDI = pdest = d_viewbuffer + d_scantable[v] + u; |
|
__asm lea edi, [d_scantable+ecx*4] |
|
__asm mov edi, [edi] |
|
__asm add edi, d_viewbuffer |
|
__asm add edi, ebx |
|
|
|
// complete |
|
// izi = (int)(zi * 0x8000); |
|
__asm fistp tmp |
|
__asm mov eax, tmp |
|
__asm mov izi, ax |
|
|
|
/* |
|
** determine the screen area covered by the particle, |
|
** which also means clamping to a min and max |
|
*/ |
|
// pix = izi >> d_pix_shift; |
|
__asm xor edx, edx |
|
__asm mov dx, izi |
|
__asm mov ecx, d_pix_shift |
|
__asm shr dx, cl |
|
|
|
// if (pix < d_pix_min) |
|
// pix = d_pix_min; |
|
__asm cmp edx, d_pix_min |
|
__asm jge check_pix_max |
|
__asm mov edx, d_pix_min |
|
__asm jmp skip_pix_clamp |
|
|
|
// else if (pix > d_pix_max) |
|
// pix = d_pix_max; |
|
check_pix_max: |
|
__asm cmp edx, d_pix_max |
|
__asm jle skip_pix_clamp |
|
__asm mov edx, d_pix_max |
|
|
|
skip_pix_clamp: |
|
|
|
/* |
|
** render the appropriate pixels |
|
** |
|
** ECX = count (used for inner loop) |
|
** EDX = count (used for outer loop) |
|
** ESI = zbuffer |
|
** EDI = framebuffer |
|
*/ |
|
__asm mov ecx, edx |
|
|
|
__asm cmp ecx, 1 |
|
__asm ja over |
|
|
|
over: |
|
|
|
/* |
|
** at this point: |
|
** |
|
** ECX = count |
|
*/ |
|
__asm push ecx |
|
__asm push edi |
|
__asm push esi |
|
|
|
top_of_pix_vert_loop: |
|
|
|
top_of_pix_horiz_loop: |
|
|
|
// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) |
|
// { |
|
// for (i=0 ; i<pix ; i++) |
|
// { |
|
// if (pz[i] <= izi) |
|
// { |
|
// pdest[i] = blendparticle( color, pdest[i] ); |
|
// } |
|
// } |
|
// } |
|
__asm xor eax, eax |
|
|
|
__asm mov ax, word ptr [esi] |
|
|
|
__asm cmp ax, izi |
|
__asm jg end_of_horiz_loop |
|
|
|
#if ENABLE_ZWRITES_FOR_PARTICLES |
|
__asm mov bp, izi |
|
__asm mov word ptr [esi], bp |
|
#endif |
|
|
|
__asm mov eax, partparms.color |
|
|
|
__asm call [blendfunc] |
|
|
|
__asm add edi, 1 |
|
__asm add esi, 2 |
|
|
|
end_of_horiz_loop: |
|
|
|
__asm dec ecx |
|
__asm jnz top_of_pix_horiz_loop |
|
|
|
__asm pop esi |
|
__asm pop edi |
|
|
|
__asm mov ebp, d_zwidth |
|
__asm shl ebp, 1 |
|
|
|
__asm add esi, ebp |
|
__asm add edi, [r_screenwidth] |
|
|
|
__asm pop ecx |
|
__asm push ecx |
|
|
|
__asm push edi |
|
__asm push esi |
|
|
|
__asm dec edx |
|
__asm jnz top_of_pix_vert_loop |
|
|
|
__asm pop ecx |
|
__asm pop ecx |
|
__asm pop ecx |
|
|
|
end: |
|
__asm pop edi |
|
__asm pop esi |
|
__asm mov ebp, ebpsave |
|
__asm ret |
|
} |
|
|
|
#else |
|
|
|
static byte BlendParticle33( int pcolor, int dstcolor ) |
|
{ |
|
return vid.alphamap[pcolor + dstcolor*256]; |
|
} |
|
|
|
static byte BlendParticle66( int pcolor, int dstcolor ) |
|
{ |
|
return vid.alphamap[pcolor*256+dstcolor]; |
|
} |
|
|
|
static byte BlendParticle100( int pcolor, int dstcolor ) |
|
{ |
|
dstcolor = dstcolor; |
|
return pcolor; |
|
} |
|
|
|
/* |
|
** R_DrawParticle |
|
** |
|
** Yes, this is amazingly slow, but it's the C reference |
|
** implementation and should be both robust and vaguely |
|
** understandable. The only time this path should be |
|
** executed is if we're debugging on x86 or if we're |
|
** recompiling and deploying on a non-x86 platform. |
|
** |
|
** To minimize error and improve readability I went the |
|
** function pointer route. This exacts some overhead, but |
|
** it pays off in clean and easy to understand code. |
|
*/ |
|
void R_DrawParticle( void ) |
|
{ |
|
particle_t *pparticle = partparms.particle; |
|
int level = partparms.level; |
|
vec3_t local, transformed; |
|
float zi; |
|
byte *pdest; |
|
short *pz; |
|
int color = pparticle->color; |
|
int i, izi, pix, count, u, v; |
|
byte (*blendparticle)( int, int ); |
|
|
|
/* |
|
** transform the particle |
|
*/ |
|
VectorSubtract (pparticle->origin, r_origin, local); |
|
|
|
transformed[0] = DotProduct(local, r_pright); |
|
transformed[1] = DotProduct(local, r_pup); |
|
transformed[2] = DotProduct(local, r_ppn); |
|
|
|
if (transformed[2] < PARTICLE_Z_CLIP) |
|
return; |
|
|
|
/* |
|
** bind the blend function pointer to the appropriate blender |
|
*/ |
|
if ( level == PARTICLE_33 ) |
|
blendparticle = BlendParticle33; |
|
else if ( level == PARTICLE_66 ) |
|
blendparticle = BlendParticle66; |
|
else |
|
blendparticle = BlendParticle100; |
|
|
|
/* |
|
** project the point |
|
*/ |
|
// FIXME: preadjust xcenter and ycenter |
|
zi = 1.0 / transformed[2]; |
|
u = (int)(xcenter + zi * transformed[0] + 0.5); |
|
v = (int)(ycenter - zi * transformed[1] + 0.5); |
|
|
|
if ((v > d_vrectbottom_particle) || |
|
(u > d_vrectright_particle) || |
|
(v < d_vrecty) || |
|
(u < d_vrectx)) |
|
{ |
|
return; |
|
} |
|
|
|
/* |
|
** compute addresses of zbuffer, framebuffer, and |
|
** compute the Z-buffer reference value. |
|
*/ |
|
pz = d_pzbuffer + (d_zwidth * v) + u; |
|
pdest = d_viewbuffer + d_scantable[v] + u; |
|
izi = (int)(zi * 0x8000); |
|
|
|
/* |
|
** determine the screen area covered by the particle, |
|
** which also means clamping to a min and max |
|
*/ |
|
pix = izi >> d_pix_shift; |
|
if (pix < d_pix_min) |
|
pix = d_pix_min; |
|
else if (pix > d_pix_max) |
|
pix = d_pix_max; |
|
|
|
/* |
|
** render the appropriate pixels |
|
*/ |
|
count = pix; |
|
|
|
switch (level) { |
|
case PARTICLE_33 : |
|
for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) |
|
{ |
|
//FIXME--do it in blocks of 8? |
|
for (i=0 ; i<pix ; i++) |
|
{ |
|
if (pz[i] <= izi) |
|
{ |
|
pz[i] = izi; |
|
pdest[i] = vid.alphamap[color + ((int)pdest[i]<<8)]; |
|
} |
|
} |
|
} |
|
break; |
|
|
|
case PARTICLE_66 : |
|
for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) |
|
{ |
|
for (i=0 ; i<pix ; i++) |
|
{ |
|
if (pz[i] <= izi) |
|
{ |
|
pz[i] = izi; |
|
pdest[i] = vid.alphamap[(color<<8) + (int)pdest[i]]; |
|
} |
|
} |
|
} |
|
break; |
|
|
|
default: //100 |
|
for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) |
|
{ |
|
for (i=0 ; i<pix ; i++) |
|
{ |
|
if (pz[i] <= izi) |
|
{ |
|
pz[i] = izi; |
|
pdest[i] = color; |
|
} |
|
} |
|
} |
|
break; |
|
} |
|
} |
|
|
|
#endif // !id386 |
|
|
|
/* |
|
** R_DrawParticles |
|
** |
|
** Responsible for drawing all of the particles in the particle list |
|
** throughout the world. Doesn't care if we're using the C path or |
|
** if we're using the asm path, it simply assigns a function pointer |
|
** and goes. |
|
*/ |
|
void R_DrawParticles (void) |
|
{ |
|
particle_t *p; |
|
int i; |
|
extern unsigned long fpu_sp24_cw, fpu_chop_cw; |
|
|
|
VectorScale( vright, xscaleshrink, r_pright ); |
|
VectorScale( vup, yscaleshrink, r_pup ); |
|
VectorCopy( vpn, r_ppn ); |
|
|
|
#if id386 && !defined __linux__ |
|
__asm fldcw word ptr [fpu_sp24_cw] |
|
#endif |
|
|
|
for (p=r_newrefdef.particles, i=0 ; i<r_newrefdef.num_particles ; i++,p++) |
|
{ |
|
|
|
if ( p->alpha > 0.66 ) |
|
partparms.level = PARTICLE_OPAQUE; |
|
else if ( p->alpha > 0.33 ) |
|
partparms.level = PARTICLE_66; |
|
else |
|
partparms.level = PARTICLE_33; |
|
|
|
partparms.particle = p; |
|
partparms.color = p->color; |
|
|
|
#if id386 && !defined __linux__ |
|
if ( i < r_newrefdef.num_particles-1 ) |
|
s_prefetch_address = ( unsigned int ) ( p + 1 ); |
|
else |
|
s_prefetch_address = ( unsigned int ) r_newrefdef.particles; |
|
#endif |
|
|
|
R_DrawParticle(); |
|
} |
|
|
|
#if id386 && !defined __linux__ |
|
__asm fldcw word ptr [fpu_chop_cw] |
|
#endif |
|
|
|
} |
|
|
|
|