You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1234 lines
29 KiB
1234 lines
29 KiB
.386P |
|
.model FLAT |
|
; |
|
; d_draw16.s |
|
; x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel |
|
; subdivision. |
|
; |
|
|
|
include qasm.inc |
|
include d_if.inc |
|
|
|
if id386 |
|
|
|
;---------------------------------------------------------------------- |
|
; 8-bpp horizontal span drawing code for polygons, with no transparency and |
|
; 16-pixel subdivision. |
|
; |
|
; Assumes there is at least one span in pspans, and that every span |
|
; contains at least one pixel |
|
;---------------------------------------------------------------------- |
|
|
|
_DATA SEGMENT |
|
|
|
_DATA ENDS |
|
_TEXT SEGMENT |
|
|
|
; out-of-line, rarely-needed clamping code |
|
|
|
LClampHigh0: |
|
mov esi,ds:dword ptr[_bbextents] |
|
jmp LClampReentry0 |
|
LClampHighOrLow0: |
|
jg LClampHigh0 |
|
xor esi,esi |
|
jmp LClampReentry0 |
|
|
|
LClampHigh1: |
|
mov edx,ds:dword ptr[_bbextentt] |
|
jmp LClampReentry1 |
|
LClampHighOrLow1: |
|
jg LClampHigh1 |
|
xor edx,edx |
|
jmp LClampReentry1 |
|
|
|
LClampLow2: |
|
mov ebp,4096 |
|
jmp LClampReentry2 |
|
LClampHigh2: |
|
mov ebp,ds:dword ptr[_bbextents] |
|
jmp LClampReentry2 |
|
|
|
LClampLow3: |
|
mov ecx,4096 |
|
jmp LClampReentry3 |
|
LClampHigh3: |
|
mov ecx,ds:dword ptr[_bbextentt] |
|
jmp LClampReentry3 |
|
|
|
LClampLow4: |
|
mov eax,4096 |
|
jmp LClampReentry4 |
|
LClampHigh4: |
|
mov eax,ds:dword ptr[_bbextents] |
|
jmp LClampReentry4 |
|
|
|
LClampLow5: |
|
mov ebx,4096 |
|
jmp LClampReentry5 |
|
LClampHigh5: |
|
mov ebx,ds:dword ptr[_bbextentt] |
|
jmp LClampReentry5 |
|
|
|
|
|
pspans equ 4+16 |
|
|
|
align 4 |
|
public _D_DrawSpans16 |
|
_D_DrawSpans16: |
|
push ebp ; preserve caller's stack frame |
|
push edi |
|
push esi ; preserve register variables |
|
push ebx |
|
|
|
; |
|
; set up scaled-by-16 steps, for 16-long segments; also set up cacheblock |
|
; and span list pointers |
|
; |
|
; TODO: any overlap from rearranging? |
|
fld ds:dword ptr[_d_sdivzstepu] |
|
fmul ds:dword ptr[fp_16] |
|
mov edx,ds:dword ptr[_cacheblock] |
|
fld ds:dword ptr[_d_tdivzstepu] |
|
fmul ds:dword ptr[fp_16] |
|
mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor |
|
fld ds:dword ptr[_d_zistepu] |
|
fmul ds:dword ptr[fp_16] |
|
mov ds:dword ptr[pbase],edx ; pbase = cacheblock |
|
fstp ds:dword ptr[zi16stepu] |
|
fstp ds:dword ptr[tdivz16stepu] |
|
fstp ds:dword ptr[sdivz16stepu] |
|
|
|
LSpanLoop: |
|
; |
|
; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the |
|
; initial s and t values |
|
; |
|
; FIXME: pipeline FILD? |
|
fild ds:dword ptr[espan_t_v+ebx] |
|
fild ds:dword ptr[espan_t_u+ebx] |
|
|
|
fld st(1) ; dv | du | dv |
|
fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv |
|
fld st(1) ; du | dv*d_sdivzstepv | du | dv |
|
fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
|
fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
|
fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu | |
|
; dv*d_sdivzstepv | du | dv |
|
fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu | |
|
; dv*d_sdivzstepv | du | dv |
|
faddp st(2),st(0) ; du*d_tdivzstepu | |
|
; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv |
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv | |
|
; du*d_tdivzstepu | du | dv |
|
fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv | |
|
; du*d_tdivzstepu | du | dv |
|
fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv | |
|
; du*d_sdivzstepu + dv*d_sdivzstepv | |
|
; du*d_tdivzstepu | du | dv |
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv | |
|
; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv |
|
fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv + |
|
; du*d_sdivzstepu; stays in %st(2) at end |
|
fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | |
|
; s/z |
|
fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv | |
|
; du*d_tdivzstepu | du | s/z |
|
fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv | |
|
; du*d_tdivzstepu | du | s/z |
|
faddp st(2),st(0) ; dv*d_zistepv | |
|
; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z |
|
fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu | |
|
; dv*d_zistepv | s/z |
|
fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu | |
|
; dv*d_tdivzstepv + du*d_tdivzstepu | |
|
; dv*d_zistepv | s/z |
|
fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu | |
|
; du*d_zistepu | dv*d_zistepv | s/z |
|
fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv + |
|
; du*d_tdivzstepu; stays in %st(1) at end |
|
fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z |
|
faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z |
|
|
|
fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z |
|
fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z |
|
fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv + |
|
; du*d_zistepu; stays in %st(0) at end |
|
; 1/z | fp_64k | t/z | s/z |
|
; |
|
; calculate and clamp s & t |
|
; |
|
fdiv st(1),st(0) ; 1/z | z*64k | t/z | s/z |
|
|
|
; |
|
; point %edi to the first pixel in the span |
|
; |
|
mov ecx,ds:dword ptr[_d_viewbuffer] |
|
mov eax,ds:dword ptr[espan_t_v+ebx] |
|
mov ds:dword ptr[pspantemp],ebx ; preserve spans pointer |
|
|
|
mov edx,ds:dword ptr[_tadjust] |
|
mov esi,ds:dword ptr[_sadjust] |
|
mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth |
|
add edi,ecx |
|
mov ecx,ds:dword ptr[espan_t_u+ebx] |
|
add edi,ecx ; pdest = &pdestspan[scans->u]; |
|
mov ecx,ds:dword ptr[espan_t_count+ebx] |
|
|
|
; |
|
; now start the FDIV for the end of the span |
|
; |
|
cmp ecx,16 |
|
ja LSetupNotLast1 |
|
|
|
dec ecx |
|
jz LCleanup1 ; if only one pixel, no need to start an FDIV |
|
mov ds:dword ptr[spancountminus1],ecx |
|
|
|
; finish up the s and t calcs |
|
fxch st(1) ; z*64k | 1/z | t/z | s/z |
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z |
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z |
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z |
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z |
|
fxch st(1) ; s | t | 1/z | t/z | s/z |
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z |
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z |
|
|
|
fild ds:dword ptr[spancountminus1] |
|
|
|
fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | spancountminus1 |
|
fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 |
|
fmul st(0),st(2) ; C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 |
|
fxch st(1) ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 |
|
fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 |
|
fxch st(2) ; scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 |
|
fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | |
|
; C(d_tdivzstepu)*scm1 |
|
fxch st(1) ; C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | |
|
; C(d_tdivzstepu)*scm1 |
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 |
|
fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 |
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1 |
|
faddp st(3),st(0) |
|
|
|
fld ds:dword ptr[fp_64k] |
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to |
|
; overlap |
|
jmp LFDIVInFlight1 |
|
|
|
LCleanup1: |
|
; finish up the s and t calcs |
|
fxch st(1) ; z*64k | 1/z | t/z | s/z |
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z |
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z |
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z |
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z |
|
fxch st(1) ; s | t | 1/z | t/z | s/z |
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z |
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z |
|
jmp LFDIVInFlight1 |
|
|
|
align 4 |
|
LSetupNotLast1: |
|
; finish up the s and t calcs |
|
fxch st(1) ; z*64k | 1/z | t/z | s/z |
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z |
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z |
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z |
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z |
|
fxch st(1) ; s | t | 1/z | t/z | s/z |
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z |
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z |
|
|
|
fadd ds:dword ptr[zi16stepu] |
|
fxch st(2) |
|
fadd ds:dword ptr[sdivz16stepu] |
|
fxch st(2) |
|
fld ds:dword ptr[tdivz16stepu] |
|
faddp st(2),st(0) |
|
fld ds:dword ptr[fp_64k] |
|
fdiv st(0),st(1) ; z = 1/1/z |
|
; this is what we've gone to all this trouble to |
|
; overlap |
|
LFDIVInFlight1: |
|
|
|
add esi,ds:dword ptr[s] |
|
add edx,ds:dword ptr[t] |
|
mov ebx,ds:dword ptr[_bbextents] |
|
mov ebp,ds:dword ptr[_bbextentt] |
|
cmp esi,ebx |
|
ja LClampHighOrLow0 |
|
LClampReentry0: |
|
mov ds:dword ptr[s],esi |
|
mov ebx,ds:dword ptr[pbase] |
|
shl esi,16 |
|
cmp edx,ebp |
|
mov ds:dword ptr[sfracf],esi |
|
ja LClampHighOrLow1 |
|
LClampReentry1: |
|
mov ds:dword ptr[t],edx |
|
mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac; |
|
shl edx,16 |
|
mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac; |
|
sar esi,16 |
|
mov ds:dword ptr[tfracf],edx |
|
|
|
; |
|
; calculate the texture starting address |
|
; |
|
sar eax,16 |
|
mov edx,ds:dword ptr[_cachewidth] |
|
imul eax,edx ; (tfrac >> 16) * cachewidth |
|
add esi,ebx |
|
add esi,eax ; psource = pbase + (sfrac >> 16) + |
|
; ((tfrac >> 16) * cachewidth); |
|
; |
|
; determine whether last span or not |
|
; |
|
cmp ecx,16 |
|
jna LLastSegment |
|
|
|
; |
|
; not the last segment; do full 16-wide segment |
|
; |
|
LNotLastSegment: |
|
|
|
; |
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
|
; get there |
|
; |
|
|
|
; pick up after the FDIV that was left in flight previously |
|
|
|
fld st(0) ; duplicate it |
|
fmul st(0),st(4) ; s = s/z * z |
|
fxch st(1) |
|
fmul st(0),st(3) ; t = t/z * z |
|
fxch st(1) |
|
fistp ds:dword ptr[snext] |
|
fistp ds:dword ptr[tnext] |
|
mov eax,ds:dword ptr[snext] |
|
mov edx,ds:dword ptr[tnext] |
|
|
|
mov bl,ds:byte ptr[esi] ; get first source texel |
|
sub ecx,16 ; count off this segments' pixels |
|
mov ebp,ds:dword ptr[_sadjust] |
|
mov ds:dword ptr[counttemp],ecx ; remember count of remaining pixels |
|
|
|
mov ecx,ds:dword ptr[_tadjust] |
|
mov ds:byte ptr[edi],bl ; store first dest pixel |
|
|
|
add ebp,eax |
|
add ecx,edx |
|
|
|
mov eax,ds:dword ptr[_bbextents] |
|
mov edx,ds:dword ptr[_bbextentt] |
|
|
|
cmp ebp,4096 |
|
jl LClampLow2 |
|
cmp ebp,eax |
|
ja LClampHigh2 |
|
LClampReentry2: |
|
|
|
cmp ecx,4096 |
|
jl LClampLow3 |
|
cmp ecx,edx |
|
ja LClampHigh3 |
|
LClampReentry3: |
|
|
|
mov ds:dword ptr[snext],ebp |
|
mov ds:dword ptr[tnext],ecx |
|
|
|
sub ebp,ds:dword ptr[s] |
|
sub ecx,ds:dword ptr[t] |
|
|
|
; |
|
; set up advancetable |
|
; |
|
mov eax,ecx |
|
mov edx,ebp |
|
sar eax,20 ; tstep >>= 16; |
|
jz LZero |
|
sar edx,20 ; sstep >>= 16; |
|
mov ebx,ds:dword ptr[_cachewidth] |
|
imul eax,ebx |
|
jmp LSetUp1 |
|
|
|
LZero: |
|
sar edx,20 ; sstep >>= 16; |
|
mov ebx,ds:dword ptr[_cachewidth] |
|
|
|
LSetUp1: |
|
|
|
add eax,edx ; add in sstep |
|
; (tstep >> 16) * cachewidth + (sstep >> 16); |
|
mov edx,ds:dword ptr[tfracf] |
|
mov ds:dword ptr[advancetable+4],eax ; advance base in t |
|
add eax,ebx ; ((tstep >> 16) + 1) * cachewidth + |
|
; (sstep >> 16); |
|
shl ebp,12 ; left-justify sstep fractional part |
|
mov ebx,ds:dword ptr[sfracf] |
|
shl ecx,12 ; left-justify tstep fractional part |
|
mov ds:dword ptr[advancetable],eax ; advance extra in t |
|
|
|
mov ds:dword ptr[tstep],ecx |
|
add edx,ecx ; advance tfrac fractional part by tstep frac |
|
|
|
sbb ecx,ecx ; turn tstep carry into -1 (0 if none) |
|
add ebx,ebp ; advance sfrac fractional part by sstep frac |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] ; point to next source texel |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov al,ds:byte ptr[esi] |
|
add ebx,ebp |
|
mov ds:byte ptr[1+edi],al |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[2+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[3+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[4+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[5+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[6+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[7+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
|
|
; |
|
; start FDIV for end of next segment in flight, so it can overlap |
|
; |
|
mov ecx,ds:dword ptr[counttemp] |
|
cmp ecx,16 ; more than one segment after this? |
|
ja LSetupNotLast2 ; yes |
|
|
|
dec ecx |
|
jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV |
|
mov ds:dword ptr[spancountminus1],ecx |
|
fild ds:dword ptr[spancountminus1] |
|
|
|
fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | spancountminus1 |
|
fmul st(0),st(1) ; C(d_zistepu)*scm1 | scm1 |
|
fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 |
|
fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 |
|
fxch st(1) ; C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 |
|
faddp st(3),st(0) ; C(d_tdivzstepu)*scm1 | scm1 |
|
fxch st(1) ; scm1 | C(d_tdivzstepu)*scm1 |
|
fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 |
|
fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 |
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1 |
|
fld ds:dword ptr[fp_64k] ; 64k | C(d_sdivzstepu)*scm1 |
|
fxch st(1) ; C(d_sdivzstepu)*scm1 | 64k |
|
faddp st(4),st(0) ; 64k |
|
|
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to |
|
; overlap |
|
jmp LFDIVInFlight2 |
|
|
|
align 4 |
|
LSetupNotLast2: |
|
fadd ds:dword ptr[zi16stepu] |
|
fxch st(2) |
|
fadd ds:dword ptr[sdivz16stepu] |
|
fxch st(2) |
|
fld ds:dword ptr[tdivz16stepu] |
|
faddp st(2),st(0) |
|
fld ds:dword ptr[fp_64k] |
|
fdiv st(0),st(1) ; z = 1/1/z |
|
; this is what we've gone to all this trouble to |
|
; overlap |
|
LFDIVInFlight2: |
|
mov ds:dword ptr[counttemp],ecx |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[8+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[9+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[10+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[11+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[12+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[13+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[14+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edi,16 |
|
mov ds:dword ptr[tfracf],edx |
|
mov edx,ds:dword ptr[snext] |
|
mov ds:dword ptr[sfracf],ebx |
|
mov ebx,ds:dword ptr[tnext] |
|
mov ds:dword ptr[s],edx |
|
mov ds:dword ptr[t],ebx |
|
|
|
mov ecx,ds:dword ptr[counttemp] ; retrieve count |
|
|
|
; |
|
; determine whether last span or not |
|
; |
|
cmp ecx,16 ; are there multiple segments remaining? |
|
mov ds:byte ptr[-1+edi],al |
|
ja LNotLastSegment ; yes |
|
|
|
; |
|
; last segment of scan |
|
; |
|
LLastSegment: |
|
|
|
; |
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
|
; get there. The number of pixels left is variable, and we want to land on the |
|
; last pixel, not step one past it, so we can't run into arithmetic problems |
|
; |
|
test ecx,ecx |
|
jz LNoSteps ; just draw the last pixel and we're done |
|
|
|
; pick up after the FDIV that was left in flight previously |
|
|
|
|
|
fld st(0) ; duplicate it |
|
fmul st(0),st(4) ; s = s/z * z |
|
fxch st(1) |
|
fmul st(0),st(3) ; t = t/z * z |
|
fxch st(1) |
|
fistp ds:dword ptr[snext] |
|
fistp ds:dword ptr[tnext] |
|
|
|
mov al,ds:byte ptr[esi] ; load first texel in segment |
|
mov ebx,ds:dword ptr[_tadjust] |
|
mov ds:byte ptr[edi],al ; store first pixel in segment |
|
mov eax,ds:dword ptr[_sadjust] |
|
|
|
add eax,ds:dword ptr[snext] |
|
add ebx,ds:dword ptr[tnext] |
|
|
|
mov ebp,ds:dword ptr[_bbextents] |
|
mov edx,ds:dword ptr[_bbextentt] |
|
|
|
cmp eax,4096 |
|
jl LClampLow4 |
|
cmp eax,ebp |
|
ja LClampHigh4 |
|
LClampReentry4: |
|
mov ds:dword ptr[snext],eax |
|
|
|
cmp ebx,4096 |
|
jl LClampLow5 |
|
cmp ebx,edx |
|
ja LClampHigh5 |
|
LClampReentry5: |
|
|
|
cmp ecx,1 ; don't bother |
|
je LOnlyOneStep ; if two pixels in segment, there's only one step, |
|
; of the segment length |
|
sub eax,ds:dword ptr[s] |
|
sub ebx,ds:dword ptr[t] |
|
|
|
add eax,eax ; convert to 15.17 format so multiply by 1.31 |
|
add ebx,ebx ; reciprocal yields 16.48 |
|
|
|
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; sstep = (snext - s) / |
|
; (spancount-1) |
|
mov ebp,edx |
|
|
|
mov eax,ebx |
|
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; tstep = (tnext - t) / |
|
; (spancount-1) |
|
LSetEntryvec: |
|
; |
|
; set up advancetable |
|
; |
|
mov ebx,ds:dword ptr[entryvec_table_16+ecx*4] |
|
mov eax,edx |
|
mov ds:dword ptr[jumptemp],ebx ; entry point into code for RET later |
|
mov ecx,ebp |
|
sar edx,16 ; tstep >>= 16; |
|
mov ebx,ds:dword ptr[_cachewidth] |
|
sar ecx,16 ; sstep >>= 16; |
|
imul edx,ebx |
|
|
|
add edx,ecx ; add in sstep |
|
; (tstep >> 16) * cachewidth + (sstep >> 16); |
|
mov ecx,ds:dword ptr[tfracf] |
|
mov ds:dword ptr[advancetable+4],edx ; advance base in t |
|
add edx,ebx ; ((tstep >> 16) + 1) * cachewidth + |
|
; (sstep >> 16); |
|
shl ebp,16 ; left-justify sstep fractional part |
|
mov ebx,ds:dword ptr[sfracf] |
|
shl eax,16 ; left-justify tstep fractional part |
|
mov ds:dword ptr[advancetable],edx ; advance extra in t |
|
|
|
mov ds:dword ptr[tstep],eax |
|
mov edx,ecx |
|
add edx,eax |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
jmp dword ptr[jumptemp] ; jump to the number-of-pixels handler |
|
|
|
;---------------------------------------- |
|
|
|
LNoSteps: |
|
mov al,ds:byte ptr[esi] ; load first texel in segment |
|
sub edi,15 ; adjust for hardwired offset |
|
jmp LEndSpan |
|
|
|
|
|
LOnlyOneStep: |
|
sub eax,ds:dword ptr[s] |
|
sub ebx,ds:dword ptr[t] |
|
mov ebp,eax |
|
mov edx,ebx |
|
jmp LSetEntryvec |
|
|
|
;---------------------------------------- |
|
|
|
public Entry2_16, Entry3_16, Entry4_16, Entry5_16 |
|
public Entry6_16, Entry7_16, Entry8_16, Entry9_16 |
|
public Entry10_16, Entry11_16, Entry12_16, Entry13_16 |
|
public Entry14_16, Entry15_16, Entry16_16 |
|
|
|
Entry2_16: |
|
sub edi,14 ; adjust for hardwired offsets |
|
mov al,ds:byte ptr[esi] |
|
jmp LEntry2_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry3_16: |
|
sub edi,13 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
jmp LEntry3_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry4_16: |
|
sub edi,12 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry4_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry5_16: |
|
sub edi,11 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry5_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry6_16: |
|
sub edi,10 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry6_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry7_16: |
|
sub edi,9 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry7_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry8_16: |
|
sub edi,8 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry8_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry9_16: |
|
sub edi,7 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry9_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry10_16: |
|
sub edi,6 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry10_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry11_16: |
|
sub edi,5 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry11_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry12_16: |
|
sub edi,4 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry12_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry13_16: |
|
sub edi,3 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry13_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry14_16: |
|
sub edi,2 ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry14_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry15_16: |
|
dec edi ; adjust for hardwired offsets |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
jmp LEntry15_16 |
|
|
|
;---------------------------------------- |
|
|
|
Entry16_16: |
|
add edx,eax |
|
mov al,ds:byte ptr[esi] |
|
sbb ecx,ecx |
|
add ebx,ebp |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
|
|
add edx,ds:dword ptr[tstep] |
|
sbb ecx,ecx |
|
mov ds:byte ptr[1+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry15_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[2+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry14_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[3+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry13_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[4+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry12_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[5+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry11_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[6+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry10_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[7+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry9_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[8+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry8_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[9+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry7_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[10+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry6_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[11+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry5_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[12+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
add edx,ds:dword ptr[tstep] |
|
LEntry4_16: |
|
sbb ecx,ecx |
|
mov ds:byte ptr[13+edi],al |
|
add ebx,ebp |
|
mov al,ds:byte ptr[esi] |
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] |
|
LEntry3_16: |
|
mov ds:byte ptr[14+edi],al |
|
mov al,ds:byte ptr[esi] |
|
LEntry2_16: |
|
|
|
LEndSpan: |
|
|
|
; |
|
; clear s/z, t/z, 1/z from FP stack |
|
; |
|
fstp st(0) |
|
fstp st(0) |
|
fstp st(0) |
|
|
|
mov ebx,ds:dword ptr[pspantemp] ; restore spans pointer |
|
mov ebx,ds:dword ptr[espan_t_pnext+ebx] ; point to next span |
|
test ebx,ebx ; any more spans? |
|
mov ds:byte ptr[15+edi],al |
|
jnz LSpanLoop ; more spans |
|
|
|
pop ebx ; restore register variables |
|
pop esi |
|
pop edi |
|
pop ebp ; restore the caller's stack frame |
|
ret |
|
|
|
|
|
;---------------------------------------------------------------------- |
|
; 8-bpp horizontal span z drawing codefor polygons, with no transparency. |
|
; |
|
; Assumes there is at least one span in pzspans, and that every span |
|
; contains at least one pixel |
|
;---------------------------------------------------------------------- |
|
|
|
|
|
|
|
; z-clamp on a non-negative gradient span |
|
LClamp: |
|
mov edx,040000000h |
|
xor ebx,ebx |
|
fstp st(0) |
|
jmp LZDraw |
|
|
|
; z-clamp on a negative gradient span |
|
LClampNeg: |
|
mov edx,040000000h |
|
xor ebx,ebx |
|
fstp st(0) |
|
jmp LZDrawNeg |
|
|
|
|
|
pzspans equ 4+16 |
|
|
|
public _D_DrawZSpans |
|
_D_DrawZSpans: |
|
push ebp ; preserve caller's stack frame |
|
push edi |
|
push esi ; preserve register variables |
|
push ebx |
|
|
|
fld ds:dword ptr[_d_zistepu] |
|
mov eax,ds:dword ptr[_d_zistepu] |
|
mov esi,ds:dword ptr[pzspans+esp] |
|
test eax,eax |
|
jz LFNegSpan |
|
|
|
fmul ds:dword ptr[Float2ToThe31nd] |
|
fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned |
|
; off here to avoid range problems |
|
mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans |
|
|
|
LFSpanLoop: |
|
; set up the initial 1/z value |
|
fild ds:dword ptr[espan_t_v+esi] |
|
fild ds:dword ptr[espan_t_u+esi] |
|
mov ecx,ds:dword ptr[espan_t_v+esi] |
|
mov edi,ds:dword ptr[_d_pzbuffer] |
|
fmul ds:dword ptr[_d_zistepu] |
|
fxch st(1) |
|
fmul ds:dword ptr[_d_zistepv] |
|
fxch st(1) |
|
fadd ds:dword ptr[_d_ziorigin] |
|
imul ecx,ds:dword ptr[_d_zrowbytes] |
|
faddp st(1),st(0) |
|
|
|
; clamp if z is nearer than 2 (1/z > 0.5) |
|
fcom ds:dword ptr[float_point5] |
|
add edi,ecx |
|
mov edx,ds:dword ptr[espan_t_u+esi] |
|
add edx,edx ; word count |
|
mov ecx,ds:dword ptr[espan_t_count+esi] |
|
add edi,edx ; pdest = &pdestspan[scans->u]; |
|
push esi ; preserve spans pointer |
|
fnstsw ax |
|
test ah,045h |
|
jz LClamp |
|
|
|
fmul ds:dword ptr[Float2ToThe31nd] |
|
fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned |
|
; off here to avoid problems when the span is closer |
|
; than 1/(2**31) |
|
mov edx,ds:dword ptr[izi] |
|
|
|
; at this point: |
|
; %ebx = izistep |
|
; %ecx = count |
|
; %edx = izi |
|
; %edi = pdest |
|
|
|
LZDraw: |
|
|
|
; do a single pixel up front, if necessary to dword align the destination |
|
test edi,2 |
|
jz LFMiddle |
|
mov eax,edx |
|
add edx,ebx |
|
shr eax,16 |
|
dec ecx |
|
mov ds:word ptr[edi],ax |
|
add edi,2 |
|
|
|
; do middle a pair of aligned dwords at a time |
|
LFMiddle: |
|
push ecx |
|
shr ecx,1 ; count / 2 |
|
jz LFLast ; no aligned dwords to do |
|
shr ecx,1 ; (count / 2) / 2 |
|
jnc LFMiddleLoop ; even number of aligned dwords to do |
|
|
|
mov eax,edx |
|
add edx,ebx |
|
shr eax,16 |
|
mov esi,edx |
|
add edx,ebx |
|
and esi,0FFFF0000h |
|
or eax,esi |
|
mov ds:dword ptr[edi],eax |
|
add edi,4 |
|
and ecx,ecx |
|
jz LFLast |
|
|
|
LFMiddleLoop: |
|
mov eax,edx |
|
add edx,ebx |
|
shr eax,16 |
|
mov esi,edx |
|
add edx,ebx |
|
and esi,0FFFF0000h |
|
or eax,esi |
|
mov ebp,edx |
|
mov ds:dword ptr[edi],eax |
|
add edx,ebx |
|
shr ebp,16 |
|
mov esi,edx |
|
add edx,ebx |
|
and esi,0FFFF0000h |
|
or ebp,esi |
|
mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention |
|
add edi,8 |
|
|
|
dec ecx |
|
jnz LFMiddleLoop |
|
|
|
LFLast: |
|
pop ecx ; retrieve count |
|
pop esi ; retrieve span pointer |
|
|
|
; do the last, unaligned pixel, if there is one |
|
and ecx,1 ; is there an odd pixel left to do? |
|
jz LFSpanDone ; no |
|
shr edx,16 |
|
mov ds:word ptr[edi],dx ; do the final pixel's z |
|
|
|
LFSpanDone: |
|
mov esi,ds:dword ptr[espan_t_pnext+esi] |
|
test esi,esi |
|
jnz LFSpanLoop |
|
|
|
jmp LFDone |
|
|
|
LFNegSpan: |
|
fmul ds:dword ptr[FloatMinus2ToThe31nd] |
|
fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned |
|
; off here to avoid range problems |
|
mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans |
|
|
|
LFNegSpanLoop: |
|
; set up the initial 1/z value |
|
fild ds:dword ptr[espan_t_v+esi] |
|
fild ds:dword ptr[espan_t_u+esi] |
|
mov ecx,ds:dword ptr[espan_t_v+esi] |
|
mov edi,ds:dword ptr[_d_pzbuffer] |
|
fmul ds:dword ptr[_d_zistepu] |
|
fxch st(1) |
|
fmul ds:dword ptr[_d_zistepv] |
|
fxch st(1) |
|
fadd ds:dword ptr[_d_ziorigin] |
|
imul ecx,ds:dword ptr[_d_zrowbytes] |
|
faddp st(1),st(0) |
|
|
|
; clamp if z is nearer than 2 (1/z > 0.5) |
|
fcom ds:dword ptr[float_point5] |
|
add edi,ecx |
|
mov edx,ds:dword ptr[espan_t_u+esi] |
|
add edx,edx ; word count |
|
mov ecx,ds:dword ptr[espan_t_count+esi] |
|
add edi,edx ; pdest = &pdestspan[scans->u]; |
|
push esi ; preserve spans pointer |
|
fnstsw ax |
|
test ah,045h |
|
jz LClampNeg |
|
|
|
fmul ds:dword ptr[Float2ToThe31nd] |
|
fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned |
|
; off here to avoid problems when the span is closer |
|
; than 1/(2**31) |
|
mov edx,ds:dword ptr[izi] |
|
|
|
; at this point: |
|
; %ebx = izistep |
|
; %ecx = count |
|
; %edx = izi |
|
; %edi = pdest |
|
|
|
LZDrawNeg: |
|
|
|
; do a single pixel up front, if necessary to dword align the destination |
|
test edi,2 |
|
jz LFNegMiddle |
|
mov eax,edx |
|
sub edx,ebx |
|
shr eax,16 |
|
dec ecx |
|
mov ds:word ptr[edi],ax |
|
add edi,2 |
|
|
|
; do middle a pair of aligned dwords at a time |
|
LFNegMiddle: |
|
push ecx |
|
shr ecx,1 ; count / 2 |
|
jz LFNegLast ; no aligned dwords to do |
|
shr ecx,1 ; (count / 2) / 2 |
|
jnc LFNegMiddleLoop ; even number of aligned dwords to do |
|
|
|
mov eax,edx |
|
sub edx,ebx |
|
shr eax,16 |
|
mov esi,edx |
|
sub edx,ebx |
|
and esi,0FFFF0000h |
|
or eax,esi |
|
mov ds:dword ptr[edi],eax |
|
add edi,4 |
|
and ecx,ecx |
|
jz LFNegLast |
|
|
|
LFNegMiddleLoop: |
|
mov eax,edx |
|
sub edx,ebx |
|
shr eax,16 |
|
mov esi,edx |
|
sub edx,ebx |
|
and esi,0FFFF0000h |
|
or eax,esi |
|
mov ebp,edx |
|
mov ds:dword ptr[edi],eax |
|
sub edx,ebx |
|
shr ebp,16 |
|
mov esi,edx |
|
sub edx,ebx |
|
and esi,0FFFF0000h |
|
or ebp,esi |
|
mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention |
|
add edi,8 |
|
|
|
dec ecx |
|
jnz LFNegMiddleLoop |
|
|
|
LFNegLast: |
|
pop ecx ; retrieve count |
|
pop esi ; retrieve span pointer |
|
|
|
; do the last, unaligned pixel, if there is one |
|
and ecx,1 ; is there an odd pixel left to do? |
|
jz LFNegSpanDone ; no |
|
shr edx,16 |
|
mov ds:word ptr[edi],dx ; do the final pixel's z |
|
|
|
LFNegSpanDone: |
|
mov esi,ds:dword ptr[espan_t_pnext+esi] |
|
test esi,esi |
|
jnz LFNegSpanLoop |
|
|
|
LFDone: |
|
pop ebx ; restore register variables |
|
pop esi |
|
pop edi |
|
pop ebp ; restore the caller's stack frame |
|
ret |
|
|
|
|
|
|
|
_TEXT ENDS |
|
endif ;id386 |
|
END
|
|
|