| | Scout
Registered: Dec 2002 Posts: 1570 |
3D projection on the C=64 (or...how do I divide?)
Hi!
After 12 years I picked up coding again on the C=64.
What I want to make is a simple 3D starfield.
I know something about 3D programming, so that's no issue.
The only thing I stumbled on is how to implement 3D projection (X1=X/Z) and especially the dividing part of it.
How can I implement this on the C=64?
Any hints, tips?
Thanx!
R.
(Asm on the PC is so much easier ;-) |
|
... 25 posts hidden. Click here to view all posts.... |
| | Graham Account closed
Registered: Dec 2002 Posts: 990 |
1 off is ok, remember that you have an error in that range anyway due to rounding.
Oh and btw, I use X*(1/Y) too when I need a fast div. |
| | John West Account closed
Registered: Aug 2004 Posts: 4 |
... and if you don't mind it being up to two out, you don't need to calculate the low byte of the multiply. That saves a few cycles. |
| | AmiDog
Registered: Mar 2003 Posts: 97 |
Well, you do need an 9-bit reciprocal to get a 100% accurate result, but as the msb of the reciprocal is always one, you don't need to store it anywhere, and a multiply by 256 is just an add. Some C-code:
/* 9-bit reciprocal table, but msb is always one and not stored */
unsigned char r0[128];
void build_table(void)
{
unsigned long i, d, r;
for (i=0; i<128; i++) {
d = i | 0x80; /* 1 < d < 2 */
r = 0x80000000 / (d << 8); /* 1 / d -> 1/2 < r < 1 */
r += 0x80; /* round the reciprocal */
r <<= 1; /* remove constant 1-bit */
r0[i] = (unsigned char)(r >> 8);
}
}
unsigned char div8(unsigned char a, unsigned char b)
{
int log = 7;
/* division by one need to be handled separately */
if (b == 1) {
return a;
}
/* normalize b so that 1 <= b < 2 */
while (!(b & 0x80)) {
b <<= 1;
log--;
}
/* if b is a power of two */
if (b == 0x80) {
return a >> log;
}
/* msb is always one, so don't use it for indexing (save some memory) */
unsigned char r = r0[b & 0x7f];
/* now multiply */
unsigned short ar = (unsigned short)a * (unsigned short)r;
ar >>= 8;
ar += (unsigned short)a; /* ar is now an 9-bit value */
ar >>= (log + 1);
return (unsigned char)ar;
}
This has been tested and is 100% accurate. I don't know how fast/slow it will be when translated to 6502 code. Maybe adding a table or two for the normalizing step could help. |
| | AmiDog
Registered: Mar 2003 Posts: 97 |
Just noticed that a divide by zero will cause an endless loop, so better replace:
if (b == 1) {
return a;
}
with
if (b <= 1) {
return a;
}
to avoid the endless loop. The result of a divide by zero is usually considered undefined anyway, so it doesn't really matter what we return... |
| | AmiDog
Registered: Mar 2003 Posts: 97 |
I just couldn't control myself, here's some 6502 code:
_divu_8
lda div_b
cmp #2
bcs + ; >= 2
lda div_a
rts
+ ldx #8
- dex
asl
bcc -
bne +
lda div_a
- lsr
dex
bne -
rts
+ tay
lda r0_table,y
ldy div_a
sta zp8_1
sta zp8_2
eor #$ff
sta zp8_3
sta zp8_4
sec
lda (zp8_1),y
sbc (zp8_3),y
lda (zp8_2),y
sbc (zp8_4),y
clc
adc div_a
ror
- lsr
dex
bne -
rts
div_a
.byte $0
div_b
.byte $0
r0_table
.byte $01,$00,$fd,$00,$f9,$00,$f5,$00,$f1,$00,$ed,$00,$ea,$00,$e6,$00
.byte $e2,$00,$df,$00,$db,$00,$d8,$00,$d5,$00,$d1,$00,$ce,$00,$cb,$00
.byte $c8,$00,$c4,$00,$c1,$00,$be,$00,$bb,$00,$b8,$00,$b5,$00,$b3,$00
.byte $b0,$00,$ad,$00,$aa,$00,$a7,$00,$a5,$00,$a2,$00,$9f,$00,$9d,$00
.byte $9a,$00,$98,$00,$95,$00,$93,$00,$90,$00,$8e,$00,$8b,$00,$89,$00
.byte $87,$00,$84,$00,$82,$00,$80,$00,$7e,$00,$7b,$00,$79,$00,$77,$00
.byte $75,$00,$73,$00,$71,$00,$6f,$00,$6d,$00,$6b,$00,$69,$00,$67,$00
.byte $65,$00,$63,$00,$61,$00,$5f,$00,$5d,$00,$5b,$00,$59,$00,$58,$00
.byte $56,$00,$54,$00,$52,$00,$51,$00,$4f,$00,$4d,$00,$4b,$00,$4a,$00
.byte $48,$00,$47,$00,$45,$00,$43,$00,$42,$00,$40,$00,$3f,$00,$3d,$00
.byte $3c,$00,$3a,$00,$39,$00,$37,$00,$36,$00,$34,$00,$33,$00,$31,$00
.byte $30,$00,$2f,$00,$2d,$00,$2c,$00,$2a,$00,$29,$00,$28,$00,$26,$00
.byte $25,$00,$24,$00,$22,$00,$21,$00,$20,$00,$1f,$00,$1d,$00,$1c,$00
.byte $1b,$00,$1a,$00,$19,$00,$17,$00,$16,$00,$15,$00,$14,$00,$13,$00
.byte $12,$00,$10,$00,$0f,$00,$0e,$00,$0d,$00,$0c,$00,$0b,$00,$0a,$00
.byte $09,$00,$08,$00,$07,$00,$06,$00,$05,$00,$04,$00,$03,$00,$02,$00
This will divide two 8-bit numbers in some 90-150 cycles. The code can easily be extended to handle larger dividends. |
| | AmiDog
Registered: Mar 2003 Posts: 97 |
Let me bore you with an optimized version:
; divide acc by y, result in acc
_divu_8
ldx t0_table,y
stx b1+1
ldx t1_table,y
beq +
ldy r0_table,x
sta zp8_1
sta zp8_2
eor #$ff
sta zp8_3
sta zp8_4
sec
lda (zp8_1),y
sbc (zp8_3),y
lda (zp8_2),y
sbc (zp8_4),y
clc
adc zp8_1
ror
+ sec
b1 bcs b1
lsr
lsr
lsr
lsr
lsr
lsr
lsr
rts
.align $100
r0_table
.byte $01,$00,$fd,$00,$f9,$00,$f5,$00,$f1,$00,$ed,$00,$ea,$00,$e6,$00
.byte $e2,$00,$df,$00,$db,$00,$d8,$00,$d5,$00,$d1,$00,$ce,$00,$cb,$00
.byte $c8,$00,$c4,$00,$c1,$00,$be,$00,$bb,$00,$b8,$00,$b5,$00,$b3,$00
.byte $b0,$00,$ad,$00,$aa,$00,$a7,$00,$a5,$00,$a2,$00,$9f,$00,$9d,$00
.byte $9a,$00,$98,$00,$95,$00,$93,$00,$90,$00,$8e,$00,$8b,$00,$89,$00
.byte $87,$00,$84,$00,$82,$00,$80,$00,$7e,$00,$7b,$00,$79,$00,$77,$00
.byte $75,$00,$73,$00,$71,$00,$6f,$00,$6d,$00,$6b,$00,$69,$00,$67,$00
.byte $65,$00,$63,$00,$61,$00,$5f,$00,$5d,$00,$5b,$00,$59,$00,$58,$00
.byte $56,$00,$54,$00,$52,$00,$51,$00,$4f,$00,$4d,$00,$4b,$00,$4a,$00
.byte $48,$00,$47,$00,$45,$00,$43,$00,$42,$00,$40,$00,$3f,$00,$3d,$00
.byte $3c,$00,$3a,$00,$39,$00,$37,$00,$36,$00,$34,$00,$33,$00,$31,$00
.byte $30,$00,$2f,$00,$2d,$00,$2c,$00,$2a,$00,$29,$00,$28,$00,$26,$00
.byte $25,$00,$24,$00,$22,$00,$21,$00,$20,$00,$1f,$00,$1d,$00,$1c,$00
.byte $1b,$00,$1a,$00,$19,$00,$17,$00,$16,$00,$15,$00,$14,$00,$13,$00
.byte $12,$00,$10,$00,$0f,$00,$0e,$00,$0d,$00,$0c,$00,$0b,$00,$0a,$00
.byte $09,$00,$08,$00,$07,$00,$06,$00,$05,$00,$04,$00,$03,$00,$02,$00
t0_table
.fill $100,0
t1_table
.fill $100,0
_divu_8_setup
ldy #1
next
tya
ldx #$ff
- inx
asl
bcc -
sta t1_table,y
txa
sta t0_table,y
iny
bne next
rts
This will divide two 8-bit numbers in 42-106 cycles (including the jsr used to call this).
I'll stop now, I promise ;) |
| | Krill
Registered: Apr 2002 Posts: 2980 |
Yes, and now please re-read some former posts by other people for somewhat better approaches :) |
| | AmiDog
Registered: Mar 2003 Posts: 97 |
Well, it depends on how you define "better". If you need accuracy, this method will give you that (all bits are correct) in 40-100 cycles (I've made a few more optimizations). Compare this with the unrolled binary division in a post above which will use 244-274 cycles...
Also, you could easily extend this code to perform a 16bit/8bit division, or even 32bit/8bit division by just extending the multiplication and the bitshifting at the end, the reciprocal and shiftcount stays the same. Somehow I doubt you'll be able to do that using log-tables :) |
| | Style
Registered: Jun 2004 Posts: 498 |
pwned krill
:) |
| | Krill
Registered: Apr 2002 Posts: 2980 |
Ok, i'm sorry, after a closer investigation, the code is much more elegant and better than i suspected after a quick peek. :) Well done. |
Previous - 1 | 2 | 3 | 4 - Next | |