;This section performs the 4 sub-multiplies to form ; the partials to be added later in self-mod code. ;Addresses like "x1y0l+1" refer to low(x1*y0) stored ; to the argument of a later "adc #{value}" ;mult8_set_mier_snippet {multiplier} ;mult8_snippet {multiplicand}, {low result}, {high result} +mult8_set_mier_snippet x1 ;17 +mult8_snippet y0, x1y0l+1, x1y0h+1 ;35 +mult8_snippet y1, x1y1l+1, z3 ;32 +mult8_set_mier_snippet x0 ;17 +mult8_snippet "Y", x0y1l+1, "X" ;28 +mult8_snippet y0, z0, "A" ;28 ;results in X=x0y1h and A=x0y0h ;multiply part total: 149-165, average 157 ;z3 z2 z1 clc x0y1l adc #0 ;x0y0h + x0y1l tay ;6 txa x1y0h adc #0 ;x0y1h + x1y0h tax bcc + ;9 inc z3 clc ;(+6) taken 7% of the time + tya x1y0l adc #0 ;+ x1y0l sta z1 ;7 txa x1y1l adc #0 ;+ x1y1l bcc done ;7 inc z3 ;(+4) taken 42% of the time
y1 y0 x x1 x0 -------- x0y0h x0y0l x0y1h x0y1l x1y0h x1y0l x1y1h x1y1l ----------------------- z3 z2 z1 z0
!macro mult8_set_mier_snippet .mier { ;set multiplier as mier ;mier can be m/A ;requires .p_.sqr* and .p_.neg.sqr* pointers, ;(mathlib.p_sqr_lo, mathlib.p_neg_sqr_lo, mathlib.p_sqr_hi, mathlib.p_neg_sqr_hi) ;uses these macros from mathlib_util_macros.asm: ;reset_cycles, add_cycles_const !if .mier = "X" or .mier = "Y" { !error "multlib: mult8_snippet: mier must be m/A, not ", .mier } +reset_cycles !if .mier != "A" { lda .mier !if .mier<$100 { +add_cycles_const 3 } else { +add_cycles_const 4 } } sta mathlib.p_sqr_lo ;3 sta mathlib.p_sqr_hi ;3 eor #$ff ;2 sta mathlib.p_neg_sqr_lo ;3 sta mathlib.p_neg_sqr_hi ;3; 3+3+3+2+3+3 = 17 cycles +add_cycles_const 14 }
mier = x1 (x1) * y0 -> x1y0l, x1y0h (x1) * y1 -> x1y1l, z3 mier = x0 (x0) * (y1) -> x0y1l, X (x0) * y0 -> z0, A
The time is 188.1 cycles, averaged over all possible inputs.