I didn't really though much about it and just used the hint from that article. EOR doesn't suffice. EOR+1 though.. And it also depends on if the inputs are both signed (no EOR+1 needed) or if just one of them is.

;continue column 1 .c1_1: lda #0 ;self-modded to contain high(x0*y0) f adc (multlib_p_neg_sqr_lo_x0),y ;high(x0*y0) g sta .c1_2+1 ;save partial ;11.5

umult16: ;unsigned 16x16 mult ;inputs: ; x0 x1 ; y0 y1 ;outputs: ; A z2 z1 z0 * = $c000 !zone multlib ;these are defaults, but should be set from the calling program !addr { multlib_p_sqr_lo_x0 = $8b ;2 bytes multlib_p_sqr_hi_x0 = $8d ;2 bytes multlib_p_neg_sqr_lo_x0 = $a3 ;2 bytes multlib_p_neg_sqr_hi_x0 = $a5 ;2 bytes multlib_p_sqr_lo_x1 = $8b ;2 bytes multlib_p_sqr_hi_x1 = $8d ;2 bytes multlib_p_neg_sqr_lo_x1 = $a3 ;2 bytes multlib_p_neg_sqr_hi_x1 = $a5 ;2 bytes multlib_sqr_lo = $c000 ;511 bytes multlib_sqr_hi = $c200 ;511 bytes multlib_neg_sqr_lo = $c400 ;511 bytes multlib_neg_sqr_hi = $c600 ;511 bytes } x0=$fb x1=$fc y0=$fd y1=$fe z0=$8000 z1=$8001 z2=$8002 z3=$8003 ;set multiplier as x0 lda x0 ;3 sta multlib_p_sqr_lo_x0 ;3 sta multlib_p_sqr_hi_x0 ;3 eor #$ff ;2 sta multlib_p_neg_sqr_lo_x0 ;3 sta multlib_p_neg_sqr_hi_x0 ;3; 3+3+3+2+3+3 = 17 cycles ;set multiplier as x1 lda x1 ;3 sta multlib_p_sqr_lo_x1 ;3 sta multlib_p_sqr_hi_x1 ;3 eor #$ff ;2 sta multlib_p_neg_sqr_lo_x1 ;3 sta multlib_p_neg_sqr_hi_x1 ;3; 3+3+3+2+3+3 = 17 cycles ;set multiplicand as y0 ldy y0 ;3 ;storing z3 in X, clear X ldx #0 ;2 clc ;2 ;x0_y1l = low(x0*y1) ;x0_y1h = high(x0*y1) ;zip add all mult8's directly ;adding requires offset $4000 in multlib_p_neg_sqr table and compensation ;7 ;column 0 lda (multlib_p_sqr_lo_x0),y ;low(x0*y0) f adc (multlib_p_neg_sqr_lo_x0),y ;11 cycles low(x0*y0) g sta z0 ;multiplier * Y, low byte, 3 cycles ;need correction factor for g in offset $400 ;14 ;continue to column 1 adc (multlib_p_sqr_hi_x0),y ;high(x0*y0) f sta .c1_1+1 ;partial addition result of column 1 (self-mod to be continued) ;9.5 ;continue to column 2 adc (multlib_p_sqr_hi_x1),y ;high(x1*y0) f sta .c2_1+1 ;save partial bcc .c1_1 inx ;inc z3 clc ;3/6=4.5 ;14 ;continue column 1 .c1_1: lda #0 ;self-modded to contain high(x0*y0) f adc (multlib_p_neg_sqr_lo_x0),y ;high(x0*y0) g sta .c1_2+1 ;save partial ;11.5 ;continue column 2 .c2_1: lda #0 ;self-modded to contain high(x1*y0) f adc (multlib_p_neg_sqr_hi_x1),y ;high(x1*y0) g sta .c2_2+1 ;save partial bcc .c1_2 inx ;inc z3 clc ;4.5 ;16 ;continue column 1 .c1_2: lda #0 ;self-modded to contain high(x0*y0) g adc (multlib_p_sqr_lo_x1),y ;low(x1*y0) f sta .c1_3 ;save partial ;11.5 ;continue column 2 ;set multiplicand as y1 ldy y1 ;3 .c2_2: lda #0 ;self-modded to contain high(x1*y0) g adc (multlib_p_sqr_hi_x1),y ;high(x0*y1) f sta .c2_3+1 ;save partial bcc .c1_3 inx ;inc z3 clc ;4.5 ;19 ;continue column 1 (extra y0) ldy y0 ;3 .c1_3: lda #0 ;self-modded to contain low(x1*y0) f adc (multlib_p_neg_sqr_lo_x1),y ;low(x1*y0) g sta .c1_4+1 ;save partial ;14.5 ;continue column 2 ldy y1 ;3 .c2_3: lda #0 ;self-modded to contain high(x0*y1) f adc (multlib_p_sqr_hi_x1),y ;high(x0*y1) g sta .c2_4+1 bcc .c1_4 inx ;inc z3 clc ;4.5 ;19 ;continue column 1 .c1_4: lda #0 ;self-modded to contain low(x1*y0) g adc (multlib_p_sqr_lo_x0),y ;low(x0*y1) f sta .c1_5+1 ;save partial ;11.5 ;continue column 2 .c2_4: lda #0 ;self-modded to contain high(x0*y1) g adc (multlib_p_sqr_hi_x1),y ;low(x1*y1) f sta .c2_5+1 ;save partal bcc .c1_5 inx ;inc z3 clc ;4.5 ;16 ;finish column 1 .c1_5: lda #0 ;self-modded to contain low(x0*y1) f adc (multlib_p_neg_sqr_lo_x0),y ;low(x0*y1) g sta z1 ;need correction factor for g in offset $400 ;10.5 ;finish column 2 .c2_5: lda #0 ;self-modded to contain low(x1*y1) f adc (multlib_p_neg_sqr_hi_x1),y ;low(x1*y1) g sta z2 ;need correction factor for g in offset $400 ;10.5 ;column 3 txa ;carries from c2 adc (multlib_p_sqr_hi_x1),y ;high(x1*y1) f adc (multlib_p_neg_sqr_hi_x1),y ;high(x1*y1) g ;need correction factor for g in offset $400 ;A=z3 ;13 ;=231.5 fin: rts

y1 y0 x x1 x0 -------- x0y0h x0y0l + x1y0h x1y0l + x0y1h x0y1l +x1y1h x1y1l ------------------------ z3 z2 z1 z0

# -*- coding: utf-8 -*- """ Created on Fri Jan 28 22:46:53 2022 @author: Repose A module to analyze arithmetic via logs ref. https://www.siranah.de/html/sail042q.htm """ #imports import sys from math import log2 #functions/classes scale=256/log2(256) #8-bit range lo=lambda x:x&255 #util for creating tables (for future development) hi=lambda x:(x>>8) logadd=lambda x:round(log2(1+1/pow(2,x/scale))*scale) #scaled table to add two logs logsub=lambda x:round(log2(1+pow(2,x/scale))*scale) #scaled table to sub two logs exp=lambda x:round(pow(2,x/scale)) #scaled exponential 2^x table lg=lambda x:round(log2(x)*scale) #scaled log2(x) table def mult(x,y): '''Multiply two integers via their logs Keyword arguments: x,y -- integers 0 to 255 Returns: The approximate product ''' return exp(lg(x)+lg(y)) def add(x,y): '''Add two integers via their logs Keyword arguments: x,y -- integers 0 to 255 Returns: The approximate sum ''' return exp(logadd(lg(y)-lg(x))+lg(y)) def output_table(fmt, fname, name, addr, tab, start, end): '''Output a table as an assembly include file Keyword arguments: fmt -- string, "ACME" fname -- string, filename of output file name -- string, table name addr -- integer start address of table tab -- generator function start -- integer start index value end -- integer end index value + 1 Returns: 0 (success) 5 (failed to open file) ''' try: with open(fname, 'w') as f: print(f'writing {fname}') if fmt.lower()=="acme": fmt_data='!by ' elif fmt.lower()=="dasm": fmt_data='dc.b ' else: print(f'unknown assembler format {fmt}') return 1 #traditional DOS incorrect function #header f.write(f'{name}=${addr:04X}\n') step=16 for i in range(start,end,step): #line start f.write(fmt_data) k=i+step if i+step<end else end first_invalid=True first_overflow=True for j in range(i,k): try: val=tab(j) except ValueError: val=0 if first_invalid: print(f'warning: invalid table value replaced with 0 at index {j}') first_invalid=False if val>0xff: val=val & 0xff if first_overflow: print(f'warning: table value overflowed at index {j}') first_overflow=False f.write(f'${val:02X}') if j!=k-1: f.write(',') else: f.write('\n') except: print(f"Couldn't open {fname} for writing") return 5 #traditional DOS error code #main def main(): '''Analyze arithmetic of two integers via their logs Keyword arguments: none Returns: 0 (success) ''' print('Guassian logs analysis\n') print('accuracy test\n') #should write a helper class for this erm=0 #maximum multiply error era=0 #maximum addition error for x in range(1,256): for y in range(x, 256): er=mult(x,y)-x*y if er>erm: erm=er pxm=x pym=y er=add(x,y)-(x+y) if er>era: era=er pxa=x pya=y #requires python 3.6+ due to f strings print('multiply') print(f'max err was {erm} ({erm/(pxm*pym)*100:.2f}%) at x={pxm} and y={pym}') print(f'{pxm}*{pym}={pxm*pym}, mult({pxm},{pym})={mult(pxm,pym)}\n') print('add') print(f'max err was {era} ({era/(pxa+pya)*100:.2f}%) at x={pxa} and y={pya}') print(f'{pxa}+{pya}={pxa+pya}, add({pxa},{pya})={add(pxa,pya)}\n') #write tables print('writing tables') tables=[{"func":lg,"name":"logtab"}, {"func":exp,"name":"exptab"}, {"func":logadd,"name":"logadd"}] addr=0xc000 assembler='acme' for table in tables: #output_table(fmt, fname, name, addr, tab, start, end+1) output_table(assembler, table["name"]+'.asm', table["name"], addr, table["func"], 0, 256) addr+=0x100 if __name__ == '__main__': sys.exit(main())