!cpu 6510 ; ============================================================================== save_num = 0x02 CHAROUT = 0xF1CA ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: 8.91666667 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C ldy #0 -- sty save_num ldx #0 - clc rol save_num bcc + lda #'1' !byte 0x2C + lda #'0' jsr CHAROUT inx cpx #8 bne - lda #0x0D jsr CHAROUT iny bne -- rts
!cpu 6510 ; ============================================================================== save_num = 0x02 CHAROUT = 0xF1CA ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: 8.9 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C !for i, 0, 255 { lda #i sta save_num !for j, 0, 7 { clc rol save_num bcc + lda #'1' !byte 0x2C + lda #'0' jsr CHAROUT } lda #0x0D jsr CHAROUT } rts
!cpu 6510 ; ============================================================================== save_num = 0x02 CHAROUT = 0xF1CA ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: 8.88333334 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C !for i, 0, 255 { lda #i sta save_num !for j, 0, 7 { asl save_num bcc + lda #'1' !byte 0x2C + lda #'0' jsr CHAROUT } lda #0x0D jsr CHAROUT } rts
If you're using Vice use the monitor command "stopwatch" (or "sw") to reset and display the stopwatch. Can be used at the start of the run command until it returns by adding a couple of break points. Also remember to SEI early to avoid the regular IRQ.
sta $2000 lda #$3b sta $d011 lda #$18 sta $d018 rts
// Variant #1: ldx #$18 // 2 stx $0400 // 4 stx $0401 // 4 stx $0402 // 4 stx $0403 // 4 stx $0404 // 4 stx $0405 // 4 stx $0406 // 4 stx $0407 // 4 lsr // 2 asl $0400 // 4 lsr // 2 asl $0401 // 4 lsr // 2 asl $0402 // 4 lsr // 2 asl $0403 // 4 lsr // 2 asl $0404 // 4 lsr // 2 asl $0405 // 4 lsr // 2 asl $0406 // 4 lsr // 2 asl $0407 // 4 // 16 x 4 + 9 * 2 = 82 cycles / 58 bytes // Probably has some illegal OPC voodo potential. // Variant #2 ldx #'0' // 2 ldy #'1' // 2 -> 4 cycles 'overhead' lsr // 2 bcc !next0+ // 2 / 3 sty $0400 // 4 -> 8/9 cycles dep. the branch lsr bcc !Next1+ !Prev0: sty $0401 lsr bcc !Next2+ !Prev1: sty $0402 lsr bcc !Next3+ !Prev2: sty $0403 lsr bcc !Next4+ !Prev3: sty $0404 lsr bcc !Next5+ !Prev4: sty $0405 lsr bcc !Next6+ !Prev5: sty $0406 lsr bcc !Next7+ !Prev6: sty $0407 rts !Next0: stx $0400 lsr bcs !Prev0- !Next1: stx $0401 lsr bcc !Prev1- !Next2: stx $0402 lsr bcc !Prev1- !Next3: stx $0403 lsr bcc !Prev1- !Next4: stx $0404 lsr bcc !Prev1- !Next5: stx $0405 lsr bcc !Prev1- !Next6: stx $0406 lsr bcc !Prev1- !Next7: stx $0407 rts // ~8 x 8 (+8) + 2 = ~66/74 cycles / ~100 bytes [code] Edit 2: 2 variants with voodo and shameless charset trickery: [/code] // Variant #3 with voodo ldx #$18 // 2 stx $0400 // 4 stx $0401 // 4 stx $0402 // 4 stx $0403 // 4 stx $0404 // 4 stx $0405 // 4 stx $0406 // 4 stx $0407 // 4 slo $0400 // 4 slo $0401 // 4 slo $0402 // 4 slo $0403 // 4 slo $0404 // 4 slo $0405 // 4 slo $0406 // 4 slo $0407 // 4 // 16 x 4 + 2 = 66 cycles / 50 bytes // Variant #4 with voodo & charset (space = "0", ! = "1") slo $0400 // 4 slo $0401 // 4 slo $0402 // 4 slo $0403 // 4 slo $0404 // 4 slo $0405 // 4 slo $0406 // 4 slo $0407 // 4 // 8 x 4 = 32 cycles / 24 bytes
To be fair I also used the KERNAL output routine for printing the chars. Small solution with loops / 78 Bytes PRG: https://trans.jansalleine.com/c64/num2binary.prg !cpu 6510 ; ============================================================================== save_num = 0x02 CHAROUT = 0xF1CA ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: 8.91666667 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C ldy #0 -- sty save_num ldx #0 - clc rol save_num bcc + lda #'1' !byte 0x2C + lda #'0' jsr CHAROUT inx cpx #8 bne - lda #0x0D jsr CHAROUT iny bne -- rts Slightly faster solution with unrolled loops / 28974 Bytes PRG: https://trans.jansalleine.com/c64/num2binary_unrolled.prg !cpu 6510 ; ============================================================================== save_num = 0x02 CHAROUT = 0xF1CA ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: 8.9 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C !for i, 0, 255 { lda #i sta save_num !for j, 0, 7 { clc rol save_num bcc + lda #'1' !byte 0x2C + lda #'0' jsr CHAROUT } lda #0x0D jsr CHAROUT } rts EDIT: corrected ror -> rol.
My idea for an optimized assembly solution would be to use 8 page aligned 256 byte tables of 0 and 1 characters, one for each bit. This would allow a shared index for 8 lda's of 4 cycles each without having to branch after each load, just pushing the values loaded to CHAROUT.
!cpu 6510 ; ============================================================================== CRSRX = 0xD3 CHAROUT = 0xF1CA CLRSCR = 0xE544 ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: ~9 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C jsr CLRSCR ldx #0 - lda #' ' jsr CHAROUT lda dec2,x jsr CHAROUT lda dec1,x jsr CHAROUT lda dec0,x jsr CHAROUT inc CRSRX inc CRSRX inc CRSRX inc CRSRX inc CRSRX inc CRSRX lda bit7,x jsr CHAROUT lda bit6,x jsr CHAROUT lda bit5,x jsr CHAROUT lda bit4,x jsr CHAROUT lda bit3,x jsr CHAROUT lda bit2,x jsr CHAROUT lda bit1,x jsr CHAROUT lda bit0,x jsr CHAROUT lda #0x0D jsr CHAROUT inx bne - rts ; ============================================================================== !align 255, 0, 0 bit7: !for i, 0, 255 { !byte ((i AND %10000000) >> 7) OR 0x30 } bit6: !for i, 0, 255 { !byte ((i AND %01000000) >> 6) OR 0x30 } bit5: !for i, 0, 255 { !byte ((i AND %00100000) >> 5) OR 0x30 } bit4: !for i, 0, 255 { !byte ((i AND %00010000) >> 4) OR 0x30 } bit3: !for i, 0, 255 { !byte ((i AND %00001000) >> 3) OR 0x30 } bit2: !for i, 0, 255 { !byte ((i AND %00000100) >> 2) OR 0x30 } bit1: !for i, 0, 255 { !byte ((i AND %00000010) >> 1) OR 0x30 } bit0: !for i, 0, 255 { !byte ((i AND %00000001) >> 0) OR 0x30 } ; ============================================================================== dec2: !for i, 0, 255 { !if i < 100 { !byte 0x20 } else if i < 200 { !byte 0x31 } else { !byte 0x32 } } dec1: !for i, 0, 255 { !if i < 10 { !byte 0x20 } else { !byte ((i / 10) - ((i / 100) * 10)) OR 0x30 } } dec0: !for i, 0, 255 { !byte (i % 10) OR 0x30 }
Quoting Mr SQLMy idea for an optimized assembly solution would be to use 8 page aligned 256 byte tables of 0 and 1 characters, one for each bit. This would allow a shared index for 8 lda's of 4 cycles each without having to branch after each load, just pushing the values loaded to CHAROUT. So I did it like you suggested (+ including decimal number and clear screen to get the "exact" (don't know what CHR$(5) does) output like in the video. One could also unroll the main loop, but this won't do much because of the unprecise TI$ measuring. https://trans.jansalleine.com/c64/num2binary_table.prg 3073 Bytes !cpu 6510 ; ============================================================================== CRSRX = 0xD3 CHAROUT = 0xF1CA CLRSCR = 0xE544 ; ============================================================================== *= 0x0801 ; basic TI$ timer wrapper program: ; -------------------------------- ; 0 TI$="000000":SYS2092 ; 1 PRINT"TIME:"TI/60 ; -------------------------------- ; RESULT: ~9 !byte 0x18, 0x08, 0x00, 0x00 !byte 0x54, 0x49, 0x24, 0xB2 !byte 0x22, 0x30, 0x30, 0x30 !byte 0x30, 0x30, 0x30, 0x22 !byte 0x3A, 0x9E, 0x32, 0x30 !byte 0x39, 0x32, 0x00, 0x2A !byte 0x08, 0x01, 0x00, 0x99 !byte 0x22, 0x54, 0x49, 0x4D !byte 0x45, 0x3A, 0x22, 0x54 !byte 0x49, 0xAD, 0x36, 0x30 !byte 0x00, 0x00, 0x00 ; ============================================================================== *= 0x082C jsr CLRSCR ldx #0 - lda #' ' jsr CHAROUT lda dec2,x jsr CHAROUT lda dec1,x jsr CHAROUT lda dec0,x jsr CHAROUT inc CRSRX inc CRSRX inc CRSRX inc CRSRX inc CRSRX inc CRSRX lda bit7,x jsr CHAROUT lda bit6,x jsr CHAROUT lda bit5,x jsr CHAROUT lda bit4,x jsr CHAROUT lda bit3,x jsr CHAROUT lda bit2,x jsr CHAROUT lda bit1,x jsr CHAROUT lda bit0,x jsr CHAROUT lda #0x0D jsr CHAROUT inx bne - rts ; ============================================================================== !align 255, 0, 0 bit7: !for i, 0, 255 { !byte ((i AND %10000000) >> 7) OR 0x30 } bit6: !for i, 0, 255 { !byte ((i AND %01000000) >> 6) OR 0x30 } bit5: !for i, 0, 255 { !byte ((i AND %00100000) >> 5) OR 0x30 } bit4: !for i, 0, 255 { !byte ((i AND %00010000) >> 4) OR 0x30 } bit3: !for i, 0, 255 { !byte ((i AND %00001000) >> 3) OR 0x30 } bit2: !for i, 0, 255 { !byte ((i AND %00000100) >> 2) OR 0x30 } bit1: !for i, 0, 255 { !byte ((i AND %00000010) >> 1) OR 0x30 } bit0: !for i, 0, 255 { !byte ((i AND %00000001) >> 0) OR 0x30 } ; ============================================================================== dec2: !for i, 0, 255 { !if i < 100 { !byte 0x20 } else if i < 200 { !byte 0x31 } else { !byte 0x32 } } dec1: !for i, 0, 255 { !if i < 10 { !byte 0x20 } else { !byte ((i / 10) - ((i / 100) * 10)) OR 0x30 } } dec0: !for i, 0, 255 { !byte (i % 10) OR 0x30 } As already stated in this thread: KERNAL char out / print is the most expensive operation in this whole scenario anyway. But without changing the "rules" like others suggested there's no way around that other than implementing your own faster routines for that – what goes a little bit beyond what so small "excersises" usually want to accomplish. That guy making those isn't a scener and has a very "oldskool" approach to everything – working with your C64 as "intended" by the user manual :-) It's sometimes still kind of fun, but I usually also fast forward a lot when watching videos from him.
0 gosub9:ti$="000000":goto2 1 printd$(i/16)d$(iand15):return 2 fori=0to255:gosub1:next 3 print"time:"ti/60:end 7 data"0000","0001","0010","0011","0100","0101","0110","0111" 8 data"1000","1001","1010","1011","1100","1101","1110","1111" 9 dimd$(16):fori=0to15:readd$(i):next:return
I like the way you had the Assembler create the tables! Which Assembler are you using?
But loops and arithmetics to generate / calculate tables etc. is a quite common feature in most (if not all) of modern PC cross assemblers.
The hard part is knowing when to drop that hammer and search for another tool to beat that non-nail. =)
Do you mean one should generate tables elsewhere
And then just incbin that binary, of course, not generate a wall of asm .byte statements.
Well, while these may all be good ideas in theory, I actually don't know how to "incbin" into CSDb forums for posting a quick and dirty code snippet ;-)
Nobody said anything like that. And code-golf or general prototyping is a good reason for loop macros. =)
Yes to generate tables elsewhere most of the time (though for a case as simple as this I'd probably inline a loop plus expression too), but if you mean "do you generate the table once then copy and paste a huge list of .byte statements into your source code" then definitely not!
This one takes 13.18 seconds (as compared to just doing print:return in the printing routine, which takes 8.91 - the binary conversion+print is hence around 4.27). So yeah, ws is correct. It's dominated by screen scroll time. Those times are if you run after loading from reset - if you start at bottom of the screen everything is slower, because yous start scrolling immediately. 0 gosub9:ti$="000000":goto2 1 printd$(i/16)d$(iand15):return 2 fori=0to255:gosub1:next 3 print"time:"ti/60:end 7 data"0000","0001","0010","0011","0100","0101","0110","0111" 8 data"1000","1001","1010","1011","1100","1101","1110","1111" 9 dimd$(16):fori=0to15:readd$(i):next:return
10 TI$="000000":E=15:DIMB$(E):SYS58692:FORA=.TOE:READB$(A):NEXT:D=1/A 20 FORA=.TO255:PRINTB$(A*D)B$(AANDE):NEXT:PRINTTI/60:DATA0000,0001,0010 30 DATA0011,0100,0101,0110,0111,1000,1001,1010,1011,1100,1101,1110,1111
1 TI$="000000":SYS49152,2:PRINTTI/60:END 2 CLR:PRINT"{CLR}";:C=48:B=49:G=128:H=64:I=32:J=16:K=8:L=4:M=2:N=1:F=256:O=65490 3 A=C:D=VANDG:IFD=GTHENA=B 4 SYSO:A=C:D=VANDH:IFD=HTHENA=B 5 SYSO:A=C:D=VANDI:IFD=ITHENA=B 6 SYSO:A=C:D=VANDJ:IFD=JTHENA=B 7 SYSO:A=C:D=VANDK:IFD=KTHENA=B 8 SYSO:A=C:D=VANDL:IFD=LTHENA=B 9 SYSO:A=C:D=VANDM:IFD=MTHENA=B 10 SYSO:A=C:D=VANDN:IFD=NTHENA=B 11 PRINTCHR$(A) 12 V=V+1:IFV<>FTHENGOTO3 13 END
1 TI$="000000":SYS49152,2:PRINTTI/60:END 2 CLR:PRINT"{CLR}";:B=16:F=256:E=15 3 A=V/B:A=A+B:GOSUBA:A=VANDE:A=A+B:GOSUBA:PRINT:V=V+1:IFV<>FTHENGOTO3 4 END 16 PRINT"0000";:RETURN 17 PRINT"0001";:RETURN 18 PRINT"0010";:RETURN 19 PRINT"0011";:RETURN 20 PRINT"0100";:RETURN 21 PRINT"0101";:RETURN 22 PRINT"0110";:RETURN 23 PRINT"0111";:RETURN 24 PRINT"1000";:RETURN 25 PRINT"1001";:RETURN 26 PRINT"1010";:RETURN 27 PRINT"1011";:RETURN 28 PRINT"1100";:RETURN 29 PRINT"1101";:RETURN 30 PRINT"1110";:RETURN 31 PRINT"1111";:RETURN
10 TI$="000000":E=15:DIMB$(E):SYS58692:FORA=.TOE:READB$(A):NEXT:D=1/A 20 FORA=.TO255:PRINTB$(A*D)B$(AANDE):NEXT:PRINTTI/60:DATA0000,0001,0010 30 DATA0011,0100,0101,0110,0111,1000,1001,1010,1011,1100,1101,1110,1111 -> 11.8166667 sec. Multiply by 1/16 is a bit faster than division. Constants in variables is a bit faster. I didn't see any gains from using a subroutine.