| |
Frantic
Registered: Mar 2003 Posts: 1648 |
ACME macro for delaying X cycles
Anybody got an macro handy for the ACME assembler for delaying X number of cycles? It is OK if it kills the A or X register. |
|
| |
ChristopherJam
Registered: Aug 2004 Posts: 1409 |
No, but this might help with writing one
;minimal bytes
; preserves a,x,y,sp
; may clobber stack and flags
; 2 cycles (1 byte)
nop
; 3 cycles (2 bytes)
bit 3
; 4 cycles (2 bytes)
nop
nop
; 5 cycles (3 bytes)
nop
bit 3
; 6 cycles (3 bytes)
nop
nop
nop
; 7 cycles (2 bytes)
pha
pla
; 8 cycles (4 bytes)
nop
nop
nop
nop
; 9 cycles (3 bytes)
pha
nop
pla
;10 cycles (4 bytes)
pha
bit 3
pla
;11 cycles (4 bytes)
pha
nop
nop
pla
;12 cycles (5 bytes)
pha
nop
bit 3
pla
;13 cycles (5 bytes)
pha
nop
nop
nop
pla
;14 cycles (4 bytes)
pha
pha
pla
pla
|
| |
Frantic
Registered: Mar 2003 Posts: 1648 |
Thanks! |
| |
ChristopherJam
Registered: Aug 2004 Posts: 1409 |
np.
Also this:
#define wait_min24(x) lda#88-x:jsr waitN
waitN
sta *+4
bne *+2
.dsb 64,$a9
lda $ea
rts |
| |
TWW
Registered: Jul 2009 Posts: 545 |
And this (part of my delay pseudo) ;-)
.if(cycles.getValue()==15) {
pha // 3
nop // 2
nop // 2
nop // 2
nop // 2
pla // 4 <- 15 cycles | 6 bytes
}
.if(cycles.getValue()==16) {
pha // 3
pha // 3
nop // 2
pla // 4
pla // 4 <- 16 cycles | 5 bytes
}
.if(cycles.getValue()==17) {
pha // 3
pha // 3
bit $00 // 3
pla // 4
pla // 4 <- 17 cycles | 6 bytes
}
.if(cycles.getValue()==18) {
pha // 3
pha // 3
nop // 2
nop // 2
pla // 4
pla // 4 <- 18 cycles | 6 bytes
}
.if(cycles.getValue()==19) {
pha // 3
pha // 3
nop // 2
bit $00 // 3
pla // 4
pla // 4 <- 19 cycles | 7 bytes
}
.if(cycles.getValue()==20) {
pha // 3
pha // 3
nop // 2
nop // 2
nop // 2
pla // 4
pla // 4 <- 20 cycles | 7 bytes
}
.if(cycles.getValue()==21) {
pha // 3
pha // 3
pha // 3
pla // 4
pla // 4
pla // 4 <- 21 cycles | 6 bytes
}
.if(cycles.getValue()==22) {
pha // 3
lda #%00000010 // 2
lsr // 2 2
nop // 2 2
bcc *-2 // 3 2
pla // 4 <- 22 cycles | 8 bytes
}
.if(cycles.getValue()==23) {
pha // 3
lda #%00000100 // 2
lsr // 2 2 2
bcc *-1 // 3 3 2
pla // 4 <- 23 cycles | 7 bytes
}
.if(cycles.getValue()==24) {
pha // 3
pha // 3
pha // 3
bit $00 // 3
pla // 4
pla // 4
pla // 4 <- 24 cycles | 8 bytes
}
.if(cycles.getValue()==25) {
pha // 3
lda #%00000100 // 2
lsr // 2 2 2
bcc *-1 // 3 3 2
nop // 2
pla // 4 <- 25 cycles | 8 bytes
}
.if(cycles.getValue()==26) {
pha // 3
pha // 3
pha // 3
nop // 2
bit $00 // 3
pla // 4
pla // 4
pla // 4 <- 26 cycles | 9 bytes
}
.if(cycles.getValue()==27) {
pha // 3
pha // 3
pha // 3
nop // 2
nop // 2
nop // 2
pla // 4
pla // 4
pla // 4 <- 27 cycles | 9 bytes
}
.if(cycles.getValue()==28) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
bcc *-1 // 3 3 3 2
pla // 4 <- 28 cycles | 7 bytes
}
.if(cycles.getValue()==29) {
pha // 3
lda #%00000100 // 2
lsr // 2 2 2
nop // 2 2 2
bcc *-2 // 3 3 2
pla // 4 <- 29 cycles | 8 bytes
}
.if(cycles.getValue()==30) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
bcc *-1 // 3 3 3 2
nop // 2
pla // 4 <- 30 cycles | 8 bytes
}
.if(cycles.getValue()==31) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
bcc *-1 // 3 3 3 2
bit $00 // 3
pla // 4 <- 31 cycles | 9 bytes
}
.if(cycles.getValue()==32) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
bcc *-1 // 3 3 3 2
nop // 2
nop // 2
pla // 4 <- 32 cycles | 9 bytes
}
.if(cycles.getValue()==33) {
pha // 3
lda #%00010000 // 2
lsr // 2 2 2 2 2
bcc *-1 // 3 3 3 3 2
pla // 4 <- 33 cycles | 7 bytes
}
.if(cycles.getValue()==34) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
bcc *-1 // 3 3 3 2
nop // 2
nop // 2
nop // 2
pla // 4 <- 34 cycles | 10 bytes
}
.if(cycles.getValue()==35) {
pha // 3
lda #%00010000 // 2
lsr // 2 2 2 2 2
bcc *-1 // 3 3 3 3 2
nop // 2
pla // 4 <- 35 cycles | 8 bytes
}
.if(cycles.getValue()==36) {
pha // 3
lda #%00001000 // 2
lsr // 2 2 2 2
nop // 2 2 2 2
bcc *-2 // 3 3 3 2
pla // 4 <- 36 cycles | 8 bytes
}
|
| |
lft
Registered: Jul 2007 Posts: 369 |
Quoting ChristopherJam
;minimal bytes
...
; 6 cycles (3 bytes)
nop
nop
nop
The following is more minimal. =)
; 6 cycles (2 bytes)
cmp (0,x)
The routines for 8, 12 and 13 cycles can be shortened with the same technique. |
| |
Frantic
Registered: Mar 2003 Posts: 1648 |
Thanks all!
@lft: cool. Wasn't aware of that one. |
| |
chatGPZ
Registered: Dec 2001 Posts: 11386 |
i'd refrain from using branches in those macros... or atleast have guards that give warnings (or even adjust the macro accordingly) when the branch crosses a page boundary. without you will get nice heisenbugs that appear and disappear randomly when you add/remove code :) |
| |
Kruthers
Registered: Jul 2016 Posts: 21 |
Can't help posting my "stable" delay macro, though it's 64tass not ACME. Maybe somebody will find it useful. It allows you to tweak the delay without causing code to shift around because it always uses 8 bytes. Alas, it can't do 2 or 4 cycles...
Only real regret is that it sometimes needs to trash a ZP location. And branching is not optional of course. Liberal use of ".option allow_branch_across_page" helps. :)
; macro to generate delay in cycles, always using 8 bytes
; requires the x or y register and one zeropage location which may be trashed
;
; usage: #delay8b 1282, y, $02
; #delay8b 23, x, $ff
delay8b .macro cycles, reg, zp
; validation
.cerror (\cycles < 3 || \cycles == 4), "8-byte-delay cannot be 1, 2 or 4 cycles"
.cerror (\cycles > 1282), "8-byte-delay must be less than 1283 cycles"
.cerror (\reg != "x" && \reg != "y"), "Unknown register", \reg
.cerror (\zp < $00 || \zp > $ff), "Zeropage argument is required"
; 3 to 10 cycles are hard coded
.switch \cycles
.case 3
jmp *+8
bit \zp
bit \zp
nop
.case 5
nop
jmp *+7
bit \zp
bit \zp
.case 6
bit \zp
jmp *+6
bit \zp
nop
.case 7
nop
nop
jmp *+6
bit \zp
nop
.case 8
nop
bit \zp
jmp *+5
bit \zp
.case 9
bit \zp
bit \zp
jmp *+4
nop
.case 10
nop
nop
bit \zp
jmp *+4
nop
; 11 or more cycles follows a repeating pattern
.default
; determine number of each operation
loop := (\cycles - 4) / 5
n_nop := 1
n_bit := 0
n_ldy := 0
n_jmp := 0
n_inc := 0
.if (\cycles % 5) == 0
n_ldy := 1
.elsif ((\cycles - 1) % 5) == 0
n_bit := 1
.elsif ((\cycles - 2) % 5) == 0
n_nop := 3
.elsif ((\cycles - 3) % 5) == 0
n_inc := 1
.elsif ((\cycles - 4) % 5) == 0
n_nop := 0
n_jmp := 1
.endif
; write out the code
; extra ldy (or ldx)
.if n_ldy > 0
.if \reg == "y"
ldy #($100-loop)
.else
ldx #($100-loop)
.endif
.endif
; loop
.if \reg == "y"
ldy #($100-loop)
iny
.else
ldx #($100-loop)
inx
.endif
bne *-1
; bit
.if n_bit
bit \zp
.endif
; nops
.rept n_nop
nop
.next
; inc
.if n_inc
inc \zp
.endif
; jmps
.if n_jmp
jmp *+3
.endif
.endswitch
.endm
|
| |
TWW
Registered: Jul 2009 Posts: 545 |
@ GPZ: That is a good point. Straight fwd. to add an assertion based on .pc.
@lft: Nice one. Will shamelessly update my routines with this one, saving a byte where I can ;-)
Forgot to say the other criteria for me was to leave the registers untouched and used simply as :DELAY 5 |
| |
ChristopherJam
Registered: Aug 2004 Posts: 1409 |
Actually, no one should ever use my wait_min24 from above on a 6510, unless they have ideological objections to unintended opcodes.
This is better; covers shorter delays and doesn't clobber any registers or flags. Still requires two bytes of stack.
#define wait_min14(x) jsr wait14+14-(x)
.dsb 64,$80 ; NOP#nn
.byt $04 ; NOP zp
wait14
nop
wait12
rts
(if you don't like illegals, replace the NOPs with BITs, and you get one that preserves registers but not flags..)
oh, and lft, thanks for the cmp (0,x)! |
... 21 posts hidden. Click here to view all posts.... |
Previous - 1 | 2 | 3 | 4 - Next |