[CSDb] - User Forums - ACME macro for delaying X cycles

You are not logged in - nap

CSDb User Forums

Forums > C64 Coding > ACME macro for delaying X cycles

2017-10-24 13:02

Frantic

Registered: Mar 2003
Posts: 1648

ACME macro for delaying X cycles

Anybody got an macro handy for the ACME assembler for delaying X number of cycles? It is OK if it kills the A or X register.

2017-10-24 13:08

ChristopherJam

Registered: Aug 2004
Posts: 1409

No, but this might help with writing one

;minimal bytes
; preserves a,x,y,sp
; may clobber stack and flags

; 2 cycles (1 byte)
    nop

; 3 cycles (2 bytes)
    bit 3

; 4 cycles (2 bytes)
    nop
    nop

; 5 cycles (3 bytes)
    nop
    bit 3

; 6 cycles (3 bytes)
    nop
    nop
    nop

; 7 cycles (2 bytes)
    pha
    pla

; 8 cycles (4 bytes)
    nop
    nop
    nop
    nop

; 9 cycles (3 bytes)
    pha
    nop
    pla

;10 cycles (4 bytes)
    pha
    bit 3
    pla

;11 cycles (4 bytes)
    pha
    nop
    nop
    pla

;12 cycles (5 bytes)
    pha
    nop
    bit 3
    pla

;13 cycles (5 bytes)
    pha
    nop
    nop
    nop
    pla

;14 cycles (4 bytes)
    pha
    pha
    pla
    pla

2017-10-24 13:13

Frantic

Registered: Mar 2003
Posts: 1648

Thanks!

2017-10-24 14:35

ChristopherJam

Registered: Aug 2004
Posts: 1409

np.

Also this:

#define wait_min24(x) lda#88-x:jsr waitN

waitN
	sta *+4
	bne *+2
	.dsb 64,$a9
	lda $ea
	rts

2017-10-24 15:54

TWW

Registered: Jul 2009
Posts: 545

And this (part of my delay pseudo) ;-)

  .if(cycles.getValue()==15) {
    pha         // 3
    nop         // 2
    nop         // 2
    nop         // 2
    nop         // 2
    pla         // 4 <- 15 cycles | 6 bytes
  }

  .if(cycles.getValue()==16) {
    pha         // 3
    pha         // 3
    nop         // 2
    pla         // 4
    pla         // 4 <- 16 cycles | 5 bytes
  }

  .if(cycles.getValue()==17) {
    pha         // 3
    pha         // 3
    bit $00     // 3
    pla         // 4
    pla         // 4 <- 17 cycles | 6 bytes
  }

  .if(cycles.getValue()==18) {
    pha         // 3
    pha         // 3
    nop         // 2
    nop         // 2
    pla         // 4
    pla         // 4 <- 18 cycles | 6 bytes
  }

  .if(cycles.getValue()==19) {
    pha         // 3
    pha         // 3
    nop         // 2
    bit $00     // 3
    pla         // 4
    pla         // 4 <- 19 cycles | 7 bytes
  }

  .if(cycles.getValue()==20) {
    pha         // 3
    pha         // 3
    nop         // 2
    nop         // 2
    nop         // 2
    pla         // 4
    pla         // 4 <- 20 cycles | 7 bytes
  }

  .if(cycles.getValue()==21) {
    pha         // 3
    pha         // 3
    pha         // 3
    pla         // 4
    pla         // 4
    pla         // 4 <- 21 cycles | 6 bytes
  }

  .if(cycles.getValue()==22) {
    pha             // 3
    lda #%00000010  // 2
    lsr             // 2 2
    nop             // 2 2
    bcc *-2         // 3 2
    pla             //   4 <- 22 cycles | 8 bytes
  }

  .if(cycles.getValue()==23) {
    pha             // 3
    lda #%00000100  // 2
    lsr             // 2 2 2
    bcc *-1         // 3 3 2
    pla             //     4 <- 23 cycles | 7 bytes
  }

  .if(cycles.getValue()==24) {
    pha         // 3
    pha         // 3
    pha         // 3
    bit $00     // 3
    pla         // 4
    pla         // 4
    pla         // 4 <- 24 cycles | 8 bytes
  }

  .if(cycles.getValue()==25) {
    pha             // 3
    lda #%00000100  // 2
    lsr             // 2 2 2
    bcc *-1         // 3 3 2
    nop             //     2
    pla             //     4 <- 25 cycles | 8 bytes
  }

  .if(cycles.getValue()==26) {
    pha         // 3
    pha         // 3
    pha         // 3
    nop         // 2
    bit $00     // 3
    pla         // 4
    pla         // 4
    pla         // 4 <- 26 cycles | 9 bytes
  }

  .if(cycles.getValue()==27) {
    pha         // 3
    pha         // 3
    pha         // 3
    nop         // 2
    nop         // 2
    nop         // 2
    pla         // 4
    pla         // 4
    pla         // 4 <- 27 cycles | 9 bytes
  }

  .if(cycles.getValue()==28) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    bcc *-1         // 3 3 3 2
    pla             //       4 <- 28 cycles | 7 bytes
  }

  .if(cycles.getValue()==29) {
    pha             // 3
    lda #%00000100  // 2
    lsr             // 2 2 2
    nop             // 2 2 2
    bcc *-2         // 3 3 2
    pla             //     4 <- 29 cycles | 8 bytes
  }

  .if(cycles.getValue()==30) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    bcc *-1         // 3 3 3 2
    nop             //       2
    pla             //       4 <- 30 cycles | 8 bytes
  }

  .if(cycles.getValue()==31) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    bcc *-1         // 3 3 3 2
    bit $00         //       3
    pla             //       4 <- 31 cycles | 9 bytes
  }

  .if(cycles.getValue()==32) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    bcc *-1         // 3 3 3 2
    nop             //       2
    nop             //       2
    pla             //       4 <- 32 cycles | 9 bytes
  }

  .if(cycles.getValue()==33) {
    pha             // 3
    lda #%00010000  // 2
    lsr             // 2 2 2 2 2
    bcc *-1         // 3 3 3 3 2
    pla             //         4 <- 33 cycles | 7 bytes
  }

  .if(cycles.getValue()==34) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    bcc *-1         // 3 3 3 2
    nop             //       2
    nop             //       2
    nop             //       2
    pla             //       4 <- 34 cycles | 10 bytes
  }

  .if(cycles.getValue()==35) {
    pha             // 3
    lda #%00010000  // 2
    lsr             // 2 2 2 2 2
    bcc *-1         // 3 3 3 3 2
    nop             //         2
    pla             //         4 <- 35 cycles | 8 bytes
  }

  .if(cycles.getValue()==36) {
    pha             // 3
    lda #%00001000  // 2
    lsr             // 2 2 2 2
    nop             // 2 2 2 2
    bcc *-2         // 3 3 3 2
    pla             //       4 <- 36 cycles | 8 bytes
  }

2017-10-24 17:04

lft

Registered: Jul 2007
Posts: 369

Quoting ChristopherJam

;minimal bytes

...

; 6 cycles (3 bytes) nop nop nop

The following is more minimal. =)

; 6 cycles (2 bytes)
    cmp (0,x)

The routines for 8, 12 and 13 cycles can be shortened with the same technique.

2017-10-24 21:22

Frantic

Registered: Mar 2003
Posts: 1648

Thanks all!

@lft: cool. Wasn't aware of that one.

2017-10-25 00:23

chatGPZ

Registered: Dec 2001
Posts: 11386

i'd refrain from using branches in those macros... or atleast have guards that give warnings (or even adjust the macro accordingly) when the branch crosses a page boundary. without you will get nice heisenbugs that appear and disappear randomly when you add/remove code :)

2017-10-25 01:35

Kruthers

Registered: Jul 2016
Posts: 21

Can't help posting my "stable" delay macro, though it's 64tass not ACME. Maybe somebody will find it useful. It allows you to tweak the delay without causing code to shift around because it always uses 8 bytes. Alas, it can't do 2 or 4 cycles...

Only real regret is that it sometimes needs to trash a ZP location. And branching is not optional of course. Liberal use of ".option allow_branch_across_page" helps. :)

; macro to generate delay in cycles, always using 8 bytes
; requires the x or y register and one zeropage location which may be trashed
;
; usage: #delay8b 1282, y, $02
;        #delay8b 23, x, $ff
delay8b     .macro cycles, reg, zp
                ; validation
                .cerror (\cycles < 3 || \cycles == 4), "8-byte-delay cannot be 1, 2 or 4 cycles"
                .cerror (\cycles > 1282), "8-byte-delay must be less than 1283 cycles"
                .cerror (\reg != "x" && \reg != "y"), "Unknown register", \reg
                .cerror (\zp < $00 || \zp > $ff), "Zeropage argument is required"
                ; 3 to 10 cycles are hard coded
                .switch \cycles
                .case 3
                    jmp *+8
                    bit \zp
                    bit \zp
                    nop
                .case 5
                    nop
                    jmp *+7
                    bit \zp
                    bit \zp
                .case 6
                    bit \zp
                    jmp *+6
                    bit \zp
                    nop
                .case 7
                    nop
                    nop
                    jmp *+6
                    bit \zp
                    nop
                .case 8
                    nop
                    bit \zp
                    jmp *+5
                    bit \zp
                .case 9
                    bit \zp
                    bit \zp
                    jmp *+4
                    nop
                .case 10
                    nop
                    nop
                    bit \zp
                    jmp *+4
                    nop
                ; 11 or more cycles follows a repeating pattern
                .default
                    ; determine number of each operation
                    loop := (\cycles - 4) / 5
                    n_nop := 1
                    n_bit := 0
                    n_ldy := 0
                    n_jmp := 0
                    n_inc := 0
                    .if (\cycles % 5) == 0
                        n_ldy := 1
                    .elsif ((\cycles - 1) % 5) == 0
                        n_bit := 1
                    .elsif ((\cycles - 2) % 5) == 0
                        n_nop := 3
                    .elsif ((\cycles - 3) % 5) == 0
                        n_inc := 1
                    .elsif ((\cycles - 4) % 5) == 0
                        n_nop := 0
                        n_jmp := 1
                    .endif
                    ; write out the code
                    ; extra ldy (or ldx)
                    .if n_ldy > 0
                        .if \reg == "y"
                            ldy #($100-loop)
                        .else
                            ldx #($100-loop)
                        .endif
                    .endif
                    ; loop
                    .if \reg == "y"
                        ldy #($100-loop)
                        iny
                    .else
                        ldx #($100-loop)
                        inx
                    .endif
                    bne *-1
                    ; bit
                    .if n_bit
                        bit \zp
                    .endif
                    ; nops
                    .rept n_nop
                        nop
                    .next
                    ; inc
                    .if n_inc
                        inc \zp
                    .endif
                    ; jmps
                    .if n_jmp
                        jmp *+3
                    .endif
                .endswitch
            .endm

2017-10-25 05:29

TWW

Registered: Jul 2009
Posts: 545

@ GPZ: That is a good point. Straight fwd. to add an assertion based on .pc.

@lft: Nice one. Will shamelessly update my routines with this one, saving a byte where I can ;-)

Forgot to say the other criteria for me was to leave the registers untouched and used simply as :DELAY 5

2017-10-25 07:13

ChristopherJam

Registered: Aug 2004
Posts: 1409

Actually, no one should ever use my wait_min24 from above on a 6510, unless they have ideological objections to unintended opcodes.

This is better; covers shorter delays and doesn't clobber any registers or flags. Still requires two bytes of stack.

#define wait_min14(x) jsr wait14+14-(x)

    .dsb 64,$80   ; NOP#nn
    .byt $04      ; NOP zp
wait14
    nop
wait12
    rts

(if you don't like illegals, replace the NOPs with BITs, and you get one that preserves registers but not flags..)

oh, and lft, thanks for the cmp (0,x)!

... 21 posts hidden. Click here to view all posts....

Previous - 1 | 2 | 3 | 4 - Next

Refresh

Subscribe to this thread: