Wave order table filling moved and optimized in SongInit.

2023-08-16 20:35:25 +02:00 · 2023-08-16 20:35:25 +02:00 · 36d435f502
commit 36d435f502
parent e0f7537774
3 changed files with 23 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -89,14 +89,14 @@ The source needs two common include files to compile (`custom.i` and
 ### Size

 The original C compiled code was... just bad. The new binary is
-about 1/3 of the original one.
+less than 1/3 of the original one.

 The code has been also optimized in a way that it compresses better.
 The original code compressed with *Blueberry's* Shrinkler goes from
 18052 bytes down to 9023 bytes.

 Raspberry Casket, depending on the features compiled in, is about
-5850 bytes and shrinkles down to ~4154 bytes (in isolation).
+5840 bytes and shrinkles down to ~4144 bytes (in isolation).

 So this means that the optimization is not just "on the outside".

@ -159,8 +159,9 @@ solve this problem.
 - This removes a big source of cpu jitter when track delay is enabled (no longer clearing the track delay buffer).
 - This also fixes usages of illegal period 0 in the lead-in that could cause the replay to miss the first trigger.
 - Moved pattern table init from PlayerInit to SongInit, optimized SongInit a bit.
+- Wave order table filling moved and optimized in SongInit.
 - Added Presto player draft.
- Drop-in replacement code size: 5850 bytes.
+- Drop-in replacement code size: 5840 bytes.

 ### V1.x (unreleased)
 - Fixed a bug regarding the copper output mode with looping waves having a loop-offset.
--- a/binaries/raspberry_casket.bin
+++ b/binaries/raspberry_casket.bin
--- a/src/raspberry_casket.asm
+++ b/src/raspberry_casket.asm
@ -80,14 +80,14 @@
 ; Size
 ; ~~~~
 ; The original C compiled code was... just bad. The new binary is
-; about 1/3rd of the original one.
+; less than 1/3rd of the original one.
 ;
 ; The code has been also optimized in a way that it compresses better.
 ; The original code compressed with Blueberry's Shrinkler goes from
 ; 18052 bytes down to 9023 bytes.
 ;
 ; Raspberry Casket, depending on the features compiled in, is about
-; 5850 bytes and shrinkles down to ~4154 bytes (in isolation).
+; 5840 bytes and shrinkles down to ~4144 bytes (in isolation).
 ;
 ; So this means that the optimization is not just "on the outside".
 ;
@ -394,11 +394,27 @@ pre_SongInit:

        move.b  d0,sv_num_steps_b(a1)

-        mulu    #3,d0                           ; *3 bytes per pattern line
+        lea     sv_wavegen_order_table+MAX_WAVES(a1),a3
+        moveq.l #MAX_WAVES-1,d3                 ; fill 24 bytes with default order of waves?
+.fillcount
+        move.b  d3,-(a3)
+        dbra    d3,.fillcount

+        cmp.b   #$19,d2                         ; check if version is higher than 19
+        bls.s   .hasnowaveordering
+
+        moveq.l #MAX_WAVES-1,d3
+.waveorderloop
+        move.b  (a0)+,(a3)+                     ; $0042 wave generation ordering
+        dbra    d3,.waveorderloop
+
+.hasnowaveordering
+        
        lea     sv_pattern_table(a1),a0
 .pattableloop
        move.l  a4,(a0)+
+        add.w   d0,a4                           ; *3 bytes per pattern line
+        add.w   d0,a4
        add.w   d0,a4
        subq.b  #1,d1
        bne.s   .pattableloop
@ -494,26 +510,6 @@ pre_SongInit:
 .addressiseven
        move.l  a0,sv_waveinfo_ptr(a1)

-        lea     sv_wavegen_order_table(a1),a0
-        cmpi.b  #$19,d2                         ; check if version is higher than 19
-        bhi.s   .haswaveorderinfo
-
-        moveq.l #0,d0                           ; fill 24 bytes with default order of waves?
-        moveq.l #MAX_WAVES-1,d7
-.fillcount
-        move.b  d0,(a0)+
-        addq.b  #1,d0
-        dbra    d7,.fillcount
-        bra.s   .contafterworkaround
-
-.haswaveorderinfo
-        moveq.l #(MAX_WAVES/4)-1,d7
-        lea     $0042(a2),a2                    ; offset into wave ordering
-.memcpyloop
-        move.l  (a2)+,(a0)+
-        dbra    d7,.memcpyloop
-
-.contafterworkaround
        moveq.l #2,d0                           ; at least empty sample
        moveq.l #0,d7
        move.b  sv_num_waves_b(a1),d7           ; has instruments?