diff --git a/README.md b/README.md index b81c2a8..ee84a8d 100644 --- a/README.md +++ b/README.md @@ -89,14 +89,14 @@ The source needs two common include files to compile (`custom.i` and ### Size The original C compiled code was... just bad. The new binary is -about 1/3 of the original one. +less than 1/3 of the original one. The code has been also optimized in a way that it compresses better. The original code compressed with *Blueberry's* Shrinkler goes from 18052 bytes down to 9023 bytes. Raspberry Casket, depending on the features compiled in, is about -5850 bytes and shrinkles down to ~4154 bytes (in isolation). +5840 bytes and shrinkles down to ~4144 bytes (in isolation). So this means that the optimization is not just "on the outside". @@ -159,8 +159,9 @@ solve this problem. - This removes a big source of cpu jitter when track delay is enabled (no longer clearing the track delay buffer). - This also fixes usages of illegal period 0 in the lead-in that could cause the replay to miss the first trigger. - Moved pattern table init from PlayerInit to SongInit, optimized SongInit a bit. +- Wave order table filling moved and optimized in SongInit. - Added Presto player draft. -- Drop-in replacement code size: 5850 bytes. +- Drop-in replacement code size: 5840 bytes. ### V1.x (unreleased) - Fixed a bug regarding the copper output mode with looping waves having a loop-offset. diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index 2c4d8bc..a80d9d8 100644 Binary files a/binaries/raspberry_casket.bin and b/binaries/raspberry_casket.bin differ diff --git a/src/raspberry_casket.asm b/src/raspberry_casket.asm index 8918e5e..2a7aae7 100755 --- a/src/raspberry_casket.asm +++ b/src/raspberry_casket.asm @@ -80,14 +80,14 @@ ; Size ; ~~~~ ; The original C compiled code was... just bad. The new binary is -; about 1/3rd of the original one. +; less than 1/3rd of the original one. ; ; The code has been also optimized in a way that it compresses better. ; The original code compressed with Blueberry's Shrinkler goes from ; 18052 bytes down to 9023 bytes. ; ; Raspberry Casket, depending on the features compiled in, is about -; 5850 bytes and shrinkles down to ~4154 bytes (in isolation). +; 5840 bytes and shrinkles down to ~4144 bytes (in isolation). ; ; So this means that the optimization is not just "on the outside". ; @@ -394,11 +394,27 @@ pre_SongInit: move.b d0,sv_num_steps_b(a1) - mulu #3,d0 ; *3 bytes per pattern line + lea sv_wavegen_order_table+MAX_WAVES(a1),a3 + moveq.l #MAX_WAVES-1,d3 ; fill 24 bytes with default order of waves? +.fillcount + move.b d3,-(a3) + dbra d3,.fillcount + cmp.b #$19,d2 ; check if version is higher than 19 + bls.s .hasnowaveordering + + moveq.l #MAX_WAVES-1,d3 +.waveorderloop + move.b (a0)+,(a3)+ ; $0042 wave generation ordering + dbra d3,.waveorderloop + +.hasnowaveordering + lea sv_pattern_table(a1),a0 .pattableloop move.l a4,(a0)+ + add.w d0,a4 ; *3 bytes per pattern line + add.w d0,a4 add.w d0,a4 subq.b #1,d1 bne.s .pattableloop @@ -494,26 +510,6 @@ pre_SongInit: .addressiseven move.l a0,sv_waveinfo_ptr(a1) - lea sv_wavegen_order_table(a1),a0 - cmpi.b #$19,d2 ; check if version is higher than 19 - bhi.s .haswaveorderinfo - - moveq.l #0,d0 ; fill 24 bytes with default order of waves? - moveq.l #MAX_WAVES-1,d7 -.fillcount - move.b d0,(a0)+ - addq.b #1,d0 - dbra d7,.fillcount - bra.s .contafterworkaround - -.haswaveorderinfo - moveq.l #(MAX_WAVES/4)-1,d7 - lea $0042(a2),a2 ; offset into wave ordering -.memcpyloop - move.l (a2)+,(a0)+ - dbra d7,.memcpyloop - -.contafterworkaround moveq.l #2,d0 ; at least empty sample moveq.l #0,d7 move.b sv_num_waves_b(a1),d7 ; has instruments?