From 0d3ad4074cb47d131f2889701b32ea171a8dd22b Mon Sep 17 00:00:00 2001 From: chrisly42 Date: Thu, 10 Aug 2023 22:41:52 +0200 Subject: [PATCH] Branch optimizations, removed two octave tables and replaced them by a tiny bit of code. --- README.md | 6 ++- binaries/raspberry_casket.bin | Bin 5942 -> 5900 bytes src/raspberry_casket.asm | 87 +++++++++++----------------------- 3 files changed, 32 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 899b46d..5478f66 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ The original code compressed with *Blueberry's* Shrinkler goes from 18052 bytes down to 9023 bytes. Raspberry Casket, depending on the features compiled in, is about -5942 bytes and goes down to ~4202 bytes (in isolation). +5900 bytes and goes down to ~4176 bytes (in isolation). So this means that the optimization is not just "on the outside". @@ -145,8 +145,10 @@ solve this problem. - Optimized some more code paths for Raspberry Casket replayer. - In the wave generator optimized away a table (32 words), replacement code is even smaller! - Replaced the period table by byte-deltas, saved 36 bytes and compression is even better! +- Optimized some code paths for octave selection. +- Removed two 25 bytes tables each, saving another 42 bytes. - Added Presto player draft. -- Drop-in replacement code size: 5942 bytes. +- Drop-in replacement code size: 5900 bytes. ### V1.x (unreleased) - Fixed a bug regarding the copper output mode with looping waves having a loop-offset. diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index 07155086939fde822628fc2d88732064c6da1bdc..b26bb73d5ceb11344dc8b4459638301e128b5b5d 100644 GIT binary patch delta 168 zcmdm{*P}O~n^9(Ck1`{p#pDf)rx`zNu3?g8W85@(7Kb9^q|JLc92ps_Hvi)+W@c2| zT*Nz-neoMB4naporp;c0=NK)l7-rQlN#-*!OybaHU}6wSXJBY4Vw_OJa9|b?=L!Wm z!r5#Pb_s)tHUrmWH{l4e4K+-L`78!DOokU6a+nGjtV#MJ<6L^Edq delta 212 zcmeCt+om_6n^9?Fk1``;(&P<{ry0L(u3?g8W85)$7Kb9^jLmyE92prKHvi)+W@gmc zT*Nz-nep9Z4napoj?G?z=NR=i)G$frGcZiEv0*ag&}Lv_5J_iXm{7uCqRqf%#V`vf z`^MM#7xTU%1|h=$hRFfK5prPWhf9u^949yewHxNM7yvbUIpi=EFxW6m-Y0CpamUTN rB%z8SYVteb`9PccL?W0}3=9ks5)vlB0V5+L6B83NGGLheOiT>`={r3E diff --git a/src/raspberry_casket.asm b/src/raspberry_casket.asm index bab1e73..18eac5a 100755 --- a/src/raspberry_casket.asm +++ b/src/raspberry_casket.asm @@ -87,7 +87,7 @@ ; 18052 bytes down to 9023 bytes. ; ; Raspberry Casket, depending on the features compiled in, is about -; 5942 bytes and goes down to ~4202 bytes (in isolation). +; 5900 bytes and goes down to ~4176 bytes (in isolation). ; ; So this means that the optimization is not just "on the outside". ; @@ -2110,21 +2110,28 @@ pre_PlayerTick: ; select right sample corresponding to current pitch move.w pcd_out_len_w(a5),d3 - cmp.w #$219,d0 - ble .noclippitchhigh - move.w #$231,d6 ; That's probably B-3+1, mapping to period $71 (although $7c is the last safe value) - btst #2,wi_flags_b(a3) - beq .noclippitchlow2 - - ; select high pitch version of the sample - moveq.l #0,d2 - move.w wi_chipram_w(a3),d2 move.w d0,d5 sub.w #$219,d5 - lsr.w #6,d5 - lea pre_octave_select_table(pc),a1 - moveq.l #0,d1 - move.b (a1,d5.w),d1 ; higher octave 1-3 + ble .is_normal_octave + btst #2,wi_flags_b(a3) + beq .clippitchhigh + + ; select high pitch version of the sample + moveq.l #1,d1 + sub.w #NOTES_IN_OCTAVE*16,d0 + sub.w #3*64,d5 + blt.s .oct1 + addq.w #1,d1 + sub.w #NOTES_IN_OCTAVE*16,d0 + sub.w #3*64,d5 + blt.s .oct2 + addq.w #1,d1 + sub.w #NOTES_IN_OCTAVE*16,d0 +.oct2 +.oct1 + + moveq.l #0,d2 + move.w wi_chipram_w(a3),d2 move.l pcd_out_ptr_l(a5),d4 move.w pcd_out_lof_w(a5),d7 @@ -2177,22 +2184,18 @@ pre_PlayerTick: move.l d4,pcd_out_ptr_l(a5) ; move trigger start pos to right octave wave .no_retrigger_new -.is_normal_octave - moveq.l #0,d1 - move.b pre_octave_note_offset_table-pre_octave_select_table(a1,d5.w),d1 - add.w d1,d1 - add.w d1,d1 - sub.w d1,d0 cmp.w #$231,d0 ble.s .noclippitchhigh - move.w #$231,d0 +.clippitchhigh + move.w #$231,d0 ; That's probably B-3+1, mapping to period $71 (although $7c is the last safe value) .noclippitchhigh - tst.w d0 +.is_normal_octave + add.w d0,d0 bge.s .noclippitchlow moveq.l #0,d0 .noclippitchlow - move.w d0,d6 -.noclippitchlow2 + move.w pv_period_table(a4,d0.w),pcd_out_per_w(a5) + tst.b pcd_out_trg_b(a5) beq.s .wasnottriggered ; this code seems to move the sample start to "loop offset" for first trigger @@ -2220,8 +2223,6 @@ pre_PlayerTick: ENDC move.b d3,pcd_out_trg_b(a5) .hassamesamlen - add.w d6,d6 - move.w pv_period_table(a4,d6.w),pcd_out_per_w(a5) ; ---------------------------------------- ; track delay handling @@ -2495,7 +2496,7 @@ pre_PlayerTick: rts ;-------------------------------------------------------------------- -; table data currently about 496 bytes +; table data currently about 446 bytes ; Tables used by WaveGen pre_roll_off_table: ; used by WaveGen dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E @@ -2535,38 +2536,6 @@ pre_fast_roll_off_16: dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8 dc.w 4,2,1 -pre_octave_note_offset_table: - dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4 - dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4 - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - IFNE PRETRACKER_PARANOIA_MODE - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4 - ELSE - dc.b 3*NOTES_IN_OCTAVE*4 - ENDC - even - - ; based on pitch $219 (537), covers up to pitch 0x819 (2073). - ; In practice, I was unable to get higher than $5ff -pre_octave_select_table: - dc.b 1,1,1 - dc.b 2,2,2 - dc.b 3,3,3 - dc.b 3,3,3 - dc.b 3,3,3 - dc.b 3,3,3 - IFNE PRETRACKER_PARANOIA_MODE - dc.b 3,3,3 - dc.b 3,3,3 - ELSE - dc.b 3 - ENDC - even - IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK ; -4,-3,-1,1,2,3,4,0 pre_minus4plus4_table: