Branch optimizations, removed two octave tables and replaced them by a tiny bit of code.

This commit is contained in:
Chris Hodges 2023-08-10 22:41:52 +02:00
parent dd8c224199
commit 0d3ad4074c
3 changed files with 32 additions and 61 deletions

View File

@ -90,7 +90,7 @@ The original code compressed with *Blueberry's* Shrinkler goes from
18052 bytes down to 9023 bytes. 18052 bytes down to 9023 bytes.
Raspberry Casket, depending on the features compiled in, is about Raspberry Casket, depending on the features compiled in, is about
5942 bytes and goes down to ~4202 bytes (in isolation). 5900 bytes and goes down to ~4176 bytes (in isolation).
So this means that the optimization is not just "on the outside". So this means that the optimization is not just "on the outside".
@ -145,8 +145,10 @@ solve this problem.
- Optimized some more code paths for Raspberry Casket replayer. - Optimized some more code paths for Raspberry Casket replayer.
- In the wave generator optimized away a table (32 words), replacement code is even smaller! - In the wave generator optimized away a table (32 words), replacement code is even smaller!
- Replaced the period table by byte-deltas, saved 36 bytes and compression is even better! - Replaced the period table by byte-deltas, saved 36 bytes and compression is even better!
- Optimized some code paths for octave selection.
- Removed two 25 bytes tables each, saving another 42 bytes.
- Added Presto player draft. - Added Presto player draft.
- Drop-in replacement code size: 5942 bytes. - Drop-in replacement code size: 5900 bytes.
### V1.x (unreleased) ### V1.x (unreleased)
- Fixed a bug regarding the copper output mode with looping waves having a loop-offset. - Fixed a bug regarding the copper output mode with looping waves having a loop-offset.

Binary file not shown.

View File

@ -87,7 +87,7 @@
; 18052 bytes down to 9023 bytes. ; 18052 bytes down to 9023 bytes.
; ;
; Raspberry Casket, depending on the features compiled in, is about ; Raspberry Casket, depending on the features compiled in, is about
; 5942 bytes and goes down to ~4202 bytes (in isolation). ; 5900 bytes and goes down to ~4176 bytes (in isolation).
; ;
; So this means that the optimization is not just "on the outside". ; So this means that the optimization is not just "on the outside".
; ;
@ -2110,21 +2110,28 @@ pre_PlayerTick:
; select right sample corresponding to current pitch ; select right sample corresponding to current pitch
move.w pcd_out_len_w(a5),d3 move.w pcd_out_len_w(a5),d3
cmp.w #$219,d0
ble .noclippitchhigh
move.w #$231,d6 ; That's probably B-3+1, mapping to period $71 (although $7c is the last safe value)
btst #2,wi_flags_b(a3)
beq .noclippitchlow2
; select high pitch version of the sample
moveq.l #0,d2
move.w wi_chipram_w(a3),d2
move.w d0,d5 move.w d0,d5
sub.w #$219,d5 sub.w #$219,d5
lsr.w #6,d5 ble .is_normal_octave
lea pre_octave_select_table(pc),a1 btst #2,wi_flags_b(a3)
moveq.l #0,d1 beq .clippitchhigh
move.b (a1,d5.w),d1 ; higher octave 1-3
; select high pitch version of the sample
moveq.l #1,d1
sub.w #NOTES_IN_OCTAVE*16,d0
sub.w #3*64,d5
blt.s .oct1
addq.w #1,d1
sub.w #NOTES_IN_OCTAVE*16,d0
sub.w #3*64,d5
blt.s .oct2
addq.w #1,d1
sub.w #NOTES_IN_OCTAVE*16,d0
.oct2
.oct1
moveq.l #0,d2
move.w wi_chipram_w(a3),d2
move.l pcd_out_ptr_l(a5),d4 move.l pcd_out_ptr_l(a5),d4
move.w pcd_out_lof_w(a5),d7 move.w pcd_out_lof_w(a5),d7
@ -2177,22 +2184,18 @@ pre_PlayerTick:
move.l d4,pcd_out_ptr_l(a5) ; move trigger start pos to right octave wave move.l d4,pcd_out_ptr_l(a5) ; move trigger start pos to right octave wave
.no_retrigger_new .no_retrigger_new
.is_normal_octave
moveq.l #0,d1
move.b pre_octave_note_offset_table-pre_octave_select_table(a1,d5.w),d1
add.w d1,d1
add.w d1,d1
sub.w d1,d0
cmp.w #$231,d0 cmp.w #$231,d0
ble.s .noclippitchhigh ble.s .noclippitchhigh
move.w #$231,d0 .clippitchhigh
move.w #$231,d0 ; That's probably B-3+1, mapping to period $71 (although $7c is the last safe value)
.noclippitchhigh .noclippitchhigh
tst.w d0 .is_normal_octave
add.w d0,d0
bge.s .noclippitchlow bge.s .noclippitchlow
moveq.l #0,d0 moveq.l #0,d0
.noclippitchlow .noclippitchlow
move.w d0,d6 move.w pv_period_table(a4,d0.w),pcd_out_per_w(a5)
.noclippitchlow2
tst.b pcd_out_trg_b(a5) tst.b pcd_out_trg_b(a5)
beq.s .wasnottriggered beq.s .wasnottriggered
; this code seems to move the sample start to "loop offset" for first trigger ; this code seems to move the sample start to "loop offset" for first trigger
@ -2220,8 +2223,6 @@ pre_PlayerTick:
ENDC ENDC
move.b d3,pcd_out_trg_b(a5) move.b d3,pcd_out_trg_b(a5)
.hassamesamlen .hassamesamlen
add.w d6,d6
move.w pv_period_table(a4,d6.w),pcd_out_per_w(a5)
; ---------------------------------------- ; ----------------------------------------
; track delay handling ; track delay handling
@ -2495,7 +2496,7 @@ pre_PlayerTick:
rts rts
;-------------------------------------------------------------------- ;--------------------------------------------------------------------
; table data currently about 496 bytes ; table data currently about 446 bytes
; Tables used by WaveGen ; Tables used by WaveGen
pre_roll_off_table: ; used by WaveGen pre_roll_off_table: ; used by WaveGen
dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E
@ -2535,38 +2536,6 @@ pre_fast_roll_off_16:
dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8 dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8
dc.w 4,2,1 dc.w 4,2,1
pre_octave_note_offset_table:
dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4
dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
IFNE PRETRACKER_PARANOIA_MODE
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
dc.b 3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4,3*NOTES_IN_OCTAVE*4
ELSE
dc.b 3*NOTES_IN_OCTAVE*4
ENDC
even
; based on pitch $219 (537), covers up to pitch 0x819 (2073).
; In practice, I was unable to get higher than $5ff
pre_octave_select_table:
dc.b 1,1,1
dc.b 2,2,2
dc.b 3,3,3
dc.b 3,3,3
dc.b 3,3,3
dc.b 3,3,3
IFNE PRETRACKER_PARANOIA_MODE
dc.b 3,3,3
dc.b 3,3,3
ELSE
dc.b 3
ENDC
even
IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK
; -4,-3,-1,1,2,3,4,0 ; -4,-3,-1,1,2,3,4,0
pre_minus4plus4_table: pre_minus4plus4_table: