From b5478a162c16af45387a0f6723560930e061c882 Mon Sep 17 00:00:00 2001 From: chrisly42 Date: Sat, 20 May 2023 19:33:53 +0200 Subject: [PATCH] Minor code size optimizations. --- README.md | 4 ++-- src/raspberry_casket.asm | 45 ++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 14c69f2..b19bd4e 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ please let me know under chrisly@platon42.de. Thank you. The new replayer comes as a drop-in binary replacement if you wish. In this case you will get faster sample generation (about 12% -faster on 68000) and about 45% less CPU time spent. However, you +faster on 68000) and about 45% less CPU time spent during playback. However, you won't get stuff as song-end detection and precalc progress this way. This mode uses the old CPU DMA wait that takes away 8 raster lines. @@ -121,7 +121,7 @@ solve this problem. - Optimized base displacement by reordering variables. - Further optimized ADSR code. - Optimized wave loop code. -- Bake in this strange vibrato speed multiplication to precalculated vibrato value (where possible). +- Baked in this strange vibrato speed multiplication to precalculated vibrato value (where possible). - Various small optimizations. - Store instrument number * 4 on loading to avoid using two adds every frame. - Optimized speed/shuffle code. Idea of using xor turned out to make things too complicated for pattern breaks/jumps. diff --git a/src/raspberry_casket.asm b/src/raspberry_casket.asm index 4fb8a77..7eaf9ca 100755 --- a/src/raspberry_casket.asm +++ b/src/raspberry_casket.asm @@ -1,5 +1,5 @@ ;-------------------------------------------------------------------- -; Raspberry Casket Player V1.1 (28-Dec-2022) +; Raspberry Casket Player V1.1 (20-May-2023) ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ; Provided by Chris 'platon42' Hodges @@ -97,8 +97,7 @@ ; of the time is spent on muls operations, so this is the limiting ; factor. ; -; Raspberry Casket is about twice as fast as the old replayer for -; playback. +; Raspberry Casket is about twice as fast as the old replayer for playback. ; ; Unfortunately, the replayer is still pretty slow and has high ; jitter compared to other standard music replayers. @@ -814,17 +813,15 @@ pre_SongInit: moveq.l #0,d1 move.b (a0)+,d1 ; ii_adsr_attack add.w d1,d1 - lea pre_fast_roll_off_16(pc),a5 - move.w (a5,d1.w),d1 + move.w pre_fast_roll_off_16-pre_vib_delay_table(a5,d1.w),d1 move.w d1,uii_adsr_attack(a4) moveq.l #0,d1 move.b (a0)+,d1 ; ii_adsr_decay - lea pre_ramp_up_16(pc),a5 - move.b (a5,d1.w),uii_adsr_decay+1(a4) + move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_decay+1(a4) move.b (a0)+,d1 ; ii_adsr_sustain - ; what is this? a patch? + ; what is this? a patch? cmp.b #15,d1 bne.s .dont_patch_sustain moveq.l #16,d1 @@ -834,8 +831,7 @@ pre_SongInit: moveq.l #0,d1 move.b (a0)+,d1 ; ii_adsr_release - lea (pre_ramp_up_16,pc),a5 - move.b (a5,d1.w),uii_adsr_release(a4) + move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_release(a4) move.b (a0)+,d1 ; ii_pattern_steps move.b d1,uii_pattern_steps(a4) @@ -1257,7 +1253,6 @@ pre_PlayerInit: moveq.l #0,d3 move.b wi_osc_phase_min_b(a3),d3 - mulu d5,d3 lsl.l #6,d3 @@ -1387,7 +1382,7 @@ pre_PlayerInit: cmpa.l #$8000,a5 ; if symmetrical beq.s .gen_noise_centered - ; what does this do? (a5 - $8000) (a5 +$7fff)&$8000 + ; FIXME what does this do? d4 = (a5 - $8000) d1 = (a5 + $7fff)&$ffff8000 -> d4 - d1 == ((a5 - $8000) - ((a5 + $7fff)&$ffff8000) move.l a5,d4 addi.l #$FFFF8000,d4 move.l a5,d1 @@ -1593,7 +1588,7 @@ pre_PlayerInit: .entry_to_filter_loop move.l d0,a6 move.l d3,d1 ; flt_speed_b*128 - adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction + adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction bgt.s .filter_speed_pos .filter_speed_neg @@ -3535,7 +3530,7 @@ pre_PlayerTick: .dont_release_note ; ---------------------------------------- -; calculate final volume output = inst_vol * ADSR volume * pattern volume +; calculate final volume output = inst_vol * ADSR volume * pattern volume IFNE PRETRACKER_VOLUME_TABLE lea pv_volume_table(a4),a1 @@ -3550,7 +3545,7 @@ pre_PlayerTick: lsr.w #4,d2 mulu d2,d1 lsr.w #6,d1 - + moveq.l #0,d2 move.b pcd_pat_vol_b(a5),d2 mulu d1,d2 @@ -3577,7 +3572,7 @@ pre_PlayerTick: clr.b pcd_wave_offset_b(a5) - ; keep current direction of ping-pong unchanged + ; keep current direction of ping-pong unchanged move.b pcd_inst_ping_pong_dir_b(a5),d4 bpl.s .wave_move_one_step_ahead sub.w d2,d1 ; go in reverse direction one step? @@ -3851,7 +3846,7 @@ pre_PlayerTick: beq .updatechannels ; no track delay for last channel moveq.l #MAX_TRACK_DELAY-1,d0 ; load from last buffer - + ; handle track delay cmpi.b #$FF,d3 beq.s .clear_track_delay @@ -3871,7 +3866,7 @@ pre_PlayerTick: move.l (a1)+,(a3)+ ; ocd_length/ocd_loop_offset move.l (a1)+,(a3)+ ; ocd_period/ocd_volume/ocd_trigger ;move.l (a1)+,(a3)+ ; this is never used - + move.b -(a3),d2 add.b d2,d2 ; increment channel bne.s .copy_trigger_for_delayed_channel @@ -4129,6 +4124,13 @@ pre_vib_depth_table: pre_vib_delay_table: dc.b 0,4,8,10,12,14,16,18,20,24,32,40,56,96,150,255 +pre_ramp_up_16: + dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143 + +pre_fast_roll_off_16: + dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8 + dc.w 4,2,1 + pre_roll_off_table: dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E dc.w $69,$64,$5A,$46,$40,$38,$30,$28,$20,$1F,$1E,$1D @@ -4139,13 +4141,6 @@ pre_roll_off_table: dc.w 2,3,3,2,3,3,2,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,1 dc.w 2,1,2,1,2,1,2,1,1,2,1,1,1,2,1 -pre_ramp_up_16: - dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143 - -pre_fast_roll_off_16: - dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8 - dc.w 4,2,1 - pre_octave_note_offset_table: dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4 dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4