Minor code size optimizations.

This commit is contained in:
Chris Hodges 2023-05-20 19:33:53 +02:00
parent 6a069ac78a
commit b5478a162c
2 changed files with 22 additions and 27 deletions

View File

@ -41,7 +41,7 @@ please let me know under chrisly@platon42.de. Thank you.
The new replayer comes as a drop-in binary replacement if you wish. The new replayer comes as a drop-in binary replacement if you wish.
In this case you will get faster sample generation (about 12% In this case you will get faster sample generation (about 12%
faster on 68000) and about 45% less CPU time spent. However, you faster on 68000) and about 45% less CPU time spent during playback. However, you
won't get stuff as song-end detection and precalc progress this way. won't get stuff as song-end detection and precalc progress this way.
This mode uses the old CPU DMA wait that takes away 8 raster lines. This mode uses the old CPU DMA wait that takes away 8 raster lines.
@ -121,7 +121,7 @@ solve this problem.
- Optimized base displacement by reordering variables. - Optimized base displacement by reordering variables.
- Further optimized ADSR code. - Further optimized ADSR code.
- Optimized wave loop code. - Optimized wave loop code.
- Bake in this strange vibrato speed multiplication to precalculated vibrato value (where possible). - Baked in this strange vibrato speed multiplication to precalculated vibrato value (where possible).
- Various small optimizations. - Various small optimizations.
- Store instrument number * 4 on loading to avoid using two adds every frame. - Store instrument number * 4 on loading to avoid using two adds every frame.
- Optimized speed/shuffle code. Idea of using xor turned out to make things too complicated for pattern breaks/jumps. - Optimized speed/shuffle code. Idea of using xor turned out to make things too complicated for pattern breaks/jumps.

View File

@ -1,5 +1,5 @@
;-------------------------------------------------------------------- ;--------------------------------------------------------------------
; Raspberry Casket Player V1.1 (28-Dec-2022) ; Raspberry Casket Player V1.1 (20-May-2023)
; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
; ;
; Provided by Chris 'platon42' Hodges <chrisly@platon42.de> ; Provided by Chris 'platon42' Hodges <chrisly@platon42.de>
@ -97,8 +97,7 @@
; of the time is spent on muls operations, so this is the limiting ; of the time is spent on muls operations, so this is the limiting
; factor. ; factor.
; ;
; Raspberry Casket is about twice as fast as the old replayer for ; Raspberry Casket is about twice as fast as the old replayer for playback.
; playback.
; ;
; Unfortunately, the replayer is still pretty slow and has high ; Unfortunately, the replayer is still pretty slow and has high
; jitter compared to other standard music replayers. ; jitter compared to other standard music replayers.
@ -814,17 +813,15 @@ pre_SongInit:
moveq.l #0,d1 moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_attack move.b (a0)+,d1 ; ii_adsr_attack
add.w d1,d1 add.w d1,d1
lea pre_fast_roll_off_16(pc),a5 move.w pre_fast_roll_off_16-pre_vib_delay_table(a5,d1.w),d1
move.w (a5,d1.w),d1
move.w d1,uii_adsr_attack(a4) move.w d1,uii_adsr_attack(a4)
moveq.l #0,d1 moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_decay move.b (a0)+,d1 ; ii_adsr_decay
lea pre_ramp_up_16(pc),a5 move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_decay+1(a4)
move.b (a5,d1.w),uii_adsr_decay+1(a4)
move.b (a0)+,d1 ; ii_adsr_sustain move.b (a0)+,d1 ; ii_adsr_sustain
; what is this? a patch? ; what is this? a patch?
cmp.b #15,d1 cmp.b #15,d1
bne.s .dont_patch_sustain bne.s .dont_patch_sustain
moveq.l #16,d1 moveq.l #16,d1
@ -834,8 +831,7 @@ pre_SongInit:
moveq.l #0,d1 moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_release move.b (a0)+,d1 ; ii_adsr_release
lea (pre_ramp_up_16,pc),a5 move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_release(a4)
move.b (a5,d1.w),uii_adsr_release(a4)
move.b (a0)+,d1 ; ii_pattern_steps move.b (a0)+,d1 ; ii_pattern_steps
move.b d1,uii_pattern_steps(a4) move.b d1,uii_pattern_steps(a4)
@ -1257,7 +1253,6 @@ pre_PlayerInit:
moveq.l #0,d3 moveq.l #0,d3
move.b wi_osc_phase_min_b(a3),d3 move.b wi_osc_phase_min_b(a3),d3
mulu d5,d3 mulu d5,d3
lsl.l #6,d3 lsl.l #6,d3
@ -1387,7 +1382,7 @@ pre_PlayerInit:
cmpa.l #$8000,a5 ; if symmetrical cmpa.l #$8000,a5 ; if symmetrical
beq.s .gen_noise_centered beq.s .gen_noise_centered
; what does this do? (a5 - $8000) (a5 +$7fff)&$8000 ; FIXME what does this do? d4 = (a5 - $8000) d1 = (a5 + $7fff)&$ffff8000 -> d4 - d1 == ((a5 - $8000) - ((a5 + $7fff)&$ffff8000)
move.l a5,d4 move.l a5,d4
addi.l #$FFFF8000,d4 addi.l #$FFFF8000,d4
move.l a5,d1 move.l a5,d1
@ -1593,7 +1588,7 @@ pre_PlayerInit:
.entry_to_filter_loop .entry_to_filter_loop
move.l d0,a6 move.l d0,a6
move.l d3,d1 ; flt_speed_b*128 move.l d3,d1 ; flt_speed_b*128
adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction
bgt.s .filter_speed_pos bgt.s .filter_speed_pos
.filter_speed_neg .filter_speed_neg
@ -4129,6 +4124,13 @@ pre_vib_depth_table:
pre_vib_delay_table: pre_vib_delay_table:
dc.b 0,4,8,10,12,14,16,18,20,24,32,40,56,96,150,255 dc.b 0,4,8,10,12,14,16,18,20,24,32,40,56,96,150,255
pre_ramp_up_16:
dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143
pre_fast_roll_off_16:
dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8
dc.w 4,2,1
pre_roll_off_table: pre_roll_off_table:
dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E
dc.w $69,$64,$5A,$46,$40,$38,$30,$28,$20,$1F,$1E,$1D dc.w $69,$64,$5A,$46,$40,$38,$30,$28,$20,$1F,$1E,$1D
@ -4139,13 +4141,6 @@ pre_roll_off_table:
dc.w 2,3,3,2,3,3,2,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,1 dc.w 2,3,3,2,3,3,2,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,1
dc.w 2,1,2,1,2,1,2,1,1,2,1,1,1,2,1 dc.w 2,1,2,1,2,1,2,1,1,2,1,1,1,2,1
pre_ramp_up_16:
dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143
pre_fast_roll_off_16:
dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8
dc.w 4,2,1
pre_octave_note_offset_table: pre_octave_note_offset_table:
dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4 dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4
dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4 dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4