Minor code size optimizations.

This commit is contained in:
Chris Hodges 2023-05-20 19:33:53 +02:00
parent 6a069ac78a
commit b5478a162c
2 changed files with 22 additions and 27 deletions

View File

@ -41,7 +41,7 @@ please let me know under chrisly@platon42.de. Thank you.
The new replayer comes as a drop-in binary replacement if you wish.
In this case you will get faster sample generation (about 12%
faster on 68000) and about 45% less CPU time spent. However, you
faster on 68000) and about 45% less CPU time spent during playback. However, you
won't get stuff as song-end detection and precalc progress this way.
This mode uses the old CPU DMA wait that takes away 8 raster lines.
@ -121,7 +121,7 @@ solve this problem.
- Optimized base displacement by reordering variables.
- Further optimized ADSR code.
- Optimized wave loop code.
- Bake in this strange vibrato speed multiplication to precalculated vibrato value (where possible).
- Baked in this strange vibrato speed multiplication to precalculated vibrato value (where possible).
- Various small optimizations.
- Store instrument number * 4 on loading to avoid using two adds every frame.
- Optimized speed/shuffle code. Idea of using xor turned out to make things too complicated for pattern breaks/jumps.

View File

@ -1,5 +1,5 @@
;--------------------------------------------------------------------
; Raspberry Casket Player V1.1 (28-Dec-2022)
; Raspberry Casket Player V1.1 (20-May-2023)
; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
;
; Provided by Chris 'platon42' Hodges <chrisly@platon42.de>
@ -97,8 +97,7 @@
; of the time is spent on muls operations, so this is the limiting
; factor.
;
; Raspberry Casket is about twice as fast as the old replayer for
; playback.
; Raspberry Casket is about twice as fast as the old replayer for playback.
;
; Unfortunately, the replayer is still pretty slow and has high
; jitter compared to other standard music replayers.
@ -814,17 +813,15 @@ pre_SongInit:
moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_attack
add.w d1,d1
lea pre_fast_roll_off_16(pc),a5
move.w (a5,d1.w),d1
move.w pre_fast_roll_off_16-pre_vib_delay_table(a5,d1.w),d1
move.w d1,uii_adsr_attack(a4)
moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_decay
lea pre_ramp_up_16(pc),a5
move.b (a5,d1.w),uii_adsr_decay+1(a4)
move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_decay+1(a4)
move.b (a0)+,d1 ; ii_adsr_sustain
; what is this? a patch?
; what is this? a patch?
cmp.b #15,d1
bne.s .dont_patch_sustain
moveq.l #16,d1
@ -834,8 +831,7 @@ pre_SongInit:
moveq.l #0,d1
move.b (a0)+,d1 ; ii_adsr_release
lea (pre_ramp_up_16,pc),a5
move.b (a5,d1.w),uii_adsr_release(a4)
move.b pre_ramp_up_16-pre_vib_delay_table(a5,d1.w),uii_adsr_release(a4)
move.b (a0)+,d1 ; ii_pattern_steps
move.b d1,uii_pattern_steps(a4)
@ -1257,7 +1253,6 @@ pre_PlayerInit:
moveq.l #0,d3
move.b wi_osc_phase_min_b(a3),d3
mulu d5,d3
lsl.l #6,d3
@ -1387,7 +1382,7 @@ pre_PlayerInit:
cmpa.l #$8000,a5 ; if symmetrical
beq.s .gen_noise_centered
; what does this do? (a5 - $8000) (a5 +$7fff)&$8000
; FIXME what does this do? d4 = (a5 - $8000) d1 = (a5 + $7fff)&$ffff8000 -> d4 - d1 == ((a5 - $8000) - ((a5 + $7fff)&$ffff8000)
move.l a5,d4
addi.l #$FFFF8000,d4
move.l a5,d1
@ -1593,7 +1588,7 @@ pre_PlayerInit:
.entry_to_filter_loop
move.l d0,a6
move.l d3,d1 ; flt_speed_b*128
adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction
adda.l d1,a6 ; suppress M68kUnexpectedConditionalInstruction
bgt.s .filter_speed_pos
.filter_speed_neg
@ -3535,7 +3530,7 @@ pre_PlayerTick:
.dont_release_note
; ----------------------------------------
; calculate final volume output = inst_vol * ADSR volume * pattern volume
; calculate final volume output = inst_vol * ADSR volume * pattern volume
IFNE PRETRACKER_VOLUME_TABLE
lea pv_volume_table(a4),a1
@ -3550,7 +3545,7 @@ pre_PlayerTick:
lsr.w #4,d2
mulu d2,d1
lsr.w #6,d1
moveq.l #0,d2
move.b pcd_pat_vol_b(a5),d2
mulu d1,d2
@ -3577,7 +3572,7 @@ pre_PlayerTick:
clr.b pcd_wave_offset_b(a5)
; keep current direction of ping-pong unchanged
; keep current direction of ping-pong unchanged
move.b pcd_inst_ping_pong_dir_b(a5),d4
bpl.s .wave_move_one_step_ahead
sub.w d2,d1 ; go in reverse direction one step?
@ -3851,7 +3846,7 @@ pre_PlayerTick:
beq .updatechannels ; no track delay for last channel
moveq.l #MAX_TRACK_DELAY-1,d0 ; load from last buffer
; handle track delay
cmpi.b #$FF,d3
beq.s .clear_track_delay
@ -3871,7 +3866,7 @@ pre_PlayerTick:
move.l (a1)+,(a3)+ ; ocd_length/ocd_loop_offset
move.l (a1)+,(a3)+ ; ocd_period/ocd_volume/ocd_trigger
;move.l (a1)+,(a3)+ ; this is never used
move.b -(a3),d2
add.b d2,d2 ; increment channel
bne.s .copy_trigger_for_delayed_channel
@ -4129,6 +4124,13 @@ pre_vib_depth_table:
pre_vib_delay_table:
dc.b 0,4,8,10,12,14,16,18,20,24,32,40,56,96,150,255
pre_ramp_up_16:
dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143
pre_fast_roll_off_16:
dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8
dc.w 4,2,1
pre_roll_off_table:
dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E
dc.w $69,$64,$5A,$46,$40,$38,$30,$28,$20,$1F,$1E,$1D
@ -4139,13 +4141,6 @@ pre_roll_off_table:
dc.w 2,3,3,2,3,3,2,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,1
dc.w 2,1,2,1,2,1,2,1,1,2,1,1,1,2,1
pre_ramp_up_16:
dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143
pre_fast_roll_off_16:
dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8
dc.w 4,2,1
pre_octave_note_offset_table:
dc.b 1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4,1*NOTES_IN_OCTAVE*4
dc.b 2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4,2*NOTES_IN_OCTAVE*4