diff --git a/README.md b/README.md index b25a1b5..ab96fc5 100644 --- a/README.md +++ b/README.md @@ -99,8 +99,20 @@ the remaining code for playback. ### Timing -Sample generation is a bit faster (I guess around 10-15%), -but most of the time is spent on `muls` operations, so this is the limiting factor. +#### Sample precalculation + +Sample generation is a faster than the original 1.0 player and also +faster than the 1.5 player, which got a slightly better performance +than the 1.0 one (compiler change?). + +According to my measurements on my set of Pretracker tunes, +Raspberry Casket needs between 10% to 20% less instructions. +Of these instructions, about 5% are `muls` operations and the new +player is only able to shave off between 3% and 8% percent of those, +so this is probably the limiting factor. + +#### Playback + Raspberry Casket is about twice as fast as the old replayer for playback. Unfortunately, the replayer is still pretty slow and has high diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index 3b88e85..04f66cd 100644 Binary files a/binaries/raspberry_casket.bin and b/binaries/raspberry_casket.bin differ diff --git a/src/raspberry_casket.asm b/src/raspberry_casket.asm index 6c03816..963afe1 100755 --- a/src/raspberry_casket.asm +++ b/src/raspberry_casket.asm @@ -346,23 +346,23 @@ pre_PrepareCopperlist: ;******************************************************************** pre_SongInit: IFNE PRETRACKER_DONT_TRASH_REGS - movem.l d2/d7/a2-a5,-(sp) + movem.l d2-d7/a2-a5,-(sp) ENDC moveq.l #0,d0 - move.l $0000(a2),d1 + movem.l (a2),d1/d3-d6 ; song offsets $0000/$0004/$0008/$000c/$0010 move.b d1,d2 move.b d0,d1 cmp.l #$50525400,d1 ; "PRE"-Text bne .error moveq.l #MAX_INSTRUMENTS-1,d7 ; notice there's one extra name (available in 1.5, but not usable)! IFNE PRETRACKER_SUPPORT_V1_5 - cmpi.b #$1e,d2 + cmp.b #$1e,d2 bgt .error bne.s .nopatchv15 move.l $005c(a2),d0 ; make song backward compatible ror.w #8,d0 move.l d0,$003c(a2) - addq.l #8,$0004(a2) ; skip over first pattern data + addq.l #8,d3 ; skip over first pattern data (offset $0004) moveq.l #2*MAX_INSTRUMENTS-1,d7 ; v1.5 has 32 slots (the other ones used for sfx) .nopatchv15 ELSE @@ -379,17 +379,13 @@ pre_SongInit: move.b $003f(a2),sv_num_steps_b(a1) ; number of steps! move.b $0041(a2),sv_num_waves_b(a1) ; number of instruments - move.l $0004(a2),d0 - add.l a2,d0 - move.l d0,sv_pos_data_adr(a1) ; address to position data (POSD) + add.l a2,d3 ; add to offset $0004 + move.l d3,sv_pos_data_adr(a1) ; address to position data (POSD) - move.l $0008(a2),d0 - add.l a2,d0 - move.l d0,sv_patterns_ptr(a1) ; address to pattern data (PATT) - - move.l $000c(a2),d0 ; offset into instrument names - lea (a2,d0.l),a0 ; instrument names + add.l a2,d4 ; add to offset $0008 + move.l d4,sv_patterns_ptr(a1) ; address to pattern data (PATT) + lea (a2,d5.l),a0 ; offset (from $000c) into instrument names .instrnamesloop moveq.l #23-1,d0 ; max 23 chars .inststrloop @@ -466,8 +462,7 @@ pre_SongInit: dbra d7,.instinfoloop .noinstsskip - move.l $0010(a2),d0 ; offset into wave names - lea (a2,d0.l),a0 + lea (a2,d6.l),a0 ; offset (from $0010) into wave names moveq.l #MAX_WAVES-1,d7 .wavenamesloop moveq.l #23-1,d0 ; max 23 chars @@ -533,7 +528,7 @@ pre_SongInit: .error .exit IFNE PRETRACKER_DONT_TRASH_REGS - movem.l (sp)+,d2/d7/a2-a5 + movem.l (sp)+,d2-d7/a2-a5 ENDC rts @@ -845,7 +840,7 @@ pre_PlayerTick: bpl.s .noclip_pat_vol_min moveq.l #0,d1 .noclip_pat_vol_min - cmpi.b #MAX_VOLUME,d1 + cmp.b #MAX_VOLUME,d1 ble.s .noclip_pat_vol_max moveq.l #MAX_VOLUME,d1 .noclip_pat_vol_max @@ -1165,7 +1160,7 @@ pre_PlayerTick: ; ---------------------------------------- .pat_set_speed lea pv_pat_speed_even_b(a4),a1 - cmpi.b #MAX_SPEED,d5 + cmp.b #MAX_SPEED,d5 bhs.s .pat_set_speed_shuffle move.b d5,(a1)+ ; pv_pat_speed_even_b move.b d5,(a1)+ ; pv_pat_speed_odd_b @@ -1279,7 +1274,7 @@ pre_PlayerTick: ; ---------------------------------------- .pat_set_volume - cmpi.b #MAX_VOLUME,d5 + cmp.b #MAX_VOLUME,d5 bls.s .pat_set_volume_nomax moveq.l #MAX_VOLUME,d5 .pat_set_volume_nomax @@ -1445,7 +1440,7 @@ pre_PlayerTick: ; IFNE PRETRACKER_PARANOIA_MODE ; new step is never written ; move.w pcd_inst_new_step_w(a5),d1 ; blt.s .inst_no_new_step_pos -; cmpi.w #$20,d1 +; cmp.w #$20,d1 ; ble.s .inst_good_new_step_pos ; moveq.l #$20,d1 ;.inst_good_new_step_pos @@ -1792,7 +1787,7 @@ pre_PlayerTick: bsr .inst_select_wave_subroutine .inst_wave_selected - cmpi.b #$FF,d2 + cmp.b #$FF,d2 beq.s .inst_pat_loop_exit3 subq.b #1,d2 move.b d2,pcd_inst_line_ticks_b(a5) @@ -1832,7 +1827,7 @@ pre_PlayerTick: .adsr_decay_and_release moveq.l #0,d4 move.b pcd_adsr_phase_speed_b(a5),d4 - cmpi.b #$8f,d4 + cmp.b #$8f,d4 bhs.s .adsr_absurd_slow_release move.b d4,d5 addq.b #1,d5 @@ -1884,7 +1879,7 @@ pre_PlayerTick: .adsr_attack add.w uii_adsr_attack(a2),d2 - cmpi.w #MAX_VOLUME<<4,d2 + cmp.w #MAX_VOLUME<<4,d2 blt.s .adsr_done .adsr_do_decay @@ -2111,7 +2106,7 @@ pre_PlayerTick: ; select right sample corresponding to current pitch move.w pcd_out_len_w(a5),d3 - cmpi.w #$219,d0 + cmp.w #$219,d0 ble .noclippitchhigh move.w #$231,d6 ; That's probably B-3+1, mapping to period $71 (although $7c is the last safe value) btst #2,wi_flags_b(a3) @@ -2184,7 +2179,7 @@ pre_PlayerTick: add.w d1,d1 add.w d1,d1 sub.w d1,d0 - cmpi.w #$231,d0 + cmp.w #$231,d0 ble.s .noclippitchhigh move.w #$231,d0 .noclippitchhigh @@ -2237,7 +2232,7 @@ pre_PlayerTick: moveq.l #MAX_TRACK_DELAY-1,d0 ; load from last buffer ; handle track delay - cmpi.b #$FF,d3 + cmp.b #$FF,d3 beq.s .clear_track_delay ; advance and wrap offset diff --git a/src/raspberry_casket_wavegen.asm b/src/raspberry_casket_wavegen.asm index b41e2fa..1945751 100644 --- a/src/raspberry_casket_wavegen.asm +++ b/src/raspberry_casket_wavegen.asm @@ -1194,8 +1194,7 @@ pre_WaveGen: ;-------------------------------------------------------------------- ; a3: waveinfo ; -; d6: wetness (word) -; uses all data registers and a0-a2 (a3 unchanged) +; uses all data registers and a0-a1 (a2/a3 unchanged) pre_Modulator: tst.b wi_mod_wetness_b(a3) beq.s .earlyexit @@ -1240,21 +1239,21 @@ pre_Modulator: moveq.l #0,d3 .innerloop + moveq.l #0,d1 add.w d7,d3 addq.w #8,d3 smi d1 ext.w d1 - eor.w d3,d1 + eor.w d3,d1 ; flip order if it was negative lsr.w #6,d1 ; 4 bit key is bits 14 to 11, needs to be 8 to 5 and.w #15<<5,d1 - ext.l d1 add.l d5,d1 lsr.l #6,d1 move.w d2,d0 sub.w d1,d0 - bmi.s .isneg + bmi.s .is_outside_sample move.b (a0,d0.w),d1 ext.w d1 @@ -1269,7 +1268,7 @@ pre_Modulator: CLIPTO8BITAFTERADD d1 move.b d1,(a0,d2.w) -.isneg +.is_outside_sample addq.w #1,d2 cmp.w d4,d2 bcs.s .innerloop