From c57f68a5052129f0265415dc44be9e18f06a049c Mon Sep 17 00:00:00 2001 From: chrisly42 Date: Sun, 13 Aug 2023 21:36:49 +0200 Subject: [PATCH] Track delay completely reworked. --- README.md | 23 +++-- binaries/raspberry_casket.bin | Bin 5900 -> 5874 bytes src/raspberry_casket.asm | 182 +++++++++++++++------------------- src/raspberry_casket.i | 7 +- 4 files changed, 100 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index 5478f66..42f1596 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,17 @@ Productions that I know have been using Raspberry Casket so far: ### Verification -The replayer has been verified on about 60 Pretracker tunes to -create an identical internal state for each tick and identical -samples (if certain optimizations switches are disabled). +The first versions of the replayer had been verified against about +60 Pretracker tunes to create an identical internal state for each +tick and identical samples (if certain optimizations switches are disabled). -I might have introduced bugs though. If you find some problems, +During the process this identical state and identical samples promise +had to be dropped due to bugs in the original player and optimizations. +This is especially the case for the track delay feature of Pretracker +that could in some cases cause odd behaviour and unwanted muting that +has been fixed in Raspberry Casket. + +If you find some problems, please let me know under chrisly@platon42.de. Thank you. ### Usage @@ -90,7 +96,7 @@ The original code compressed with *Blueberry's* Shrinkler goes from 18052 bytes down to 9023 bytes. Raspberry Casket, depending on the features compiled in, is about -5900 bytes and goes down to ~4176 bytes (in isolation). +5874 bytes and goes down to ~4164 bytes (in isolation). So this means that the optimization is not just "on the outside". @@ -137,6 +143,8 @@ solve this problem. - Pattern break (Dxx) + Song pos (Bxx) on the same line does not work in original Pretracker & Player: New Dxx position is ignored. There is code to enable it in the player, so you could in theory make backwards running tracks like in Protracker. But this doesn't make sense as long as the tracker itself does not support it. +- Setting the same track delay multiple times will no longer mute the delayed channel. +- Clearing the track delay (multiple times) will no longer mute the delayed channel nor cause a delay of one tick to the note played in the no-longer delayed channel. ## Changelog @@ -147,8 +155,11 @@ solve this problem. - Replaced the period table by byte-deltas, saved 36 bytes and compression is even better! - Optimized some code paths for octave selection. - Removed two 25 bytes tables each, saving another 42 bytes. +- Completely reworked track delay handling, fixed oddities and improved output quality. +- This removes a big source of cpu jitter when track delay is enabled (no longer clearing the track delay buffer). +- This also fixes usages of illegal period 0 in the lead-in that could cause the replay to miss the first trigger. - Added Presto player draft. -- Drop-in replacement code size: 5900 bytes. +- Drop-in replacement code size: 5874 bytes. ### V1.x (unreleased) - Fixed a bug regarding the copper output mode with looping waves having a loop-offset. diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index b26bb73d5ceb11344dc8b4459638301e128b5b5d..348ddb5b0ae62426440a584e954b8eafda6b3e14 100644 GIT binary patch delta 460 zcmXYrK}b_^9LK-^-*fhCzNurrbdU5^u$uM4Kq8ul?QPTf(h+*?@Mnj;Llg!Pqojf8 zsU^`15wA&S<$2seMx$7sWQblob(r8{MS9S!dN1S)pTqC>`TqD7ZvWEWy=U*^D?{sgJW92gAOh@WY&KjO< zx1k|yW|bu6l_bLak<;~5Y15sGijJ_N)7zTl?1p&M@R*}kMH-dQ5BMI`DPw4@FWDa) z%>_H;yDE@yVjDecls|WXf@{f+W#V*YcF?auo{p8GN(}M{J2b|R)l9tJYJ~RPyoB@6 z&ByADOL@GG>Ch46P!j|=debw9c5`F!F{&??^ ZZ(jZxMU@sja%u4vB#ysN?WMqL`UmUTk<9=A delta 482 zcmYk2F=!KU6vn^*OPXFyE^Ut^mY$NUf!YL56gm{rA-QW!yxI`s((%ZUTNG>wAxJP# zaY}}ggCedvII1U34k4IDAvgpv;N)}#52{uu)mrbMhU59jg|;&hmCQe2t|ThU#Kk4i$dMgadrQ!1Ds?p8NWUSNIGEUvOYu;m$XG&hPalE#Kd8XJhT#_^dpfqm&zWB zjz_nnr+eUdRKf3>JrlhU@ZMlAPd}w*%ufUD)3dRH){x6 ikg`a6yrws;qD%F)IUk$XexjWFhyB0qQPi%A_vtTFYL @@ -22,7 +22,7 @@ ; ; It took me more than a month and it was not fun. ; -; Also: Open source. It's 2022, keeping the code closed is just not +; Also: Open source. It's 2023, keeping the code closed is just not ; part of the demoscene spirit (anymore?), at least for a replayer. ; ; Also note that this is not the final state of the source code. @@ -87,7 +87,7 @@ ; 18052 bytes down to 9023 bytes. ; ; Raspberry Casket, depending on the features compiled in, is about -; 5900 bytes and goes down to ~4176 bytes (in isolation). +; 5874 bytes and goes down to ~4164 bytes (in isolation). ; ; So this means that the optimization is not just "on the outside". ; @@ -189,7 +189,7 @@ PRETRACKER_SONG_END_DETECTION = 0 ENDC ; Do you want to have information on the sample generation progress -; during the call to pre_PlayerInit? Then enable this and call w +; during the call to pre_PlayerInit? Then enable this and call ; pre_PlayerInit with a pointer to a longword in a3. ; Please make sure yourself that the initial value is zero. ; It will be incremented by the number of samples (in bytes) @@ -864,11 +864,7 @@ pre_PlayerTick: beq.s .note_delay_end_reached move.b d4,pcd_note_delay_b(a5) ; note still delayed - IFNE PRETRACKER_BUGFIX_CODE bra .pat_play_cont ; I believe that with activated track delay, we must jump here - ELSE - bra .pat_channels_loop_test - ENDC .note_delay_end_reached st pcd_note_delay_b(a5) ; release note delay @@ -933,11 +929,7 @@ pre_PlayerTick: beq.s .pat_exy_cmd_cont ENDC move.b d1,pcd_note_delay_b(a5) - IFNE PRETRACKER_BUGFIX_CODE bra .pat_play_cont ; I believe that with activated track delay, we must jump here - ELSE - bra .pat_channels_loop_test - ENDC .pat_is_not_ed_cmd addq.b #$d-$a,d3 @@ -1208,30 +1200,27 @@ pre_PlayerTick: cmp.b #NUM_CHANNELS-1,pcd_channel_num_b(a5) beq.s .pat_play_cont ; we are at channel 3 -- track delay not available here - lea pcd_SIZEOF+pcd_track_delay_buffer+ocd_volume(a5),a1 - moveq.l #0,d3 - moveq.l #MAX_TRACK_DELAY-1,d2 -.clr_track_delay_buffer_loop - move.b d3,(a1) ; ocd_volume - lea ocd_SIZEOF(a1),a1 - dbra d2,.clr_track_delay_buffer_loop - tst.b d5 bne.s .pat_track_delay_set IFNE PRETRACKER_BUGFIX_CODE ; clearing track delay when it already was cleared will overwrite the note needlessly tst.b pcd_track_delay_steps_b(a5) beq.s .pat_play_cont ENDC - st pcd_track_delay_steps_b(a5) - bra.s .handle_track_delay + move.b d5,pcd_SIZEOF+pcd_pat_vol_b(a5) + move.b d5,pcd_track_delay_steps_b(a5) + bra.s .pat_play_cont .pat_track_delay_set moveq.l #15,d2 and.b d5,d2 add.b d2,d2 + IFNE PRETRACKER_BUGFIX_CODE + cmp.b pcd_track_delay_steps_b(a5),d2 + beq.s .pat_track_set_only_vol + ENDC move.b d2,pcd_track_delay_steps_b(a5) -; subq.b #1,d2 -; move.b d2,pcd_SIZEOF+pcd_track_init_delay_b(a5) + move.b d2,pcd_SIZEOF+pcd_track_init_delay_b(a5) +.pat_track_set_only_vol lsr.b #4,d5 move.b d5,pcd_track_delay_vol16_b(a5) bra.s .pat_play_cont @@ -1288,18 +1277,15 @@ pre_PlayerTick: .pat_play_nop .pat_play_cont - move.b pcd_track_delay_steps_b(a5),d2 ; FIXME this is a mess - beq.s .pat_channels_loop_test + cmp.b #NUM_CHANNELS-1,pcd_channel_num_b(a5) + beq .pat_channels_loop_end -.handle_track_delay lea pcd_SIZEOF(a5),a5 - cmp.b #NUM_CHANNELS-2,pcd_channel_num_b-pcd_SIZEOF(a5) ; FIXME find out why we need this - bge.s .pat_channels_loop_end -.pat_channels_loop_test - lea pcd_SIZEOF(a5),a5 - cmp.b #NUM_CHANNELS-1,pcd_channel_num_b-pcd_SIZEOF(a5) - bne .pre_pat_chan_loop + tst.b pcd_track_delay_steps_b-pcd_SIZEOF(a5) ; check if the next channel has track delay + bne.s .pat_play_cont ; skip channel that has track delay enabled + bra .pre_pat_chan_loop + .pat_channels_loop_end ; end of pattern loop @@ -2224,68 +2210,54 @@ pre_PlayerTick: move.b d3,pcd_out_trg_b(a5) .hassamesamlen +.no_inst_selected + ; ---------------------------------------- ; track delay handling +.check_next_channel + cmp.b #NUM_CHANNELS-1,pcd_channel_num_b(a5) + beq .updatechannels -.no_inst_selected - move.b pcd_track_delay_steps_b(a5),d3 - beq .incrementchannel ; no track delay + lea pcd_SIZEOF(a5),a5 - cmp.b #NUM_CHANNELS-1,pcd_channel_num_b(a5) ; last channel processed? - beq .updatechannels ; no track delay for last channel + move.b pcd_track_delay_steps_b-pcd_SIZEOF(a5),d3 + beq .inst_chan_loop ; no track delay moveq.l #MAX_TRACK_DELAY-1,d0 ; load from last buffer - ; handle track delay - cmp.b #$FF,d3 - beq.s .clear_track_delay - ; advance and wrap offset - move.b pcd_SIZEOF+pcd_track_delay_offset_b(a5),d1 - addq.b #1,d1 + move.b pcd_track_delay_offset_b(a5),d1 + addq.w #1,d1 and.w d0,d1 - move.b d1,pcd_SIZEOF+pcd_track_delay_offset_b(a5) + move.b d1,pcd_track_delay_offset_b(a5) + ; write previous channel data to this channel's buffer move.w d1,d2 - lea pcd_SIZEOF(a5),a3 lsl.w #4,d2 - lea pcd_track_delay_buffer(a3,d2.w),a3 - lea pcd_out_base(a5),a1 + lea pcd_track_delay_buffer(a5,d2.w),a3 + lea pcd_out_base-pcd_SIZEOF(a5),a1 move.l (a1)+,(a3)+ ; ocd_sam_ptr move.l (a1)+,(a3)+ ; ocd_length/ocd_loop_offset move.l (a1)+,(a3)+ ; ocd_period/ocd_volume/ocd_trigger - ;move.l (a1)+,(a3)+ ; this is never used - move.b -(a3),d2 - add.b d2,d2 ; increment channel - bne.s .copy_trigger_for_delayed_channel - tst.b pcd_SIZEOF+pcd_track_delay_steps_b(a5) - bne.s .dont_trigger_track_delay_first_note - move.b d2,(a3) ; trigger note (ocd_trigger) -.copy_trigger_for_delayed_channel - or.b d2,pv_trigger_mask_w+1(a4) -.dont_trigger_track_delay_first_note + moveq.l #0,d5 + tst.b pcd_track_init_delay_b(a5) + bmi.s .track_delay_ready - IFNE PRETRACKER_VOLUME_TABLE - lea pv_volume_table(a4),a1 - move.b pcd_track_delay_vol16_b(a5),-(sp) - move.w (sp)+,d4 - clr.b d4 - add.w d4,d4 - move.b -(a3),d4 ; ocd_volume - move.b (a1,d4.w),(a3)+ - ELSE - moveq.l #0,d4 - move.b -(a3),d4 ; ocd_volume - move.b pcd_track_delay_vol16_b(a5),d2 - ext.w d2 - mulu d4,d2 ; apply track delay volume - lsr.w #4,d2 - move.b d2,(a3)+ ; fix volume - ENDC + subq.b #1,pcd_track_init_delay_b(a5) + bmi.s .track_delay_trigger_first - move.b d3,pcd_SIZEOF+pcd_track_delay_steps_b(a5) + lea pcd_out_base(a5),a3 + move.l pv_sample_buffer_ptr(a4),(a3)+ ; ocd_sam_ptr + move.l #2<<16,(a3)+ ; ocd_length/ocd_loop_offset + move.l #$7b<<16,(a3)+ ; ocd_period/ocd_volume/ocd_trigger + bra.s .check_next_channel + +.track_delay_trigger_first + move.b pcd_channel_mask_b(a5),d5 + +.track_delay_ready sub.b d3,d1 and.w d1,d0 @@ -2296,42 +2268,48 @@ pre_PlayerTick: move.b (a1,d1.w),d1 ext.w d1 ENDC - bra.s .load_track_data_from_buffer -.clear_track_delay - moveq.l #0,d1 - move.b d1,pcd_track_delay_steps_b(a5) - move.b d1,pcd_SIZEOF+pcd_pat_vol_b(a5) - move.b d1,pcd_SIZEOF+pcd_track_delay_steps_b(a5) - st pcd_SIZEOF+pcd_track_delay_offset_b(a5) - -.load_track_data_from_buffer - lea pcd_SIZEOF(a5),a5 ; skip the channel we applied track delay to lsl.w #4,d0 lea pcd_track_delay_buffer(a5,d0.w),a1 lea pcd_out_base(a5),a3 move.l (a1)+,(a3)+ ; ocd_sam_ptr move.l (a1)+,(a3)+ ; ocd_length/ocd_loop_offset - move.l (a1)+,(a3)+ ; ocd_period/ocd_volume/ocd_trigger - ;move.l (a1)+,(a3)+ ; this is never used - - ;clr.b ocd_volume-ocd_unused(a1) ; does not seem to bother IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK ; FIXME this seems odd! Why modulate the period by the distance? - move.w pcd_out_base+ocd_period(a5),d2 - move.w d1,d0 - muls d2,d0 - lsl.l #4,d0 - swap d0 - add.w d0,d2 - move.w d2,pcd_out_base+ocd_period(a5) + move.w (a1)+,d0 ; ocd_period + muls d0,d1 + swap d1 + add.w d1,d0 + move.w d0,(a3)+ ; ocd_period + ELSE + move.w (a1)+,(a3)+ ; ocd_period ENDC -.incrementchannel - lea pcd_SIZEOF(a5),a5 - cmp.b #NUM_CHANNELS-1,pcd_channel_num_b-pcd_SIZEOF(a5) - bne .inst_chan_loop + IFNE PRETRACKER_VOLUME_TABLE + move.w pcd_track_delay_vol16_b-pcd_SIZEOF(a5),d4 + clr.b d4 + add.w d4,d4 + move.b (a1)+,d4 ; ocd_volume + move.b (a1)+,d2 ; ocd_trigger + lea pv_volume_table(a4),a1 + move.b (a1,d4.w),(a3)+ ; ocd_volume (this track) + ELSE + moveq.l #0,d4 + move.b (a1)+,d4 ; ocd_volume + move.b pcd_track_delay_vol16_b-pcd_SIZEOF(a5),d2 + ext.w d2 + mulu d4,d2 ; apply track delay volume + lsr.w #4,d2 + move.b d2,(a3)+ ; fix volume + move.b (a1)+,d2 ; ocd_trigger + ENDC + + add.b d2,d2 ; change mask to next channel + or.b d5,d2 + move.b d2,(a3)+ ; ocd_trigger (this track) + or.b d2,pv_trigger_mask_w+1(a4) + bra.s .check_next_channel ; ---------------------------------------- .updatechannels @@ -2539,7 +2517,7 @@ pre_fast_roll_off_16: IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK ; -4,-3,-1,1,2,3,4,0 pre_minus4plus4_table: - dc.b $FC,$FB,$FF,1,2,3,4,0 + dc.b $c0,$b0,$f0,$10,$20,$30,$40,$00 ENDC pre_delta_period_table: diff --git a/src/raspberry_casket.i b/src/raspberry_casket.i index b0e35b1..7c6563b 100644 --- a/src/raspberry_casket.i +++ b/src/raspberry_casket.i @@ -180,7 +180,7 @@ sv_inst_infos_table rs.b MAX_INSTRUMENTS*uii_SIZEOF sv_SIZEOF rs.b 0 ; ---------------------------------------- -; channel output data (part of pcd structure below) +; channel output data (part of pcd structure below) -- FIXED ORDER! rsreset ocd_sam_ptr rs.l 1 ; 0 ocd_length rs.w 1 ; 4 @@ -238,12 +238,11 @@ pcd_note_off_delay_b rs.b 1 ; time before note is released ($ff = disa pcd_inst_pattern_steps_b rs.b 1 ; number of steps in instrument pattern pcd_note_delay_b rs.b 1 ; $ff = no note delay -pcd_track_delay_steps_b rs.b 1 ; $00 = no track delay, $ff = stop track delay (this is for the next channel!) -pcd_track_delay_vol16_b rs.b 1 +pcd_track_delay_steps_b rs.b 1 ; $00 = no track delay, $xx = track delay xx (this is for the next channel!) +pcd_track_delay_vol16_b rs.b 1 ; needs to be at even address (using word access to shift << 8) pcd_track_init_delay_b rs.b 1 ; number of frames to ignore the delay pcd_inst_num4_w rs.w 1 ; current instrument number * 4 -;pcd_inst_new_step_w rs.w 1 ; seems to be unused pcd_inst_subloop_wait_w rs.w 1 pcd_inst_loop_offset_w rs.w 1 pcd_inst_info_ptr rs.l 1 ; pointer to currently active instrument