From ef1c519a6c39e451af4220a067a208b0f510d0d3 Mon Sep 17 00:00:00 2001 From: chrisly42 Date: Thu, 24 Aug 2023 22:07:42 +0200 Subject: [PATCH] Wavegen optimizations, cosmetics and another mystery solved. --- README.md | 27 +++++++--- binaries/raspberry_casket.bin | Bin 5732 -> 5716 bytes src/raspberry_casket.asm | 15 ++---- src/raspberry_casket.i | 3 +- src/raspberry_casket_wavegen.asm | 82 ++++++++++++++++--------------- 5 files changed, 69 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 17e0716..260baf2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Raspberry Casket A fast and small open source Pretracker replayer -## Raspberry Casket Player V2.x (22-Aug-2023) +## Raspberry Casket Player V2.x (24-Aug-2023) Provided by Chris 'platon42' Hodges @@ -101,7 +101,7 @@ The original code compressed with *Blueberry's* Shrinkler goes from 18052 bytes down to 9023 bytes. Raspberry Casket, depending on the features compiled in, is about -5732 bytes and shrinkles down to ~4084 bytes (in isolation). +5716 bytes and shrinkles down to ~4071 bytes (in isolation). So this means that the optimization is not just "on the outside". @@ -137,18 +137,33 @@ about 34 on average!). Watch out for *Presto*, the [LightSpeedPlayer](https://github.com/arnaud-carre/LSPlayer) variant that should solve this problem. +### Secrets + +- Pink never actually documented how the 0xy command works (2nd instrument, not an ARP!). + It will play the instrument y for x+1 ticks before going to the actual instrument you wanted to trigger in the first place. + This works well, e.g. for bassdrums and other short percussion samples. Note that because y is a 4 bit nibble, you can only + specify the instruments $1-$f this way and not $10-$1f. + ### Known issues +- Songs saved with earlier versions of Pretracker than 1.0 (internal version lower than $1b) have stored the ADSR values differently in the file. + There is no provision for fixing these values neither in the original player nor in Raspberry Casket. Loading the file in the tracker and saving + it again will cure this. This is more of a hypothetical problem as you are unlikely to use a Pretracker V0.9 beta version, but it took me quite a + while why Pink's "On and On" has a broken first wave sample in all the replayers but not on the tracker itself. This affects these tunes: + Attack and Release, On and On, Rewind, Cold and Tired, PreFix all by Pink and Cracksteady by Tecon. - Behaviour for undefined volume slides with both up- and down nibble specified is different (e.g. A9A, hi Rapture!). Don't do that. - Don't use loops with odd lengths and offsets (even if Pretracker allows this when dragging the loop points). - Don't stop the music with F00 and use a note delay (EDx) in the same line. - Don't try to play music with no waves, instruments or patterns. -- Pattern breaks with target row >= $7f will be ignored. +- The original player had the internal state machine running for wave 1 even if no note had been triggered yet on the channel. + This could cause the first instrument using a 4xx command (trigger wave without sync) in the instrument pattern to start at + a more or less random first loop offset instead of from the beginning. This is fixed in Raspberry Casket. - Shinobi seemed to have used an early beta version of Pretracker where it was possible to specify a Subloop Wait of 0. That's illegal and unsupported. +- Pattern breaks with target row >= $7f will be ignored. - Pattern break (Dxx) + Song pos (Bxx) on the same line does not work in original Pretracker & Player: New Dxx position is ignored. There is code to enable it in the player, so you could in theory make backwards running tracks like in Protracker. But this doesn't make sense as long as the tracker itself does not support it. -- Setting the same track delay multiple times will no longer mute the delayed channel. +- Setting the same track delay multiple times will no longer mute the delayed channel and the new volume will take effect immediately. - Clearing the track delay (multiple times) will no longer mute the delayed channel nor cause a delay of one tick to the note played in the no-longer delayed channel. ## Changelog @@ -168,9 +183,9 @@ solve this problem. - Bugfix: Songend detection for back-jumps was broken since at least V1.1. - Optimized some more wave selection code. - Nosync/sync wave selection optimized. -- Optimized wave generation a bit more (noise generator). +- Optimized wave generation a lot (esp. noise generator). - Added Presto player draft. -- Drop-in replacement code size: 5732 bytes. +- Drop-in replacement code size: 5716 bytes. ### V1.x (unreleased) - Fixed a bug regarding the copper output mode with looping waves having a loop-offset. diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index 458a838892bd82a14289735c672c3f410aaccbbb..66f9908e71b0f2d8767a8d55742b64f42fad5c2e 100644 GIT binary patch delta 295 zcmaE&b47=Tfq?;t8LJo=xQr(9XfwWaSe7zX9)(9zHj)t-uvD;Dx%Gx=+WeHh(nu!1*k&I%PCLn z@v19L2|^8(McHWyQK11o9?o7)`AkO-J?whg<>iphz;JHvxhlp-v0AQPjyY;YJZfAn z&65u?w=!By_FysD+{7ZtC_AS>0%)aXx`+~Ax`3CCzG51eWeo$PzCr@S--gW_SWhrA qZQC=sid_#xtzvfqQk&ngA7o?{*}Q?XkeTt&W+mPQ9Fxz9P6Gg*^I`n} delta 300 zcmcbj^F)V-fq?;t8LJo=xXdQS{)V5c zj@!e%hQW?aMnc?;kzsNkQ#5O1;Q9a4CZA)9V48h)vKq5|y-g8A36BwjiNmEDh6zoF zJRA)eJsc-F9r(l8==_C&!S^L!R{`?@Z3dBaA>Vh548aWPT)yxAHvnm-x#vW*85BL5 zJPvVaGq3 @@ -96,7 +96,7 @@ ; 18052 bytes down to 9023 bytes. ; ; Raspberry Casket, depending on the features compiled in, is about -; 5732 bytes and shrinkles down to ~4084 bytes (in isolation). +; 5716 bytes and shrinkles down to ~4071 bytes (in isolation). ; ; So this means that the optimization is not just "on the outside". ; @@ -2478,7 +2478,7 @@ pre_PlayerTick: rts ;-------------------------------------------------------------------- -; table data currently about 446 bytes +; table data currently about 450 bytes ; Tables used by WaveGen pre_roll_off_table: ; used by WaveGen dc.w $400,$200,$180,$140,$100,$C0,$A0,$80,$78,$74,$6E @@ -2490,11 +2490,6 @@ pre_roll_off_table: ; used by WaveGen dc.w 2,3,3,2,3,3,2,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,1 dc.w 2,1,2,1,2,1,2,1,1,2,1,1,1,2,1 -; I assume this is a log table for freq distances within an octave ; used by WaveGen -pre_log12_table: - dc.b $400000/$8000,$400000/$871D,$400000/$8F2F,$400000/$97B7,$400000/$9FC4,$400000/$A9DE - dc.b $400000/$B505,$400000/$BF49,$400000/$CB31,$400000/$D645,$400000/$E215,$400000/$F1A0 - pre_modulator_ramp_8: ; used by WaveGen ;dc.w 77,293,539,1079,1337,1877,2431,3031 ; the 1079 value is strange (938 better?) dc.w $4D,$125,$21B,$437,$539,$755,$96D,$BD7 @@ -2515,8 +2510,8 @@ pre_ramp_up_16: dc.b 0,1,3,6,7,9,10,11,12,13,14,16,19,35,55,143 pre_fast_roll_off_16: - dc.w $400,$200,$80,$64,$50,$40,$30,$20,$10,14,12,10,8 - dc.w 4,2,1 + dc.w $400,$200,$80,$64,$50,$40,$30,$20 + dc.w 16,14,12,10,8,4,2,1 IFNE PRETRACKER_DUBIOUS_PITCH_SHIFT_FOR_DELAYED_TRACK ; -4,-3,-1,1,2,3,4,0 diff --git a/src/raspberry_casket.i b/src/raspberry_casket.i index 9fad806..bcf9126 100644 --- a/src/raspberry_casket.i +++ b/src/raspberry_casket.i @@ -1,6 +1,6 @@ ; Pretracker song format description: ; -; $0000 4: PRT ($19 (V0.x), $1b (V1.0), $1e (V1.5)) +; $0000 4: PRT ($19/$1a (V0.x), $1b (V1.0), $1e (V1.5)) ; $0004 4: File offset to position data (POSD) ; $0008 4: File offset to pattern data (PATT) ; $000C 4: File offset to instruments (INST) @@ -281,7 +281,6 @@ pcd_out_unused_l = pcd_out_base+ocd_unused ; copied for track delay, owb_saw_waves rs.b 128 owb_sqr_waves rs.b 128 owb_tri_waves rs.b 128 -owb_wave_length rs.b 1 owb_SIZEOF rs.b 0 ; ---------------------------------------- diff --git a/src/raspberry_casket_wavegen.asm b/src/raspberry_casket_wavegen.asm index aadca9c..03a9dff 100644 --- a/src/raspberry_casket_wavegen.asm +++ b/src/raspberry_casket_wavegen.asm @@ -41,7 +41,7 @@ CLIPTO8BITAFTERADD MACRO pre_WaveGen: ; ---------------------------------------- - lea pre_log12_table(pc),a0 ; 128, 121, 114, 107, 102, 96, 90, 85, 80, 76, 72, 67 + lea .pre_log12_table(pc),a0 ; 128, 121, 114, 107, 102, 96, 90, 85, 80, 76, 72, 67 lea pv_osc_buffers+owb_sqr_waves(a4),a3 moveq.l #NOTES_IN_OCTAVE-1,d7 .noteloop @@ -49,7 +49,6 @@ pre_WaveGen: moveq.l #0,d6 move.w d6,d7 ; tabpos move.b (a0)+,d6 ; period - move.b d6,owb_wave_length-owb_sqr_waves(a3) move.l #$ff00,d5 divu d6,d5 ; frac increment @@ -184,7 +183,6 @@ pre_WaveGen: move.b d2,(a1)+ move.b d3,(a1)+ - suba.l a5,a5 clr.w pv_wg_chord_note_num_b(a4) ; and pv_wg_chord_note_num_b .wavegen_chordloop @@ -199,32 +197,32 @@ pre_WaveGen: cmp.b wi_osc_basenote_b(a3),d0 beq .wave_gen_tone_done ; skip chord notes that are same as base note .base_note_is_never_skipped - moveq.l #0,d1 + moveq.l #0,d5 moveq.l #NOTES_IN_OCTAVE,d2 - move.w d0,d1 + move.w d0,d5 move.w d0,a2 ; save base note, used later (much later, noise generator)! - add.w #NOTES_IN_OCTAVE*NOTES_IN_OCTAVE,d1 ; make sure we don't run into negative modulo - divu d2,d1 - swap d1 - move.w d1,d0 ; note within octave - swap d1 - sub.w d2,d1 ; restore octave, result may be negative + add.w #NOTES_IN_OCTAVE*NOTES_IN_OCTAVE,d5 ; make sure we don't run into negative modulo + divu d2,d5 + sub.w d2,d5 ; restore octave, result may be negative + move.w d5,d1 ; +-octave + swap d5 ; note within octave - mulu #owb_SIZEOF,d0 - lea (a4,d0.w),a1 + moveq.l #0,d7 + move.b .pre_log12_table(pc,d5.w),d7 ; 128, 121, 114, 107, 102, 96, 90, 85, 80, 76, 72, 67 - lea pv_osc_buffers+owb_saw_waves(a1),a6 moveq.l #3,d0 + mulu d0,d5 and.b wi_flags_b(a3),d0 beq.s .osc_selected - lea owb_tri_waves-owb_saw_waves(a6),a6 - subq.b #1,d0 + addq.w #2,d5 + subq.w #1,d0 beq.s .osc_selected - lea owb_sqr_waves-owb_tri_waves(a6),a6 - subq.b #1,d0 - beq.s .osc_selected - suba.l a6,a6 ; noise selected + subq.w #1,d5 + subq.w #1,d0 .osc_selected + lea pv_osc_buffers+owb_saw_waves(a4),a6 + lsl.w #7,d5 + adda.w d5,a6 ; ---------------------------------------- ; pitch ramp @@ -233,15 +231,20 @@ pre_WaveGen: ext.l d2 btst #4,wi_flags_b(a3) ; pitch linear flag beq.s .pitch_not_linear - tst.b d2 + tst.w d2 bgt.s .pitch_ramp_positive + ; FIXME what happens if d1 is negative? rolls out by 63? lsl.l d1,d2 add.l d2,d2 bra.s .pitch_ramp_cont +.pre_log12_table + dc.b $400000/$8000,$400000/$871d,$400000/$8f2f,$400000/$97b7,$400000/$9fc4,$400000/$a9de + dc.b $400000/$b505,$400000/$bf49,$400000/$cb31,$400000/$d645,$400000/$e215,$400000/$f1a0 + .pitch_not_linear - tst.b d2 + tst.w d2 ble.s .pitch_ramp_cont .pitch_ramp_positive muls d2,d2 @@ -250,8 +253,8 @@ pre_WaveGen: lsl.l #2,d2 ; check whether we have a noise oscillator or something else - move.l a6,d4 - bne .no_noise + tst.w d0 + beq .no_noise ; ---------------------------------------- ; d0 = scratch @@ -260,6 +263,7 @@ pre_WaveGen: ; d4 = scratch ; a2 = base note .gen_noise + suba.l a6,a6 IFNE PRETRACKER_PARANOIA_MODE tst.w pv_wg_curr_sample_len_w(a4) beq .wave_gen_tone_done @@ -349,7 +353,7 @@ pre_WaveGen: .gen_noise_innerloop move.b d1,(a0)+ - cmpa.l pv_wg_curr_samend_ptr(a4),a0 + cmp.l pv_wg_curr_samend_ptr(a4),a0 beq .wave_gen_tone_done adda.l a1,a5 @@ -370,14 +374,14 @@ pre_WaveGen: sub.l d4,d2 .noise_nonlinear_pitch - cmpa.w #$1ff,a1 + cmp.w #$1ff,a1 bgt.s .gen_noise_no_end_of_pitch_ramp moveq.l #0,d2 ; stop pitch ramping move.l d2,a5 movea.w #$200,a1 .gen_noise_no_end_of_pitch_ramp .gen_noise_no_pitch_ramping - cmpa.w a5,a5 + cmp.w a5,a5 beq.s .gen_noise_innerloop bra .gen_noise_outerloop @@ -389,8 +393,6 @@ pre_WaveGen: ; a2 = base note .no_noise - moveq.l #0,d7 - move.b pv_osc_buffers+owb_wave_length(a1),d7 ; get period moveq.l #15,d5 lsl.l d5,d7 @@ -535,7 +537,7 @@ pre_WaveGen: moveq.l #0,d2 moveq.l #0,d1 .chordtone_done - cmpa.l pv_wg_curr_samend_ptr(a4),a0 + cmp.l pv_wg_curr_samend_ptr(a4),a0 bne.s .chordtoneloop .wave_gen_tone_done @@ -768,14 +770,14 @@ pre_WaveGen: CLIPTO8BIT d7 .filter_outputbyte move.b d7,(a0)+ - cmpa.l a0,a1 + cmp.l a0,a1 bne.s .filter_innerloop .filterloop_end_test movem.w d3-d6,pv_wg_flt_taps(a4) movem.l (sp)+,d3-d5 - cmpa.l pv_wg_curr_samend_ptr(a4),a0 + cmp.l pv_wg_curr_samend_ptr(a4),a0 bhs.s .filter_done move.l a6,d0 bra .entry_to_filter_loop @@ -809,7 +811,7 @@ pre_WaveGen: moveq.l #0,d0 move.b wi_vol_attack_b(a3),d0 bne.s .has_attack_volume - cmpi.b #$FF,wi_vol_sustain_b(a3) + cmp.b #$ff,wi_vol_sustain_b(a3) beq .vol_envelope_finished ; no attack but not full sustain -> go to delay ;move.l #$100<<16,d3 @@ -836,7 +838,7 @@ pre_WaveGen: lsl.l #4,d1 ; multiply speed by 16 .vol_no_fast add.l d1,d3 ; increase volume - cmpi.l #$FFFFFF,d3 + cmp.l #$ffffff,d3 ble.s .vol_do_attack ; first step overshooting? .vol_skip_attack btst #3,wi_flags_b(a3) ; boost flag @@ -862,7 +864,7 @@ pre_WaveGen: subq.w #1,d4 bmi .vol_envelope_finished add.l d1,d3 ; increase volume - cmpi.l #$FFFFFF,d3 + cmp.l #$ffffff,d3 ble.s .vol_attack_normal_loop ; ---------------------------------------- @@ -882,7 +884,7 @@ pre_WaveGen: ELSE lea 2(a0,d0.w),a1 - move.w #$FF,d3 ; FIXME I don't think that this is quite right. Shouldn't the max volume NOT change the value? + move.w #$ff,d3 ; FIXME I don't think that this is quite right. Shouldn't the max volume NOT change the value? .vol_delay_normal_loop move.b (a0),d0 IFNE 1 @@ -896,7 +898,7 @@ pre_WaveGen: ENDC move.b d0,(a0)+ - cmpa.l a1,a0 + cmp.l a1,a0 dbeq d4,.vol_delay_normal_loop bne .vol_envelope_finished ENDC @@ -920,7 +922,7 @@ pre_WaveGen: subq.w #1,d4 bmi .vol_envelope_finished add.l d1,d3 - cmpi.l #$FFFFFF,d3 + cmp.l #$ffffff,d3 ble.s .vol_attack_boosted_loop ; ---------------------------------------- @@ -941,7 +943,7 @@ pre_WaveGen: add.b d0,d0 CLIPTO8BITAFTERADD d0 ELSE - move.w #$FF,d3 ; FIXME I don't think that this is quite right. It should be $100 to boost by full volume + move.w #$ff,d3 ; FIXME I don't think that this is quite right. It should be $100 to boost by full volume .vol_delay_boosted_loop move.b (a0),d0 ext.w d0 @@ -951,7 +953,7 @@ pre_WaveGen: ENDC move.b d0,(a0)+ - cmpa.l a1,a0 + cmp.l a1,a0 dbeq d4,.vol_delay_boosted_loop bne .vol_envelope_finished