From e0f7537774bf57ae1fb7e29517c51ee85d410054 Mon Sep 17 00:00:00 2001 From: chrisly42 Date: Wed, 16 Aug 2023 20:02:41 +0200 Subject: [PATCH] Moved pattern table init from PlayerInit to SongInit, optimized SongInit a bit. --- README.md | 5 +- binaries/raspberry_casket.bin | Bin 5874 -> 5850 bytes src/raspberry_casket.asm | 83 +++++++++++++++++----------------- src/raspberry_casket.i | 7 +-- 4 files changed, 49 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 42f1596..b81c2a8 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ The original code compressed with *Blueberry's* Shrinkler goes from 18052 bytes down to 9023 bytes. Raspberry Casket, depending on the features compiled in, is about -5874 bytes and goes down to ~4164 bytes (in isolation). +5850 bytes and shrinkles down to ~4154 bytes (in isolation). So this means that the optimization is not just "on the outside". @@ -158,8 +158,9 @@ solve this problem. - Completely reworked track delay handling, fixed oddities and improved output quality. - This removes a big source of cpu jitter when track delay is enabled (no longer clearing the track delay buffer). - This also fixes usages of illegal period 0 in the lead-in that could cause the replay to miss the first trigger. +- Moved pattern table init from PlayerInit to SongInit, optimized SongInit a bit. - Added Presto player draft. -- Drop-in replacement code size: 5874 bytes. +- Drop-in replacement code size: 5850 bytes. ### V1.x (unreleased) - Fixed a bug regarding the copper output mode with looping waves having a loop-offset. diff --git a/binaries/raspberry_casket.bin b/binaries/raspberry_casket.bin index 348ddb5b0ae62426440a584e954b8eafda6b3e14..2c4d8bcefebe0a1ec51410a1b6c6de6aac72ea53 100644 GIT binary patch delta 1058 zcmYjPZAe>Z6n@^Dn-8-XlbhS?O{;B;E;Y;5E3}TH1lSmZ)FK5f9dmQLx zg2SZ&lA;`u6>KDjs}J=+H}7nN(b>#yCGhf(`zy&*onPj>X$byx;tBa}KmGoP(lz7F z@Xvi|oTZbbth;;~*{sGH*Jud9c0}1sT^t{?yWX^#NvL8l$KgS7`!a;ilW_ae?ADQG z1k3K{#Oy%>Sw?*j-eu-prSzh18w9r=5#Ktw+*9UF8%JmtQ>vc#rwjCSkoo8>!5NVG zg*YBPn6U{?k6h&F%i4ON?N$7u?@t&xD{mF7=OB_{W>|InhbFeJNw%9@C|tlI?!uRr z%eW*r!=<>}U;fe6IRrpSIYhHPurs;w`DwoqM>Vr-(JQP&&4H8({ z7RKMvn3#0I8R6~5h$)vD)+<{hN{5WPgh;eVQ>@gf*UTvPj^@OR($R;gy_S~6UX9Jz zjoqJhE~w!Qvprzmwnb&WR+9HmT+~_!`J9gHN=QGQ*4-`?=9NV`YJeO)&qDOElujbD!#dpIt`wmtR&Za!F;jT90Bv`(h)`Cs#%(N zVfEiYfesorh=%s(U(0{V>d}r`J$dU^kD;Ab85>G&e8|>Rl4)_U&IcVQLG@VI%PN|+ zdZBJrzKf0;*N8B2zTh>ImvXhD#vX-0@0s2soxpd51AJV68+;~Esa%H2{hYVne@ z^lM84xky(n)rEUQ6>xoE@~}@wI9zUL1DS~pkKE#24k1? zUHEBr zJw$u&X{uI6BJSL2SnT~JNw_p~f^L0Eduf@;q;L*VcdKs<`Ujptm_5CaGC1^IeGAQZ zL=srpb66D&_n*%ZVUx^@VKKq2T(@@ckdrOS^e@(=5| zO7s5fD3`UKv{p-i+Lff`U{PJls8Xm&BY?W0O-RqBp@P350SUuA7Vrw^S&Y^M&`Fzk zVHm+p;VrKAv;Hv%i+3orO_#@-f<-f(V=oDfn)OJrT3|`xqc@zM9>uhp#+^TwWXTgy zZ0=-P9)s>U&x@hIV754H4#_bNJ532ws-xeA)g58QDSGkrD%P-$P*JJ1!v7c~ccinCCuJJs)yiA}06DPkO$UD(Z* zPvxU1CtTV-pW=vHPTTWdcs2~^c56ZgZLAVnr=x>_~&S>!zlGnwU{r$G!J!mw0?%d*a!qfj`lH zwWx^4wX!*iCo6g{H1NdKV-Sv2JllPWH1~ndN;J);^bIu5X7x9+&H_K4yA7z6=R>E2 zc2%W$BrJ*r$4TvI&In}1+~FU~54fNJTfBC^PqFc_O~IWW+}wq3<09|=<;W+zr{sEb zB%v6Scx)d$)8~adu21PIYs7@XGye~fMlG*o5Gt9XtZ0}K)Fn1$Y!WmJ>`UVn%Cn(8 zPJvcfV$c2mauQ3LcKEWF<}Gs0 zovePD49nhrw7C*+vSsmYdc+RYWchc_*36NU9kX zn&Ev @@ -87,7 +87,7 @@ ; 18052 bytes down to 9023 bytes. ; ; Raspberry Casket, depending on the features compiled in, is about -; 5874 bytes and goes down to ~4164 bytes (in isolation). +; 5850 bytes and shrinkles down to ~4154 bytes (in isolation). ; ; So this means that the optimization is not just "on the outside". ; @@ -378,16 +378,30 @@ pre_SongInit: move.w #sv_SIZEOF,d0 bsr pre_MemClr - move.b $003c(a2),sv_pat_restart_pos_w+1(a1) ; song restart pos - move.b $003e(a2),sv_pat_pos_len_w+1(a1) ; songlength in pattern positions - move.b $003f(a2),sv_num_steps_b(a1) ; number of steps! - move.b $0041(a2),sv_num_waves_b(a1) ; number of instruments - add.l a2,d3 ; add to offset $0004 move.l d3,sv_pos_data_adr(a1) ; address to position data (POSD) + lea (a2,d4.l),a4 ; add to offset $0008 - add.l a2,d4 ; add to offset $0008 - move.l d4,sv_patterns_ptr(a1) ; address to pattern data (PATT) + moveq.l #0,d0 + moveq.l #0,d4 + lea $003c(a2),a0 + move.b (a0)+,sv_pat_restart_pos_w+1(a1) ; $003c song restart pos + move.b (a0)+,d1 ; $003d number of patterns + move.b (a0)+,sv_pat_pos_len_w+1(a1) ; $003e songlength in pattern positions + move.b (a0)+,d0 ; $003f number of steps! + move.b (a0)+,d4 ; $0040 number of instruments + move.b (a0)+,sv_num_waves_b(a1) ; $0041 number of waves + + move.b d0,sv_num_steps_b(a1) + + mulu #3,d0 ; *3 bytes per pattern line + + lea sv_pattern_table(a1),a0 +.pattableloop + move.l a4,(a0)+ + add.w d0,a4 + subq.b #1,d1 + bne.s .pattableloop lea (a2,d5.l),a0 ; offset (from $000c) into instrument names .instrnamesloop @@ -397,23 +411,21 @@ pre_SongInit: dbeq d0,.inststrloop dbra d7,.instrnamesloop - moveq.l #0,d7 - move.b $0040(a2),d7 ; number of instruments + move.l d4,d0 IFNE PRETRACKER_PARANOIA_MODE beq.s .noinstsskip ENDC - move.l d7,d0 lsl.w #3,d0 add.l a0,d0 ; skip 8 bytes of info per instrument (ININ) lea sv_inst_patterns_table(a1),a3 lea sv_inst_infos_table(a1),a4 IFNE PRETRACKER_SUPPORT_V1_5 - cmp.w #MAX_INSTRUMENTS,d7 + cmp.w #MAX_INSTRUMENTS,d4 ble.s .notruncto32 - moveq.l #MAX_INSTRUMENTS,d7 + moveq.l #MAX_INSTRUMENTS,d4 .notruncto32 ENDC - subq.w #1,d7 + subq.w #1,d4 .instinfoloop move.l d0,(a3)+ @@ -463,7 +475,7 @@ pre_SongInit: add.l d1,d0 add.l d1,d0 ; calc next start address lea uii_SIZEOF(a4),a4 - dbra d7,.instinfoloop + dbra d4,.instinfoloop .noinstsskip lea (a2,d6.l),a0 ; offset (from $0010) into wave names @@ -619,28 +631,13 @@ pre_PlayerInit: dbra d6,.perfineipolloop dbra d7,.periodtableloop -; ---------------------------------------- - - moveq.l #0,d0 - move.b sv_num_steps_b(a6),d0 - move.w d0,d1 - add.w d0,d0 - add.w d1,d0 ; *3 bytes per pattern line - - move.l sv_patterns_ptr(a6),a3 - lea sv_pattern_table(a6),a0 - move.w #255-1,d7 ; FIXME we should use the number of patterns instead? -.pattableloop - move.l a3,(a0)+ - add.w d0,a3 - dbra d7,.pattableloop - ; ---------------------------------------- move.l #$00ffff06,pv_pat_curr_row_b(a4) ; pattern frame = 0, line = $ff, pattern pos = $ff, speed_even = 0 move.l #$06060100,pv_pat_speed_odd_b(a4) ; and pv_pat_line_ticks_b, pv_pat_stopped_b, pv_songend_detected_b + addq.w #2,pv_stop_len_lof(a4) + move.w #$007b,pv_stop_per_vol_trg(a4) - move.l sv_waveinfo_ptr(a6),a1 lea pv_channeldata(a4),a0 moveq.l #NUM_CHANNELS-1,d7 moveq.l #0,d0 @@ -648,13 +645,16 @@ pre_PlayerInit: move.b #MAX_VOLUME,pcd_pat_vol_b(a0) st pcd_track_delay_offset_b(a0) IFEQ PRETRACKER_BUGFIX_CODE - move.l a1,pcd_waveinfo_ptr(a0) ; we should actually have no wave selected + move.l sv_waveinfo_ptr(a6),pcd_waveinfo_ptr(a0) ; we should actually have no wave selected ENDC - move.w #3,pcd_adsr_phase_w(a0) + addq.w #3,pcd_adsr_phase_w(a0) + + lea pv_sample_buffer_ptr(a4),a1 + lea pcd_out_base(a0),a2 + move.l (a1)+,(a2)+ ; pv_sample_buffer_ptr -> pcd_out_ptr_l + move.l (a1)+,(a2)+ ; pv_stop_len_lof -> pcd_out_len_w / pcd_out_lof_w + move.l (a1)+,(a2)+ ; pv_stop_per_vol_trg -> pcd_out_per_w / pcd_out_vol_b / pcd_out_trg_b - move.l pv_sample_buffer_ptr(a4),pcd_out_ptr_l(a0) - move.w #2,pcd_out_len_w(a0) - move.w #$7B,pcd_out_per_w(a0) move.b d0,pcd_channel_num_b(a0) bset d0,pcd_channel_mask_b(a0) addq.b #1,d0 @@ -2248,9 +2248,10 @@ pre_PlayerTick: bmi.s .track_delay_trigger_first lea pcd_out_base(a5),a3 - move.l pv_sample_buffer_ptr(a4),(a3)+ ; ocd_sam_ptr - move.l #2<<16,(a3)+ ; ocd_length/ocd_loop_offset - move.l #$7b<<16,(a3)+ ; ocd_period/ocd_volume/ocd_trigger + lea pv_sample_buffer_ptr(a4),a1 + move.l (a1)+,(a3)+ ; ocd_sam_ptr + move.l (a1)+,(a3)+ ; ocd_length/ocd_loop_offset + move.l (a1)+,(a3)+ ; ocd_period/ocd_volume/ocd_trigger bra.s .check_next_channel diff --git a/src/raspberry_casket.i b/src/raspberry_casket.i index 7c6563b..727ba9d 100644 --- a/src/raspberry_casket.i +++ b/src/raspberry_casket.i @@ -170,13 +170,12 @@ sv_wavetotal_table rs.l MAX_WAVES ; 24 longwords to sample lengths for a sv_wavegen_order_table rs.b MAX_WAVES ; 24 bytes sv_num_waves_b rs.b 1 sv_num_steps_b rs.b 1 -sv_patterns_ptr rs.l 1 sv_pat_pos_len_w rs.w 1 ; only byte used sv_pat_restart_pos_w rs.w 1 ; only byte used sv_pos_data_adr rs.l 1 sv_waveinfo_ptr rs.l 1 ; base pointer of wave info -sv_pattern_table rs.l 256 sv_inst_infos_table rs.b MAX_INSTRUMENTS*uii_SIZEOF +sv_pattern_table rs.l 256 sv_SIZEOF rs.b 0 ; ---------------------------------------- @@ -308,10 +307,12 @@ pv_trigger_mask_w rs.w 1 pv_my_song rs.l 1 pv_copperlist_ptr rs.l 1 -pv_sample_buffer_ptr rs.l 1 ; pointer to start of sample buffer pv_wave_sample_table rs.l MAX_WAVES ; 24 pointers to sample starts pv_period_table rs.w 16*NOTES_IN_OCTAVE*3 ; --- 127 byte displacement limit --- +pv_sample_buffer_ptr rs.l 1 ; pointer to start of sample buffer +pv_stop_len_lof rs.l 1 ; $0002 / $0000 +pv_stop_per_vol_trg rs.l 1 ; $007b / $00 / $00 pv_channeldata rs.b NUM_CHANNELS*pcd_SIZEOF IFNE PRETRACKER_VOLUME_TABLE