Wave order table filling moved and optimized in SongInit.

This commit is contained in:
Chris Hodges 2023-08-16 20:35:25 +02:00
parent e0f7537774
commit 36d435f502
3 changed files with 23 additions and 26 deletions

View File

@ -89,14 +89,14 @@ The source needs two common include files to compile (`custom.i` and
### Size
The original C compiled code was... just bad. The new binary is
about 1/3 of the original one.
less than 1/3 of the original one.
The code has been also optimized in a way that it compresses better.
The original code compressed with *Blueberry's* Shrinkler goes from
18052 bytes down to 9023 bytes.
Raspberry Casket, depending on the features compiled in, is about
5850 bytes and shrinkles down to ~4154 bytes (in isolation).
5840 bytes and shrinkles down to ~4144 bytes (in isolation).
So this means that the optimization is not just "on the outside".
@ -159,8 +159,9 @@ solve this problem.
- This removes a big source of cpu jitter when track delay is enabled (no longer clearing the track delay buffer).
- This also fixes usages of illegal period 0 in the lead-in that could cause the replay to miss the first trigger.
- Moved pattern table init from PlayerInit to SongInit, optimized SongInit a bit.
- Wave order table filling moved and optimized in SongInit.
- Added Presto player draft.
- Drop-in replacement code size: 5850 bytes.
- Drop-in replacement code size: 5840 bytes.
### V1.x (unreleased)
- Fixed a bug regarding the copper output mode with looping waves having a loop-offset.

Binary file not shown.

View File

@ -80,14 +80,14 @@
; Size
; ~~~~
; The original C compiled code was... just bad. The new binary is
; about 1/3rd of the original one.
; less than 1/3rd of the original one.
;
; The code has been also optimized in a way that it compresses better.
; The original code compressed with Blueberry's Shrinkler goes from
; 18052 bytes down to 9023 bytes.
;
; Raspberry Casket, depending on the features compiled in, is about
; 5850 bytes and shrinkles down to ~4154 bytes (in isolation).
; 5840 bytes and shrinkles down to ~4144 bytes (in isolation).
;
; So this means that the optimization is not just "on the outside".
;
@ -394,11 +394,27 @@ pre_SongInit:
move.b d0,sv_num_steps_b(a1)
mulu #3,d0 ; *3 bytes per pattern line
lea sv_wavegen_order_table+MAX_WAVES(a1),a3
moveq.l #MAX_WAVES-1,d3 ; fill 24 bytes with default order of waves?
.fillcount
move.b d3,-(a3)
dbra d3,.fillcount
cmp.b #$19,d2 ; check if version is higher than 19
bls.s .hasnowaveordering
moveq.l #MAX_WAVES-1,d3
.waveorderloop
move.b (a0)+,(a3)+ ; $0042 wave generation ordering
dbra d3,.waveorderloop
.hasnowaveordering
lea sv_pattern_table(a1),a0
.pattableloop
move.l a4,(a0)+
add.w d0,a4 ; *3 bytes per pattern line
add.w d0,a4
add.w d0,a4
subq.b #1,d1
bne.s .pattableloop
@ -494,26 +510,6 @@ pre_SongInit:
.addressiseven
move.l a0,sv_waveinfo_ptr(a1)
lea sv_wavegen_order_table(a1),a0
cmpi.b #$19,d2 ; check if version is higher than 19
bhi.s .haswaveorderinfo
moveq.l #0,d0 ; fill 24 bytes with default order of waves?
moveq.l #MAX_WAVES-1,d7
.fillcount
move.b d0,(a0)+
addq.b #1,d0
dbra d7,.fillcount
bra.s .contafterworkaround
.haswaveorderinfo
moveq.l #(MAX_WAVES/4)-1,d7
lea $0042(a2),a2 ; offset into wave ordering
.memcpyloop
move.l (a2)+,(a0)+
dbra d7,.memcpyloop
.contafterworkaround
moveq.l #2,d0 ; at least empty sample
moveq.l #0,d7
move.b sv_num_waves_b(a1),d7 ; has instruments?