diff --git a/KAEZstd/open_source/zstd_decompress.patch b/KAEZstd/open_source/zstd_decompress.patch index 2490baa2d292f690f419eb4e3b2863dfe476ee5b..af6cfaaf311fa895aec29e270553f6aeb725fdfb 100644 --- a/KAEZstd/open_source/zstd_decompress.patch +++ b/KAEZstd/open_source/zstd_decompress.patch @@ -1,15 +1,193 @@ -diff -Nur zstd-1.5.4/lib/decompress/huf_decompress.c zstd/lib/decompress/huf_decompress.c ---- zstd-1.5.4/lib/decompress/huf_decompress.c 2023-02-10 08:41:50.000000000 +0800 -+++ zstd/lib/decompress/huf_decompress.c 2025-06-23 16:52:55.825521370 +0800 -@@ -11,6 +11,7 @@ - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - ****************************************************************** */ -+#define OPTIMIZE_LIT_HUF_DECODE 1 +diff -Naur zstd-1.5.4/lib/compress/clevels.h zstd/lib/compress/clevels.h +--- zstd-1.5.4/lib/compress/clevels.h 2023-02-10 08:41:50.000000000 +0800 ++++ zstd/lib/compress/clevels.h 2025-08-18 21:50:50.105008530 +0800 +@@ -30,7 +30,8 @@ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ +- { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ ++ // { 21, 22, 22, 5, 5, 2, ZSTD_greedy }, /* level 5 */ ++ { 21, 22, 22, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ +@@ -82,7 +83,8 @@ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ +- { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ ++ // { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ ++ { 17, 18, 18, 5, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ +diff -Naur zstd-1.5.4/lib/compress/zstd_lazy.c zstd/lib/compress/zstd_lazy.c +--- zstd-1.5.4/lib/compress/zstd_lazy.c 2023-02-10 08:41:50.000000000 +0800 ++++ zstd/lib/compress/zstd_lazy.c 2025-08-18 21:50:50.105008530 +0800 +@@ -1212,6 +1212,7 @@ + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); + /* Cycle through the matches and prefetch */ ++ nbAttempts = 16; + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; + U32 const matchIndex = row[matchPos]; +diff -Naur zstd-1.5.4/lib/decompress/huf_decompress.c zstd/lib/decompress/huf_decompress.c +--- zstd-1.5.4/lib/decompress/huf_decompress.c 2023-02-10 08:41:50.000000000 +0800 ++++ zstd/lib/decompress/huf_decompress.c 2025-08-18 21:50:50.105008530 +0800 +@@ -15,6 +15,9 @@ /* ************************************************************** * Dependencies -@@ -843,9 +844,110 @@ + ****************************************************************/ ++#define OPTIMIZE_LIT_HUF_DECODE 1 ++#define OPTIMIZE_HUF_TABLE_COPY 1 ++ + #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ + #include "../common/compiler.h" + #include "../common/bitstream.h" /* BIT_* */ +@@ -354,6 +357,132 @@ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + } HUF_ReadDTableX1_Workspace; + ++#if OPTIMIZE_HUF_TABLE_COPY ++ ++static U16 HUF_DEltX1_set1 (BYTE symbol, BYTE nbBits) { ++ U16 D = ((U16)(symbol << 8) + nbBits); ++ return D; ++} ++ ++size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) ++{ ++ U32 tableLog = 0; ++ U32 nbSymbols = 0; ++ size_t iSize; ++ void* const dtPtr = DTable + 1; ++ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; ++ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; ++ ++ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); ++ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); ++ ++ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); ++ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ ++ ++ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); ++ if (HUF_isError(iSize)) return iSize; ++ ++ ++ /* Table header */ ++ { DTableDesc dtd = HUF_getDTableDesc(DTable); ++ U32 const maxTableLog = dtd.maxTableLog + 1; ++ U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); ++ tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); ++ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ ++ dtd.tableType = 0; ++ dtd.tableLog = (BYTE)tableLog; ++ ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); ++ } ++ ++ { ++ int n; ++ int nextRankStart = 0; ++ int const unroll = 4; ++ int const nLimit = (int)nbSymbols - unroll + 1; ++ for (n=0; n<(int)tableLog+1; n++) { ++ U32 const curr = nextRankStart; ++ nextRankStart += wksp->rankVal[n]; ++ wksp->rankStart[n] = curr; ++ } ++ for (n=0; n < nLimit; n += unroll) { ++ int u; ++ for (u=0; u < unroll; ++u) { ++ size_t const w = wksp->huffWeight[n+u]; ++ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); ++ } ++ } ++ for (; n < (int)nbSymbols; ++n) { ++ size_t const w = wksp->huffWeight[n]; ++ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; ++ } ++ } ++ ++ { ++ U32 w; ++ int symbol=wksp->rankVal[0]; ++ int rankStart=0; ++ for (w=1; wrankVal[w]; ++ int const length = (1 << w) >> 1; ++ int uStart = rankStart; ++ BYTE const nbBits = (BYTE)(tableLog + 1 - w); ++ int s; ++ switch (length) { ++ case 1: ++ for (s=0; ssymbols[symbol + s]; ++ D.nbBits = nbBits; ++ dt[uStart] = D; ++ uStart += 1; ++ } ++ break; ++ case 2: ++ for (s=0; ssymbols[symbol + s]; ++ D.nbBits = nbBits; ++ dt[uStart+0] = D; ++ dt[uStart+1] = D; ++ uStart += 2; ++ } ++ break; ++ case 4: ++ for (s=0; ssymbols[symbol + s] , nbBits); ++ U16 DH = HUF_DEltX1_set1(wksp->symbols[symbol + s + 1], nbBits); ++ vst1q_u16((U16*)(dt+uStart), vcombine_u16(vdup_n_u16(DL), vdup_n_u16(DH))); ++ uStart += 8; ++ } ++ break; ++ case 8: ++ for (s=0; ssymbols[symbol + s], nbBits); ++ vst1q_u16((U16*)(dt+uStart), vdupq_n_u16(D1)); ++ uStart += 8; ++ } ++ break; ++ default: ++ for (s=0; ssymbols[symbol + s], nbBits); ++ uint16x8_t vecD8 = vdupq_n_u16(D1); ++ for (int u=0; u> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ +- return DTime1 < DTime0; ++ return DTime1 * 1.12 < DTime0; + } + #endif + } +diff -Naur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/decompress/zstd_decompress_block_aarch64.S --- zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S 1970-01-01 08:00:00.000000000 +0800 -+++ zstd/lib/decompress/zstd_decompress_block_aarch64.S 2025-06-23 16:47:39.873521370 +0800 -@@ -0,0 +1,8736 @@ ++++ zstd/lib/decompress/zstd_decompress_block_aarch64.S 2025-08-18 21:50:50.105008530 +0800 +@@ -0,0 +1,7827 @@ + .arch armv8-a + .file "zstd_decompress_block.c" + .text @@ -131,17 +318,17 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .p2align 4,,11 + .type ZSTD_safecopy, %function +ZSTD_safecopy: -+.LFB4518: ++.LFB4465: + .cfi_startproc + add x5, x0, x3 + cmp x3, 7 -+ ble .L34 ++ ble .L32 + cmp w4, 1 -+ beq .L35 ++ beq .L33 + cmp x5, x1 + bls .L19 + cmp x1, x0 -+ bcs .L36 ++ bcs .L34 +.L12: + cmp x0, x5 + bcs .L1 @@ -154,29 +341,29 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec +.L1: + ret + .p2align 2,,3 -+.L35: ++.L33: + sub x4, x0, x2 + cmp x4, 7 -+ bls .L37 ++ bls .L35 + ldr d0, [x2] + str d0, [x0] +.L7: -+ add x6, x2, 8 -+ add x4, x0, 8 ++ add x2, x2, 8 ++ add x0, x0, 8 + cmp x5, x1 + bhi .L8 -+ sub x2, x0, x2 -+ cmp x2, 15 ++ sub x1, x0, x2 ++ cmp x1, 15 + bgt .L9 + .p2align 3,,7 +.L10: -+ ldr d0, [x6], 8 -+ str d0, [x4], 8 -+ cmp x5, x4 ++ ldr d0, [x2], 8 ++ str d0, [x0], 8 ++ cmp x5, x0 + bhi .L10 + ret + .p2align 2,,3 -+.L34: ++.L32: + mov x1, 0 + cmp x5, x0 + bls .L1 @@ -189,7 +376,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bne .L3 + ret + .p2align 2,,3 -+.L37: ++.L35: + ldrb w7, [x2] + adrp x6, .LANCHOR0 + strb w7, [x0] @@ -210,22 +397,22 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + b .L7 + .p2align 2,,3 +.L8: -+ cmp x1, x4 -+ bcc .L23 -+ sub x0, x0, x2 -+ sub x2, x1, x4 ++ cmp x1, x0 ++ bcc .L12 ++ sub x4, x1, x0 ++ sub x3, x0, x2 ++ mov x6, x4 ++ cmp x3, 15 ++ bgt .L13 + mov x3, x2 -+ cmp x0, 15 -+ bgt .L24 -+ mov x0, x6 + .p2align 3,,7 +.L14: -+ ldr d0, [x0], 8 -+ str d0, [x4], 8 -+ cmp x1, x4 ++ ldr d0, [x3], 8 ++ str d0, [x0], 8 ++ cmp x1, x0 + bhi .L14 +.L16: -+ add x2, x6, x2 ++ add x2, x2, x4 + mov x0, x1 + ldrb w1, [x2], 1 + strb w1, [x0], 1 @@ -234,8 +421,6 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + b .L1 +.L9: + sub x3, x3, #8 -+ mov x2, x6 -+ mov x0, x4 + .p2align 3,,7 +.L19: + ldr q0, [x2] @@ -256,17 +441,16 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bhi .L11 + ret + .p2align 2,,3 -+.L36: -+ sub x3, x1, x0 -+ mov x6, x2 -+ mov x2, x3 ++.L34: ++ sub x6, x1, x0 ++ mov x4, x6 +.L13: -+ ldr q0, [x6] ++ ldr q0, [x2] + str q0, [x0] -+ cmp x3, 16 ++ cmp x6, 16 + ble .L16 + add x0, x0, 16 -+ add x3, x6, 16 ++ add x3, x2, 16 + .p2align 3,,7 +.L17: + ldr q0, [x3] @@ -278,21 +462,14 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + cmp x0, x1 + bcc .L17 + b .L16 -+.L24: -+ mov x0, x4 -+ b .L13 -+.L23: -+ mov x2, x6 -+ mov x0, x4 -+ b .L12 + .cfi_endproc -+.LFE4518: ++.LFE4465: + .size ZSTD_safecopy, .-ZSTD_safecopy + .align 2 + .p2align 4,,11 + .type ZSTD_execSequenceEnd, %function +ZSTD_execSequenceEnd: -+.LFB4520: ++.LFB4467: + .cfi_startproc + stp x29, x30, [sp, -64]! + .cfi_def_cfa_offset 64 @@ -303,51 +480,51 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + stp x21, x22, [sp, 32] + .cfi_offset 21, -32 + .cfi_offset 22, -24 -+ ldp x10, x21, [x2] -+ add x22, x10, x21 ++ ldp x13, x21, [x2] ++ add x22, x13, x21 + cmp x8, x22 -+ bcc .L44 -+ ldr x12, [x3] -+ mov x9, x3 -+ ldr x13, [x2, 16] -+ sub x4, x4, x12 -+ cmp x4, x10 + bcc .L42 ++ ldr x11, [x3] ++ mov x9, x3 ++ sub x4, x4, x11 ++ cmp x4, x13 ++ bcc .L40 + stp x19, x20, [sp, 16] + .cfi_offset 20, -40 + .cfi_offset 19, -48 + sub x20, x1, #32 -+ add x19, x0, x10 -+ mov x15, x5 -+ mov x2, x12 -+ mov x11, x6 -+ mov x14, x7 ++ add x19, x0, x13 ++ ldr x15, [x2, 16] ++ mov x14, x5 ++ mov x2, x11 ++ mov x10, x6 ++ mov x12, x7 + mov x1, x20 -+ mov x3, x10 ++ mov x3, x13 + mov w4, 0 + str x23, [sp, 48] + .cfi_offset 23, -16 + bl ZSTD_safecopy -+ add x12, x12, x10 -+ sub x0, x19, x15 -+ str x12, [x9] -+ sub x23, x19, x13 -+ cmp x0, x13 -+ bcs .L41 -+ sub x6, x19, x11 -+ cmp x6, x13 -+ bcc .L46 -+ sub x2, x15, x23 -+ sub x1, x14, x2 ++ add x11, x11, x13 ++ sub x0, x19, x14 ++ str x11, [x9] ++ sub x23, x19, x15 ++ cmp x0, x15 ++ bcs .L39 ++ sub x6, x19, x10 ++ cmp x6, x15 ++ bcc .L44 ++ sub x2, x14, x23 ++ sub x1, x12, x2 + add x0, x1, x21 -+ cmp x14, x0 -+ bcs .L47 ++ cmp x12, x0 ++ bcs .L45 + mov x0, x19 + sub x21, x21, x2 -+ mov x23, x15 ++ mov x23, x14 + add x19, x19, x2 + bl memmove -+.L41: ++.L39: + mov x2, x23 + mov x1, x20 + mov x0, x19 @@ -359,7 +536,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_restore 19 + ldr x23, [sp, 48] + .cfi_restore 23 -+.L38: ++.L36: + mov x0, x22 + ldp x21, x22, [sp, 32] + ldp x29, x30, [sp], 64 @@ -370,7 +547,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L46: ++.L44: + .cfi_def_cfa_offset 64 + .cfi_offset 19, -48 + .cfi_offset 20, -40 @@ -384,7 +561,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_restore 19 + ldr x23, [sp, 48] + .cfi_restore 23 -+.L42: ++.L40: + mov x22, -20 + mov x0, x22 + ldp x21, x22, [sp, 32] @@ -397,7 +574,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L44: ++.L42: + .cfi_restore_state + mov x22, -70 + mov x0, x22 @@ -410,7 +587,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L47: ++.L45: + .cfi_def_cfa_offset 64 + .cfi_offset 19, -48 + .cfi_offset 20, -40 @@ -427,63 +604,63 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_restore 19 + ldr x23, [sp, 48] + .cfi_restore 23 -+ b .L38 ++ b .L36 + .cfi_endproc -+.LFE4520: ++.LFE4467: + .size ZSTD_execSequenceEnd, .-ZSTD_execSequenceEnd + .align 2 + .p2align 4,,11 + .type ZSTD_safecopyDstBeforeSrc, %function +ZSTD_safecopyDstBeforeSrc: -+.LFB4519: ++.LFB4466: + .cfi_startproc + cmp x2, 7 + sub x3, x0, x1 + ccmn x3, #7, 0, gt + add x4, x0, x2 -+ bge .L62 ++ bge .L60 + sub x5, x4, #32 + cmp x5, x0 + ccmn x3, #16, 0, cs -+ blt .L63 -+.L52: ++ blt .L61 ++.L50: + cmp x4, x0 -+ bls .L48 ++ bls .L46 + sub x4, x4, x0 + mov x2, 0 + .p2align 3,,7 -+.L55: ++.L53: + ldrb w3, [x1, x2] + strb w3, [x0, x2] + add x2, x2, 1 + cmp x4, x2 -+ bne .L55 -+.L48: ++ bne .L53 ++.L46: + ret + .p2align 2,,3 -+.L62: ++.L60: + cmp x0, x4 -+ bcs .L48 ++ bcs .L46 + mov x3, 0 + .p2align 3,,7 -+.L51: ++.L49: + ldrb w4, [x1, x3] + strb w4, [x0, x3] + add x3, x3, 1 + cmp x2, x3 -+ bne .L51 ++ bne .L49 + ret + .p2align 2,,3 -+.L63: ++.L61: + ldr q0, [x1] + sub x3, x5, x0 + str q0, [x0] + cmp x3, 16 -+ ble .L53 ++ ble .L51 + add x0, x0, 16 + add x2, x1, 16 + .p2align 3,,7 -+.L54: ++.L52: + ldr q0, [x2] + add x0, x0, 32 + add x2, x2, 32 @@ -491,19 +668,19 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x2, -16] + str q0, [x0, -16] + cmp x5, x0 -+ bhi .L54 -+.L53: ++ bhi .L52 ++.L51: + add x1, x1, x3 + mov x0, x5 -+ b .L52 ++ b .L50 + .cfi_endproc -+.LFE4519: ++.LFE4466: + .size ZSTD_safecopyDstBeforeSrc, .-ZSTD_safecopyDstBeforeSrc + .align 2 + .p2align 4,,11 + .type ZSTD_execSequenceEndSplitLitBuffer, %function +ZSTD_execSequenceEndSplitLitBuffer: -+.LFB4521: ++.LFB4468: + .cfi_startproc + stp x29, x30, [sp, -64]! + .cfi_def_cfa_offset 64 @@ -520,44 +697,44 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 20, -40 + add x20, x10, x22 + cmp x1, x20 -+ bcc .L71 ++ bcc .L69 + ldr x1, [x4] + mov x11, x4 -+ ldr x9, [x3, 16] + sub x5, x5, x1 + cmp x5, x10 -+ bcc .L68 ++ bcc .L66 + add x12, x1, x10 + cmp x0, x12 + ccmp x1, x0, 2, cc -+ bcc .L71 ++ bcc .L69 ++ ldr x9, [x3, 16] + add x19, x0, x10 + mov x21, x2 + mov x2, x10 + str x23, [sp, 48] + .cfi_offset 23, -16 + bl ZSTD_safecopyDstBeforeSrc -+ str x12, [x11] + sub x0, x19, x6 ++ str x12, [x11] + sub x23, x19, x9 + cmp x0, x9 -+ bcs .L67 ++ bcs .L65 + sub x7, x19, x7 + cmp x7, x9 -+ bcc .L73 ++ bcc .L71 + ldr x0, [sp, 64] + sub x2, x6, x23 + ldr x3, [sp, 64] + sub x1, x0, x2 + add x0, x1, x22 + cmp x3, x0 -+ bcs .L74 ++ bcs .L72 + mov x0, x19 + sub x22, x22, x2 + mov x23, x6 + add x19, x19, x2 + bl memmove -+.L67: ++.L65: + mov x2, x23 + mov x3, x22 + mov x1, x21 @@ -566,7 +743,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bl ZSTD_safecopy + ldr x23, [sp, 48] + .cfi_restore 23 -+.L64: ++.L62: + mov x0, x20 + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] @@ -581,7 +758,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L71: ++.L69: + .cfi_restore_state + mov x20, -70 + mov x0, x20 @@ -597,7 +774,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L73: ++.L71: + .cfi_def_cfa_offset 64 + .cfi_offset 19, -48 + .cfi_offset 20, -40 @@ -608,7 +785,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 30, -56 + ldr x23, [sp, 48] + .cfi_restore 23 -+.L68: ++.L66: + mov x20, -20 + mov x0, x20 + ldp x19, x20, [sp, 16] @@ -623,7 +800,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L74: ++.L72: + .cfi_def_cfa_offset 64 + .cfi_offset 19, -48 + .cfi_offset 20, -40 @@ -637,15 +814,15 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bl memmove + ldr x23, [sp, 48] + .cfi_restore 23 -+ b .L64 ++ b .L62 + .cfi_endproc -+.LFE4521: ++.LFE4468: + .size ZSTD_execSequenceEndSplitLitBuffer, .-ZSTD_execSequenceEndSplitLitBuffer + .align 2 + .p2align 4,,11 + .type ZSTD_buildFSETable_body_default.constprop.0, %function +ZSTD_buildFSETable_body_default.constprop.0: -+.LFB4547: ++.LFB4502: + .cfi_startproc + mov w7, 1 + stp x29, x30, [sp, -32]! @@ -653,112 +830,111 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 29, -32 + .cfi_offset 30, -24 + lsl w14, w7, w5 -+ mov x30, 0 ++ mov x18, 0 + sub w9, w5, #1 + lsr w8, w14, 3 -+ bfi x30, x5, 32, 32 ++ bfi x18, x5, 32, 32 + add w8, w8, w14, lsr 1 + mov x29, sp -+ mov x15, 1 ++ mov x13, 1 + lsl w7, w7, w9 -+ bfi x30, x15, 0, 32 ++ bfi x18, x13, 0, 32 + add w9, w8, 3 + add x11, x0, 8 + sub w8, w14, #1 -+ add x17, x6, 106 ++ add x15, x6, 106 + cmn w2, #1 -+ beq .L76 -+ sxth w18, w7 ++ beq .L74 ++ sxth w17, w7 + uxtw x16, w2 + mov w10, w8 + mov x7, 0 ++ mov w30, 0 + str x19, [sp, 16] + .cfi_offset 19, -16 -+ mov w19, 0 -+ b .L80 ++ mov w19, w13 ++ b .L78 + .p2align 2,,3 -+.L77: -+ cmp w18, w12 -+ strh w13, [x6, x7, lsl 1] -+ csel w15, w15, wzr, gt ++.L75: ++ cmp w17, w12 ++ strh w12, [x6, x7, lsl 1] ++ csel w13, w13, wzr, gt + add x12, x7, 1 -+ mov w19, 1 ++ mov w30, 1 + cmp x16, x7 -+ beq .L116 -+.L99: ++ beq .L114 ++.L97: + mov x7, x12 -+.L80: ++.L78: + ldrsh w12, [x1, x7, lsl 1] -+ and w13, w12, 65535 + cmn w12, #1 -+ bne .L77 -+ add x12, x11, w10, uxtw 3 -+ mov w13, 1 ++ bne .L75 ++ add x12, x11, x10, uxtw 3 + sub w10, w10, #1 + str w7, [x12, 4] + add x12, x7, 1 -+ strh w13, [x6, x7, lsl 1] ++ strh w19, [x6, x7, lsl 1] + cmp x16, x7 -+ bne .L99 -+.L116: -+ lsr w7, w30, 0 -+ cmp w19, 0 -+ csel x15, x15, x7, ne -+ bfi x30, x15, 0, 32 -+ str x30, [x0] ++ bne .L97 ++.L114: ++ lsr w7, w18, 0 ++ cmp w30, 0 ++ csel x13, x13, x7, ne ++ bfi x18, x13, 0, 32 ++ str x18, [x0] + cmp w8, w10 -+ beq .L98 ++ beq .L96 + mov x15, 0 + mov w0, 0 + .p2align 3,,7 -+.L94: ++.L92: + ldrsh w12, [x1, x15, lsl 1] + mov w13, w15 + cmp w12, 0 -+ ble .L90 ++ ble .L88 + mov w2, 0 + .p2align 3,,7 -+.L93: -+ add x7, x11, w0, uxtw 3 ++.L91: ++ add x7, x11, x0, uxtw 3 + add w0, w0, w9 + and w0, w8, w0 + str w13, [x7, 4] + cmp w0, w10 -+ bhi .L92 -+.L91: ++ bhi .L90 ++.L89: + add w2, w2, 1 + cmp w12, w2 -+ bne .L93 -+.L90: ++ bne .L91 ++.L88: + add x2, x15, 1 + cmp x16, x15 -+ beq .L117 ++ beq .L115 + mov x15, x2 -+ b .L94 ++ b .L92 + .p2align 2,,3 -+.L92: ++.L90: + add w0, w0, w9 + and w0, w8, w0 + cmp w0, w10 -+ bls .L91 ++ bls .L89 + add w0, w0, w9 + and w0, w8, w0 + cmp w0, w10 -+ bls .L91 -+ b .L92 ++ bls .L89 ++ b .L90 + .p2align 2,,3 -+.L117: ++.L115: + ldr x19, [sp, 16] + .cfi_restore 19 -+ cbz w14, .L75 -+.L87: ++ cbz w14, .L73 ++.L85: + and w5, w5, 255 + and w12, w14, 65535 + mov x2, x11 + mov w8, 0 + mov w11, 31 + .p2align 3,,7 -+.L96: ++.L94: + ldr w7, [x2, 4] + add w8, w8, 1 + add x2, x2, 8 @@ -779,57 +955,57 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr w0, [x3, w7, uxtw 2] + str w0, [x2, -4] + cmp w14, w8 -+ bhi .L96 -+.L75: ++ bhi .L94 ++.L73: + ldp x29, x30, [sp], 32 + .cfi_restore 30 + .cfi_restore 29 + .cfi_def_cfa_offset 0 + ret -+.L98: ++.L96: + .cfi_def_cfa_offset 32 + .cfi_offset 19, -16 + .cfi_offset 29, -32 + .cfi_offset 30, -24 + add x0, x1, 2 + mov x10, 0 -+ add x2, x0, w2, uxtw 1 ++ add x2, x0, x2, uxtw 1 + mov x13, 0 -+ mov x15, 72340172838076673 ++ mov x16, 72340172838076673 + .p2align 3,,7 -+.L83: ++.L81: + ldrsh w12, [x1] + add x7, x13, 106 -+ str x10, [x17, x13] ++ str x10, [x15, x13] + add x7, x6, x7 + mov x0, 8 + cmp w12, 8 -+ ble .L85 ++ ble .L83 + .p2align 3,,7 -+.L82: ++.L80: + str x10, [x7, x0] + add x0, x0, 8 + cmp w12, w0 -+ bgt .L82 -+.L85: ++ bgt .L80 ++.L83: + add x1, x1, 2 -+ add x13, x13, w12, sxth -+ add x10, x10, x15 ++ add x13, x13, x12, sxth ++ add x10, x10, x16 + cmp x2, x1 -+ bne .L83 ++ bne .L81 + ldr x19, [sp, 16] + .cfi_restore 19 -+.L84: ++.L82: + uxtw x0, w14 -+ cbz w14, .L75 ++ cbz w14, .L73 + sub x12, x0, #1 -+ mov x1, x17 ++ mov x1, x15 + add x0, x6, 108 + and x12, x12, -2 + add x12, x12, x0 + mov x0, 0 + .p2align 3,,7 -+.L89: ++.L87: + and x7, x8, x0 + add x0, x9, x0 + and x2, x8, x0 @@ -843,19 +1019,19 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldrb w7, [x1, -1] + str w7, [x2, 4] + cmp x12, x1 -+ bne .L89 -+ b .L87 -+.L76: -+ str x30, [x0] -+ b .L84 ++ bne .L87 ++ b .L85 ++.L74: ++ str x18, [x0] ++ b .L82 + .cfi_endproc -+.LFE4547: ++.LFE4502: + .size ZSTD_buildFSETable_body_default.constprop.0, .-ZSTD_buildFSETable_body_default.constprop.0 + .align 2 + .p2align 4,,11 + .type ZSTD_buildSeqTable.constprop.0, %function +ZSTD_buildSeqTable.constprop.0: -+.LFB4548: ++.LFB4503: + .cfi_startproc + stp x29, x30, [sp, -208]! + .cfi_def_cfa_offset 208 @@ -876,15 +1052,15 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov x23, x1 + str w3, [sp, 76] + cmp w2, 2 -+ beq .L119 ++ beq .L117 + cmp w2, 3 -+ beq .L120 ++ beq .L118 + cmp w2, 1 -+ beq .L136 ++ beq .L134 + ldr x0, [sp, 216] + mov x21, 0 + str x0, [x1] -+.L118: ++.L116: + mov x0, x21 + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] @@ -901,7 +1077,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L119: ++.L117: + .cfi_restore_state + mov x4, x6 + mov x3, x5 @@ -911,10 +1087,10 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bl FSE_readNCount + mov x21, x0 + cmn x0, #120 -+ bhi .L130 ++ bhi .L128 + ldr w5, [sp, 92] + cmp w19, w5 -+ bcc .L130 ++ bcc .L128 + ldr w2, [sp, 76] + mov x3, x20 + ldr x4, [sp, 208] @@ -923,15 +1099,15 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov x0, x22 + bl ZSTD_buildFSETable_body_default.constprop.0 + str x22, [x23] -+ b .L118 ++ b .L116 + .p2align 2,,3 -+.L136: ++.L134: + mov x21, -72 -+ cbz x6, .L118 ++ cbz x6, .L116 + ldrb w0, [x5] + mov x21, -20 + cmp w0, w3 -+ bhi .L118 ++ bhi .L116 + ldr x1, [sp, 208] + uxtw x2, w0 + and x0, x0, 255 @@ -960,17 +1136,17 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L120: ++.L118: + .cfi_restore_state + ldr w0, [sp, 224] + mov x21, -20 -+ cbz w0, .L118 ++ cbz w0, .L116 + ldr w0, [sp, 232] + mov x21, 0 + cmp w0, 0 + ldr w0, [sp, 240] + ccmp w0, 24, 4, ne -+ ble .L118 ++ ble .L116 + mov w1, 1 + mov x0, 0 + lsl w1, w1, w4 @@ -978,15 +1154,15 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr x2, [x23] + sbfiz x1, x1, 3, 32 + .p2align 3,,7 -+.L123: ++.L121: + prfm PLDL2KEEP, [x2, x0] + add x0, x0, 64 + cmp x1, x0 -+ bhi .L123 ++ bhi .L121 + mov x21, 0 -+ b .L118 ++ b .L116 + .p2align 2,,3 -+.L130: ++.L128: + mov x21, -20 + mov x0, x21 + ldp x19, x20, [sp, 16] @@ -1003,13 +1179,13 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .cfi_endproc -+.LFE4548: ++.LFE4503: + .size ZSTD_buildSeqTable.constprop.0, .-ZSTD_buildSeqTable.constprop.0 + .align 2 + .p2align 4,,11 + .type ZSTD_initFseState, %function +ZSTD_initFseState: -+.LFB4524: ++.LFB4471: + .cfi_startproc + ldr w6, [x2, 4] + adrp x5, .LANCHOR0 @@ -1025,34 +1201,34 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + and x4, x4, x5 + str x4, [x0] + cmp w3, 64 -+ bhi .L138 ++ bhi .L136 + ldr x4, [x1, 16] + ldr x5, [x1, 32] + cmp x4, x5 -+ bcs .L141 ++ bcs .L139 + ldr x6, [x1, 24] + cmp x4, x6 -+ beq .L138 ++ beq .L136 + lsr w5, w3, 3 + lsr w7, w3, 3 + sub x5, x4, x5 + cmp x6, x5 -+ bls .L140 ++ bls .L138 + sub x6, x4, x6 + mov w7, w6 -+ sub x5, x4, w6, uxtw -+.L140: ++ sub x5, x4, x6, uxtw ++.L138: + ldr x4, [x5] + sub w7, w3, w7, lsl 3 + str x4, [x1] + str w7, [x1, 8] + str x5, [x1, 16] -+.L138: ++.L136: + add x2, x2, 8 + str x2, [x0, 8] + ret + .p2align 2,,3 -+.L141: ++.L139: + lsr w5, w3, 3 + and w3, w3, 7 + sub x4, x4, x5 @@ -1064,256 +1240,254 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str x2, [x0, 8] + ret + .cfi_endproc -+.LFE4524: ++.LFE4471: + .size ZSTD_initFseState, .-ZSTD_initFseState + .align 2 + .p2align 4,,11 + .type ZSTD_decompressSequencesLong_default.constprop.0, %function +ZSTD_decompressSequencesLong_default.constprop.0: -+.LFB4550: ++.LFB4505: + .cfi_startproc -+ sub sp, sp, #800 -+ .cfi_def_cfa_offset 800 ++ sub sp, sp, #768 ++ .cfi_def_cfa_offset 768 + stp x29, x30, [sp, 16] -+ .cfi_offset 29, -784 -+ .cfi_offset 30, -776 ++ .cfi_offset 29, -752 ++ .cfi_offset 30, -744 + add x29, sp, 16 -+ stp x25, x26, [sp, 80] -+ .cfi_offset 25, -720 -+ .cfi_offset 26, -712 -+ add x25, x0, 16384 -+ stp x21, x22, [sp, 48] -+ .cfi_offset 21, -752 -+ .cfi_offset 22, -744 -+ mov x22, x0 ++ stp x27, x28, [sp, 96] ++ .cfi_offset 27, -672 ++ .cfi_offset 28, -664 ++ add x28, x0, 16384 ++ stp x23, x24, [sp, 64] ++ .cfi_offset 23, -704 ++ .cfi_offset 24, -696 ++ mov x24, x0 + add x0, x1, x2 + str x0, [sp, 128] -+ ldr w0, [x25, 13976] -+ stp x23, x24, [sp, 64] -+ .cfi_offset 23, -736 -+ .cfi_offset 24, -728 -+ mov x23, x3 -+ stp x27, x28, [sp, 96] -+ str w5, [sp, 116] ++ ldr w0, [x28, 13976] ++ stp x21, x22, [sp, 48] ++ stp x25, x26, [sp, 80] ++ str w5, [sp, 120] + str x1, [sp, 152] + cmp w0, 1 -+ .cfi_offset 27, -704 -+ .cfi_offset 28, -696 -+ bne .L144 -+ ldr x1, [x22, 30344] ++ .cfi_offset 21, -720 ++ .cfi_offset 22, -712 ++ .cfi_offset 25, -688 ++ .cfi_offset 26, -680 ++ bne .L142 ++ ldr x1, [x24, 30344] + str x1, [sp, 128] -+.L144: -+ ldr x2, [x22, 29904] -+ str x2, [sp, 160] -+ ldr x2, [x22, 30352] ++.L142: ++ ldr x2, [x24, 30352] + str x2, [sp, 136] -+ ldr w2, [sp, 116] -+ ldr x1, [x22, 30120] -+ str x1, [sp, 344] -+ ldr x24, [x22, 29896] -+ ldr x26, [x22, 29912] -+ cbz w2, .L314 -+ ldr w3, [x25, 10300] ++ ldr w2, [sp, 120] ++ ldr x1, [x24, 30120] ++ str x1, [sp, 312] ++ cbz w2, .L308 ++ ldr w1, [x28, 10308] + mov w0, 1 -+ ldr w2, [x25, 10304] -+ ldr w1, [x25, 10308] -+ str x3, [sp, 584] -+ str x2, [sp, 592] -+ str x1, [sp, 600] -+ str w0, [x25, 13620] -+ cbz x4, .L146 -+ add x0, x23, x4 -+ add x1, x23, 8 -+ str x1, [sp, 120] -+ str x23, [sp, 520] -+ str x1, [sp, 528] ++ str x1, [sp, 568] ++ ldr x1, [x24, 29904] ++ str x1, [sp, 160] ++ ldr w5, [x28, 10300] ++ ldr w2, [x28, 10304] ++ ldr x1, [x24, 29912] ++ str x1, [sp, 144] ++ str x5, [sp, 552] ++ str x2, [sp, 560] ++ ldr x27, [x24, 29896] ++ str w0, [x28, 13620] ++ cbz x4, .L144 ++ add x0, x3, x4 ++ add x1, x3, 8 ++ stp x3, x1, [sp, 488] + ldrb w0, [x0, -1] + cmp x4, 7 -+ bls .L147 ++ bls .L145 + sub x1, x4, #8 -+ add x2, x23, x1 -+ str x2, [sp, 512] -+ ldr x1, [x23, x1] -+ str x1, [sp, 496] -+ cbz w0, .L146 ++ add x2, x3, x1 ++ str x2, [sp, 480] ++ ldr x1, [x3, x1] ++ str x1, [sp, 464] ++ cbz w0, .L144 + clz w0, w0 + sub w0, w0, #23 -+ str w0, [sp, 504] ++ str w0, [sp, 472] + cmn x4, #120 -+ bhi .L146 -+ stp x19, x20, [sp, 32] -+ .cfi_offset 20, -760 -+ .cfi_offset 19, -768 -+.L148: -+ ldr w3, [sp, 116] -+ ldr x2, [x22] ++ bhi .L144 ++.L146: ++ ldr w3, [sp, 120] ++ add x1, sp, 464 ++ ldr x2, [x24] + cmp w3, 8 + mov w0, 8 + csel w0, w3, w0, le -+ add x1, sp, 496 -+ str w0, [sp, 144] -+ add x0, sp, 536 ++ str w0, [sp, 176] ++ add x0, x1, 40 + bl ZSTD_initFseState -+ ldr x2, [x22, 16] -+ add x0, sp, 552 ++ add x0, x1, 56 ++ ldr x2, [x24, 16] + bl ZSTD_initFseState -+ add x0, sp, 568 -+ ldr x2, [x22, 8] ++ add x0, x1, 72 ++ ldr x2, [x24, 8] + bl ZSTD_initFseState -+ ldr w20, [sp, 504] -+ cmp w20, 64 -+ bhi .L315 -+ add x10, sp, 536 -+ adrp x16, .LANCHOR0 -+ ldr x0, [sp, 152] -+ add x16, x16, :lo12:.LANCHOR0 -+ ldp x9, x1, [x10] -+ sub x19, x0, x24 -+ ldp x6, x8, [x10, 16] -+ add x3, sp, 608 -+ ldr x10, [sp, 576] -+ mov w2, w20 -+ ldr x0, [sp, 496] -+ add x16, x16, 64 -+ ldr x5, [sp, 568] -+ add x27, sp, 504 -+ stp x8, x22, [sp, 184] -+ mov x22, x10 -+ mov w30, 0 -+ stp x25, x3, [sp, 200] -+ mov x25, x1 -+ mov w4, 0 -+ str w20, [sp, 216] -+ mov x20, x8 -+ ldr x7, [sp, 512] ++ ldr w0, [sp, 472] ++ cmp w0, 64 ++ bhi .L421 ++ ldr x1, [sp, 152] ++ stp x19, x20, [sp, 32] ++ .cfi_offset 20, -728 ++ .cfi_offset 19, -736 ++ add x18, sp, 576 ++ sub x20, x1, x27 ++ adrp x15, .LANCHOR0 ++ ldp x6, x22, [sp, 480] ++ add x15, x15, :lo12:.LANCHOR0 ++ ldr x1, [sp, 544] ++ stp x28, x18, [sp, 184] ++ ldr w28, [sp, 176] ++ ldr x17, [sp, 496] ++ mov x8, x18 ++ ldr x30, [sp, 512] ++ add x15, x15, 64 ++ ldr x25, [sp, 528] ++ add x26, sp, 472 ++ str x24, [sp, 168] ++ mov x24, x1 + mov w21, 0 -+ stp x1, x10, [sp, 168] -+ str x7, [sp, 224] + .p2align 3,,7 ++.L171: ++ cmp x17, x6 ++ bls .L422 ++ cmp x22, x6 ++ beq .L175 ++ lsr w1, w0, 3 ++ lsr w2, w0, 3 ++ sub x1, x6, x1 ++ cmp x22, x1 ++ bls .L176 ++ sub x1, x6, x22 ++ mov w2, w1 ++ sub x1, x6, x1, uxtw ++.L176: ++ mov x6, x1 ++ sub w0, w0, w2, lsl 3 ++ ldr x3, [x1] ++ str x3, [sp, 464] ++ str w0, [sp, 472] ++ str x1, [sp, 480] ++.L175: ++ cmp w28, w21 ++ ble .L423 ++.L177: ++ ldr x3, [sp, 504] ++ ldr x2, [sp, 536] ++ ldr x1, [sp, 520] ++ ldr x5, [x30, x3, lsl 3] ++ ldr x3, [x24, x2, lsl 3] ++ ldr x4, [x25, x1, lsl 3] ++ ubfx w13, w5, 16, 8 ++ ubfx w14, w3, 16, 8 ++ lsr x19, x5, 32 ++ add w7, w13, w14 ++ lsr x11, x3, 32 ++ ubfx w10, w4, 16, 8 ++ mov x12, x19 ++ add w7, w10, w7 ++ lsr x18, x4, 32 ++ and w7, w7, 255 ++ lsr w9, w5, 24 ++ lsr w1, w3, 24 ++ lsr w16, w4, 24 ++ cmp w10, 1 ++ bls .L156 ++ ldr x2, [sp, 464] ++ neg w23, w10 ++ ldr x19, [sp, 552] ++ lsl x2, x2, x0 ++ add w0, w0, w10 ++ ldr x10, [sp, 560] ++ lsr x2, x2, x23 ++ add x2, x18, x2 ++ str w0, [sp, 472] ++ str x2, [sp, 552] ++ str x19, [sp, 560] ++ str x10, [sp, 568] +.L157: -+ ldr x1, [sp, 120] -+ cmp x1, x7 -+ bls .L429 -+ mov w1, w4 -+ cmp x23, x7 -+ beq .L172 -+ lsr w0, w2, 3 -+ lsr w1, w2, 3 -+ sub x0, x7, x0 -+ cmp x23, x0 -+ bls .L173 -+ sub x0, x7, x23 -+ mov w1, w0 -+ sub x0, x7, w0, uxtw -+.L173: -+ mov x7, x0 -+ sub w2, w2, w1, lsl 3 -+ ldr x0, [x0] -+ mov w1, 1 -+ mov w30, w1 -+.L172: -+ ldr w8, [sp, 144] -+ cmp w8, w21 -+ ble .L430 -+.L174: -+ ldr x8, [x25, x9, lsl 3] -+ ldr x5, [x22, x5, lsl 3] -+ ldr x6, [x20, x6, lsl 3] -+ ubfx w14, w8, 16, 8 -+ ubfx w15, w5, 16, 8 -+ lsr x28, x8, 32 -+ add w9, w14, w15 -+ lsr x12, x5, 32 -+ ubfx w11, w6, 16, 8 -+ mov x13, x28 -+ add w9, w11, w9 -+ lsr x18, x6, 32 -+ and w9, w9, 255 -+ lsr w10, w8, 24 -+ lsr w1, w5, 24 -+ lsr w17, w6, 24 -+ cmp w11, 1 -+ bls .L158 -+ neg w28, w11 -+ lsl x4, x0, x2 -+ add w2, w2, w11 -+ ldr x11, [sp, 592] -+ str x11, [sp, 600] -+ ldr x11, [sp, 584] -+ lsr x4, x4, x28 -+ add x4, x18, x4 -+.L159: -+ str x4, [sp, 584] -+ str x11, [sp, 592] -+ cbnz w15, .L431 ++ cbnz w14, .L424 ++.L162: ++ cmp w7, 30 ++ bhi .L425 ++.L412: ++ ldr x10, [sp, 464] +.L164: -+ cmp w9, 30 -+ bhi .L432 -+.L165: -+ cbnz w14, .L433 -+.L168: -+ uxtw x11, w17 -+ add w9, w10, w2 -+ add x19, x13, x19 ++ cbnz w13, .L426 ++.L169: ++ uxtw x13, w16 ++ add w7, w9, w0 + uxtw x14, w1 -+ cmp x19, x4 -+ add w1, w1, w9 -+ add w2, w17, w1 -+ ldr w10, [x16, x10, lsl 2] -+ ldr w17, [x16, x11, lsl 2] -+ csel x15, x26, x24, cc -+ sub x11, x19, x4 -+ neg w9, w9 -+ add x11, x15, x11 -+ ldr w14, [x16, x14, lsl 2] ++ add w1, w1, w7 ++ add w0, w16, w1 ++ add x20, x12, x20 ++ ldr w16, [x15, x13, lsl 2] ++ cmp x20, x2 ++ ldr x13, [sp, 144] ++ neg w7, w7 ++ ldr w9, [x15, x9, lsl 2] + neg w1, w1 -+ lsr x9, x0, x9 -+ and x9, x9, x10 -+ neg w10, w2 -+ prfm PLDL1KEEP, [x11] -+ lsr x1, x0, x1 -+ prfm PLDL1KEEP, [x11, 64] ++ csel x18, x13, x27, cc ++ sub x13, x20, x2 ++ add x13, x18, x13 ++ lsr x7, x10, x7 ++ ldr w14, [x15, x14, lsl 2] ++ and x7, x7, x9 ++ neg w9, w0 ++ lsr x1, x10, x1 + and x1, x1, x14 -+ lsr x10, x0, x10 -+ and x10, x10, x17 -+ stp x13, x12, [x3] ++ prfm PLDL1KEEP, [x13] ++ lsr x10, x10, x9 ++ and x10, x10, x16 ++ prfm PLDL1KEEP, [x13, 64] ++ add x7, x7, x5, uxth ++ add x3, x1, x3, uxth ++ add x4, x10, x4, uxth ++ stp x12, x11, [x8] + add w21, w21, 1 -+ add x9, x9, w8, uxth -+ str x4, [x3, 16] -+ add x5, x1, w5, uxth -+ add x6, x10, w6, uxth -+ add x19, x19, x12 -+ add x3, x3, 24 -+ mov w4, 1 -+ cmp w2, 64 -+ bls .L157 -+ ldp x22, x25, [sp, 192] -+ cbz w30, .L309 -+ str x0, [sp, 496] -+ str w2, [sp, 504] -+ str x7, [sp, 512] -+.L310: -+ str x9, [sp, 536] -+ str x6, [sp, 552] -+ str x5, [sp, 568] -+.L156: -+ ldr w0, [sp, 144] -+ cmp w0, w21 -+ ble .L424 -+.L420: ++ add x20, x20, x11 ++ str x2, [x8, 16] ++ add x8, x8, 24 ++ str w0, [sp, 472] ++ str x7, [sp, 504] ++ str x4, [sp, 520] ++ str x3, [sp, 536] ++ cmp w0, 64 ++ bls .L171 + ldp x19, x20, [sp, 32] + .cfi_restore 20 + .cfi_restore 19 -+.L146: -+ mov x3, -20 -+.L142: -+ mov x0, x3 ++ ldr x24, [sp, 168] ++ ldr x28, [sp, 184] ++.L154: ++ ldr w0, [sp, 176] ++ cmp w0, w21 ++ bgt .L144 ++ ldr x22, [sp, 152] ++ .p2align 3,,7 ++.L172: ++ ldr w0, [sp, 120] ++ cmp w0, w21 ++ ble .L249 ++ .p2align 3,,7 ++.L144: ++ mov x25, -20 ++.L140: ++ mov x0, x25 + ldp x29, x30, [sp, 16] + ldp x21, x22, [sp, 48] + ldp x23, x24, [sp, 64] + ldp x25, x26, [sp, 80] + ldp x27, x28, [sp, 96] -+ add sp, sp, 800 ++ add sp, sp, 768 + .cfi_remember_state + .cfi_restore 29 + .cfi_restore 30 @@ -1328,29 +1502,29 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L314: ++.L308: + .cfi_restore_state -+ ldr x28, [sp, 152] -+.L145: ++ ldr x22, [sp, 152] ++.L143: + ldp x3, x2, [sp, 128] + sub x2, x2, x1 -+ sub x3, x3, x28 ++ sub x3, x3, x22 + cmp w0, 2 -+ beq .L434 ++ beq .L427 + cmp x2, x3 -+ bhi .L306 -+ cbnz x28, .L308 -+.L307: ++ bhi .L305 ++ cbnz x22, .L307 ++.L306: + ldr x0, [sp, 152] -+ sub x3, x28, x0 -+.L437: -+ mov x0, x3 ++ sub x25, x22, x0 ++.L430: ++ mov x0, x25 + ldp x29, x30, [sp, 16] + ldp x21, x22, [sp, 48] + ldp x23, x24, [sp, 64] + ldp x25, x26, [sp, 80] + ldp x27, x28, [sp, 96] -+ add sp, sp, 800 ++ add sp, sp, 768 + .cfi_remember_state + .cfi_restore 29 + .cfi_restore 30 @@ -1365,1098 +1539,1086 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L147: ++.L145: + .cfi_restore_state -+ ldrb w1, [x23] -+ str x1, [sp, 496] -+ str x23, [sp, 512] ++ ldrb w1, [x3] ++ str x1, [sp, 464] ++ str x3, [sp, 480] + cmp x4, 5 -+ beq .L149 -+ bhi .L150 ++ beq .L147 ++ bhi .L148 + cmp x4, 3 -+ beq .L151 ++ beq .L149 + cmp x4, 4 -+ bne .L435 -+.L152: -+ ldrb w2, [x23, 3] ++ bne .L428 ++.L150: ++ ldrb w2, [x3, 3] + add x1, x1, x2, lsl 24 -+.L151: -+ ldrb w2, [x23, 2] ++.L149: ++ ldrb w2, [x3, 2] + add x1, x1, x2, lsl 16 -+ ldrb w2, [x23, 1] ++ ldrb w2, [x3, 1] + add x1, x1, x2, lsl 8 -+ str x1, [sp, 496] -+.L154: -+ cbz w0, .L146 ++ str x1, [sp, 464] ++.L152: ++ cbz w0, .L144 + mov w1, 8 + sub w1, w1, w4 + clz w0, w0 -+ stp x19, x20, [sp, 32] -+ .cfi_remember_state -+ .cfi_offset 20, -760 -+ .cfi_offset 19, -768 + add w0, w0, w1, lsl 3 + sub w0, w0, #23 -+ str w0, [sp, 504] -+ b .L148 ++ str w0, [sp, 472] ++ b .L146 + .p2align 2,,3 -+.L150: -+ .cfi_restore_state ++.L148: + cmp x4, 6 -+ beq .L155 -+ ldrb w2, [x23, 6] ++ beq .L153 ++ ldrb w2, [x3, 6] + add x1, x1, x2, lsl 48 -+.L155: -+ ldrb w2, [x23, 5] ++.L153: ++ ldrb w2, [x3, 5] + add x1, x1, x2, lsl 40 -+.L149: -+ ldrb w2, [x23, 4] ++.L147: ++ ldrb w2, [x3, 4] + add x1, x1, x2, lsl 32 -+ b .L152 ++ b .L150 + .p2align 2,,3 -+.L435: ++.L428: + cmp x4, 2 -+ bne .L154 -+ ldrb w2, [x23, 1] ++ bne .L152 ++ ldrb w2, [x3, 1] + add x1, x1, x2, lsl 8 -+ str x1, [sp, 496] -+ b .L154 ++ str x1, [sp, 464] ++ b .L152 + .p2align 2,,3 -+.L434: ++.L427: + cmp x2, x3 -+ bhi .L306 ++ bhi .L305 + stp x19, x20, [sp, 32] -+ .cfi_offset 20, -760 -+ .cfi_offset 19, -768 ++ .cfi_offset 20, -728 ++ .cfi_offset 19, -736 + mov x0, 30364 + mov x19, 65536 -+ add x22, x22, x0 -+ cbz x28, .L436 -+ mov x0, x28 -+ add x28, x28, x2 ++ add x24, x24, x0 ++ cbz x22, .L429 ++ mov x0, x22 ++ add x22, x22, x2 + bl memmove -+ str x22, [sp, 344] ++ str x24, [sp, 312] + ldr x0, [sp, 128] -+ sub x0, x0, x28 ++ sub x0, x0, x22 + cmp x0, x19 -+ bcc .L419 ++ bcc .L410 + mov x2, x19 -+ mov x1, x22 ++ mov x1, x24 + ldp x19, x20, [sp, 32] + .cfi_restore 20 + .cfi_restore 19 -+.L308: -+ mov x0, x28 -+ add x28, x28, x2 ++.L307: ++ mov x0, x22 ++ add x22, x22, x2 + bl memmove + ldr x0, [sp, 152] -+ sub x3, x28, x0 -+ b .L437 ++ sub x25, x22, x0 ++ b .L430 + .p2align 2,,3 -+.L432: -+ .cfi_offset 19, -768 -+ .cfi_offset 20, -760 -+ cmp w2, 64 -+ bhi .L165 -+ ldr x9, [sp, 120] -+ cmp x7, x9 -+ bcs .L438 -+ cmp x23, x7 -+ beq .L165 -+ lsr w9, w2, 3 -+ lsr w11, w2, 3 -+ sub x9, x7, x9 -+ cmp x23, x9 -+ bls .L167 -+ sub x9, x7, x23 -+ mov w11, w9 -+ sub x9, x7, w9, uxtw -+.L167: -+ sub w2, w2, w11, lsl 3 -+ mov x7, x9 -+ mov w30, 1 -+ ldr x0, [x9] -+ cbz w14, .L168 ++.L425: ++ .cfi_offset 19, -736 ++ .cfi_offset 20, -728 ++ cmp w0, 64 ++ bhi .L412 ++ cmp x17, x6 ++ bls .L431 ++ cmp x22, x6 ++ beq .L412 ++ lsr w7, w0, 3 ++ lsr w10, w0, 3 ++ sub x7, x6, x7 ++ cmp x7, x22 ++ bcc .L432 ++ mov x6, x7 ++.L168: ++ sub w0, w0, w10, lsl 3 ++ ldr x10, [x6] ++ str x10, [sp, 464] ++ str x6, [sp, 480] ++ cbz w13, .L169 + .p2align 3,,7 -+.L433: -+ neg w11, w14 -+ lsl x9, x0, x2 -+ add w2, w2, w14 -+ lsr x9, x9, x11 -+ add x13, x13, x9 -+ b .L168 ++.L426: ++ neg w14, w13 ++ lsl x7, x10, x0 ++ add w0, w0, w13 ++ lsr x7, x7, x14 ++ add x12, x12, x7 ++ b .L169 + .p2align 2,,3 -+.L424: -+ ldr x28, [sp, 152] -+ .p2align 3,,7 -+.L178: -+ ldr w0, [sp, 116] -+ cmp w0, w21 -+ bgt .L420 -+.L250: -+ ldr w0, [sp, 144] -+ sub w21, w21, w0 -+ ldr w0, [sp, 116] -+ cmp w0, w21 -+ ble .L251 -+ add x19, x22, 94208 -+ adrp x20, .LANCHOR0 -+ add x0, x20, :lo12:.LANCHOR0 -+ add x19, x19, 1692 -+ stp x19, x0, [sp, 176] -+ add x0, x0, 32 -+ mov w1, w21 -+ str x0, [sp, 192] -+ add x0, sp, 608 -+ ldr x27, [sp, 128] -+ mov x21, x24 -+ ldr x19, [sp, 136] -+ mov x23, x25 -+ mov w24, w1 -+ mov x25, x0 -+ .p2align 3,,7 -+.L301: -+ and x11, x24, 7 -+ ubfiz x6, x24, 1, 3 -+ add x6, x6, x11 -+ ldr w1, [x23, 13976] -+ ldr x13, [sp, 344] -+ lsl x0, x6, 3 -+ ldr x7, [x25, x6, lsl 3] -+ add x14, x13, x7 -+ cmp w1, 2 -+ beq .L439 -+ add x1, sp, 624 -+ add x2, sp, 616 -+ add x4, x25, x0 -+ add x20, x28, x7 -+ sub x15, x27, #32 -+ ldr x11, [x1, x6, lsl 3] -+ ldr x12, [x2, x6, lsl 3] -+ sub x2, x20, x11 -+ ldp x0, x1, [x4] -+ add x3, x12, x7 -+ ldr x4, [x4, 16] -+ add x5, x28, x3 -+ prfm PLDL1KEEP, [x2] -+ cmp x5, x15 -+ stp x0, x1, [sp, 448] -+ ccmp x19, x14, 0, ls -+ str x4, [sp, 464] -+ bcc .L287 -+ ldr q0, [x13] -+ str q0, [x28] -+ cmp x7, 16 -+ bhi .L288 -+.L291: -+ sub x0, x20, x21 -+ str x14, [sp, 344] -+ cmp x11, x0 -+ bls .L290 -+ ldr x0, [sp, 160] -+ sub x0, x20, x0 -+ cmp x11, x0 -+ bhi .L420 -+ sub x1, x2, x21 -+ add x1, x26, x1 -+ add x0, x1, x12 -+ cmp x26, x0 -+ bcs .L440 -+ sub x2, x21, x2 -+ mov x0, x20 -+ sub x12, x12, x2 -+ add x20, x20, x2 -+ str x12, [sp, 120] -+ stp x11, x5, [sp, 136] -+ str x3, [sp, 168] -+ bl memmove -+ ldp x11, x5, [sp, 136] -+ mov x2, x21 -+ ldr x12, [sp, 120] -+ ldr x3, [sp, 168] -+.L290: -+ cmp x11, 15 -+ bls .L294 -+ ldr q0, [x2] -+ str q0, [x20] -+ cmp x12, 16 -+ ble .L276 -+ add x1, x20, 16 -+ add x2, x2, 16 -+ .p2align 3,,7 -+.L295: -+ ldr q0, [x2] -+ add x1, x1, 32 -+ add x2, x2, 32 -+ str q0, [x1, -32] -+ ldr q0, [x2, -16] -+ str q0, [x1, -16] -+ cmp x5, x1 -+ bhi .L295 -+.L276: -+ cmn x3, #120 -+ bhi .L417 -+ ldr w0, [sp, 116] -+ add w24, w24, 1 -+ add x28, x28, x3 -+ cmp w0, w24 -+ bne .L301 -+ mov x25, x23 -+ str x19, [sp, 136] -+.L251: -+ ldr x0, [sp, 584] -+ ldp x19, x20, [sp, 32] -+ .cfi_remember_state -+ .cfi_restore 20 -+ .cfi_restore 19 -+ str w0, [x25, 10300] -+ ldr x0, [sp, 592] -+ str w0, [x25, 10304] -+ ldr x0, [sp, 600] -+ str w0, [x25, 10308] -+ ldr w0, [x25, 13976] -+ ldr x1, [sp, 344] -+ b .L145 -+ .p2align 2,,3 -+.L158: -+ .cfi_restore_state -+ cmp w28, 0 -+ cset w4, eq -+ cmp w11, 1 -+ beq .L160 -+ cmp w28, 0 -+ add x4, x4, 10 -+ cset x11, ne -+ add x11, x11, 10 -+ ldr x4, [x27, x4, lsl 3] -+ ldr x11, [x27, x11, lsl 3] -+ str x4, [sp, 584] -+ str x11, [sp, 592] -+ cbz w15, .L164 -+.L431: -+ neg w18, w15 -+ lsl x11, x0, x2 -+ add w2, w2, w15 -+ lsr x11, x11, x18 -+ add x12, x12, x11 -+ b .L164 ++.L156: ++ cmp w19, 0 ++ cset w2, eq ++ cmp w10, 1 ++ beq .L158 ++ cmp w19, 0 ++ add x2, x2, 10 ++ cset x10, ne ++ add x10, x10, 10 ++ ldr x2, [x26, x2, lsl 3] ++ ldr x10, [x26, x10, lsl 3] ++ str x2, [sp, 552] ++ str x10, [sp, 560] ++ cbz w14, .L162 ++.L424: ++ ldr x10, [sp, 464] ++ neg w18, w14 ++ lsl x10, x10, x0 ++ add w0, w0, w14 ++ lsr x10, x10, x18 ++ add x11, x11, x10 ++ str w0, [sp, 472] ++ b .L162 + .p2align 2,,3 -+.L429: -+ lsr w0, w2, 3 -+ ldr w8, [sp, 144] -+ sub x7, x7, x0 -+ mov w30, 1 -+ and w2, w2, 7 -+ mov w1, w30 -+ ldr x0, [x7] -+ cmp w8, w21 -+ bgt .L174 -+.L430: -+ ldr w20, [sp, 216] -+ ldp x22, x25, [sp, 192] -+ cbz w30, .L175 -+ str x7, [sp, 512] -+ cbz w1, .L313 -+ mov w20, w2 -+ str w2, [sp, 504] -+.L313: -+ str x0, [sp, 496] -+.L312: -+ cbz w4, .L177 -+.L451: -+ str x9, [sp, 536] -+ str x6, [sp, 552] -+ str x5, [sp, 568] -+.L177: -+ cmp w20, 64 -+ bhi .L424 -+ adrp x0, .LANCHOR0 -+ add x0, x0, :lo12:.LANCHOR0 -+ add x12, x0, 64 -+ str x0, [sp, 296] -+ add x0, x22, 94208 -+ mov x11, x26 -+ add x0, x0, 1692 -+ str x0, [sp, 192] -+ ldr x0, [sp, 120] -+ mov x15, x22 -+ mov x10, x24 -+ mov w22, w21 -+ mov x13, x23 -+ mov x14, x25 -+ mov x21, x19 -+ ldr x28, [sp, 152] -+ ldr x26, [sp, 208] -+ cmp x0, x7 -+ bls .L441 ++.L422: ++ lsr w1, w0, 3 ++ and w0, w0, 7 ++ sub x6, x6, x1 ++ ldr x1, [x6] ++ str x1, [sp, 464] ++ str w0, [sp, 472] ++ str x6, [sp, 480] ++ cmp w28, w21 ++ bgt .L177 ++.L423: ++ ldr x24, [sp, 168] ++ ldp x28, x18, [sp, 184] ++ cmp w0, 64 ++ bhi .L312 ++ adrp x1, .LANCHOR0 ++ add x1, x1, :lo12:.LANCHOR0 ++ add x10, x1, 64 ++ str x1, [sp, 264] ++ add x1, x24, 94208 ++ mov x12, x28 ++ add x1, x1, 1692 ++ str x1, [sp, 184] ++ mov x28, x18 ++ mov x11, x27 ++ ldp x18, x22, [sp, 144] ++ cmp x6, x17 ++ bcs .L433 + .p2align 3,,7 -+.L246: -+ mov x5, x13 -+ cmp x13, x7 -+ beq .L247 -+ lsr w5, w20, 3 -+ lsr w0, w20, 3 -+ sub x5, x7, x5 -+ cmp x13, x5 -+ bls .L248 -+ sub x5, x7, x13 -+ mov w0, w5 -+ sub x5, x7, w5, uxtw -+.L248: -+ ldr x1, [x5] -+ sub w0, w20, w0, lsl 3 -+ str x1, [sp, 496] -+ str w0, [sp, 504] -+ str x5, [sp, 512] ++.L245: ++ ldr x1, [sp, 488] ++ mov x8, x6 ++ cmp x1, x6 ++ beq .L246 ++ lsr w3, w0, 3 ++ lsr w2, w0, 3 ++ sub x8, x6, x3 ++ cmp x1, x8 ++ bls .L247 ++ sub x1, x6, x1 ++ mov w2, w1 ++ sub x8, x6, x1, uxtw +.L247: -+ ldr w0, [sp, 116] -+ cmp w0, w22 -+ ble .L442 -+ ldr x3, [sp, 168] -+ ldr x2, [sp, 536] -+ ldr x1, [sp, 568] -+ ldr x3, [x3, x2, lsl 3] -+ ldr x2, [sp, 176] -+ ldr x0, [sp, 552] -+ ubfx w9, w3, 16, 8 -+ ldr x7, [x2, x1, lsl 3] -+ lsr x18, x3, 32 -+ ldr x1, [sp, 184] -+ mov x25, x18 -+ ubfx w16, w7, 16, 8 -+ lsr w6, w3, 24 -+ add w4, w9, w16 -+ lsr x24, x7, 32 -+ ldr x2, [x1, x0, lsl 3] ++ ldr x1, [x8] ++ sub w2, w0, w2, lsl 3 ++ str x1, [sp, 464] ++ str w2, [sp, 472] ++ str x8, [sp, 480] ++.L246: ++ ldr w0, [sp, 120] ++ cmp w0, w21 ++ ble .L434 ++ add x6, sp, 520 ++ ldp x5, x4, [sp, 504] ++ ldp x3, x2, [x6, 16] ++ ldp x1, x0, [x6] ++ ldr x4, [x4, x5, lsl 3] ++ ldr x7, [x2, x3, lsl 3] ++ ldr x2, [x0, x1, lsl 3] ++ ubfx w13, w4, 16, 8 ++ ubfx w14, w7, 16, 8 ++ lsr x16, x4, 32 ++ add w5, w13, w14 ++ ldr w0, [sp, 472] ++ ubfx w9, w2, 16, 8 ++ lsr x26, x7, 32 ++ add w5, w9, w5 ++ mov x27, x16 ++ lsr x24, x2, 32 ++ and w5, w5, 255 ++ lsr w6, w4, 24 + lsr w1, w7, 24 -+ ldr w0, [sp, 504] -+ ubfx w8, w2, 16, 8 -+ lsr x19, x2, 32 -+ add w4, w8, w4 -+ lsr w20, w2, 24 -+ and w4, w4, 255 -+ cmp w8, 1 -+ bls .L180 -+ ldr x17, [sp, 496] -+ neg w18, w8 -+ ldr x23, [sp, 592] -+ str x23, [sp, 600] -+ lsl x17, x17, x0 -+ add w0, w0, w8 -+ ldr x8, [sp, 584] -+ lsr x17, x17, x18 -+ add x19, x19, x17 -+ str w0, [sp, 504] -+.L181: -+ str x19, [sp, 584] -+ str x8, [sp, 592] -+ cbnz w16, .L443 -+.L186: -+ cmp w4, 30 -+ bhi .L187 -+.L421: -+ ldr x4, [sp, 496] -+.L188: -+ cbnz w9, .L444 -+.L193: -+ uxtw x8, w6 -+ uxtw x5, w1 ++ lsr w3, w2, 24 ++ cmp w9, 1 ++ bls .L179 ++ ldr x15, [sp, 464] ++ neg w19, w9 ++ ldr x16, [sp, 552] ++ lsl x15, x15, x0 ++ add w0, w0, w9 ++ ldr x9, [sp, 560] ++ lsr x15, x15, x19 ++ add x24, x24, x15 ++ str w0, [sp, 472] ++ str x24, [sp, 552] ++ str x16, [sp, 560] ++ str x9, [sp, 568] ++.L180: ++ cbnz w14, .L435 ++.L185: ++ cmp w5, 30 ++ bhi .L436 ++.L414: ++ ldr x5, [sp, 464] ++.L187: ++ cbnz w13, .L437 ++.L192: ++ uxtw x9, w6 ++ uxtw x8, w1 + add w0, w6, w0 -+ uxtw x9, w20 ++ uxtw x13, w3 + add w1, w1, w0 -+ and x23, x22, 7 -+ ubfiz x6, x22, 1, 3 -+ add w20, w20, w1 -+ ldr w16, [x12, x8, lsl 2] ++ and x23, x21, 7 ++ ubfiz x6, x21, 1, 3 ++ add w3, w3, w1 ++ ldr w14, [x10, x9, lsl 2] + add x6, x6, x23 -+ ldr w5, [x12, x5, lsl 2] ++ ldr w8, [x10, x8, lsl 2] + neg w0, w0 + neg w1, w1 -+ ldr w8, [x12, x9, lsl 2] -+ lsr x0, x4, x0 -+ neg w9, w20 -+ lsr x1, x4, x1 -+ and x0, x0, x16 -+ and x1, x1, x5 -+ add x0, x0, w3, uxth -+ add x3, x1, w7, uxth -+ lsr x4, x4, x9 -+ ldr x7, [x26, x6, lsl 3] -+ and x4, x4, x8 -+ ldr x1, [sp, 344] -+ add x2, x4, w2, uxth -+ ldr w5, [x14, 13976] -+ lsl x4, x6, 3 -+ str w20, [sp, 504] -+ str x0, [sp, 536] ++ ldr w9, [x10, x13, lsl 2] ++ lsr x0, x5, x0 ++ neg w13, w3 ++ lsr x1, x5, x1 ++ and x0, x0, x14 ++ and x1, x1, x8 ++ add x0, x0, x4, uxth ++ add x4, x1, x7, uxth ++ lsr x5, x5, x13 ++ ldr x7, [x28, x6, lsl 3] ++ and x5, x5, x9 ++ ldr x1, [sp, 312] ++ add x2, x5, x2, uxth ++ ldr w8, [x12, 13976] ++ str w3, [sp, 472] ++ lsl x3, x6, 3 ++ str x0, [sp, 504] + add x0, x1, x7 -+ str x2, [sp, 552] -+ str x3, [sp, 568] -+ cmp w5, 2 -+ beq .L445 -+ add x9, sp, 616 -+ add x8, sp, 624 -+ add x4, x26, x4 -+ add x27, x28, x7 ++ str x2, [sp, 520] ++ str x4, [sp, 536] ++ cmp w8, 2 ++ beq .L438 ++ add x9, sp, 584 ++ add x8, sp, 592 ++ add x3, x28, x3 ++ add x19, x22, x7 + ldr x2, [sp, 128] -+ ldr x5, [x9, x6, lsl 3] ++ ldr x4, [x9, x6, lsl 3] + sub x2, x2, #32 -+ ldr x6, [x8, x6, lsl 3] -+ add x3, x5, x7 -+ ldp x16, x17, [x4] -+ sub x30, x27, x6 -+ ldr x18, [x4, 16] -+ add x4, x28, x3 -+ cmp x4, x2 -+ prfm PLDL1KEEP, [x30] ++ ldr x5, [x8, x6, lsl 3] ++ add x25, x4, x7 ++ ldp x14, x15, [x3] ++ sub x6, x19, x5 ++ ldr x13, [x3, 16] ++ add x3, x22, x25 ++ cmp x3, x2 ++ prfm PLDL1KEEP, [x6] + ldr x2, [sp, 136] -+ stp x16, x17, [sp, 352] -+ str x18, [sp, 368] ++ stp x14, x15, [sp, 320] ++ str x13, [sp, 336] + ccmp x2, x0, 0, ls -+ bcc .L231 ++ bcc .L230 + ldr q0, [x1] -+ str q0, [x28] ++ str q0, [x22] + cmp x7, 16 -+ bhi .L232 -+.L235: -+ str x0, [sp, 344] -+ sub x0, x27, x10 -+ cmp x6, x0 -+ bls .L234 ++ bhi .L231 ++.L234: ++ str x0, [sp, 312] ++ sub x0, x19, x11 ++ cmp x5, x0 ++ bls .L233 + ldr x0, [sp, 160] -+ sub x0, x27, x0 -+ cmp x6, x0 -+ bhi .L420 -+ sub x1, x30, x10 -+ add x1, x11, x1 -+ add x0, x1, x5 -+ cmp x11, x0 -+ bcs .L423 -+ sub x2, x10, x30 -+ mov x0, x27 -+ sub x5, x5, x2 -+ add x27, x27, x2 -+ stp x5, x10, [sp, 200] -+ stp x6, x4, [sp, 216] -+ stp x9, x8, [sp, 232] -+ stp x11, x15, [sp, 248] -+ stp x13, x14, [sp, 264] -+ stp x12, x3, [sp, 280] ++ sub x0, x19, x0 ++ cmp x5, x0 ++ bhi .L408 ++ sub x1, x6, x11 ++ add x1, x18, x1 ++ add x0, x1, x4 ++ cmp x18, x0 ++ bcs .L416 ++ sub x2, x11, x6 ++ mov x0, x19 ++ sub x4, x4, x2 ++ add x19, x19, x2 ++ stp x4, x11, [sp, 192] ++ stp x5, x3, [sp, 208] ++ stp x9, x8, [sp, 224] ++ stp x18, x12, [sp, 240] ++ str x10, [sp, 256] + bl memmove -+ ldp x5, x10, [sp, 200] -+ ldp x6, x4, [sp, 216] -+ ldp x9, x8, [sp, 232] -+ ldp x11, x15, [sp, 248] -+ mov x30, x10 -+ ldp x13, x14, [sp, 264] -+ ldp x12, x3, [sp, 280] -+.L234: -+ cmp x6, 15 -+ bls .L238 -+ ldr q0, [x30] -+ str q0, [x27] -+ cmp x5, 16 -+ ble .L220 -+ add x27, x27, 16 -+ add x30, x30, 16 ++ ldp x4, x11, [sp, 192] ++ ldp x5, x3, [sp, 208] ++ ldp x9, x8, [sp, 224] ++ ldp x18, x12, [sp, 240] ++ mov x6, x11 ++ ldr x10, [sp, 256] ++.L233: ++ cmp x5, 15 ++ bls .L237 ++ ldr q0, [x6] ++ str q0, [x19] ++ cmp x4, 16 ++ ble .L219 ++ add x19, x19, 16 ++ add x6, x6, 16 + .p2align 3,,7 -+.L239: -+ ldr q0, [x30] -+ add x27, x27, 32 -+ add x30, x30, 32 -+ str q0, [x27, -32] -+ ldr q0, [x30, -16] -+ str q0, [x27, -16] -+ cmp x4, x27 -+ bhi .L239 -+.L220: -+ cmn x3, #120 -+ bhi .L417 -+ add x21, x25, x21 ++.L238: ++ ldr q0, [x6] ++ add x19, x19, 32 ++ add x6, x6, 32 ++ str q0, [x19, -32] ++ ldr q0, [x6, -16] ++ str q0, [x19, -16] ++ cmp x3, x19 ++ bhi .L238 ++.L219: ++ cmn x25, #120 ++ bhi .L405 ++ add x20, x27, x20 + add x23, x23, x23, lsl 1 -+ cmp x19, x21 -+ sub x1, x21, x19 -+ csel x0, x11, x10, hi -+ add x28, x28, x3 ++ cmp x24, x20 ++ sub x1, x20, x24 ++ csel x0, x18, x11, hi ++.L417: + add x0, x0, x1 -+ add x21, x24, x21 -+ add w22, w22, 1 ++ add x22, x22, x25 ++ add x20, x26, x20 ++ add w21, w21, 1 + prfm PLDL1KEEP, [x0] + prfm PLDL1KEEP, [x0, 64] -+ str x25, [x26, x23, lsl 3] -+ str x24, [x9, x23, lsl 3] -+ str x19, [x8, x23, lsl 3] -+ cmp w20, 64 -+ bhi .L403 -+.L448: -+ ldr x0, [sp, 120] -+ ldr x7, [sp, 512] -+ cmp x0, x7 -+ bhi .L246 -+.L441: -+ lsr w5, w20, 3 -+ and w20, w20, 7 -+ sub x5, x7, x5 -+ str w20, [sp, 504] -+ str x5, [sp, 512] -+ ldr x0, [x5] -+ str x0, [sp, 496] -+ b .L247 -+ .p2align 2,,3 -+.L187: ++ ldr w0, [sp, 472] ++ str x27, [x28, x23, lsl 3] ++ str x26, [x9, x23, lsl 3] ++ str x24, [x8, x23, lsl 3] + cmp w0, 64 -+ bhi .L421 -+ ldr x4, [sp, 120] -+ cmp x4, x5 -+ bls .L446 -+ cmp x13, x5 -+ beq .L421 -+ lsr w4, w0, 3 ++ bhi .L396 ++ ldr x6, [sp, 480] ++ ldr x17, [sp, 496] ++ cmp x6, x17 ++ bcc .L245 ++.L433: + lsr w8, w0, 3 -+ sub x4, x5, x4 -+ cmp x13, x4 -+ bls .L192 -+ sub x4, x5, x13 -+ mov w8, w4 -+ sub x4, x5, w4, uxtw -+.L192: -+ str x4, [sp, 512] -+ sub w0, w0, w8, lsl 3 -+ ldr x4, [x4] -+ str x4, [sp, 496] -+ cbz w9, .L193 -+ .p2align 3,,7 -+.L444: -+ neg w8, w9 -+ lsl x5, x4, x0 -+ add w0, w0, w9 -+ lsr x5, x5, x8 -+ add x25, x25, x5 -+ b .L193 ++ and w0, w0, 7 ++ sub x8, x6, x8 ++ str w0, [sp, 472] ++ str x8, [sp, 480] ++ ldr x0, [x8] ++ str x0, [sp, 464] ++ b .L246 + .p2align 2,,3 -+.L180: -+ cmp w18, 0 -+ cset w17, eq -+ cmp w8, 1 -+ beq .L182 -+ cmp w18, 0 -+ add x17, x17, 10 -+ cset x8, ne -+ add x18, sp, 504 -+ add x8, x8, 10 -+ ldr x19, [x18, x17, lsl 3] -+ ldr x8, [x18, x8, lsl 3] -+ str x19, [sp, 584] -+ str x8, [sp, 592] -+ cbz w16, .L186 -+.L443: -+ ldr x8, [sp, 496] -+ neg w17, w16 -+ lsl x8, x8, x0 -+ add w0, w0, w16 -+ lsr x8, x8, x17 -+ add x24, x24, x8 -+ str w0, [sp, 504] -+ b .L186 ++.L429: ++ ldr x0, [sp, 128] ++ cmp x0, x19 ++ bcs .L409 ++.L410: ++ ldp x19, x20, [sp, 32] ++ .cfi_restore 20 ++ .cfi_restore 19 ++.L305: ++ mov x25, -70 ++ mov x0, x25 ++ ldp x29, x30, [sp, 16] ++ ldp x21, x22, [sp, 48] ++ ldp x23, x24, [sp, 64] ++ ldp x25, x26, [sp, 80] ++ ldp x27, x28, [sp, 96] ++ add sp, sp, 768 ++ .cfi_restore 29 ++ .cfi_restore 30 ++ .cfi_restore 27 ++ .cfi_restore 28 ++ .cfi_restore 25 ++ .cfi_restore 26 ++ .cfi_restore 23 ++ .cfi_restore 24 ++ .cfi_restore 21 ++ .cfi_restore 22 ++ .cfi_def_cfa_offset 0 ++ ret + .p2align 2,,3 -+.L445: -+ ldr x2, [x15, 30352] ++.L436: ++ .cfi_def_cfa_offset 768 ++ .cfi_offset 19, -736 ++ .cfi_offset 20, -728 ++ .cfi_offset 21, -720 ++ .cfi_offset 22, -712 ++ .cfi_offset 23, -704 ++ .cfi_offset 24, -696 ++ .cfi_offset 25, -688 ++ .cfi_offset 26, -680 ++ .cfi_offset 27, -672 ++ .cfi_offset 28, -664 ++ .cfi_offset 29, -752 ++ .cfi_offset 30, -744 ++ cmp w0, 64 ++ bhi .L414 ++ cmp x17, x8 ++ bls .L439 ++ ldr x9, [sp, 488] ++ cmp x9, x8 ++ beq .L414 ++ lsr w5, w0, 3 ++ lsr w14, w0, 3 ++ sub x5, x8, x5 ++ cmp x9, x5 ++ bls .L191 ++ sub x9, x8, x9 ++ mov w14, w9 ++ sub x5, x8, x9, uxtw ++.L191: ++ str x5, [sp, 480] ++ sub w0, w0, w14, lsl 3 ++ ldr x5, [x5] ++ str x5, [sp, 464] ++ cbz w13, .L192 ++ .p2align 3,,7 ++.L437: ++ neg w9, w13 ++ lsl x8, x5, x0 ++ add w0, w0, w13 ++ lsr x8, x8, x9 ++ add x27, x27, x8 ++ b .L192 ++ .p2align 2,,3 ++.L179: ++ cmp w16, 0 ++ cset w15, eq ++ cmp w9, 1 ++ beq .L181 ++ cmp w16, 0 ++ add x15, x15, 10 ++ cset x9, ne ++ add x16, sp, 472 ++ add x9, x9, 10 ++ ldr x24, [x16, x15, lsl 3] ++ ldr x9, [x16, x9, lsl 3] ++ str x24, [sp, 552] ++ str x9, [sp, 560] ++ cbz w14, .L185 ++.L435: ++ ldr x9, [sp, 464] ++ neg w15, w14 ++ lsl x9, x9, x0 ++ add w0, w0, w14 ++ lsr x9, x9, x15 ++ add x26, x26, x9 ++ str w0, [sp, 472] ++ b .L185 ++ .p2align 2,,3 ++.L438: ++ ldr x2, [sp, 168] ++ ldr x2, [x2, 30352] + cmp x2, x0 -+ bcs .L195 ++ bcs .L194 + subs x2, x2, x1 -+ beq .L196 ++ beq .L195 + ldr x0, [sp, 128] -+ sub x0, x0, x28 ++ sub x0, x0, x22 + cmp x2, x0 -+ bhi .L255 -+ mov x0, x28 ++ bhi .L254 ++ mov x0, x22 + sub x7, x7, x2 -+ add x28, x28, x2 ++ add x22, x22, x2 + bl ZSTD_safecopyDstBeforeSrc -+ str x7, [x26, x6, lsl 3] -+.L196: ++ str x7, [x28, x6, lsl 3] ++.L195: + add x3, x23, x23, lsl 1 -+ add x8, sp, 624 -+ add x9, sp, 616 -+ add x4, x28, x7 -+ add x1, x26, x3, lsl 3 -+ mov x18, 30364 -+ ldr x27, [x8, x3, lsl 3] -+ ldp x16, x17, [x1] -+ stp x16, x17, [sp, 400] -+ ldr x6, [x9, x3, lsl 3] -+ sub x16, x4, x27 ++ add x9, sp, 584 ++ add x8, sp, 592 ++ mov x6, 30364 ++ add x1, x28, x3, lsl 3 ++ ldr x5, [x9, x3, lsl 3] ++ ldp x14, x15, [x1] ++ add x25, x5, x7 ++ ldr x19, [x8, x3, lsl 3] ++ add x3, x22, x7 + ldr x0, [sp, 128] -+ add x3, x6, x7 -+ ldr x30, [x1, 16] ++ stp x14, x15, [sp, 368] ++ sub x14, x3, x19 ++ ldr x16, [sp, 168] + sub x2, x0, #32 -+ ldr x1, [sp, 192] -+ add x0, x15, x18 -+ prfm PLDL1KEEP, [x16] -+ add x5, x28, x3 -+ str x1, [sp, 136] -+ cmp x5, x2 -+ str x0, [sp, 344] -+ add x0, x0, x7 -+ str x30, [sp, 416] -+ ccmp x1, x0, 0, ls -+ str wzr, [x14, 13976] -+ bcc .L199 -+ ldr q0, [x15, x18] -+ str q0, [x28] ++ add x4, x22, x25 ++ prfm PLDL1KEEP, [x14] ++ cmp x4, x2 ++ add x0, x16, x6 ++ ldr x13, [x1, 16] ++ str x0, [sp, 312] ++ ldr x2, [sp, 184] ++ str x2, [sp, 136] ++ str x13, [sp, 384] ++ add x1, x0, x7 ++ str wzr, [x12, 13976] ++ ccmp x2, x1, 0, ls ++ bcc .L198 ++ ldr q0, [x16, x6] ++ str q0, [x22] + cmp x7, 16 -+ bhi .L200 -+.L204: -+ str x0, [sp, 344] -+ sub x0, x4, x10 -+ cmp x27, x0 -+ bls .L202 ++ bhi .L199 ++.L203: ++ sub x0, x3, x11 ++ str x1, [sp, 312] ++ cmp x19, x0 ++ bls .L201 + ldr x0, [sp, 160] -+ sub x0, x4, x0 -+ cmp x27, x0 -+ bhi .L420 -+ sub x1, x16, x10 -+ add x1, x11, x1 -+ add x0, x1, x6 -+ cmp x11, x0 -+ bcs .L447 -+ sub x2, x10, x16 -+ mov x0, x4 -+ sub x6, x6, x2 -+ add x4, x4, x2 -+ stp x4, x6, [sp, 200] -+ stp x10, x5, [sp, 216] -+ stp x9, x8, [sp, 232] -+ stp x11, x15, [sp, 248] -+ stp x13, x14, [sp, 264] -+ stp x12, x3, [sp, 280] ++ sub x0, x3, x0 ++ cmp x19, x0 ++ bhi .L408 ++ sub x1, x14, x11 ++ add x1, x18, x1 ++ add x0, x1, x5 ++ cmp x18, x0 ++ bcs .L440 ++ sub x2, x11, x14 ++ mov x0, x3 ++ sub x5, x5, x2 ++ add x3, x3, x2 ++ stp x3, x5, [sp, 192] ++ stp x11, x4, [sp, 208] ++ stp x9, x8, [sp, 224] ++ stp x18, x12, [sp, 240] ++ str x10, [sp, 256] + bl memmove -+ ldp x10, x5, [sp, 216] -+ ldp x4, x6, [sp, 200] -+ ldp x9, x8, [sp, 232] -+ ldp x11, x15, [sp, 248] -+ mov x16, x10 -+ ldp x13, x14, [sp, 264] -+ ldp x12, x3, [sp, 280] -+.L202: -+ cmp x27, 15 -+ bls .L207 -+ ldr q0, [x16] -+ str q0, [x4] -+ cmp x6, 16 -+ ble .L203 -+ add x0, x4, 16 -+ add x1, x16, 16 ++ ldp x11, x4, [sp, 208] ++ ldp x3, x5, [sp, 192] ++ ldp x9, x8, [sp, 224] ++ ldp x18, x12, [sp, 240] ++ mov x14, x11 ++ ldr x10, [sp, 256] ++.L201: ++ cmp x19, 15 ++ bls .L206 ++ ldr q0, [x14] ++ str q0, [x3] ++ cmp x5, 16 ++ ble .L202 ++ add x0, x3, 16 ++ add x1, x14, 16 + .p2align 3,,7 -+.L208: ++.L207: + ldr q0, [x1] + add x0, x0, 32 + add x1, x1, 32 + str q0, [x0, -32] + ldr q0, [x1, -16] + str q0, [x0, -16] -+ cmp x5, x0 -+ bhi .L208 -+.L203: -+ cmn x3, #120 -+ bhi .L417 -+ add x21, x25, x21 ++ cmp x4, x0 ++ bhi .L207 ++.L202: ++ cmn x25, #120 ++ bhi .L405 ++ add x20, x27, x20 + add x23, x23, x23, lsl 1 -+ subs x1, x21, x19 -+ add x28, x28, x3 -+ csel x0, x11, x10, cc -+ add x21, x21, x24 -+ add x0, x0, x1 -+ add w22, w22, 1 -+ prfm PLDL1KEEP, [x0] -+ prfm PLDL1KEEP, [x0, 64] -+ str x25, [x26, x23, lsl 3] -+ str x24, [x9, x23, lsl 3] -+ str x19, [x8, x23, lsl 3] -+ cmp w20, 64 -+ bls .L448 -+ .p2align 3,,7 -+.L403: -+ mov w21, w22 -+ mov x24, x10 -+ mov x26, x11 -+ mov x22, x15 -+ mov x25, x14 -+ b .L178 ++ subs x1, x20, x24 ++ csel x0, x18, x11, cc ++ b .L417 + .p2align 2,,3 -+.L195: -+ add x9, sp, 616 -+ add x18, x26, x4 -+ add x8, sp, 624 -+ add x4, sp, 512 ++.L194: ++ add x13, x28, x3 ++ add x9, sp, 584 + ldr x3, [sp, 136] -+ sub x2, x7, #32 -+ ldr x5, [x9, x6, lsl 3] ++ add x8, sp, 592 ++ ldr x4, [x9, x6, lsl 3] + cmp x3, x0 -+ ldp x16, x17, [x18] -+ add x3, x5, x7 -+ ldr x6, [x8, x6, lsl 3] -+ stp x16, x17, [x4, -136] ++ add x3, sp, 512 ++ sub x2, x7, #32 ++ ldp x14, x15, [x13] ++ add x25, x4, x7 ++ ldr x5, [x8, x6, lsl 3] ++ stp x14, x15, [x3, -168] + add x2, x1, x2 -+ ldr x18, [x18, 16] -+ str x18, [sp, 392] -+ add x27, x28, x7 -+ add x4, x28, x3 -+ sub x30, x27, x6 -+ ccmp x2, x4, 0, cs -+ bcc .L216 ++ ldr x13, [x13, 16] ++ str x13, [sp, 360] ++ add x19, x22, x7 ++ add x3, x22, x25 ++ sub x6, x19, x5 ++ ccmp x2, x3, 0, cs ++ bcc .L215 + ldr q0, [x1] -+ str q0, [x28] ++ str q0, [x22] + cmp x7, 16 -+ bhi .L217 -+.L221: -+ str x0, [sp, 344] -+ sub x0, x27, x10 -+ cmp x6, x0 -+ bls .L219 ++ bhi .L216 ++.L220: ++ str x0, [sp, 312] ++ sub x0, x19, x11 ++ cmp x5, x0 ++ bls .L218 + ldr x0, [sp, 160] -+ sub x0, x27, x0 -+ cmp x6, x0 -+ bhi .L420 -+ sub x1, x30, x10 -+ add x1, x11, x1 -+ add x0, x1, x5 -+ cmp x11, x0 -+ bcs .L423 -+ sub x2, x10, x30 -+ mov x0, x27 -+ sub x5, x5, x2 -+ add x27, x27, x2 -+ stp x5, x10, [sp, 200] -+ stp x6, x4, [sp, 216] -+ stp x9, x8, [sp, 232] -+ stp x11, x15, [sp, 248] -+ stp x13, x14, [sp, 264] -+ stp x12, x3, [sp, 280] ++ sub x0, x19, x0 ++ cmp x5, x0 ++ bhi .L408 ++ sub x1, x6, x11 ++ add x1, x18, x1 ++ add x0, x1, x4 ++ cmp x18, x0 ++ bcs .L416 ++ sub x2, x11, x6 ++ mov x0, x19 ++ sub x4, x4, x2 ++ add x19, x19, x2 ++ stp x4, x11, [sp, 192] ++ stp x5, x3, [sp, 208] ++ stp x9, x8, [sp, 224] ++ stp x18, x12, [sp, 240] ++ str x10, [sp, 256] + bl memmove -+ ldp x5, x10, [sp, 200] -+ ldp x6, x4, [sp, 216] -+ ldp x9, x8, [sp, 232] -+ ldp x11, x15, [sp, 248] -+ mov x30, x10 -+ ldp x13, x14, [sp, 264] -+ ldp x12, x3, [sp, 280] -+.L219: -+ cmp x6, 15 -+ bls .L224 -+ ldr q0, [x30] -+ str q0, [x27] -+ cmp x5, 16 -+ ble .L220 -+ add x27, x27, 16 -+ add x30, x30, 16 ++ ldp x4, x11, [sp, 192] ++ ldp x5, x3, [sp, 208] ++ ldp x9, x8, [sp, 224] ++ ldp x18, x12, [sp, 240] ++ mov x6, x11 ++ ldr x10, [sp, 256] ++.L218: ++ cmp x5, 15 ++ bls .L223 ++ ldr q0, [x6] ++ str q0, [x19] ++ cmp x4, 16 ++ ble .L219 ++ add x19, x19, 16 ++ add x6, x6, 16 + .p2align 3,,7 -+.L225: -+ ldr q0, [x30] -+ add x27, x27, 32 -+ add x30, x30, 32 -+ str q0, [x27, -32] -+ ldr q0, [x30, -16] -+ str q0, [x27, -16] -+ cmp x4, x27 -+ bhi .L225 -+ b .L220 -+ .p2align 2,,3 -+.L423: -+ mov x2, x5 -+ mov x0, x27 -+ stp x9, x8, [sp, 200] -+ stp x10, x11, [sp, 216] -+ stp x15, x13, [sp, 232] -+ stp x14, x12, [sp, 248] -+ str x3, [sp, 264] -+ bl memmove -+ ldp x9, x8, [sp, 200] -+ ldp x10, x11, [sp, 216] -+ ldp x15, x13, [sp, 232] -+ ldp x14, x12, [sp, 248] -+ ldr x3, [sp, 264] -+ b .L220 ++.L224: ++ ldr q0, [x6] ++ add x19, x19, 32 ++ add x6, x6, 32 ++ str q0, [x19, -32] ++ ldr q0, [x6, -16] ++ str q0, [x19, -16] ++ cmp x3, x19 ++ bhi .L224 ++ b .L219 + .p2align 2,,3 -+.L436: -+ ldr x0, [sp, 128] -+ cmp x0, x19 -+ bcs .L418 -+.L419: ++.L396: ++ mov x27, x11 ++ mov x28, x12 + ldp x19, x20, [sp, 32] ++ .cfi_remember_state + .cfi_restore 20 + .cfi_restore 19 -+.L306: -+ mov x3, -70 -+ mov x0, x3 -+ ldp x29, x30, [sp, 16] -+ ldp x21, x22, [sp, 48] -+ ldp x23, x24, [sp, 64] -+ ldp x25, x26, [sp, 80] -+ ldp x27, x28, [sp, 96] -+ add sp, sp, 800 -+ .cfi_restore 29 -+ .cfi_restore 30 -+ .cfi_restore 27 -+ .cfi_restore 28 -+ .cfi_restore 25 -+ .cfi_restore 26 -+ .cfi_restore 23 -+ .cfi_restore 24 -+ .cfi_restore 21 -+ .cfi_restore 22 -+ .cfi_def_cfa_offset 0 -+ ret ++ ldr x24, [sp, 168] ++ b .L172 + .p2align 2,,3 -+.L309: -+ .cfi_def_cfa_offset 800 -+ .cfi_offset 19, -768 -+ .cfi_offset 20, -760 -+ .cfi_offset 21, -752 -+ .cfi_offset 22, -744 -+ .cfi_offset 23, -736 -+ .cfi_offset 24, -728 -+ .cfi_offset 25, -720 -+ .cfi_offset 26, -712 -+ .cfi_offset 27, -704 -+ .cfi_offset 28, -696 -+ .cfi_offset 29, -784 -+ .cfi_offset 30, -776 -+ str w2, [sp, 504] -+ b .L310 ++.L416: ++ .cfi_restore_state ++ mov x2, x4 ++ mov x0, x19 ++ stp x9, x8, [sp, 192] ++ stp x11, x18, [sp, 208] ++ stp x12, x10, [sp, 224] ++ bl memmove ++ ldp x9, x8, [sp, 192] ++ ldp x11, x18, [sp, 208] ++ ldp x12, x10, [sp, 224] ++ b .L219 + .p2align 2,,3 -+.L160: -+ ldr x11, [sp, 584] -+ add w4, w4, w18 -+ lsl x18, x0, x2 -+ add w2, w2, 1 -+ add x18, x4, x18, lsr 63 -+ subs x4, x11, #1 -+ cinc x4, x4, eq ++.L158: ++ ldr x19, [sp, 464] ++ add w2, w2, w18 ++ ldr x10, [sp, 552] ++ lsl x18, x19, x0 ++ add w0, w0, 1 ++ add x18, x2, x18, lsr 63 ++ str w0, [sp, 472] + cmp x18, 3 -+ beq .L162 -+ add x4, x18, 10 -+ ldr x4, [x27, x4, lsl 3] -+ cmp x4, 0 -+ cinc x4, x4, eq ++ beq .L441 ++ add x2, x18, 10 ++ ldr x2, [x26, x2, lsl 3] ++ cmp x2, 0 ++ cinc x2, x2, eq + cmp x18, 1 -+ beq .L159 -+.L162: -+ ldr x18, [sp, 592] -+ str x18, [sp, 600] -+ b .L159 ++ bne .L160 ++.L161: ++ str x2, [sp, 552] ++ str x10, [sp, 560] ++ b .L157 ++.L441: ++ subs x2, x10, #1 ++ cinc x2, x2, eq ++.L160: ++ ldr x18, [sp, 560] ++ str x18, [sp, 568] ++ b .L161 + .p2align 2,,3 -+.L231: ++.L408: ++ ldp x19, x20, [sp, 32] ++ .cfi_remember_state ++ .cfi_restore 20 ++ .cfi_restore 19 ++ b .L144 ++ .p2align 2,,3 ++.L230: ++ .cfi_restore_state + ldr x1, [sp, 128] + mov x4, x2 + ldr x6, [sp, 160] -+ mov x7, x11 -+ add x2, sp, 304 -+ mov x5, x10 -+ add x3, sp, 344 -+ mov x0, x28 -+ stp x10, x11, [sp, 200] -+ stp x9, x8, [sp, 216] -+ stp x15, x13, [sp, 232] -+ stp x14, x12, [sp, 248] -+ stp x16, x17, [sp, 304] -+ str x18, [sp, 320] ++ mov x7, x18 ++ mov x5, x11 ++ add x3, sp, 312 ++ add x2, sp, 272 ++ mov x0, x22 ++ stp x11, x18, [sp, 192] ++ stp x9, x8, [sp, 208] ++ stp x12, x10, [sp, 224] ++ stp x14, x15, [sp, 272] ++ str x13, [sp, 288] + bl ZSTD_execSequenceEnd -+.L422: -+ mov x3, x0 -+ ldp x10, x11, [sp, 200] -+ ldp x9, x8, [sp, 216] -+ ldp x15, x13, [sp, 232] -+ ldp x14, x12, [sp, 248] -+ b .L220 ++.L415: ++ mov x25, x0 ++ ldp x11, x18, [sp, 192] ++ ldp x9, x8, [sp, 208] ++ ldp x12, x10, [sp, 224] ++ b .L219 + .p2align 2,,3 -+.L232: ++.L231: + ldr q0, [x1, 16] + sub x7, x7, #16 -+ str q0, [x28, 16] ++ str q0, [x22, 16] + cmp x7, 16 -+ ble .L235 ++ ble .L234 + add x1, x1, 32 -+ add x2, x28, 32 ++ add x2, x22, 32 + .p2align 3,,7 -+.L236: ++.L235: + ldr q0, [x1] + add x2, x2, 32 + add x1, x1, 32 + str q0, [x2, -32] + ldr q0, [x1, -16] + str q0, [x2, -16] -+ cmp x27, x2 -+ bhi .L236 -+ b .L235 -+ .p2align 2,,3 -+.L182: -+ ldr x18, [sp, 496] -+ add w17, w17, w19 -+ ldr x8, [sp, 584] -+ lsl x18, x18, x0 ++ cmp x19, x2 ++ bhi .L235 ++ b .L234 ++ .p2align 2,,3 ++.L181: ++ ldr x9, [sp, 464] ++ add w15, w15, w24 ++ ldr x16, [sp, 552] ++ lsl x9, x9, x0 + add w0, w0, 1 -+ subs x19, x8, #1 -+ add x17, x17, x18, lsr 63 -+ str w0, [sp, 504] -+ cinc x19, x19, eq -+ cmp x17, 3 ++ add x9, x15, x9, lsr 63 ++ str w0, [sp, 472] ++ cmp x9, 3 ++ beq .L442 ++ add x15, sp, 768 ++ add x15, x15, x9, lsl 3 ++ ldr x24, [x15, -216] ++ cmp x24, 0 ++ cinc x24, x24, eq ++ cmp x9, 1 + beq .L184 -+ add x18, sp, 800 -+ add x18, x18, x17, lsl 3 -+ ldr x19, [x18, -216] -+ cmp x19, 0 -+ cinc x19, x19, eq -+ cmp x17, 1 -+ beq .L181 ++.L183: ++ ldr x9, [sp, 560] ++ str x9, [sp, 568] +.L184: -+ ldr x17, [sp, 592] -+ str x17, [sp, 600] -+ b .L181 ++ str x24, [sp, 552] ++ str x16, [sp, 560] ++ b .L180 + .p2align 2,,3 -+.L238: -+ cmp x6, 7 -+ bhi .L240 -+ ldr x2, [sp, 296] -+ ldrb w1, [x30] -+ strb w1, [x27] ++.L237: ++ cmp x5, 7 ++ bhi .L239 ++ ldr x2, [sp, 264] ++ ldrb w1, [x6] ++ strb w1, [x19] + add x0, x2, 32 -+ ldrsw x1, [x2, x6, lsl 2] -+ ldrb w2, [x30, 1] -+ strb w2, [x27, 1] -+ ldr w0, [x0, x6, lsl 2] -+ ldrb w2, [x30, 2] -+ strb w2, [x27, 2] -+ add x2, x30, x0 -+ ldrb w6, [x30, 3] -+ strb w6, [x27, 3] -+ ldr w0, [x30, x0] -+ sub x30, x2, x1 -+ str w0, [x27, 4] -+.L241: -+ cmp x5, 8 -+ bls .L220 -+ sub x2, x27, x30 -+ add x1, x30, 8 -+ add x0, x27, 8 ++ ldrsw x1, [x2, x5, lsl 2] ++ ldrb w2, [x6, 1] ++ strb w2, [x19, 1] ++ ldr w0, [x0, x5, lsl 2] ++ ldrb w2, [x6, 2] ++ strb w2, [x19, 2] ++ add x2, x6, x0 ++ ldrb w5, [x6, 3] ++ strb w5, [x19, 3] ++ ldr w0, [x6, x0] ++ sub x6, x2, x1 ++ str w0, [x19, 4] ++.L240: ++ cmp x4, 8 ++ bls .L219 ++ add x1, x6, 8 ++ add x0, x19, 8 ++ sub x2, x0, x1 + cmp x2, 15 -+ bgt .L242 ++ bgt .L241 + .p2align 3,,7 -+.L243: ++.L242: + ldr d0, [x1], 8 + str d0, [x0], 8 -+ cmp x4, x0 -+ bhi .L243 -+ b .L220 ++ cmp x3, x0 ++ bhi .L242 ++ b .L219 + .p2align 2,,3 -+.L438: -+ lsr w0, w2, 3 -+ mov w30, 1 -+ sub x7, x7, x0 -+ and w2, w2, 7 -+ ldr x0, [x7] -+ b .L165 ++.L431: ++ lsr w7, w0, 3 ++ and w0, w0, 7 ++ sub x6, x6, x7 ++ ldr x10, [x6] ++ str x10, [sp, 464] ++ str x6, [sp, 480] ++ b .L164 + .p2align 2,,3 -+.L315: ++.L421: ++ .cfi_restore 19 ++ .cfi_restore 20 + mov w21, 0 -+ b .L156 ++ b .L154 + .p2align 2,,3 -+.L418: ++.L409: ++ .cfi_offset 19, -736 ++ .cfi_offset 20, -728 + ldp x19, x20, [sp, 32] + .cfi_remember_state + .cfi_restore 20 + .cfi_restore 19 -+ b .L307 -+.L240: ++ b .L306 ++ .p2align 2,,3 ++.L432: + .cfi_restore_state -+ ldr d0, [x30] -+ str d0, [x27] -+ b .L241 ++ sub x7, x6, x22 ++ mov w10, w7 ++ sub x6, x6, x7, uxtw ++ b .L168 ++.L239: ++ ldr d0, [x6] ++ str d0, [x19] ++ b .L240 ++.L442: ++ subs x24, x16, #1 ++ cinc x24, x24, eq ++ b .L183 + .p2align 2,,3 -+.L439: -+ ldr x1, [x22, 30352] ++.L434: ++ ldp x19, x20, [sp, 32] ++ .cfi_restore 20 ++ .cfi_restore 19 ++ mov x27, x11 ++ ldr x24, [sp, 168] ++ mov x28, x12 ++.L249: ++ ldr w0, [sp, 176] ++ ldr w23, [sp, 120] ++ sub w21, w21, w0 ++ cmp w23, w21 ++ ble .L250 ++ stp x19, x20, [sp, 32] ++ .cfi_offset 20, -728 ++ .cfi_offset 19, -736 ++ add x19, x24, 94208 ++ adrp x20, .LANCHOR0 ++ add x0, x20, :lo12:.LANCHOR0 ++ add x19, x19, 1692 ++ stp x19, x0, [sp, 184] ++ add x0, x0, 32 ++ add x18, sp, 576 ++ str x0, [sp, 200] ++ mov w0, w21 ++ ldr x26, [sp, 128] ++ mov x25, x28 ++ ldr x19, [sp, 136] ++ mov x21, x27 ++ mov x28, x18 ++ mov w27, w0 ++ .p2align 3,,7 ++.L300: ++ and x11, x27, 7 ++ ubfiz x6, x27, 1, 3 ++ add x6, x6, x11 ++ ldr w1, [x25, 13976] ++ ldr x13, [sp, 312] ++ lsl x0, x6, 3 ++ ldr x7, [x28, x6, lsl 3] ++ add x14, x13, x7 ++ cmp w1, 2 ++ beq .L443 ++ add x1, sp, 592 ++ add x2, sp, 584 ++ add x3, x28, x0 ++ add x20, x22, x7 ++ sub x15, x26, #32 ++ ldr x11, [x1, x6, lsl 3] ++ ldr x12, [x2, x6, lsl 3] ++ sub x2, x20, x11 ++ ldp x0, x1, [x3] ++ add x4, x12, x7 ++ ldr x3, [x3, 16] ++ add x5, x22, x4 ++ prfm PLDL1KEEP, [x2] ++ cmp x5, x15 ++ stp x0, x1, [sp, 416] ++ ccmp x19, x14, 0, ls ++ str x3, [sp, 432] ++ bcc .L286 ++ ldr q0, [x13] ++ str q0, [x22] ++ cmp x7, 16 ++ bhi .L287 ++.L290: ++ sub x0, x20, x21 ++ str x14, [sp, 312] ++ cmp x11, x0 ++ bls .L289 ++ ldr x0, [sp, 160] ++ sub x0, x20, x0 ++ cmp x11, x0 ++ bhi .L408 ++ ldr x3, [sp, 144] ++ sub x1, x2, x21 ++ add x1, x3, x1 ++ add x0, x1, x12 ++ cmp x3, x0 ++ bcs .L444 ++ sub x2, x21, x2 ++ mov x0, x20 ++ sub x12, x12, x2 ++ add x20, x20, x2 ++ str x12, [sp, 120] ++ str x11, [sp, 136] ++ stp x5, x4, [sp, 168] ++ bl memmove ++ mov x2, x21 ++ ldp x5, x4, [sp, 168] ++ ldr x12, [sp, 120] ++ ldr x11, [sp, 136] ++.L289: ++ cmp x11, 15 ++ bls .L293 ++ ldr q0, [x2] ++ str q0, [x20] ++ cmp x12, 16 ++ ble .L275 ++ add x1, x20, 16 ++ add x2, x2, 16 ++ .p2align 3,,7 ++.L294: ++ ldr q0, [x2] ++ add x1, x1, 32 ++ add x2, x2, 32 ++ str q0, [x1, -32] ++ ldr q0, [x2, -16] ++ str q0, [x1, -16] ++ cmp x5, x1 ++ bhi .L294 ++.L275: ++ cmn x4, #120 ++ bhi .L398 ++ add w27, w27, 1 ++ add x22, x22, x4 ++ cmp w23, w27 ++ bne .L300 ++ str x19, [sp, 136] ++ mov x28, x25 ++ ldp x19, x20, [sp, 32] ++ .cfi_restore 20 ++ .cfi_restore 19 ++.L250: ++ ldr x0, [sp, 552] ++ str w0, [x28, 10300] ++ ldr x0, [sp, 560] ++ str w0, [x28, 10304] ++ ldr x0, [sp, 568] ++ str w0, [x28, 10308] ++ ldr w0, [x28, 13976] ++ ldr x1, [sp, 312] ++ b .L143 ++ .p2align 2,,3 ++.L443: ++ .cfi_offset 19, -736 ++ .cfi_offset 20, -728 ++ ldr x1, [x24, 30352] + cmp x1, x14 -+ bcs .L253 ++ bcs .L252 + subs x2, x1, x13 -+ beq .L254 -+ sub x0, x27, x28 ++ beq .L253 ++ sub x0, x26, x22 + cmp x2, x0 -+ bhi .L255 -+ mov x0, x28 ++ bhi .L254 ++ mov x0, x22 + sub x7, x7, x2 -+ add x28, x28, x2 ++ add x22, x22, x2 + mov x1, x13 + bl ZSTD_safecopyDstBeforeSrc -+ str x7, [x25, x6, lsl 3] -+.L254: ++ str x7, [x28, x6, lsl 3] ++.L253: + add x11, x11, x11, lsl 1 -+ add x0, sp, 616 -+ add x3, sp, 624 -+ add x20, x28, x7 -+ add x2, x25, x11, lsl 3 -+ sub x1, x27, #32 ++ add x0, sp, 584 ++ add x2, sp, 592 ++ add x8, sp, 512 ++ add x10, x28, x11, lsl 3 ++ add x20, x22, x7 + ldr x6, [x0, x11, lsl 3] -+ mov x4, 30364 -+ ldp x12, x13, [x2] -+ add x0, x22, x4 -+ ldr x11, [x3, x11, lsl 3] -+ add x3, x6, x7 -+ ldr x10, [x2, 16] -+ add x2, sp, 512 -+ add x5, x28, x3 -+ cmp x5, x1 -+ stp x12, x13, [x2, -88] ++ sub x1, x26, #32 ++ ldr x11, [x2, x11, lsl 3] ++ add x4, x6, x7 ++ ldp x2, x3, [x10] ++ stp x2, x3, [x8, -120] ++ add x5, x22, x4 + sub x2, x20, x11 -+ ldr x1, [sp, 176] ++ mov x12, 30364 ++ ldr x10, [x10, 16] ++ add x0, x24, x12 ++ ldr x3, [sp, 184] ++ cmp x5, x1 + prfm PLDL1KEEP, [x2] -+ str x0, [sp, 344] -+ add x0, x0, x7 -+ str x10, [sp, 440] -+ mov x19, x1 -+ str wzr, [x23, 13976] -+ ccmp x1, x0, 0, ls -+ bcc .L256 -+ ldr q0, [x22, x4] -+ str q0, [x28] ++ add x1, x0, x7 ++ str x0, [sp, 312] ++ ccmp x3, x1, 0, ls ++ str x10, [sp, 408] ++ mov x19, x3 ++ str wzr, [x25, 13976] ++ bcc .L255 ++ ldr q0, [x24, x12] ++ str q0, [x22] + cmp x7, 16 -+ bhi .L257 -+.L261: -+ str x0, [sp, 344] ++ bhi .L256 ++.L260: + sub x0, x20, x21 ++ str x1, [sp, 312] + cmp x11, x0 -+ bls .L259 ++ bls .L258 + ldr x0, [sp, 160] + sub x0, x20, x0 + cmp x11, x0 -+ bhi .L420 ++ bhi .L408 ++ ldr x3, [sp, 144] + sub x1, x2, x21 -+ add x1, x26, x1 ++ add x1, x3, x1 + add x0, x1, x6 -+ cmp x26, x0 -+ bcs .L449 ++ cmp x3, x0 ++ bcs .L445 + sub x2, x21, x2 + mov x0, x20 + sub x6, x6, x2 + add x20, x20, x2 + str x6, [sp, 120] -+ stp x11, x5, [sp, 136] -+ str x3, [sp, 168] ++ str x11, [sp, 136] ++ stp x5, x4, [sp, 168] + bl memmove -+ ldp x11, x5, [sp, 136] + mov x2, x21 ++ ldp x5, x4, [sp, 168] + ldr x6, [sp, 120] -+ ldr x3, [sp, 168] -+.L259: ++ ldr x11, [sp, 136] ++.L258: + cmp x11, 15 -+ bls .L264 ++ bls .L263 + ldr q0, [x2] + str q0, [x20] + cmp x6, 16 -+ ble .L276 ++ ble .L275 + add x0, x20, 16 + add x1, x2, 16 + .p2align 3,,7 -+.L265: ++.L264: + ldr q0, [x1] + add x0, x0, 32 + add x1, x1, 32 @@ -2464,70 +2626,71 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x1, -16] + str q0, [x0, -16] + cmp x5, x0 -+ bhi .L265 -+ b .L276 ++ bhi .L264 ++ b .L275 + .p2align 2,,3 -+.L253: -+ add x1, sp, 616 -+ add x11, sp, 624 -+ add x4, x25, x0 ++.L252: ++ add x1, sp, 584 ++ add x11, sp, 592 ++ add x3, x28, x0 + sub x2, x7, #32 + cmp x19, x14 + add x2, x13, x2 + ldr x5, [x1, x6, lsl 3] -+ add x20, x28, x7 ++ add x20, x22, x7 + ldr x11, [x11, x6, lsl 3] -+ add x6, sp, 704 -+ ldp x0, x1, [x4] -+ add x3, x5, x7 ++ add x6, sp, 672 ++ ldp x0, x1, [x3] ++ add x4, x5, x7 + stp x0, x1, [x6, -232] -+ add x6, x28, x3 ++ add x6, x22, x4 + ccmp x2, x6, 0, cs -+ ldr x0, [x4, 16] -+ str x0, [sp, 488] ++ ldr x0, [x3, 16] ++ str x0, [sp, 456] + sub x0, x20, x11 -+ bcc .L272 ++ bcc .L271 + ldr q0, [x13] -+ str q0, [x28] ++ str q0, [x22] + cmp x7, 16 -+ bhi .L273 -+.L277: ++ bhi .L272 ++.L276: + sub x1, x20, x21 -+ str x14, [sp, 344] ++ str x14, [sp, 312] + cmp x11, x1 -+ bls .L275 ++ bls .L274 + ldr x1, [sp, 160] + sub x1, x20, x1 + cmp x11, x1 -+ bhi .L420 ++ bhi .L408 ++ ldr x3, [sp, 144] + sub x1, x0, x21 -+ add x1, x26, x1 ++ add x1, x3, x1 + add x2, x1, x5 -+ cmp x26, x2 -+ bcs .L450 ++ cmp x3, x2 ++ bcs .L446 + sub x2, x21, x0 + mov x0, x20 + sub x5, x5, x2 + add x20, x20, x2 + str x5, [sp, 120] -+ stp x11, x6, [sp, 136] -+ str x3, [sp, 168] ++ str x11, [sp, 136] ++ stp x6, x4, [sp, 168] + bl memmove -+ ldp x11, x6, [sp, 136] + mov x0, x21 ++ ldp x6, x4, [sp, 168] + ldr x5, [sp, 120] -+ ldr x3, [sp, 168] -+.L275: ++ ldr x11, [sp, 136] ++.L274: + cmp x11, 15 -+ bls .L280 ++ bls .L279 + ldr q0, [x0] + str q0, [x20] + cmp x5, 16 -+ ble .L276 ++ ble .L275 + add x1, x20, 16 + add x2, x0, 16 + .p2align 3,,7 -+.L281: ++.L280: + ldr q0, [x2] + add x1, x1, 32 + add x2, x2, 32 @@ -2535,200 +2698,181 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x2, -16] + str q0, [x1, -16] + cmp x6, x1 -+ bhi .L281 -+ b .L276 -+ .p2align 2,,3 -+.L417: -+ ldp x19, x20, [sp, 32] -+ .cfi_remember_state -+ .cfi_restore 20 -+ .cfi_restore 19 -+ b .L142 ++ bhi .L280 ++ b .L275 + .p2align 2,,3 -+.L216: -+ .cfi_restore_state ++.L215: + ldp x1, x5, [sp, 128] -+ str x11, [sp] ++ str x18, [sp] + ldr x7, [sp, 160] -+ add x3, sp, 304 -+ mov x6, x10 -+ add x4, sp, 344 -+ mov x0, x28 -+ stp x10, x11, [sp, 200] -+ stp x9, x8, [sp, 216] -+ stp x15, x13, [sp, 232] -+ stp x14, x12, [sp, 248] -+ stp x16, x17, [sp, 304] -+ str x18, [sp, 320] ++ mov x6, x11 ++ add x4, sp, 312 ++ add x3, sp, 272 ++ mov x0, x22 ++ stp x11, x18, [sp, 192] ++ stp x9, x8, [sp, 208] ++ stp x12, x10, [sp, 224] ++ stp x14, x15, [sp, 272] ++ str x13, [sp, 288] + bl ZSTD_execSequenceEndSplitLitBuffer -+ b .L422 ++ b .L415 + .p2align 2,,3 -+.L446: -+ lsr w4, w0, 3 ++.L439: ++ lsr w5, w0, 3 + and w0, w0, 7 -+ sub x4, x5, x4 -+ str x4, [sp, 512] -+ ldr x4, [x4] -+ str x4, [sp, 496] -+ b .L188 ++ sub x5, x8, x5 ++ str x5, [sp, 480] ++ ldr x5, [x5] ++ str x5, [sp, 464] ++ b .L187 + .p2align 2,,3 -+.L217: ++.L216: + ldr q0, [x1, 16] + sub x7, x7, #16 -+ str q0, [x28, 16] ++ str q0, [x22, 16] + cmp x7, 16 -+ ble .L221 ++ ble .L220 + add x1, x1, 32 -+ add x2, x28, 32 ++ add x2, x22, 32 + .p2align 3,,7 -+.L222: ++.L221: + ldr q0, [x1] + add x2, x2, 32 + add x1, x1, 32 + str q0, [x2, -32] + ldr q0, [x1, -16] + str q0, [x2, -16] -+ cmp x27, x2 -+ bhi .L222 -+ b .L221 ++ cmp x19, x2 ++ bhi .L221 ++ b .L220 + .p2align 2,,3 -+.L447: -+ mov x2, x6 -+ mov x0, x4 -+ stp x9, x8, [sp, 200] -+ stp x10, x11, [sp, 216] -+ stp x15, x13, [sp, 232] -+ stp x14, x12, [sp, 248] -+ str x3, [sp, 264] ++.L440: ++ mov x2, x5 ++ mov x0, x3 ++ stp x9, x8, [sp, 192] ++ stp x11, x18, [sp, 208] ++ stp x12, x10, [sp, 224] + bl memmove -+ ldp x9, x8, [sp, 200] -+ ldp x10, x11, [sp, 216] -+ ldp x15, x13, [sp, 232] -+ ldp x14, x12, [sp, 248] -+ ldr x3, [sp, 264] -+ b .L203 -+ .p2align 2,,3 -+.L442: -+ mov w21, w22 -+ mov x24, x10 -+ mov x26, x11 -+ mov x22, x15 -+ mov x25, x14 -+ b .L250 ++ ldp x9, x8, [sp, 192] ++ ldp x11, x18, [sp, 208] ++ ldp x12, x10, [sp, 224] ++ b .L202 + .p2align 2,,3 -+.L440: ++.L444: + mov x2, x12 + mov x0, x20 -+ str x3, [sp, 120] ++ str x4, [sp, 120] + bl memmove -+ ldr x3, [sp, 120] -+ b .L276 -+.L224: -+ cmp x6, 7 -+ bhi .L226 -+ ldr x2, [sp, 296] -+ ldrb w1, [x30] -+ strb w1, [x27] -+ add x0, x2, 32 -+ ldrsw x1, [x2, x6, lsl 2] -+ ldrb w2, [x30, 1] -+ strb w2, [x27, 1] -+ ldr w0, [x0, x6, lsl 2] -+ ldrb w2, [x30, 2] -+ strb w2, [x27, 2] -+ add x2, x30, x0 -+ ldrb w6, [x30, 3] -+ strb w6, [x27, 3] -+ ldr w0, [x30, x0] -+ sub x30, x2, x1 -+ str w0, [x27, 4] -+.L227: -+ cmp x5, 8 -+ bls .L220 -+ sub x2, x27, x30 -+ add x1, x30, 8 -+ add x0, x27, 8 ++ ldr x4, [sp, 120] ++ b .L275 ++.L405: ++ ldp x19, x20, [sp, 32] ++ .cfi_remember_state ++ .cfi_restore 20 ++ .cfi_restore 19 ++ b .L140 ++.L223: ++ .cfi_restore_state ++ cmp x5, 7 ++ bhi .L225 ++ ldr x2, [sp, 264] ++ ldrb w1, [x6] ++ strb w1, [x19] ++ add x0, x2, 32 ++ ldrsw x1, [x2, x5, lsl 2] ++ ldrb w2, [x6, 1] ++ strb w2, [x19, 1] ++ ldr w0, [x0, x5, lsl 2] ++ ldrb w2, [x6, 2] ++ strb w2, [x19, 2] ++ add x2, x6, x0 ++ ldrb w5, [x6, 3] ++ strb w5, [x19, 3] ++ ldr w0, [x6, x0] ++ sub x6, x2, x1 ++ str w0, [x19, 4] ++.L226: ++ cmp x4, 8 ++ bls .L219 ++ add x1, x6, 8 ++ add x0, x19, 8 ++ sub x2, x0, x1 + cmp x2, 15 -+ bgt .L228 ++ bgt .L227 + .p2align 3,,7 -+.L229: ++.L228: + ldr d0, [x1], 8 + str d0, [x0], 8 -+ cmp x4, x0 -+ bhi .L229 -+ b .L220 -+.L199: -+ stp x6, x27, [sp, 408] -+ mov x4, x1 -+ mov x7, x11 -+ ldp x0, x1, [sp, 400] -+ stp x0, x1, [sp, 304] -+ mov x5, x10 ++ cmp x3, x0 ++ bhi .L228 ++ b .L219 ++.L198: ++ stp x5, x19, [sp, 376] ++ mov x4, x2 ++ mov x7, x18 ++ ldp x0, x1, [sp, 368] ++ stp x0, x1, [sp, 272] ++ mov x5, x11 + ldr x1, [sp, 128] -+ add x3, sp, 344 ++ add x3, sp, 312 + ldr x6, [sp, 160] -+ add x2, sp, 304 -+ mov x0, x28 -+ stp x10, x11, [sp, 200] -+ stp x9, x8, [sp, 216] -+ stp x15, x13, [sp, 232] -+ stp x14, x12, [sp, 248] -+ str x27, [sp, 320] ++ add x2, sp, 272 ++ mov x0, x22 ++ stp x11, x18, [sp, 192] ++ stp x9, x8, [sp, 208] ++ stp x12, x10, [sp, 224] ++ str x19, [sp, 288] + bl ZSTD_execSequenceEnd -+ mov x3, x0 -+ ldp x10, x11, [sp, 200] -+ ldp x9, x8, [sp, 216] -+ ldp x15, x13, [sp, 232] -+ ldp x14, x12, [sp, 248] -+ b .L203 -+.L287: -+ stp x12, x11, [sp, 456] -+ add x2, sp, 304 -+ mov x7, x26 -+ ldr x6, [sp, 160] -+ mov x5, x21 ++ mov x25, x0 ++ ldp x11, x18, [sp, 192] ++ ldp x9, x8, [sp, 208] ++ ldp x12, x10, [sp, 224] ++ b .L202 ++.L286: ++ stp x12, x11, [sp, 424] + mov x4, x19 -+ add x3, sp, 344 -+ ldp x0, x1, [sp, 448] -+ stp x0, x1, [sp, 304] -+.L427: -+ mov x1, x27 -+ mov x0, x28 -+ str x11, [sp, 320] ++ mov x5, x21 ++ ldr x7, [sp, 144] ++ add x3, sp, 312 ++ ldr x6, [sp, 160] ++ add x2, sp, 272 ++ ldp x0, x1, [sp, 416] ++ stp x0, x1, [sp, 272] ++ mov x1, x26 ++ mov x0, x22 ++ str x11, [sp, 288] + bl ZSTD_execSequenceEnd -+ mov x3, x0 -+ b .L276 -+.L200: -+ mov x1, 30380 ++ mov x4, x0 ++ b .L275 ++.L199: ++ ldr q0, [x0, 16] + sub x7, x7, #16 -+ ldr q0, [x15, x1] -+ str q0, [x28, 16] ++ str q0, [x22, 16] + cmp x7, 16 -+ ble .L204 -+ mov x7, 30396 -+ add x1, x28, 32 -+ add x2, x15, x7 ++ ble .L203 ++ add x0, x0, 32 ++ add x2, x22, 32 + .p2align 3,,7 -+.L205: -+ ldr q0, [x2] -+ add x1, x1, 32 ++.L204: ++ ldr q0, [x0] + add x2, x2, 32 -+ str q0, [x1, -32] -+ ldr q0, [x2, -16] -+ str q0, [x1, -16] -+ cmp x4, x1 -+ bhi .L205 -+ b .L204 -+.L288: ++ add x0, x0, 32 ++ str q0, [x2, -32] ++ ldr q0, [x0, -16] ++ str q0, [x2, -16] ++ cmp x3, x2 ++ bhi .L204 ++ b .L203 ++.L287: + ldr q0, [x13, 16] -+ sub x4, x7, #16 -+ str q0, [x28, 16] -+ cmp x4, 16 -+ ble .L291 ++ sub x3, x7, #16 ++ str q0, [x22, 16] ++ cmp x3, 16 ++ ble .L290 + add x1, x13, 32 -+ add x0, x28, 32 ++ add x0, x22, 32 + .p2align 3,,7 -+.L292: ++.L291: + ldr q0, [x1] + add x0, x0, 32 + add x1, x1, 32 @@ -2736,113 +2880,122 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x1, -16] + str q0, [x0, -16] + cmp x20, x0 -+ bhi .L292 -+ b .L291 -+.L207: -+ cmp x27, 7 -+ bhi .L209 -+ ldr x2, [sp, 296] -+ ldrb w1, [x16] -+ strb w1, [x4] ++ bhi .L291 ++ b .L290 ++.L206: ++ cmp x19, 7 ++ bhi .L208 ++ ldr x2, [sp, 264] ++ ldrb w1, [x14] ++ strb w1, [x3] + add x0, x2, 32 -+ ldrsw x1, [x2, x27, lsl 2] -+ ldrb w2, [x16, 1] -+ strb w2, [x4, 1] -+ ldr w0, [x0, x27, lsl 2] -+ ldrb w2, [x16, 2] -+ strb w2, [x4, 2] -+ add x2, x16, x0 -+ ldrb w7, [x16, 3] -+ strb w7, [x4, 3] -+ ldr w0, [x16, x0] -+ sub x16, x2, x1 -+ str w0, [x4, 4] -+.L210: -+ cmp x6, 8 -+ bls .L203 -+ sub x2, x4, x16 -+ add x1, x16, 8 -+ add x0, x4, 8 ++ ldrsw x1, [x2, x19, lsl 2] ++ ldrb w2, [x14, 1] ++ strb w2, [x3, 1] ++ ldr w0, [x0, x19, lsl 2] ++ ldrb w2, [x14, 2] ++ strb w2, [x3, 2] ++ add x2, x14, x0 ++ ldrb w6, [x14, 3] ++ strb w6, [x3, 3] ++ ldr w0, [x14, x0] ++ sub x14, x2, x1 ++ str w0, [x3, 4] ++.L209: ++ cmp x5, 8 ++ bls .L202 ++ add x1, x14, 8 ++ add x0, x3, 8 ++ sub x2, x0, x1 + cmp x2, 15 -+ bgt .L211 ++ bgt .L210 + .p2align 3,,7 -+.L212: ++.L211: + ldr d0, [x1], 8 + str d0, [x0], 8 -+ cmp x5, x0 -+ bhi .L212 -+ b .L203 -+.L294: ++ cmp x4, x0 ++ bhi .L211 ++ b .L202 ++.L293: + cmp x11, 7 -+ bhi .L296 ++ bhi .L295 + ldrb w0, [x2] + strb w0, [x20] + ldrb w1, [x2, 1] + strb w1, [x20, 1] -+ ldp x1, x0, [sp, 184] ++ ldp x1, x0, [sp, 192] + ldrb w6, [x2, 2] + strb w6, [x20, 2] + ldrb w6, [x2, 3] + ldr w0, [x0, x11, lsl 2] + ldrsw x1, [x1, x11, lsl 2] + strb w6, [x20, 3] -+ add x4, x2, x0 ++ add x3, x2, x0 + ldr w0, [x2, x0] -+ sub x2, x4, x1 ++ sub x2, x3, x1 + str w0, [x20, 4] -+.L297: ++.L296: + cmp x12, 8 -+ bls .L276 -+ sub x4, x20, x2 ++ bls .L275 + add x1, x2, 8 + add x0, x20, 8 -+ cmp x4, 15 -+ bgt .L298 ++ sub x3, x0, x1 ++ cmp x3, 15 ++ bgt .L297 + .p2align 3,,7 -+.L299: ++.L298: + ldr d0, [x1], 8 + str d0, [x0], 8 + cmp x5, x0 -+ bhi .L299 -+ b .L276 -+.L450: ++ bhi .L298 ++ b .L275 ++.L446: + mov x2, x5 + mov x0, x20 -+ str x3, [sp, 120] ++ str x4, [sp, 120] + bl memmove -+ ldr x3, [sp, 120] -+ b .L276 -+.L226: -+ ldr d0, [x30] -+ str d0, [x27] -+ b .L227 -+.L272: -+ add x0, sp, 704 -+ str x26, [sp] -+ stp x5, x11, [sp, 480] -+ add x3, sp, 304 -+ mov x6, x21 ++ ldr x4, [sp, 120] ++ b .L275 ++.L225: ++ ldr d0, [x6] ++ str d0, [x19] ++ b .L226 ++.L271: ++ ldr x0, [sp, 144] ++ str x0, [sp] ++ add x0, sp, 672 ++ stp x5, x11, [sp, 448] ++ add x4, sp, 312 + ldr x7, [sp, 160] -+ add x4, sp, 344 ++ mov x6, x21 + ldp x0, x1, [x0, -232] ++ add x3, sp, 272 + mov x5, x19 -+ stp x0, x1, [sp, 304] -+ mov x1, x27 -+ mov x0, x28 -+ str x11, [sp, 320] ++ stp x0, x1, [sp, 272] ++ mov x1, x26 ++ mov x0, x22 ++ str x11, [sp, 288] + bl ZSTD_execSequenceEndSplitLitBuffer -+ mov x3, x0 -+ b .L276 -+.L273: ++ mov x4, x0 ++ b .L275 ++.L398: ++ mov x25, x4 ++ ldp x19, x20, [sp, 32] ++ .cfi_remember_state ++ .cfi_restore 20 ++ .cfi_restore 19 ++ b .L140 ++.L272: ++ .cfi_restore_state + ldr q0, [x13, 16] -+ sub x4, x7, #16 -+ str q0, [x28, 16] -+ cmp x4, 16 -+ ble .L277 ++ sub x3, x7, #16 ++ str q0, [x22, 16] ++ cmp x3, 16 ++ ble .L276 + add x1, x13, 32 -+ add x2, x28, 32 ++ add x2, x22, 32 + .p2align 3,,7 -+.L278: ++.L277: + ldr q0, [x1] + add x2, x2, 32 + add x1, x1, 32 @@ -2850,175 +3003,195 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x1, -16] + str q0, [x2, -16] + cmp x20, x2 -+ bhi .L278 -+ b .L277 -+.L449: ++ bhi .L277 ++ b .L276 ++.L445: + mov x2, x6 + mov x0, x20 -+ str x3, [sp, 120] ++ str x4, [sp, 120] + bl memmove -+ ldr x3, [sp, 120] -+ b .L276 -+.L242: -+ ldr q0, [x30, 8] -+ str q0, [x27, 8] -+ cmp x5, 24 -+ ble .L220 -+ add x27, x27, 24 -+ add x30, x30, 24 ++ ldr x4, [sp, 120] ++ b .L275 ++.L241: ++ ldr q0, [x6, 8] ++ str q0, [x19, 8] ++ cmp x4, 24 ++ ble .L219 ++ add x19, x19, 24 ++ add x6, x6, 24 + .p2align 3,,7 -+.L244: -+ ldr q0, [x30] -+ add x27, x27, 32 -+ add x30, x30, 32 -+ str q0, [x27, -32] -+ ldr q0, [x30, -16] -+ str q0, [x27, -16] -+ cmp x4, x27 -+ bhi .L244 -+ b .L220 -+.L209: -+ ldr d0, [x16] -+ str d0, [x4] -+ b .L210 -+.L280: ++.L243: ++ ldr q0, [x6] ++ add x19, x19, 32 ++ add x6, x6, 32 ++ str q0, [x19, -32] ++ ldr q0, [x6, -16] ++ str q0, [x19, -16] ++ cmp x3, x19 ++ bhi .L243 ++ b .L219 ++.L279: + cmp x11, 7 -+ bhi .L282 ++ bhi .L281 + ldrb w1, [x0] + strb w1, [x20] + ldrb w2, [x0, 1] + strb w2, [x20, 1] -+ ldp x2, x1, [sp, 184] ++ ldp x2, x1, [sp, 192] + ldrb w7, [x0, 2] + strb w7, [x20, 2] + ldrb w7, [x0, 3] + ldr w1, [x1, x11, lsl 2] + ldrsw x2, [x2, x11, lsl 2] + strb w7, [x20, 3] -+ add x4, x0, x1 ++ add x3, x0, x1 + ldr w1, [x0, x1] -+ sub x0, x4, x2 ++ sub x0, x3, x2 + str w1, [x20, 4] -+.L283: ++.L282: + cmp x5, 8 -+ bls .L276 -+ sub x4, x20, x0 ++ bls .L275 + add x2, x0, 8 + add x1, x20, 8 -+ cmp x4, 15 -+ bgt .L284 -+.L285: ++ sub x3, x1, x2 ++ cmp x3, 15 ++ bgt .L283 ++.L284: + ldr d0, [x2], 8 + str d0, [x1], 8 + cmp x6, x1 -+ bhi .L285 -+ b .L276 -+.L296: ++ bhi .L284 ++ b .L275 ++.L208: ++ ldr d0, [x14] ++ str d0, [x3] ++ b .L209 ++.L295: + ldr d0, [x2] + str d0, [x20] -+ b .L297 -+.L256: ++ b .L296 ++.L255: + add x0, sp, 512 -+ stp x6, x11, [sp, 432] -+ mov x4, x1 -+ add x2, sp, 304 -+ mov x7, x26 -+ ldp x0, x1, [x0, -88] ++ stp x6, x11, [sp, 400] ++ mov x4, x3 ++ ldr x7, [sp, 144] + mov x5, x21 -+ add x3, sp, 344 -+ stp x0, x1, [sp, 304] + ldr x6, [sp, 160] -+ b .L427 -+.L257: -+ mov x1, 30380 -+ sub x4, x7, #16 -+ ldr q0, [x22, x1] -+ str q0, [x28, 16] -+ cmp x4, 16 -+ ble .L261 -+ mov x4, 30396 -+ add x1, x28, 32 -+ add x4, x22, x4 ++ add x3, sp, 312 ++ ldp x0, x1, [x0, -120] ++ add x2, sp, 272 ++ stp x0, x1, [sp, 272] ++ mov x1, x26 ++ mov x0, x22 ++ str x11, [sp, 288] ++ bl ZSTD_execSequenceEnd ++ mov x4, x0 ++ b .L275 ++.L256: ++ ldr q0, [x0, 16] ++ sub x3, x7, #16 ++ str q0, [x22, 16] ++ cmp x3, 16 ++ ble .L260 ++ add x0, x0, 32 ++ add x3, x22, 32 + .p2align 3,,7 -+.L262: -+ ldr q0, [x4] -+ add x1, x1, 32 -+ add x4, x4, 32 -+ str q0, [x1, -32] -+ ldr q0, [x4, -16] -+ str q0, [x1, -16] -+ cmp x20, x1 -+ bhi .L262 -+ b .L261 -+.L264: ++.L261: ++ ldr q0, [x0] ++ add x3, x3, 32 ++ add x0, x0, 32 ++ str q0, [x3, -32] ++ ldr q0, [x0, -16] ++ str q0, [x3, -16] ++ cmp x20, x3 ++ bhi .L261 ++ b .L260 ++.L263: + cmp x11, 7 -+ bhi .L266 ++ bhi .L265 + ldrb w0, [x2] + strb w0, [x20] + ldrb w1, [x2, 1] + strb w1, [x20, 1] -+ ldp x1, x0, [sp, 184] ++ ldp x1, x0, [sp, 192] + ldrb w7, [x2, 2] + strb w7, [x20, 2] + ldrb w7, [x2, 3] + ldr w0, [x0, x11, lsl 2] + ldrsw x1, [x1, x11, lsl 2] + strb w7, [x20, 3] -+ add x4, x2, x0 ++ add x3, x2, x0 + ldr w0, [x2, x0] -+ sub x2, x4, x1 ++ sub x2, x3, x1 + str w0, [x20, 4] -+.L267: ++.L266: + cmp x6, 8 -+ bls .L276 -+ sub x4, x20, x2 ++ bls .L275 + add x1, x2, 8 + add x0, x20, 8 -+ cmp x4, 15 -+ bgt .L268 -+.L269: ++ sub x3, x0, x1 ++ cmp x3, 15 ++ bgt .L267 ++.L268: + ldr d0, [x1], 8 + str d0, [x0], 8 + cmp x5, x0 -+ bhi .L269 -+ b .L276 -+.L228: -+ ldr q0, [x30, 8] -+ str q0, [x27, 8] -+ cmp x5, 24 -+ ble .L220 -+ add x27, x27, 24 -+ add x30, x30, 24 -+.L230: -+ ldr q0, [x30] -+ add x27, x27, 32 -+ add x30, x30, 32 -+ str q0, [x27, -32] -+ ldr q0, [x30, -16] -+ str q0, [x27, -16] -+ cmp x4, x27 -+ bhi .L230 -+ b .L220 -+.L282: ++ bhi .L268 ++ b .L275 ++.L227: ++ ldr q0, [x6, 8] ++ str q0, [x19, 8] ++ cmp x4, 24 ++ ble .L219 ++ add x19, x19, 24 ++ add x6, x6, 24 ++.L229: ++ ldr q0, [x6] ++ add x19, x19, 32 ++ add x6, x6, 32 ++ str q0, [x19, -32] ++ ldr q0, [x6, -16] ++ str q0, [x19, -16] ++ cmp x3, x19 ++ bhi .L229 ++ b .L219 ++.L281: + ldr d0, [x0] + str d0, [x20] -+ b .L283 -+.L255: -+ mov x3, -70 ++ b .L282 ++.L254: ++ mov x25, -70 + ldp x19, x20, [sp, 32] + .cfi_remember_state + .cfi_restore 20 + .cfi_restore 19 -+ b .L142 -+.L298: ++ b .L140 ++.L210: + .cfi_restore_state ++ ldr q0, [x14, 8] ++ str q0, [x3, 8] ++ cmp x5, 24 ++ ble .L202 ++ add x0, x3, 24 ++ add x1, x14, 24 ++.L212: ++ ldr q0, [x1] ++ add x0, x0, 32 ++ add x1, x1, 32 ++ str q0, [x0, -32] ++ ldr q0, [x1, -16] ++ str q0, [x0, -16] ++ cmp x4, x0 ++ bhi .L212 ++ b .L202 ++.L297: + ldr q0, [x2, 8] + str q0, [x20, 8] + cmp x12, 24 -+ ble .L276 ++ ble .L275 + add x1, x20, 24 + add x2, x2, 24 -+.L300: ++.L299: + ldr q0, [x2] + add x1, x1, 32 + add x2, x2, 32 @@ -3026,37 +3199,20 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x2, -16] + str q0, [x1, -16] + cmp x5, x1 -+ bhi .L300 -+ b .L276 -+.L211: -+ ldr q0, [x16, 8] -+ str q0, [x4, 8] -+ cmp x6, 24 -+ ble .L203 -+ add x0, x4, 24 -+ add x1, x16, 24 -+.L213: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x5, x0 -+ bhi .L213 -+ b .L203 -+.L266: ++ bhi .L299 ++ b .L275 ++.L265: + ldr d0, [x2] + str d0, [x20] -+ b .L267 -+.L284: ++ b .L266 ++.L283: + ldr q0, [x0, 8] + str q0, [x20, 8] + cmp x5, 24 -+ ble .L276 ++ ble .L275 + add x1, x20, 24 + add x0, x0, 24 -+.L286: ++.L285: + ldr q0, [x0] + add x1, x1, 32 + add x0, x0, 32 @@ -3064,16 +3220,16 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x0, -16] + str q0, [x1, -16] + cmp x6, x1 -+ bhi .L286 -+ b .L276 -+.L268: ++ bhi .L285 ++ b .L275 ++.L267: + ldr q0, [x2, 8] + str q0, [x20, 8] + cmp x6, 24 -+ ble .L276 ++ ble .L275 + add x0, x20, 24 + add x1, x2, 24 -+.L270: ++.L269: + ldr q0, [x1] + add x0, x0, 32 + add x1, x1, 32 @@ -3081,490 +3237,450 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr q0, [x1, -16] + str q0, [x0, -16] + cmp x5, x0 -+ bhi .L270 -+ b .L276 -+.L175: -+ ldr x7, [sp, 224] -+ cbz w1, .L312 -+ str w2, [sp, 504] -+ mov w20, w2 -+ cbnz w4, .L451 -+ b .L177 ++ bhi .L269 ++ b .L275 ++.L312: ++ ldp x19, x20, [sp, 32] ++ .cfi_restore 20 ++ .cfi_restore 19 ++ ldr x22, [sp, 152] ++ b .L172 + .cfi_endproc -+.LFE4550: ++.LFE4505: + .size ZSTD_decompressSequencesLong_default.constprop.0, .-ZSTD_decompressSequencesLong_default.constprop.0 + .align 2 + .p2align 4,,11 -+ .type ZSTD_decompressSequences_default.constprop.0, %function -+ZSTD_decompressSequences_default.constprop.0: -+.LFB4551: ++ .type ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0, %function ++ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0: ++.LFB4504: + .cfi_startproc -+ add x10, x0, 16384 -+ stp x29, x30, [sp, -416]! ++ sub sp, sp, #416 + .cfi_def_cfa_offset 416 -+ .cfi_offset 29, -416 -+ .cfi_offset 30, -408 -+ mov x8, x0 -+ mov x29, sp -+ ldr w0, [x10, 13976] -+ stp x19, x20, [sp, 16] -+ stp x23, x24, [sp, 48] -+ .cfi_offset 19, -400 -+ .cfi_offset 20, -392 -+ .cfi_offset 23, -368 -+ .cfi_offset 24, -360 -+ mov x24, x3 -+ stp x25, x26, [sp, 64] -+ .cfi_offset 25, -352 -+ .cfi_offset 26, -344 -+ mov w25, w5 -+ str x1, [sp, 104] -+ cbnz w0, .L453 -+ add x0, x1, x2 -+ str x0, [sp, 112] -+.L454: -+ ldr x20, [x8, 30120] -+ str x20, [sp, 272] -+ ldr x0, [x8, 30152] -+ ldr x5, [x8, 29896] -+ add x0, x20, x0 -+ str x0, [sp, 96] -+ ldr x0, [x8, 29904] -+ str x0, [sp, 120] -+ ldr x26, [x8, 29912] -+ cbz w25, .L600 -+ ldr w3, [x10, 10300] ++ stp x29, x30, [sp, 16] ++ .cfi_offset 29, -400 ++ .cfi_offset 30, -392 ++ add x29, sp, 16 ++ stp x23, x24, [sp, 64] ++ .cfi_offset 23, -352 ++ .cfi_offset 24, -344 ++ add x23, x1, x2 ++ stp x27, x28, [sp, 96] ++ .cfi_offset 27, -320 ++ .cfi_offset 28, -312 ++ mov x28, x1 ++ ldr x1, [x0, 30120] ++ stp x19, x20, [sp, 32] ++ .cfi_offset 19, -384 ++ .cfi_offset 20, -376 ++ mov x20, x0 ++ stp x25, x26, [sp, 80] ++ str x1, [sp, 224] ++ ldr x27, [x0, 30352] ++ .cfi_offset 25, -336 ++ .cfi_offset 26, -328 ++ cbz w5, .L568 ++ add x25, x0, 16384 ++ stp x21, x22, [sp, 48] ++ .cfi_offset 22, -360 ++ .cfi_offset 21, -368 ++ mov w21, w5 + mov w0, 1 -+ ldr w2, [x10, 10304] -+ ldr w1, [x10, 10308] -+ stp x3, x2, [sp, 392] ++ ldr w1, [x25, 10308] + str x1, [sp, 408] -+ str w0, [x10, 13620] -+ cbz x4, .L456 -+ add x0, x24, x4 -+ add x9, x24, 8 -+ stp x24, x9, [sp, 328] -+ ldrb w0, [x0, -1] -+ cmp x4, 7 -+ bls .L457 -+ sub x1, x4, #8 -+ add x2, x24, x1 ++ ldr x1, [x20, 29904] ++ str x1, [sp, 120] ++ ldr w5, [x25, 10300] ++ ldr w2, [x25, 10304] ++ ldr x1, [x20, 29912] ++ str x1, [sp, 128] ++ stp x5, x2, [sp, 392] ++ ldr x24, [x20, 29896] ++ str w0, [x25, 13620] ++ cbz x4, .L449 ++ add x0, x3, x4 ++ add x1, x3, 8 ++ stp x3, x1, [sp, 328] ++ ldrb w0, [x0, -1] ++ cmp x4, 7 ++ bls .L450 ++ sub x1, x4, #8 ++ add x2, x3, x1 + str x2, [sp, 320] -+ ldr x1, [x24, x1] ++ ldr x1, [x3, x1] + str x1, [sp, 304] -+ cbz w0, .L456 ++ cbz w0, .L449 + clz w0, w0 + sub w0, w0, #23 + str w0, [sp, 312] + cmn x4, #120 -+ bhi .L456 -+ stp x21, x22, [sp, 32] -+ .cfi_offset 22, -376 -+ .cfi_offset 21, -384 -+ stp x27, x28, [sp, 80] -+ .cfi_offset 28, -328 -+ .cfi_offset 27, -336 -+.L458: ++ bhi .L449 ++.L451: ++ ldr x2, [x20] + add x1, sp, 304 -+ ldr x2, [x8] -+ add x0, sp, 344 -+ str x5, [sp, 128] -+ str x3, [sp, 144] ++ add x0, x1, 40 + bl ZSTD_initFseState -+ add x0, sp, 360 -+ ldr x2, [x8, 16] ++ add x0, x1, 56 ++ ldr x2, [x20, 16] + bl ZSTD_initFseState -+ add x0, sp, 376 -+ ldr x2, [x8, 8] ++ add x0, x1, 72 ++ ldr x2, [x20, 8] + bl ZSTD_initFseState -+ cmp w25, 12 -+ ldr x5, [sp, 128] -+ ble .L466 -+ ldp x22, x28, [sp, 376] -+ adrp x0, .LANCHOR0 -+ ldp x21, x27, [sp, 344] -+ add x0, x0, :lo12:.LANCHOR0 -+ ldp x23, x8, [sp, 360] -+ add x4, x0, 64 -+ ldr w19, [sp, 312] -+ mov x15, x22 -+ ldr x11, [sp, 104] -+ mov x22, x20 -+ ldr x3, [sp, 144] -+ str x0, [sp, 216] -+ adrp x0, dec64table.1 -+ add x0, x0, :lo12:dec64table.1 -+ str x0, [sp, 128] -+ adrp x0, dec32table.0 -+ add x0, x0, :lo12:dec32table.0 -+ str x0, [sp, 136] -+ .p2align 3,,7 -+.L561: -+ ldr x6, [x27, x21, lsl 3] -+ ldr x7, [x8, x23, lsl 3] -+ ldr x12, [x28, x15, lsl 3] -+ ubfx w15, w6, 16, 8 -+ ldr q0, [x22] -+ ubfx w14, w7, 16, 8 -+ add w0, w14, w15 -+ ubfx w16, w12, 16, 8 ++ ldp x5, x4, [sp, 344] ++ ldp x3, x2, [sp, 376] ++ ldp x1, x0, [sp, 360] ++ ldr x5, [x4, x5, lsl 3] ++ ldr x3, [x2, x3, lsl 3] ++ ldr x4, [x0, x1, lsl 3] ++ ubfx w9, w5, 16, 8 ++ ubfx w14, w3, 16, 8 ++ lsr x15, x5, 32 ++ add w0, w9, w14 ++ lsr x12, x3, 32 ++ ubfx w16, w4, 16, 8 ++ mov x6, x15 + add w0, w16, w0 -+ lsr x18, x6, 32 -+ str q0, [x11] -+ and w1, w0, 255 -+ mov x2, x18 -+ lsr x20, x12, 32 -+ lsr x21, x7, 32 -+ lsr w13, w6, 24 -+ lsr w0, w12, 24 -+ lsr w17, w7, 24 -+ cmp w14, 1 -+ bhi .L691 -+ cbnz w14, .L469 -+ cbz x18, .L692 -+ cbnz w16, .L693 -+.L473: -+ cbnz w15, .L694 -+.L474: -+ cmp w1, 30 -+ bhi .L475 -+.L688: -+ ldr x21, [sp, 304] -+.L476: -+ add w1, w13, w19 -+ uxtw x15, w0 -+ uxtw x14, w17 -+ add w0, w0, w1 -+ ldr w16, [x4, x13, lsl 2] -+ add w19, w17, w0 -+ neg w1, w1 -+ ldr w15, [x4, x15, lsl 2] ++ lsr x22, x4, 32 ++ and w0, w0, 255 ++ lsr w11, w5, 24 ++ lsr w1, w3, 24 ++ lsr w13, w4, 24 ++ cmp w16, 1 ++ bls .L459 ++ ldr x7, [sp, 304] ++ neg w18, w16 ++ ldr w15, [sp, 312] ++ ldr x17, [sp, 392] ++ add w2, w16, w15 ++ lsl x7, x7, x15 ++ str w2, [sp, 312] ++ lsr x7, x7, x18 ++ add x22, x22, x7 ++ ldr x7, [sp, 400] ++ stp x22, x17, [sp, 392] ++ str x7, [sp, 408] ++.L460: ++ cbnz w14, .L668 ++.L465: ++ cmp w0, 30 ++ bhi .L669 ++.L662: ++ ldr x7, [sp, 304] ++.L467: ++ cbnz w9, .L670 ++.L472: ++ adrp x0, .LANCHOR0 ++ add x8, x0, :lo12:.LANCHOR0 ++ add x9, x8, 64 ++ uxtw x16, w1 ++ add w0, w11, w2 ++ uxtw x15, w13 ++ add w1, w1, w0 + neg w0, w0 -+ ldr w14, [x4, x14, lsl 2] -+ neg w13, w19 -+ lsr x1, x21, x1 -+ and x1, x1, x16 -+ lsr x0, x21, x0 -+ and x0, x0, x15 -+ lsr x13, x21, x13 -+ and x13, x13, x14 -+ add x23, x1, w6, uxth -+ add x12, x0, w12, uxth -+ add x7, x13, w7, uxth -+ str w19, [sp, 312] -+ str x23, [sp, 344] -+ add x6, x11, x2 -+ str x7, [sp, 360] -+ str x12, [sp, 376] -+ cmp x2, 16 -+ bhi .L481 -+.L484: -+ add x22, x22, x2 -+ str x22, [sp, 272] -+ add x13, x2, x20 -+ sub x1, x6, x3 -+ cbnz x26, .L695 -+.L483: -+ cmp x3, 15 -+ bls .L488 -+ ldr q0, [x1] -+ str q0, [x6] -+ cmp x20, 16 -+ ble .L487 -+ add x20, x6, x20 -+ add x0, x6, 16 -+ add x1, x1, 16 ++ ldr w14, [x9, x11, lsl 2] ++ add w2, w13, w1 ++ ldr w13, [x9, x16, lsl 2] ++ neg w1, w1 ++ lsr x0, x7, x0 ++ and x0, x0, x14 ++ neg w14, w2 ++ lsr x1, x7, x1 ++ ldr w11, [x9, x15, lsl 2] ++ and x1, x1, x13 ++ ldr x13, [sp, 224] ++ add x5, x0, x5, uxth ++ lsr x0, x7, x14 ++ and x0, x0, x11 ++ ldr x7, [x20, 30352] ++ add x3, x1, x3, uxth ++ add x4, x0, x4, uxth ++ add x14, x13, x6 ++ str x8, [sp, 168] ++ mov x19, x28 ++ str w2, [sp, 312] ++ str x5, [sp, 344] ++ str x4, [sp, 360] ++ str x3, [sp, 376] ++ cmp x7, x14 ++ bcc .L473 ++ add x11, sp, 312 ++ stp x23, x28, [sp, 144] ++ mov x23, x20 ++ mov x28, x22 ++ mov x20, x24 ++ mov x22, x12 ++ mov w24, w21 ++ str x25, [sp, 160] ++ mov x25, x27 ++ mov x27, x9 ++ add x0, x8, 32 ++ str x0, [sp, 176] ++.L508: ++ sub x2, x6, #32 ++ add x21, x6, x22 ++ cmp x25, x14 ++ add x2, x13, x2 ++ add x4, x19, x21 ++ add x26, x19, x6 ++ ccmp x2, x4, 0, cs ++ sub x3, x26, x28 ++ bcc .L474 ++ ldr q0, [x13] ++ str q0, [x19] ++ cmp x6, 16 ++ bhi .L475 ++.L479: ++ sub x0, x26, x20 ++ str x14, [sp, 224] ++ cmp x28, x0 ++ bls .L477 ++ ldr x0, [sp, 120] ++ sub x0, x26, x0 ++ cmp x28, x0 ++ bhi .L449 ++ ldr x2, [sp, 128] ++ sub x1, x3, x20 ++ add x1, x2, x1 ++ add x0, x1, x22 ++ cmp x2, x0 ++ bcs .L671 ++ sub x2, x20, x3 ++ mov x0, x26 ++ sub x22, x22, x2 ++ add x26, x26, x2 ++ str x4, [sp, 136] ++ bl memmove ++ ldr x4, [sp, 136] ++ mov x3, x20 ++ add x11, sp, 312 ++.L477: ++ cmp x28, 15 ++ bls .L482 ++ ldr q0, [x3] ++ str q0, [x26] ++ cmp x22, 16 ++ ble .L478 ++ add x1, x26, 16 ++ add x2, x3, 16 + .p2align 3,,7 -+.L490: -+ ldr q0, [x1] -+ add x0, x0, 32 ++.L483: ++ ldr q0, [x2] + add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x20, x0 -+ bhi .L490 -+.L487: -+ add x20, x11, x13 -+ cmp w19, 64 -+ bhi .L497 -+ ldr x0, [sp, 320] -+ cmp x9, x0 -+ bls .L696 -+ cmp x24, x0 -+ beq .L497 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x24, x1 -+ bls .L499 -+ sub x1, x0, x24 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L499: -+ ldr x21, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x21, [sp, 304] -+ str w19, [sp, 312] -+ str x1, [sp, 320] -+.L497: -+ ldr x6, [x27, x23, lsl 3] -+ ldr x12, [x28, x12, lsl 3] -+ ldr x11, [x8, x7, lsl 3] -+ ubfx w15, w6, 16, 8 -+ ldr q0, [x22] -+ ubfx w16, w12, 16, 8 -+ add w7, w15, w16 -+ ubfx w14, w11, 16, 8 -+ add w7, w14, w7 -+ lsr x18, x6, 32 -+ str q0, [x20] -+ and w7, w7, 255 -+ mov x2, x18 -+ lsr x23, x12, 32 -+ lsr x17, x11, 32 -+ lsr w13, w6, 24 -+ lsr w1, w12, 24 -+ lsr w0, w11, 24 -+ cmp w14, 1 -+ bhi .L697 -+ cbnz w14, .L502 -+ cbz x18, .L698 -+ cbnz w16, .L699 -+.L506: -+ cbnz w15, .L700 ++ add x2, x2, 32 ++ str q0, [x1, -32] ++ ldr q0, [x2, -16] ++ str q0, [x1, -16] ++ cmp x4, x1 ++ bhi .L483 ++.L478: ++ cmn x21, #120 ++ bhi .L654 ++ ldr w0, [sp, 312] ++ add x19, x19, x21 ++ subs w24, w24, #1 ++ mov w2, w0 ++ beq .L655 ++ cmp w0, 64 ++ bhi .L491 ++ ldr x1, [sp, 320] ++ ldr x3, [sp, 336] ++ cmp x1, x3 ++ bcs .L672 ++ ldr x4, [sp, 328] ++ cmp x1, x4 ++ beq .L491 ++ lsr w3, w0, 3 ++ lsr w0, w0, 3 ++ sub x3, x1, x3 ++ cmp x4, x3 ++ bls .L493 ++ sub x4, x1, x4 ++ mov w0, w4 ++ sub x3, x1, x4, uxtw ++.L493: ++ ldr x1, [x3] ++ sub w0, w2, w0, lsl 3 ++ str x1, [sp, 304] ++ str w0, [sp, 312] ++ str x3, [sp, 320] ++.L491: ++ ldp x7, x6, [sp, 344] ++ ldp x4, x3, [sp, 376] ++ ldp x2, x1, [sp, 360] ++ ldr x15, [x6, x7, lsl 3] ++ ldr x14, [x3, x4, lsl 3] ++ ldr x10, [x1, x2, lsl 3] ++ ubfx w13, w15, 16, 8 ++ ubfx w16, w14, 16, 8 ++ lsr x18, x15, 32 ++ add w2, w13, w16 ++ lsr x22, x14, 32 ++ ubfx w7, w10, 16, 8 ++ mov x6, x18 ++ add w2, w7, w2 ++ lsr x9, x10, 32 ++ and w2, w2, 255 ++ lsr w3, w15, 24 ++ lsr w1, w14, 24 ++ lsr w4, w10, 24 ++ cmp w7, 1 ++ bls .L494 ++ ldr x17, [sp, 304] ++ neg w26, w7 ++ ldr x18, [sp, 392] ++ lsl x17, x17, x0 ++ add w0, w0, w7 ++ lsr x7, x17, x26 ++ add x28, x9, x7 ++ ldr x7, [sp, 400] ++ str w0, [sp, 312] ++ stp x28, x18, [sp, 392] ++ str x7, [sp, 408] ++.L495: ++ cbnz w16, .L673 ++.L500: ++ cmp w2, 30 ++ bhi .L674 ++.L664: ++ ldr x2, [sp, 304] ++.L502: ++ cbnz w13, .L675 +.L507: -+ cmp w7, 30 -+ bhi .L701 -+.L508: -+ add w19, w13, w19 -+ uxtw x14, w1 -+ uxtw x16, w0 -+ add w1, w1, w19 -+ add w7, w0, w1 -+ ldr w15, [x4, x13, lsl 2] -+ ldr w14, [x4, x14, lsl 2] -+ neg w19, w19 -+ ldr w13, [x4, x16, lsl 2] ++ uxtw x7, w1 ++ add w0, w3, w0 ++ uxtw x16, w4 ++ ldr w13, [x27, x3, lsl 2] ++ add w1, w1, w0 ++ neg w0, w0 ++ add w4, w4, w1 ++ ldr w7, [x27, x7, lsl 2] + neg w1, w1 -+ neg w0, w7 -+ lsr x19, x21, x19 -+ lsr x1, x21, x1 -+ and x19, x19, x15 -+ and x1, x1, x14 -+ lsr x0, x21, x0 ++ ldr w3, [x27, x16, lsl 2] ++ lsr x0, x2, x0 ++ neg w16, w4 + and x0, x0, x13 -+ add x19, x19, w6, uxth -+ add x11, x0, w11, uxth -+ add x6, x1, w12, uxth -+ str w7, [sp, 312] -+ add x12, x20, x2 -+ str x19, [sp, 344] -+ str x11, [sp, 360] -+ str x6, [sp, 376] -+ cmp x2, 16 ++ lsr x1, x2, x1 ++ ldr x13, [sp, 224] ++ and x1, x1, x7 ++ ldr x7, [x23, 30352] ++ lsr x2, x2, x16 ++ and x2, x2, x3 ++ add x1, x1, x14, uxth ++ add x0, x0, x15, uxth ++ add x2, x2, x10, uxth ++ add x14, x13, x6 ++ str w4, [sp, 312] ++ str x0, [sp, 344] ++ str x2, [sp, 360] ++ str x1, [sp, 376] ++ cmp x14, x7 ++ bls .L508 ++ mov x12, x22 ++ mov w21, w24 ++ mov x22, x28 ++ mov x24, x20 ++ mov x20, x23 ++ ldp x23, x28, [sp, 144] ++ ldr x25, [sp, 160] ++.L473: ++ cmp w21, 0 ++ ble .L449 ++ subs x2, x7, x13 ++ beq .L509 ++ sub x0, x23, x19 ++ mov x26, -70 ++ cmp x2, x0 ++ bhi .L660 ++ mov x0, x19 ++ sub x6, x6, x2 ++ add x19, x19, x2 ++ mov x1, x13 ++ bl ZSTD_safecopyDstBeforeSrc ++.L509: ++ add x3, x19, x6 ++ add x27, x20, 94208 ++ sub x2, x3, x22 ++ mov x7, 30364 ++ add x0, x20, x7 ++ add x27, x27, 1692 ++ add x1, x0, x6 ++ add x26, x12, x6 ++ prfm PLDL1KEEP, [x2] ++ cmp x27, x1 ++ str x0, [sp, 224] ++ add x5, x19, x26 ++ str wzr, [x25, 13976] ++ sub x8, x23, #32 ++ ccmp x5, x8, 2, cs ++ bhi .L510 ++ ldr q0, [x20, x7] ++ str q0, [x19] ++ cmp x6, 16 + bhi .L511 -+.L514: -+ add x22, x22, x2 -+ str x22, [sp, 272] -+ add x18, x2, x23 -+ sub x1, x12, x3 -+ cbnz x26, .L702 ++.L515: ++ sub x0, x3, x24 ++ str x1, [sp, 224] ++ cmp x0, x22 ++ bcc .L512 +.L513: -+ cmp x3, 15 ++ cmp x22, 15 + bls .L518 -+ ldr q0, [x1] -+ str q0, [x12] -+ cmp x23, 16 -+ ble .L517 -+ add x23, x12, x23 -+ add x0, x12, 16 -+ add x1, x1, 16 -+ .p2align 3,,7 -+.L520: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x23, x0 -+ bhi .L520 -+.L517: -+ add x13, x20, x18 -+ cmp w7, 64 -+ bhi .L527 -+ ldr x0, [sp, 320] -+ cmp x9, x0 -+ bls .L703 -+ cmp x24, x0 -+ beq .L527 -+ lsr w1, w7, 3 -+ lsr w2, w7, 3 -+ sub x1, x0, x1 -+ cmp x24, x1 -+ bls .L529 -+ sub x1, x0, x24 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L529: -+ ldr x21, [x1] -+ sub w7, w7, w2, lsl 3 -+ str x21, [sp, 304] -+ str w7, [sp, 312] -+ str x1, [sp, 320] -+.L527: -+ ldr x15, [x28, x6, lsl 3] -+ ldr x12, [x27, x19, lsl 3] -+ ldr x14, [x8, x11, lsl 3] -+ ubfx w23, w15, 16, 8 -+ ldr q0, [x22] -+ ubfx w19, w12, 16, 8 -+ add w0, w19, w23 -+ ubfx w17, w14, 16, 8 -+ add w0, w17, w0 -+ lsr x30, x12, 32 -+ str q0, [x20, x18] -+ and w0, w0, 255 -+ mov x6, x30 -+ lsr x11, x15, 32 -+ lsr w16, w12, 24 -+ lsr w2, w15, 24 -+ lsr w1, w14, 24 -+ lsr x18, x14, 32 -+ cmp w17, 1 -+ bhi .L704 -+ cbnz w17, .L532 -+ cbz x30, .L705 -+ cbnz w23, .L706 -+.L536: -+ cbnz w19, .L707 -+.L537: -+ cmp w0, 30 -+ bhi .L708 -+.L538: -+ add w0, w16, w7 -+ uxtw x18, w2 -+ uxtw x7, w1 -+ add w2, w2, w0 -+ add w19, w1, w2 -+ ldr w17, [x4, x16, lsl 2] -+ neg w0, w0 -+ ldr w16, [x4, x18, lsl 2] -+ ldr w1, [x4, x7, lsl 2] -+ neg w2, w2 -+ neg w7, w19 -+ lsr x0, x21, x0 -+ lsr x2, x21, x2 -+ and x0, x0, x17 -+ lsr x23, x21, x7 -+ and x2, x2, x16 -+ and x23, x23, x1 -+ add x21, x0, w12, uxth -+ add x15, x2, w15, uxth -+ add x23, x23, w14, uxth -+ str w19, [sp, 312] -+ add x7, x13, x6 -+ str x21, [sp, 344] -+ str x23, [sp, 360] -+ str x15, [sp, 376] -+ cmp x6, 16 -+ bhi .L541 -+.L544: -+ add x22, x22, x6 -+ str x22, [sp, 272] -+ add x20, x6, x11 -+ sub x1, x7, x3 -+ cbnz x26, .L709 -+.L543: -+ cmp x3, 15 -+ bls .L548 -+ ldr q0, [x1] -+ str q0, [x7] -+ cmp x11, 16 -+ ble .L547 -+ add x2, x7, x11 -+ add x0, x7, 16 -+ add x1, x1, 16 ++ ldr q0, [x2] ++ str q0, [x3] ++ cmp x12, 16 ++ ble .L514 ++ add x0, x3, 16 ++ add x1, x2, 16 + .p2align 3,,7 -+.L550: ++.L519: + ldr q0, [x1] + add x0, x0, 32 + add x1, x1, 32 + str q0, [x0, -32] + ldr q0, [x1, -16] + str q0, [x0, -16] -+ cmp x2, x0 -+ bhi .L550 -+.L547: -+ add x11, x13, x20 -+ cmp w19, 64 -+ bhi .L557 -+ ldr x0, [sp, 320] -+ cmp x9, x0 -+ bls .L710 -+ cmp x24, x0 -+ beq .L557 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x24, x1 -+ bls .L559 -+ sub x1, x0, x24 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L559: -+ ldr x0, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x0, [sp, 304] -+ str w19, [sp, 312] -+ str x1, [sp, 320] -+.L557: -+ sub w25, w25, #3 -+ cmp w25, 12 -+ ble .L681 -+.L716: -+ ldr x3, [sp, 392] -+ b .L561 -+ .p2align 2,,3 -+.L453: ++ cmp x5, x0 ++ bhi .L519 ++.L514: ++ cmn x26, #120 ++ bhi .L660 ++ add x19, x19, x26 ++ subs w9, w21, #1 ++ bne .L653 ++ ldr w2, [sp, 312] ++ b .L490 ++ .p2align 2,,3 ++.L568: + .cfi_restore 21 + .cfi_restore 22 -+ .cfi_restore 27 -+ .cfi_restore 28 -+ ldr x0, [x8, 30344] -+ str x0, [sp, 112] -+ b .L454 -+ .p2align 2,,3 -+.L600: -+ ldr x11, [sp, 104] -+.L455: -+ mov x7, -70 -+ ldr x0, [sp, 96] -+ sub x2, x0, x20 -+ ldr x0, [sp, 112] -+ sub x0, x0, x11 ++ mov x19, x28 ++ add x25, x0, 16384 ++.L448: ++ ldr w2, [x25, 13976] ++ sub x0, x23, x19 ++ cmp w2, 2 ++ beq .L676 ++ sub x2, x27, x1 + cmp x2, x0 -+ bhi .L452 -+ cbz x11, .L599 -+ mov x0, x11 -+ add x11, x11, x2 -+ mov x1, x20 -+ str x11, [sp, 96] ++ bhi .L566 ++.L679: ++ cbz x19, .L567 ++ mov x0, x19 ++ add x19, x19, x2 + bl memcpy -+ ldr x11, [sp, 96] -+.L599: -+ ldr x0, [sp, 104] -+ sub x7, x11, x0 -+.L452: -+ mov x0, x7 -+ ldp x19, x20, [sp, 16] -+ ldp x23, x24, [sp, 48] -+ ldp x25, x26, [sp, 64] -+ ldp x29, x30, [sp], 416 -+ .cfi_remember_state -+ .cfi_restore 30 ++.L567: ++ sub x26, x19, x28 ++.L447: ++ mov x0, x26 ++ ldp x29, x30, [sp, 16] ++ ldp x19, x20, [sp, 32] ++ ldp x23, x24, [sp, 64] ++ ldp x25, x26, [sp, 80] ++ ldp x27, x28, [sp, 96] ++ add sp, sp, 416 + .cfi_restore 29 ++ .cfi_restore 30 ++ .cfi_restore 27 ++ .cfi_restore 28 + .cfi_restore 25 + .cfi_restore 26 + .cfi_restore 23 @@ -3574,40 +3690,58 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L457: -+ .cfi_restore_state -+ ldrb w1, [x24] ++.L450: ++ .cfi_def_cfa_offset 416 ++ .cfi_offset 19, -384 ++ .cfi_offset 20, -376 ++ .cfi_offset 21, -368 ++ .cfi_offset 22, -360 ++ .cfi_offset 23, -352 ++ .cfi_offset 24, -344 ++ .cfi_offset 25, -336 ++ .cfi_offset 26, -328 ++ .cfi_offset 27, -320 ++ .cfi_offset 28, -312 ++ .cfi_offset 29, -400 ++ .cfi_offset 30, -392 ++ ldrb w1, [x3] + str x1, [sp, 304] -+ str x24, [sp, 320] ++ str x3, [sp, 320] + cmp x4, 5 -+ beq .L459 -+ bhi .L460 ++ beq .L452 ++ bhi .L453 + cmp x4, 3 -+ beq .L461 ++ beq .L454 + cmp x4, 4 -+ bne .L711 -+.L462: -+ ldrb w2, [x24, 3] ++ bne .L677 ++.L455: ++ ldrb w2, [x3, 3] + add x1, x1, x2, lsl 24 -+.L461: -+ ldrb w2, [x24, 2] ++.L454: ++ ldrb w2, [x3, 2] + add x1, x1, x2, lsl 16 -+ ldrb w2, [x24, 1] ++ ldrb w2, [x3, 1] + add x1, x1, x2, lsl 8 + str x1, [sp, 304] -+.L464: -+ cbnz w0, .L712 -+.L456: -+ mov x7, -20 -+.L725: -+ mov x0, x7 -+ ldp x19, x20, [sp, 16] -+ ldp x23, x24, [sp, 48] -+ ldp x25, x26, [sp, 64] -+ ldp x29, x30, [sp], 416 ++.L457: ++ cbnz w0, .L678 ++.L449: ++ mov x26, -20 ++ mov x0, x26 ++ ldp x29, x30, [sp, 16] ++ ldp x19, x20, [sp, 32] ++ ldp x21, x22, [sp, 48] + .cfi_remember_state -+ .cfi_restore 30 ++ .cfi_restore 22 ++ .cfi_restore 21 ++ ldp x23, x24, [sp, 64] ++ ldp x25, x26, [sp, 80] ++ ldp x27, x28, [sp, 96] ++ add sp, sp, 416 + .cfi_restore 29 ++ .cfi_restore 30 ++ .cfi_restore 27 ++ .cfi_restore 28 + .cfi_restore 25 + .cfi_restore 26 + .cfi_restore 23 @@ -3616,2733 +3750,988 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_restore 20 + .cfi_def_cfa_offset 0 + ret -+.L460: ++ .p2align 2,,3 ++.L453: + .cfi_restore_state + cmp x4, 6 -+ beq .L465 -+ ldrb w2, [x24, 6] ++ beq .L458 ++ ldrb w2, [x3, 6] + add x1, x1, x2, lsl 48 -+.L465: -+ ldrb w2, [x24, 5] ++.L458: ++ ldrb w2, [x3, 5] + add x1, x1, x2, lsl 40 -+.L459: -+ ldrb w2, [x24, 4] ++.L452: ++ ldrb w2, [x3, 4] + add x1, x1, x2, lsl 32 -+ b .L462 -+.L711: ++ b .L455 ++ .p2align 2,,3 ++.L677: + cmp x4, 2 -+ bne .L464 -+ ldrb w2, [x24, 1] ++ bne .L457 ++ ldrb w2, [x3, 1] + add x1, x1, x2, lsl 8 + str x1, [sp, 304] -+ b .L464 -+ .p2align 2,,3 -+.L706: -+ .cfi_offset 21, -384 -+ .cfi_offset 22, -376 -+ .cfi_offset 27, -336 -+ .cfi_offset 28, -328 -+ lsl x17, x21, x7 -+ neg w18, w23 -+ add w7, w7, w23 -+ str w7, [sp, 312] -+ lsr x17, x17, x18 -+ add x11, x11, x17 -+ cbz w19, .L537 -+.L707: -+ neg w18, w19 -+ lsl x17, x21, x7 -+ add w7, w7, w19 -+ str w7, [sp, 312] -+ lsr x17, x17, x18 -+ add x6, x6, x17 -+ cmp w0, 30 -+ bls .L538 -+.L708: -+ cmp w7, 64 -+ bhi .L538 -+ ldr x17, [sp, 320] -+ cmp x9, x17 -+ bls .L713 -+ cmp x24, x17 -+ beq .L538 -+ lsr w0, w7, 3 -+ lsr w18, w7, 3 -+ sub x0, x17, x0 -+ cmp x24, x0 -+ bls .L540 -+ sub x0, x17, x24 -+ mov w18, w0 -+ sub x0, x17, w0, uxtw -+.L540: -+ ldr x21, [x0] -+ sub w7, w7, w18, lsl 3 -+ str x21, [sp, 304] -+ str x0, [sp, 320] -+ b .L538 -+ .p2align 2,,3 -+.L693: -+ ldr x14, [sp, 304] -+ neg w18, w16 -+ lsl x14, x14, x19 -+ add w19, w19, w16 -+ str w19, [sp, 312] -+ lsr x14, x14, x18 -+ add x20, x20, x14 -+ cbz w15, .L474 -+.L694: -+ ldr x14, [sp, 304] -+ neg w16, w15 -+ lsl x14, x14, x19 -+ add w19, w19, w15 -+ str w19, [sp, 312] -+ lsr x14, x14, x16 -+ add x2, x2, x14 -+ cmp w1, 30 -+ bls .L688 -+.L475: -+ cmp w19, 64 -+ bhi .L688 -+ ldr x1, [sp, 320] -+ cmp x9, x1 -+ bls .L714 -+ cmp x24, x1 -+ beq .L688 -+ lsr w14, w19, 3 -+ lsr w15, w19, 3 -+ sub x14, x1, x14 -+ cmp x24, x14 -+ bls .L480 -+ sub x14, x1, x24 -+ mov w15, w14 -+ sub x14, x1, w14, uxtw -+.L480: -+ ldr x21, [x14] -+ sub w19, w19, w15, lsl 3 -+ str x21, [sp, 304] -+ str x14, [sp, 320] -+ b .L476 -+ .p2align 2,,3 -+.L699: -+ lsl x14, x21, x19 -+ neg w17, w16 -+ add w19, w19, w16 -+ str w19, [sp, 312] -+ lsr x14, x14, x17 -+ add x23, x23, x14 -+ cbz w15, .L507 -+.L700: -+ neg w16, w15 -+ lsl x14, x21, x19 -+ add w19, w19, w15 -+ str w19, [sp, 312] -+ lsr x14, x14, x16 -+ add x2, x2, x14 -+ cmp w7, 30 -+ bls .L508 -+.L701: -+ cmp w19, 64 -+ bhi .L508 -+ ldr x14, [sp, 320] -+ cmp x9, x14 -+ bls .L715 -+ cmp x24, x14 -+ beq .L508 -+ lsr w7, w19, 3 -+ lsr w15, w19, 3 -+ sub x7, x14, x7 -+ cmp x24, x7 -+ bls .L510 -+ sub x7, x14, x24 -+ mov w15, w7 -+ sub x7, x14, w7, uxtw -+.L510: -+ ldr x21, [x7] -+ sub w19, w19, w15, lsl 3 -+ str x21, [sp, 304] -+ str x7, [sp, 320] -+ b .L508 -+ .p2align 2,,3 -+.L705: -+ ldr x17, [sp, 400] -+ stp x17, x3, [sp, 392] -+ mov x3, x17 -+ cbz w23, .L536 -+ b .L706 ++ b .L457 + .p2align 2,,3 -+.L698: -+ ldr x14, [sp, 400] -+ stp x14, x3, [sp, 392] -+ mov x3, x14 -+ cbz w16, .L506 -+ b .L699 -+ .p2align 2,,3 -+.L692: -+ ldr x14, [sp, 400] -+ stp x14, x3, [sp, 392] -+ mov x3, x14 -+ cbz w16, .L473 -+ b .L693 -+ .p2align 2,,3 -+.L712: ++.L676: + .cfi_restore 21 + .cfi_restore 22 ++ sub x2, x27, x1 ++ cmp x2, x0 ++ bhi .L566 ++ cbz x19, .L565 ++ mov x0, x19 ++ add x19, x19, x2 ++ bl memmove ++ sub x0, x23, x19 ++.L565: ++ add x27, x20, 94208 ++ mov x1, 30364 ++ add x27, x27, 1692 ++ add x1, x20, x1 ++ sub x2, x27, x1 ++ str x1, [sp, 224] ++ str wzr, [x25, 13976] ++ cmp x2, x0 ++ bls .L679 ++.L566: ++ mov x26, -70 ++ mov x0, x26 ++ ldp x29, x30, [sp, 16] ++ ldp x19, x20, [sp, 32] ++ ldp x23, x24, [sp, 64] ++ ldp x25, x26, [sp, 80] ++ ldp x27, x28, [sp, 96] ++ add sp, sp, 416 ++ .cfi_restore 29 ++ .cfi_restore 30 + .cfi_restore 27 + .cfi_restore 28 ++ .cfi_restore 25 ++ .cfi_restore 26 ++ .cfi_restore 23 ++ .cfi_restore 24 ++ .cfi_restore 19 ++ .cfi_restore 20 ++ .cfi_def_cfa_offset 0 ++ ret ++ .p2align 2,,3 ++.L678: ++ .cfi_def_cfa_offset 416 ++ .cfi_offset 19, -384 ++ .cfi_offset 20, -376 ++ .cfi_offset 21, -368 ++ .cfi_offset 22, -360 ++ .cfi_offset 23, -352 ++ .cfi_offset 24, -344 ++ .cfi_offset 25, -336 ++ .cfi_offset 26, -328 ++ .cfi_offset 27, -320 ++ .cfi_offset 28, -312 ++ .cfi_offset 29, -400 ++ .cfi_offset 30, -392 + mov w1, 8 + sub w1, w1, w4 + clz w0, w0 -+ stp x21, x22, [sp, 32] -+ .cfi_offset 22, -376 -+ .cfi_offset 21, -384 + add w0, w0, w1, lsl 3 + sub w0, w0, #23 -+ stp x27, x28, [sp, 80] -+ .cfi_offset 28, -328 -+ .cfi_offset 27, -336 + str w0, [sp, 312] -+ b .L458 -+ .p2align 2,,3 -+.L703: -+ lsr w1, w7, 3 -+ and w7, w7, 7 -+ sub x0, x0, x1 -+ str w7, [sp, 312] -+ str x0, [sp, 320] -+ ldr x21, [x0] -+ str x21, [sp, 304] -+ b .L527 ++ b .L451 + .p2align 2,,3 -+.L710: -+ lsr w1, w19, 3 -+ sub w25, w25, #3 -+ sub x0, x0, x1 -+ str x0, [sp, 320] -+ and w19, w19, 7 -+ str w19, [sp, 312] -+ ldr x0, [x0] -+ str x0, [sp, 304] -+ cmp w25, 12 -+ bgt .L716 -+.L681: -+ mov x20, x22 -+ mov x22, x15 -+.L560: -+ ldr x0, [sp, 112] -+ mov x12, x28 -+ mov x16, x20 -+ mov x28, x26 -+ sub x15, x0, #32 -+ mov x26, x11 -+ ldr x0, [sp, 216] -+ str x9, [sp, 128] -+ mov x9, x27 -+ add x13, x0, 64 -+ add x0, x0, 32 -+ mov x27, x13 -+ mov x13, x24 -+ mov x24, x5 -+ stp x10, x0, [sp, 224] -+ mov w10, w25 -+ .p2align 3,,7 -+.L562: -+ ldr x21, [x9, x21, lsl 3] -+ ldr x22, [x12, x22, lsl 3] -+ ldr x23, [x8, x23, lsl 3] -+ ubfx w17, w21, 16, 8 -+ ubfx w20, w22, 16, 8 -+ lsr x30, x21, 32 -+ add w2, w17, w20 -+ lsr x3, x22, 32 -+ ubfx w6, w23, 16, 8 -+ mov x1, x30 -+ add w2, w6, w2 -+ lsr x25, x23, 32 -+ and w2, w2, 255 -+ lsr w4, w21, 24 -+ lsr w0, w22, 24 -+ lsr w18, w23, 24 -+ cmp w6, 1 -+ bls .L563 -+ ldr x14, [sp, 304] -+ neg w30, w6 -+ ldr x5, [sp, 400] -+ str x5, [sp, 408] -+ lsl x14, x14, x19 -+ add w19, w19, w6 -+ lsr x14, x14, x30 -+ add x25, x25, x14 -+ ldr x14, [sp, 392] -+ str w19, [sp, 312] -+.L564: -+ stp x25, x14, [sp, 392] -+ cbnz w20, .L717 -+.L569: -+ cmp w2, 30 -+ bhi .L570 -+.L689: -+ ldr x6, [sp, 304] -+.L571: -+ cbnz w17, .L718 -+.L576: -+ uxtw x20, w0 -+ add w2, w4, w19 -+ add w0, w0, w2 -+ uxtw x17, w18 -+ add w19, w18, w0 -+ neg w2, w2 -+ ldr w20, [x27, x20, lsl 2] -+ neg w0, w0 -+ ldr w30, [x27, x4, lsl 2] -+ lsr x2, x6, x2 -+ lsr x0, x6, x0 -+ and x0, x0, x20 -+ add x20, x26, x1 -+ and x4, x2, x30 -+ ldr w17, [x27, x17, lsl 2] -+ neg w18, w19 -+ sub x2, x20, x25 -+ add x21, x4, w21, uxth -+ ldr x5, [sp, 96] -+ lsr x6, x6, x18 -+ and x6, x6, x17 -+ prfm PLDL1KEEP, [x2] -+ add x22, x0, w22, uxth -+ add x23, x6, w23, uxth -+ add x0, x16, x1 -+ add x6, x1, x3 -+ str w19, [sp, 312] -+ add x4, x26, x6 -+ str x21, [sp, 344] -+ cmp x5, x0 -+ str x23, [sp, 360] -+ ccmp x4, x15, 2, cs -+ str x22, [sp, 376] -+ bhi .L577 -+ ldr q0, [x16] -+ str q0, [x26] -+ cmp x1, 16 -+ bhi .L578 -+.L582: -+ str x0, [sp, 272] -+ sub x0, x20, x24 -+ cmp x25, x0 -+ bls .L580 -+ ldr x0, [sp, 120] -+ sub x0, x20, x0 -+ cmp x25, x0 -+ bhi .L687 -+ sub x1, x2, x24 -+ add x1, x28, x1 -+ add x0, x1, x3 -+ cmp x28, x0 -+ bcs .L719 -+ sub x2, x24, x2 -+ mov x0, x20 -+ sub x3, x3, x2 -+ add x20, x20, x2 -+ stp x3, x4, [sp, 136] -+ stp x9, x12, [sp, 152] -+ stp x8, x15, [sp, 168] -+ str x13, [sp, 184] -+ str w10, [sp, 192] -+ stp x14, x6, [sp, 200] -+ bl memmove -+ ldr w10, [sp, 192] -+ ldp x3, x4, [sp, 136] -+ mov x2, x24 -+ ldp x9, x12, [sp, 152] -+ ldp x8, x15, [sp, 168] -+ ldp x14, x6, [sp, 200] -+ ldr x13, [sp, 184] -+.L580: -+ cmp x25, 15 -+ bls .L585 -+ ldr q0, [x2] -+ str q0, [x20] -+ cmp x3, 16 -+ ble .L581 -+ add x20, x20, 16 -+ add x2, x2, 16 ++.L674: ++ cmp w0, 64 ++ bhi .L664 ++ ldr x2, [sp, 320] ++ ldr x7, [sp, 336] ++ cmp x2, x7 ++ bcs .L680 ++ ldr x16, [sp, 328] ++ cmp x2, x16 ++ beq .L664 ++ lsr w7, w0, 3 ++ lsr w17, w0, 3 ++ sub x7, x2, x7 ++ cmp x16, x7 ++ bls .L506 ++ sub x16, x2, x16 ++ mov w17, w16 ++ sub x7, x2, x16, uxtw ++.L506: ++ ldr x2, [x7] ++ str x2, [sp, 304] ++ str x7, [sp, 320] ++ sub w0, w0, w17, lsl 3 ++ cbz w13, .L507 + .p2align 3,,7 -+.L586: -+ ldr q0, [x2] -+ add x20, x20, 32 -+ add x2, x2, 32 -+ str q0, [x20, -32] -+ ldr q0, [x2, -16] -+ str q0, [x20, -16] -+ cmp x4, x20 -+ bhi .L586 -+.L581: -+ cmn x6, #120 -+ bhi .L682 -+ add x26, x26, x6 -+ subs w10, w10, #1 -+ beq .L593 -+ cmp w19, 64 -+ bhi .L594 -+ ldr x1, [sp, 128] -+ ldr x0, [sp, 320] -+ cmp x1, x0 -+ bls .L720 -+ cmp x13, x0 -+ beq .L594 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x13, x1 -+ bls .L596 -+ sub x1, x0, x13 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L596: -+ ldr x0, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x0, [sp, 304] -+ str w19, [sp, 312] -+ str x1, [sp, 320] -+.L594: -+ ldr x16, [sp, 272] -+ b .L562 -+ .p2align 2,,3 -+.L696: -+ lsr w1, w19, 3 -+ and w19, w19, 7 -+ sub x0, x0, x1 -+ str w19, [sp, 312] -+ str x0, [sp, 320] -+ ldr x21, [x0] -+ str x21, [sp, 304] -+ b .L497 ++.L675: ++ neg w16, w13 ++ lsl x7, x2, x0 ++ add w0, w0, w13 ++ lsr x7, x7, x16 ++ add x6, x6, x7 ++ b .L507 + .p2align 2,,3 -+.L570: -+ cmp w19, 64 -+ bhi .L689 -+ ldr x2, [sp, 128] -+ ldr x6, [sp, 320] -+ cmp x2, x6 -+ bls .L721 -+ cmp x13, x6 -+ beq .L689 -+ lsr w2, w19, 3 -+ lsr w20, w19, 3 -+ sub x2, x6, x2 -+ cmp x13, x2 -+ bls .L575 -+ sub x2, x6, x13 -+ mov w20, w2 -+ sub x2, x6, w2, uxtw -+.L575: -+ ldr x6, [x2] -+ str x6, [sp, 304] -+ str x2, [sp, 320] -+ sub w19, w19, w20, lsl 3 -+ cbz w17, .L576 -+ .p2align 3,,7 -+.L718: -+ neg w20, w17 -+ lsl x2, x6, x19 -+ add w19, w19, w17 -+ lsr x2, x2, x20 -+ add x1, x1, x2 -+ b .L576 -+ .p2align 2,,3 -+.L563: -+ cmp w30, 0 -+ cset w14, eq -+ cmp w6, 1 -+ beq .L565 -+ cmp w30, 0 -+ add x14, x14, 10 -+ cset x6, ne -+ add x5, sp, 312 -+ add x6, x6, 10 -+ ldr x25, [x5, x14, lsl 3] -+ ldr x14, [x5, x6, lsl 3] -+ stp x25, x14, [sp, 392] -+ cbz w20, .L569 -+.L717: -+ ldr x6, [sp, 304] -+ neg w30, w20 -+ lsl x6, x6, x19 -+ add w19, w19, w20 -+ lsr x6, x6, x30 -+ add x3, x3, x6 -+ str w19, [sp, 312] -+ b .L569 ++.L494: ++ cmp w18, 0 ++ cset w17, eq ++ cmp w7, 1 ++ beq .L496 ++ cmp w18, 0 ++ add x17, x17, 10 ++ cset x7, ne ++ add x7, x7, 10 ++ ldr x28, [x11, x17, lsl 3] ++ ldr x7, [x11, x7, lsl 3] ++ stp x28, x7, [sp, 392] ++ cbz w16, .L500 ++.L673: ++ ldr x7, [sp, 304] ++ neg w17, w16 ++ lsl x7, x7, x0 ++ add w0, w0, w16 ++ lsr x7, x7, x17 ++ add x22, x22, x7 ++ str w0, [sp, 312] ++ b .L500 + .p2align 2,,3 -+.L720: -+ lsr w1, w19, 3 -+ and w19, w19, 7 -+ sub x0, x0, x1 -+ str x0, [sp, 320] -+ str w19, [sp, 312] -+ ldr x16, [sp, 272] -+ ldr x0, [x0] -+ str x0, [sp, 304] -+ b .L562 -+ .p2align 2,,3 -+.L697: -+ ldr x18, [sp, 400] -+ neg w30, w14 -+ stp x3, x18, [sp, 400] -+ lsl x18, x21, x19 -+ lsr x18, x18, x30 -+ add w19, w19, w14 -+ add x17, x17, x18 -+ str w19, [sp, 312] -+ mov x3, x17 -+ str x17, [sp, 392] -+ cbz w16, .L506 -+ b .L699 ++.L672: ++ lsr w2, w0, 3 ++ and w0, w0, 7 ++ sub x1, x1, x2 ++ str x1, [sp, 320] ++ str w0, [sp, 312] ++ ldr x1, [x1] ++ str x1, [sp, 304] ++ b .L491 + .p2align 2,,3 -+.L691: -+ ldr x30, [sp, 304] -+ neg w23, w14 -+ ldr x18, [sp, 400] -+ stp x3, x18, [sp, 400] -+ lsl x3, x30, x19 -+ add w19, w19, w14 -+ lsr x14, x3, x23 -+ add x14, x21, x14 -+ str w19, [sp, 312] -+ str x14, [sp, 392] -+ mov x3, x14 -+ cbz w16, .L473 -+ b .L693 -+ .p2align 2,,3 -+.L709: -+ sub x0, x7, x5 -+ cmp x3, x0 -+ bls .L543 -+ sub x0, x1, x5 -+ add x0, x26, x0 -+ add x2, x0, x11 -+ cmp x26, x2 -+ bcs .L722 -+ sub x2, x5, x1 -+ mov x1, x0 -+ sub x11, x11, x2 -+ mov x0, x7 -+ add x7, x7, x2 -+ stp x7, x11, [sp, 144] -+ stp x5, x13, [sp, 160] -+ stp x3, x15, [sp, 176] -+ stp x9, x8, [sp, 192] -+ str x4, [sp, 208] -+ str x10, [sp, 224] ++.L671: ++ mov x2, x22 ++ mov x0, x26 + bl memmove -+ ldp x5, x13, [sp, 160] -+ ldp x7, x11, [sp, 144] -+ ldp x3, x15, [sp, 176] -+ mov x1, x5 -+ ldp x9, x8, [sp, 192] -+ ldr x4, [sp, 208] -+ ldr x10, [sp, 224] -+ b .L543 -+ .p2align 2,,3 -+.L541: -+ ldr q0, [x22, 16] -+ sub x0, x6, #16 -+ str q0, [x13, 16] -+ cmp x0, 16 -+ ble .L544 -+ add x0, x13, 32 -+ add x1, x22, 32 -+ .p2align 3,,7 -+.L545: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x0, x7 -+ bcc .L545 -+ b .L544 ++ add x11, sp, 312 ++ b .L478 + .p2align 2,,3 -+.L511: -+ ldr q0, [x22, 16] -+ sub x0, x2, #16 -+ str q0, [x20, 16] -+ cmp x0, 16 -+ ble .L514 -+ add x0, x20, 32 -+ add x1, x22, 32 -+ .p2align 3,,7 -+.L515: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x0, x12 -+ bcc .L515 -+ add x22, x22, x2 -+ str x22, [sp, 272] -+ add x18, x2, x23 -+ sub x1, x12, x3 -+ cbz x26, .L513 -+ .p2align 3,,7 -+.L702: -+ sub x0, x12, x5 -+ cmp x3, x0 -+ bls .L513 -+ sub x0, x1, x5 -+ add x0, x26, x0 -+ add x2, x0, x23 -+ cmp x26, x2 -+ bcs .L723 -+ sub x2, x5, x1 -+ mov x1, x0 -+ mov x0, x12 -+ add x12, x12, x2 -+ sub x23, x23, x2 -+ stp x12, x5, [sp, 144] -+ stp x6, x11, [sp, 160] -+ str x18, [sp, 176] -+ str w7, [sp, 184] -+ stp x3, x9, [sp, 192] -+ str x8, [sp, 208] -+ stp x4, x10, [sp, 224] -+ bl memmove -+ ldr w7, [sp, 184] -+ ldp x12, x5, [sp, 144] -+ ldp x6, x11, [sp, 160] -+ ldr x18, [sp, 176] -+ mov x1, x5 -+ ldp x3, x9, [sp, 192] -+ ldr x8, [sp, 208] -+ ldp x4, x10, [sp, 224] -+ b .L513 ++.L474: ++ stp x6, x22, [sp, 232] ++ mov x5, x25 ++ add x4, sp, 224 ++ ldp x7, x0, [sp, 120] ++ str x0, [sp] ++ ldp x0, x1, [sp, 232] ++ stp x0, x1, [sp, 192] ++ add x3, sp, 192 ++ ldr x1, [sp, 144] ++ mov x6, x20 ++ mov x0, x19 ++ str x28, [sp, 208] ++ str x28, [sp, 248] ++ bl ZSTD_execSequenceEndSplitLitBuffer ++ mov x21, x0 ++ add x11, sp, 312 ++ b .L478 + .p2align 2,,3 -+.L704: -+ ldr x20, [sp, 400] -+ neg w30, w17 -+ stp x3, x20, [sp, 400] -+ lsl x20, x21, x7 -+ lsr x20, x20, x30 -+ add w7, w7, w17 -+ add x18, x18, x20 -+ str w7, [sp, 312] -+ mov x3, x18 -+ str x18, [sp, 392] -+ cbz w23, .L536 -+ b .L706 ++.L459: ++ cmp w15, 0 ++ ldr w2, [sp, 312] ++ cset w7, eq ++ cmp w16, 1 ++ beq .L461 ++ cmp w15, 0 ++ add x16, sp, 312 ++ cset x15, ne ++ add x7, x7, 10 ++ add x15, x15, 10 ++ ldr x22, [x16, x7, lsl 3] ++ ldr x15, [x16, x15, lsl 3] ++ stp x22, x15, [sp, 392] ++ b .L460 ++ .p2align 2,,3 ++.L655: ++ mov x20, x23 ++ mov x27, x25 ++ ldp x23, x28, [sp, 144] ++ ldr x25, [sp, 160] ++.L490: ++ cmp w2, 64 ++ bhi .L561 ++ ldr x0, [sp, 320] ++ ldr x1, [sp, 336] ++ cmp x0, x1 ++ bcs .L449 ++ ldr x1, [sp, 328] ++ cmp x0, x1 ++ bne .L449 ++ cmp w2, 64 ++ bne .L449 ++.L561: ++ ldr x0, [sp, 392] ++ ldp x21, x22, [sp, 48] ++ .cfi_remember_state ++ .cfi_restore 22 ++ .cfi_restore 21 ++ str w0, [x25, 10300] ++ ldr x0, [sp, 400] ++ str w0, [x25, 10304] ++ ldr x0, [sp, 408] ++ str w0, [x25, 10308] ++ ldr x1, [sp, 224] ++ b .L448 + .p2align 2,,3 -+.L481: -+ ldr q0, [x22, 16] -+ sub x0, x2, #16 -+ str q0, [x11, 16] -+ cmp x0, 16 -+ ble .L484 -+ add x0, x11, 32 -+ add x1, x22, 32 ++.L475: ++ .cfi_restore_state ++ ldr q0, [x13, 16] ++ sub x6, x6, #16 ++ str q0, [x19, 16] ++ cmp x6, 16 ++ ble .L479 ++ add x13, x13, 32 ++ add x0, x19, 32 + .p2align 3,,7 -+.L485: -+ ldr q0, [x1] ++.L480: ++ ldr q0, [x13] + add x0, x0, 32 -+ add x1, x1, 32 ++ add x13, x13, 32 + str q0, [x0, -32] -+ ldr q0, [x1, -16] ++ ldr q0, [x13, -16] + str q0, [x0, -16] -+ cmp x0, x6 -+ bcc .L485 -+ add x22, x22, x2 -+ str x22, [sp, 272] -+ add x13, x2, x20 -+ sub x1, x6, x3 -+ cbz x26, .L483 -+ .p2align 3,,7 -+.L695: -+ sub x0, x6, x5 -+ cmp x3, x0 -+ bls .L483 -+ sub x0, x1, x5 -+ add x0, x26, x0 -+ add x2, x0, x20 -+ cmp x26, x2 -+ bcs .L724 -+ sub x2, x5, x1 -+ mov x1, x0 -+ mov x0, x6 -+ add x6, x6, x2 -+ sub x20, x20, x2 -+ stp x6, x5, [sp, 144] -+ stp x12, x7, [sp, 160] -+ stp x13, x3, [sp, 176] -+ stp x11, x9, [sp, 192] -+ str x8, [sp, 208] -+ stp x4, x10, [sp, 224] -+ bl memmove -+ ldp x6, x5, [sp, 144] -+ ldp x12, x7, [sp, 160] -+ ldp x13, x3, [sp, 176] -+ mov x1, x5 -+ ldp x11, x9, [sp, 192] -+ ldr x8, [sp, 208] -+ ldp x4, x10, [sp, 224] -+ b .L483 -+ .p2align 2,,3 -+.L488: -+ cmp x3, 7 -+ bhi .L492 -+ ldrb w0, [x1] -+ strb w0, [x6] -+ ldrb w2, [x1, 1] -+ strb w2, [x6, 1] -+ ldrb w2, [x1, 2] -+ strb w2, [x6, 2] -+ ldrb w2, [x1, 3] -+ strb w2, [x6, 3] -+ ldp x2, x0, [sp, 128] -+ ldr w0, [x0, x3, lsl 2] -+ ldrsw x2, [x2, x3, lsl 2] -+ add x15, x1, x0 -+ ldr w14, [x1, x0] -+ sub x1, x15, x2 -+ str w14, [x6, 4] -+.L493: -+ cmp x20, 8 -+ bls .L487 -+ sub x15, x6, x1 -+ add x0, x6, 8 -+ add x2, x1, 8 -+ add x14, x6, x20 -+ cmp x15, 15 -+ bgt .L494 ++ cmp x26, x0 ++ bhi .L480 ++ b .L479 ++ .p2align 2,,3 ++.L670: ++ neg w14, w9 ++ lsl x0, x7, x2 ++ add w2, w2, w9 ++ lsr x0, x0, x14 ++ add x6, x6, x0 ++ b .L472 ++ .p2align 2,,3 ++.L668: ++ ldr x7, [sp, 304] ++ neg w15, w14 ++ lsl x7, x7, x2 ++ add w2, w2, w14 ++ lsr x7, x7, x15 ++ add x12, x12, x7 ++ str w2, [sp, 312] ++ b .L465 ++ .p2align 2,,3 ++.L482: ++ cmp x28, 7 ++ bhi .L484 ++ ldrb w0, [x3] ++ strb w0, [x26] ++ ldrb w1, [x3, 1] ++ strb w1, [x26, 1] ++ ldp x1, x0, [sp, 168] ++ ldrb w6, [x3, 2] ++ strb w6, [x26, 2] ++ ldrb w6, [x3, 3] ++ ldr w0, [x0, x28, lsl 2] ++ ldrsw x1, [x1, x28, lsl 2] ++ strb w6, [x26, 3] ++ add x2, x3, x0 ++ ldr w0, [x3, x0] ++ sub x3, x2, x1 ++ str w0, [x26, 4] ++.L485: ++ cmp x22, 8 ++ bls .L478 ++ add x1, x3, 8 ++ add x0, x26, 8 ++ sub x2, x0, x1 ++ cmp x2, 15 ++ bgt .L486 + .p2align 3,,7 -+.L495: -+ ldr d0, [x2], 8 ++.L487: ++ ldr d0, [x1], 8 + str d0, [x0], 8 -+ cmp x14, x0 -+ bhi .L495 -+ b .L487 ++ cmp x4, x0 ++ bhi .L487 ++ b .L478 + .p2align 2,,3 -+.L518: -+ cmp x3, 7 -+ bhi .L522 -+ ldrb w0, [x1] -+ strb w0, [x12] -+ ldrb w2, [x1, 1] -+ strb w2, [x12, 1] -+ ldrb w2, [x1, 2] -+ strb w2, [x12, 2] -+ ldrb w2, [x1, 3] -+ strb w2, [x12, 3] -+ ldp x2, x0, [sp, 128] -+ ldr w0, [x0, x3, lsl 2] -+ ldrsw x2, [x2, x3, lsl 2] -+ add x14, x1, x0 -+ ldr w13, [x1, x0] -+ sub x1, x14, x2 -+ str w13, [x12, 4] -+.L523: -+ cmp x23, 8 -+ bls .L517 -+ sub x14, x12, x1 -+ add x0, x12, 8 -+ add x2, x1, 8 -+ add x13, x12, x23 -+ cmp x14, 15 -+ bgt .L524 -+ .p2align 3,,7 -+.L525: -+ ldr d0, [x2], 8 -+ str d0, [x0], 8 -+ cmp x13, x0 -+ bhi .L525 -+ b .L517 -+ .p2align 2,,3 -+.L548: -+ cmp x3, 7 -+ bhi .L552 -+ ldrb w0, [x1] -+ strb w0, [x7] -+ ldrb w2, [x1, 1] -+ strb w2, [x7, 1] -+ ldp x2, x0, [sp, 128] -+ ldr w0, [x0, x3, lsl 2] -+ ldrsw x2, [x2, x3, lsl 2] -+ ldrb w3, [x1, 2] -+ add x6, x1, x0 -+ strb w3, [x7, 2] -+ ldrb w3, [x1, 3] -+ strb w3, [x7, 3] -+ ldr w0, [x1, x0] -+ sub x1, x6, x2 -+ str w0, [x7, 4] -+.L553: -+ cmp x11, 8 -+ bls .L547 -+ sub x6, x7, x1 -+ add x0, x7, 8 -+ add x2, x1, 8 -+ add x3, x7, x11 -+ cmp x6, 15 -+ bgt .L554 -+ .p2align 3,,7 -+.L555: -+ ldr d0, [x2], 8 -+ str d0, [x0], 8 -+ cmp x3, x0 -+ bhi .L555 -+ b .L547 -+ .p2align 2,,3 -+.L469: -+ ldr x23, [sp, 304] -+ cmp w18, 0 -+ cinc w14, w21, eq -+ uxtw x14, w14 -+ lsl x18, x23, x19 -+ add w19, w19, 1 -+ add x14, x14, x18, lsr 63 -+ str w19, [sp, 312] -+ sub x18, x3, #1 -+ cmp x14, 3 -+ beq .L471 -+ add x18, sp, 416 -+ add x18, x18, x14, lsl 3 -+ ldr x18, [x18, -24] -+ cmp x14, 1 -+ beq .L472 -+.L471: -+ ldr x14, [sp, 400] -+ str x14, [sp, 408] -+.L472: -+ stp x18, x3, [sp, 392] -+ mov x3, x18 -+ cbz w16, .L473 -+ b .L693 -+ .p2align 2,,3 -+.L532: -+ cmp w30, 0 -+ lsl x20, x21, x7 -+ cinc w17, w18, eq -+ add w7, w7, 1 -+ uxtw x17, w17 -+ str w7, [sp, 312] -+ add x17, x17, x20, lsr 63 -+ sub x18, x3, #1 -+ cmp x17, 3 -+ beq .L534 -+ add x18, sp, 416 -+ add x18, x18, x17, lsl 3 -+ ldr x18, [x18, -24] -+ cmp x17, 1 -+ beq .L535 -+.L534: -+ ldr x17, [sp, 400] -+ str x17, [sp, 408] -+.L535: -+ stp x18, x3, [sp, 392] -+ mov x3, x18 -+ cbz w23, .L536 -+ b .L706 -+ .p2align 2,,3 -+.L502: -+ cmp w18, 0 -+ lsl x18, x21, x19 -+ cinc w14, w17, eq -+ add w19, w19, 1 -+ uxtw x14, w14 -+ str w19, [sp, 312] -+ add x14, x14, x18, lsr 63 -+ sub x17, x3, #1 -+ cmp x14, 3 -+ beq .L504 -+ add x17, sp, 416 -+ add x17, x17, x14, lsl 3 -+ ldr x17, [x17, -24] -+ cmp x14, 1 -+ beq .L505 -+.L504: -+ ldr x14, [sp, 400] -+ str x14, [sp, 408] -+.L505: -+ stp x17, x3, [sp, 392] -+ mov x3, x17 -+ cbz w16, .L506 -+ b .L699 -+ .p2align 2,,3 -+.L719: -+ mov x2, x3 -+ mov x0, x20 -+ stp x9, x12, [sp, 136] -+ stp x8, x15, [sp, 152] -+ str x13, [sp, 168] -+ str w10, [sp, 176] -+ stp x14, x6, [sp, 184] -+ bl memmove -+ ldr w10, [sp, 176] -+ ldp x9, x12, [sp, 136] -+ ldp x8, x15, [sp, 152] -+ ldr x13, [sp, 168] -+ ldp x14, x6, [sp, 184] -+ b .L581 -+ .p2align 2,,3 -+.L577: -+ add x0, sp, 512 -+ stp x1, x3, [sp, 280] -+ mov x4, x5 -+ ldr x6, [sp, 120] -+ mov x7, x28 -+ ldp x0, x1, [x0, -232] -+ stp x0, x1, [sp, 240] -+ mov x5, x24 -+ ldr x1, [sp, 112] -+ add x2, sp, 240 -+ add x3, sp, 272 -+ mov x0, x26 -+ stp x9, x12, [sp, 136] -+ stp x8, x15, [sp, 152] -+ str x13, [sp, 168] -+ str w10, [sp, 176] -+ str x14, [sp, 184] -+ str x25, [sp, 256] -+ str x25, [sp, 296] -+ bl ZSTD_execSequenceEnd -+ ldr w10, [sp, 176] -+ mov x6, x0 -+ ldp x9, x12, [sp, 136] -+ ldp x8, x15, [sp, 152] -+ ldr x13, [sp, 168] -+ ldr x14, [sp, 184] -+ b .L581 -+ .p2align 2,,3 -+.L578: -+ ldr q0, [x16, 16] -+ sub x1, x1, #16 -+ str q0, [x26, 16] -+ cmp x1, 16 -+ ble .L582 -+ add x16, x16, 32 -+ add x1, x26, 32 -+ .p2align 3,,7 -+.L583: -+ ldr q0, [x16] -+ add x1, x1, 32 -+ add x16, x16, 32 -+ str q0, [x1, -32] -+ ldr q0, [x16, -16] -+ str q0, [x1, -16] -+ cmp x20, x1 -+ bhi .L583 -+ b .L582 -+.L687: -+ mov x7, -20 -+ ldp x21, x22, [sp, 32] -+ .cfi_remember_state -+ .cfi_restore 22 -+ .cfi_restore 21 -+ ldp x27, x28, [sp, 80] -+ .cfi_restore 28 -+ .cfi_restore 27 -+ b .L725 -+.L492: -+ .cfi_restore_state -+ ldr d0, [x1] -+ str d0, [x6] -+ b .L493 -+.L522: -+ ldr d0, [x1] -+ str d0, [x12] -+ b .L523 -+.L552: -+ ldr d0, [x1] -+ str d0, [x7] -+ b .L553 -+.L585: -+ cmp x25, 7 -+ bls .L726 -+ ldr d0, [x2] -+ str d0, [x20] -+.L588: -+ cmp x3, 8 -+ bls .L581 -+ sub x16, x20, x2 -+ add x1, x2, 8 -+ add x0, x20, 8 -+ cmp x16, 15 -+ bgt .L589 -+ .p2align 3,,7 -+.L590: -+ ldr d0, [x1], 8 -+ str d0, [x0], 8 -+ cmp x4, x0 -+ bhi .L590 -+ b .L581 -+.L682: -+ mov x7, x6 -+ ldp x21, x22, [sp, 32] -+ .cfi_remember_state -+ .cfi_restore 22 -+ .cfi_restore 21 -+ ldp x27, x28, [sp, 80] -+ .cfi_restore 28 -+ .cfi_restore 27 -+ b .L452 -+ .p2align 2,,3 -+.L593: -+ .cfi_restore_state -+ mov x11, x26 -+ mov x12, x25 -+ ldr x9, [sp, 128] -+ ldr x10, [sp, 224] -+ cmp w19, 64 -+ bhi .L597 -+ ldr x0, [sp, 320] -+ cmp x9, x0 -+ bls .L687 -+ cmp x13, x0 -+ bne .L687 -+ cmp w19, 64 -+ bne .L687 -+.L597: -+ ldr x0, [sp, 408] -+ ldp x21, x22, [sp, 32] -+ .cfi_remember_state -+ .cfi_restore 22 -+ .cfi_restore 21 -+ ldp x27, x28, [sp, 80] -+ .cfi_restore 28 -+ .cfi_restore 27 -+ str w12, [x10, 10300] -+ str w14, [x10, 10304] -+ str w0, [x10, 10308] -+ ldr x20, [sp, 272] -+ b .L455 -+.L565: -+ .cfi_restore_state -+ ldr x6, [sp, 304] -+ add w25, w14, w25 -+ ldr x14, [sp, 392] -+ lsl x6, x6, x19 -+ add w19, w19, 1 -+ add x6, x25, x6, lsr 63 -+ subs x25, x14, #1 -+ str w19, [sp, 312] -+ cinc x25, x25, eq -+ cmp x6, 3 -+ beq .L567 -+ add x25, x6, 10 -+ add x5, sp, 312 -+ ldr x25, [x5, x25, lsl 3] -+ cmp x25, 0 -+ cinc x25, x25, eq -+ cmp x6, 1 -+ beq .L564 -+.L567: -+ ldr x6, [sp, 400] -+ str x6, [sp, 408] -+ b .L564 -+.L714: -+ lsr w14, w19, 3 -+ and w19, w19, 7 -+ sub x1, x1, x14 -+ str x1, [sp, 320] -+ ldr x21, [x1] -+ str x21, [sp, 304] -+ b .L476 -+.L715: -+ lsr w7, w19, 3 -+ and w19, w19, 7 -+ sub x7, x14, x7 -+ ldr x21, [x7] -+ str x21, [sp, 304] -+ str x7, [sp, 320] -+ b .L508 -+.L713: -+ lsr w0, w7, 3 -+ and w7, w7, 7 -+ sub x0, x17, x0 -+ ldr x21, [x0] -+ str x21, [sp, 304] -+ str x0, [sp, 320] -+ b .L538 -+.L726: -+ ldrb w0, [x2] -+ strb w0, [x20] -+ ldr x0, [sp, 232] -+ ldrb w1, [x2, 1] -+ strb w1, [x20, 1] -+ ldr w0, [x0, x25, lsl 2] -+ ldrb w1, [x2, 2] -+ strb w1, [x20, 2] -+ add x17, x2, x0 -+ ldrb w1, [x2, 3] -+ strb w1, [x20, 3] -+ ldr x1, [sp, 216] -+ ldr w16, [x2, x0] -+ str w16, [x20, 4] -+ ldrsw x1, [x1, x25, lsl 2] -+ sub x2, x17, x1 -+ b .L588 -+.L721: -+ lsr w2, w19, 3 -+ and w19, w19, 7 -+ sub x2, x6, x2 -+ ldr x6, [x2] -+ str x6, [sp, 304] -+ str x2, [sp, 320] -+ b .L571 -+.L554: -+ ldr q0, [x1, 8] -+ str q0, [x7, 8] -+ cmp x11, 24 -+ ble .L547 -+ add x0, x7, 24 -+ add x1, x1, 24 -+ .p2align 3,,7 -+.L556: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x3, x0 -+ bhi .L556 -+ b .L547 -+.L524: -+ ldr q0, [x1, 8] -+ str q0, [x12, 8] -+ cmp x23, 24 -+ ble .L517 -+ add x0, x12, 24 -+ add x1, x1, 24 -+ .p2align 3,,7 -+.L526: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x13, x0 -+ bhi .L526 -+ b .L517 -+.L494: -+ ldr q0, [x1, 8] -+ str q0, [x6, 8] -+ cmp x20, 24 -+ ble .L487 -+ add x0, x6, 24 -+ add x1, x1, 24 -+ .p2align 3,,7 -+.L496: -+ ldr q0, [x1] -+ add x0, x0, 32 -+ add x1, x1, 32 -+ str q0, [x0, -32] -+ ldr q0, [x1, -16] -+ str q0, [x0, -16] -+ cmp x14, x0 -+ bhi .L496 -+ b .L487 -+.L466: -+ adrp x0, .LANCHOR0 -+ ldr w19, [sp, 312] -+ add x0, x0, :lo12:.LANCHOR0 -+ str x0, [sp, 216] -+ ldr x11, [sp, 104] -+ ldp x21, x27, [sp, 344] -+ ldp x23, x8, [sp, 360] -+ ldp x22, x28, [sp, 376] -+ b .L560 -+.L589: -+ ldr q0, [x2, 8] -+ str q0, [x20, 8] -+ cmp x3, 24 -+ ble .L581 -+ add x20, x20, 24 -+ add x2, x2, 24 -+.L591: -+ ldr q0, [x2] -+ add x20, x20, 32 -+ add x2, x2, 32 -+ str q0, [x20, -32] -+ ldr q0, [x2, -16] -+ str q0, [x20, -16] -+ cmp x4, x20 -+ bhi .L591 -+ b .L581 -+.L724: -+ mov x1, x0 -+ mov x2, x20 -+ mov x0, x6 -+ stp x12, x7, [sp, 144] -+ stp x13, x5, [sp, 160] -+ stp x3, x11, [sp, 176] -+ stp x9, x8, [sp, 192] -+ str x4, [sp, 208] -+ str x10, [sp, 224] -+ bl memmove -+ ldp x12, x7, [sp, 144] -+ ldp x13, x5, [sp, 160] -+ ldp x3, x11, [sp, 176] -+ ldp x9, x8, [sp, 192] -+ ldr x4, [sp, 208] -+ ldr x10, [sp, 224] -+ b .L487 -+.L722: -+ mov x1, x0 -+ mov x2, x11 -+ mov x0, x7 -+ stp x13, x5, [sp, 144] -+ stp x15, x9, [sp, 160] -+ stp x8, x4, [sp, 176] -+ str x10, [sp, 192] -+ bl memmove -+ ldp x13, x5, [sp, 144] -+ ldp x15, x9, [sp, 160] -+ ldp x8, x4, [sp, 176] -+ ldr x10, [sp, 192] -+ b .L547 -+.L723: -+ mov x1, x0 -+ mov x2, x23 -+ mov x0, x12 -+ stp x6, x11, [sp, 144] -+ str x18, [sp, 160] -+ str w7, [sp, 168] -+ stp x5, x3, [sp, 176] -+ stp x9, x8, [sp, 192] -+ str x4, [sp, 208] -+ str x10, [sp, 224] -+ bl memmove -+ ldr w7, [sp, 168] -+ ldp x6, x11, [sp, 144] -+ ldr x18, [sp, 160] -+ ldp x5, x3, [sp, 176] -+ ldp x9, x8, [sp, 192] -+ ldr x4, [sp, 208] -+ ldr x10, [sp, 224] -+ b .L517 -+ .cfi_endproc -+.LFE4551: -+ .size ZSTD_decompressSequences_default.constprop.0, .-ZSTD_decompressSequences_default.constprop.0 -+ .align 2 -+ .p2align 4,,11 -+ .type ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0, %function -+ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0: -+.LFB4552: -+ .cfi_startproc -+ sub sp, sp, #480 -+ .cfi_def_cfa_offset 480 -+ stp x29, x30, [sp, 16] -+ .cfi_offset 29, -464 -+ .cfi_offset 30, -456 -+ add x29, sp, 16 -+ ldr x8, [x0, 30120] -+ stp x19, x20, [sp, 32] -+ .cfi_offset 19, -448 -+ .cfi_offset 20, -440 -+ mov x20, x0 -+ add x0, x1, x2 -+ str x0, [sp, 112] -+ stp x21, x22, [sp, 48] -+ ldr x0, [x20, 29904] -+ stp x23, x24, [sp, 64] -+ stp x25, x26, [sp, 80] -+ .cfi_offset 21, -432 -+ .cfi_offset 22, -424 -+ .cfi_offset 23, -416 -+ .cfi_offset 24, -408 -+ .cfi_offset 25, -400 -+ .cfi_offset 26, -392 -+ mov x25, x1 -+ str x0, [sp, 120] -+ str x8, [sp, 288] -+ ldr x11, [x20, 29896] -+ ldr x14, [x20, 29912] -+ ldr x24, [x20, 30352] -+ cbz w5, .L848 -+ add x23, x20, 16384 -+ mov w0, 1 -+ ldr w13, [x23, 10300] -+ ldr w18, [x23, 10304] -+ ldr w1, [x23, 10308] -+ stp x13, x18, [sp, 456] -+ str x1, [sp, 472] -+ str w0, [x23, 13620] -+ cbz x4, .L729 -+ add x0, x3, x4 -+ stp x27, x28, [sp, 96] -+ .cfi_offset 28, -376 -+ .cfi_offset 27, -384 -+ add x1, x3, 8 -+ str x1, [sp, 128] -+ mov x27, x3 -+ stp x3, x1, [sp, 392] -+ mov w28, w5 -+ ldrb w0, [x0, -1] -+ cmp x4, 7 -+ bls .L730 -+ sub x1, x4, #8 -+ add x2, x3, x1 -+ str x2, [sp, 384] -+ ldr x1, [x3, x1] -+ str x1, [sp, 368] -+ cbz w0, .L951 -+ clz w0, w0 -+ sub w0, w0, #23 -+ str w0, [sp, 376] -+ cmn x4, #120 -+ bhi .L951 -+.L731: -+ ldr x2, [x20] -+ add x1, sp, 368 -+ add x0, sp, 408 -+ bl ZSTD_initFseState -+ add x0, sp, 424 -+ ldr x2, [x20, 16] -+ bl ZSTD_initFseState -+ add x0, sp, 440 -+ ldr x2, [x20, 8] -+ bl ZSTD_initFseState -+ ldp x2, x0, [sp, 408] -+ ldp x12, x1, [sp, 432] -+ ldr x10, [sp, 448] -+ mov x3, x0 -+ str x3, [sp, 136] -+ ldr x4, [x0, x2, lsl 3] -+ ldr x0, [sp, 424] -+ ldr x2, [x10, x1, lsl 3] -+ ubfx w17, w4, 16, 8 -+ ldr x3, [x12, x0, lsl 3] -+ lsr x30, x4, 32 -+ ubfx w21, w2, 16, 8 -+ lsr x5, x2, 32 -+ add w7, w17, w21 -+ mov x6, x30 -+ ubfx w16, w3, 16, 8 -+ lsr w15, w4, 24 -+ add w7, w16, w7 -+ lsr w1, w2, 24 -+ lsr x22, x3, 32 -+ and w7, w7, 255 -+ lsr w19, w3, 24 -+ cmp w16, 1 -+ bls .L739 -+ ldr x9, [sp, 368] -+ neg w30, w16 -+ ldr w26, [sp, 376] -+ str x18, [sp, 472] -+ add w0, w16, w26 -+ str w0, [sp, 376] -+ lsl x16, x9, x26 -+ lsr x16, x16, x30 -+ add x22, x22, x16 -+.L740: -+ stp x22, x13, [sp, 456] -+ cbnz w21, .L956 -+.L745: -+ cmp w7, 30 -+ bhi .L746 -+.L952: -+ ldr x7, [sp, 368] -+.L747: -+ cbnz w17, .L957 -+.L752: -+ adrp x13, .LANCHOR0 -+ add x9, x13, :lo12:.LANCHOR0 -+ add x26, x9, 64 -+ add w0, w15, w0 -+ uxtw x17, w1 -+ uxtw x16, w19 -+ add w1, w1, w0 -+ neg w0, w0 -+ ldr w15, [x26, x15, lsl 2] -+ add w19, w19, w1 -+ lsr x0, x7, x0 -+ ldr w17, [x26, x17, lsl 2] -+ and x0, x0, x15 -+ ldr w16, [x26, x16, lsl 2] -+ neg w15, w19 -+ neg w1, w1 -+ add x0, x0, w4, uxth -+ str x9, [sp, 208] -+ lsr x1, x7, x1 -+ and x1, x1, x17 -+ lsr x7, x7, x15 -+ and x7, x7, x16 -+ add x2, x1, w2, uxth -+ add x3, x7, w3, uxth -+ add x15, x8, x6 -+ str w19, [sp, 376] -+ str x0, [sp, 408] -+ str x3, [sp, 424] -+ str x2, [sp, 440] -+ cmp x24, x15 -+ bcc .L849 -+ add x0, x9, 32 -+ mov x21, x25 -+ mov x9, x27 -+ mov x27, x11 -+ mov x11, x20 -+ mov x20, x26 -+ mov x26, x12 -+ stp x25, x23, [sp, 192] -+ mov x23, x24 -+ mov x25, x14 -+ mov w24, w28 -+ mov x28, x10 -+ str x0, [sp, 216] -+.L788: -+ sub x2, x6, #32 -+ add x4, x6, x5 -+ cmp x23, x15 -+ add x2, x8, x2 -+ add x7, x21, x4 -+ add x3, x21, x6 -+ ccmp x2, x7, 0, cs -+ sub x0, x3, x22 -+ bcc .L754 -+ ldr q0, [x8] -+ str q0, [x21] -+ cmp x6, 16 -+ bhi .L755 -+.L759: -+ sub x1, x3, x27 -+ str x15, [sp, 288] -+ cmp x22, x1 -+ bls .L757 -+ ldr x1, [sp, 120] -+ sub x1, x3, x1 -+ cmp x22, x1 -+ bhi .L951 -+ sub x1, x0, x27 -+ add x1, x25, x1 -+ add x2, x1, x5 -+ cmp x25, x2 -+ bcs .L958 -+ sub x2, x27, x0 -+ mov x0, x3 -+ sub x5, x5, x2 -+ add x3, x3, x2 -+ stp x3, x5, [sp, 144] -+ stp x7, x11, [sp, 160] -+ stp x9, x4, [sp, 176] -+ bl memmove -+ mov x0, x27 -+ ldp x3, x5, [sp, 144] -+ ldp x7, x11, [sp, 160] -+ ldp x9, x4, [sp, 176] -+.L757: -+ cmp x22, 15 -+ bls .L762 -+ ldr q0, [x0] -+ str q0, [x3] -+ cmp x5, 16 -+ ble .L758 -+ add x1, x3, 16 -+ add x2, x0, 16 -+ .p2align 3,,7 -+.L763: -+ ldr q0, [x2] -+ add x1, x1, 32 -+ add x2, x2, 32 -+ str q0, [x1, -32] -+ ldr q0, [x2, -16] -+ str q0, [x1, -16] -+ cmp x7, x1 -+ bhi .L763 -+.L758: -+ cmn x4, #120 -+ bhi .L947 -+ add x21, x21, x4 -+ subs w24, w24, #1 -+ beq .L935 -+ cmp w19, 64 -+ bhi .L771 -+ ldr x1, [sp, 128] -+ ldr x0, [sp, 384] -+ cmp x1, x0 -+ bls .L959 -+ cmp x9, x0 -+ beq .L771 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x9, x1 -+ bls .L773 -+ sub x1, x0, x9 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L773: -+ ldr x0, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x0, [sp, 368] -+ str w19, [sp, 376] -+ str x1, [sp, 384] -+.L771: -+ ldr x3, [sp, 136] -+ ldr x1, [sp, 408] -+ ldr x2, [sp, 440] -+ ldr x0, [sp, 424] -+ ldr x1, [x3, x1, lsl 3] -+ ldr x2, [x28, x2, lsl 3] -+ ldr x3, [x26, x0, lsl 3] -+ ubfx w15, w1, 16, 8 -+ ubfx w16, w2, 16, 8 -+ lsr x30, x1, 32 -+ add w4, w15, w16 -+ lsr x5, x2, 32 -+ ubfx w8, w3, 16, 8 -+ mov x6, x30 -+ add w4, w8, w4 -+ lsr x22, x3, 32 -+ and w4, w4, 255 -+ lsr w7, w1, 24 -+ lsr w0, w2, 24 -+ lsr w18, w3, 24 -+ cmp w8, 1 -+ bls .L774 -+ ldr x17, [sp, 368] -+ neg w30, w8 -+ ldr x10, [sp, 464] -+ str x10, [sp, 472] -+ lsl x17, x17, x19 -+ add w19, w19, w8 -+ lsr x17, x17, x30 -+ add x22, x22, x17 -+ ldr x30, [sp, 456] -+ str w19, [sp, 376] -+.L775: -+ stp x22, x30, [sp, 456] -+ cbnz w16, .L960 -+.L780: -+ cmp w4, 30 -+ bhi .L781 -+.L953: -+ ldr x4, [sp, 368] -+.L782: -+ cbnz w15, .L961 -+.L787: -+ add w8, w7, w19 -+ uxtw x16, w0 -+ uxtw x15, w18 -+ add w0, w0, w8 -+ neg w8, w8 -+ ldr w7, [x20, x7, lsl 2] -+ add w19, w18, w0 -+ ldr w16, [x20, x16, lsl 2] -+ lsr x8, x4, x8 -+ neg w0, w0 -+ ldr w15, [x20, x15, lsl 2] -+ and x7, x8, x7 -+ neg w17, w19 -+ lsr x0, x4, x0 -+ ldr x8, [sp, 288] -+ and x0, x0, x16 -+ ldr x16, [x11, 30352] -+ lsr x4, x4, x17 -+ and x4, x4, x15 -+ add x1, x7, w1, uxth -+ add x2, x0, w2, uxth -+ add x3, x4, w3, uxth -+ add x15, x8, x6 -+ str w19, [sp, 376] -+ str x1, [sp, 408] -+ str x3, [sp, 424] -+ str x2, [sp, 440] -+ cmp x15, x16 -+ bls .L788 -+ mov x14, x25 -+ mov x10, x28 -+ ldp x25, x23, [sp, 192] -+ mov x20, x11 -+ mov x12, x26 -+ mov x11, x27 -+ mov w28, w24 -+ mov x27, x9 -+.L753: -+ cmp w28, 0 -+ ble .L951 -+ subs x2, x16, x8 -+ beq .L789 -+ ldr x0, [sp, 112] -+ str x5, [sp, 144] -+ mov x4, -70 -+ sub x0, x0, x21 -+ cmp x2, x0 -+ bhi .L947 -+ mov x0, x21 -+ sub x6, x6, x2 -+ add x21, x21, x2 -+ mov x1, x8 -+ bl ZSTD_safecopyDstBeforeSrc -+ ldr x5, [sp, 144] -+.L789: -+ add x26, x21, x6 -+ sub x0, x26, x22 -+ mov x7, 30364 -+ add x1, x20, x7 -+ add x24, x20, 94208 -+ add x2, x1, x6 -+ add x24, x24, 1692 -+ prfm PLDL1KEEP, [x0] -+ add x4, x5, x6 -+ str x1, [sp, 288] -+ cmp x24, x2 -+ ldr x1, [sp, 112] -+ str wzr, [x23, 13976] -+ add x3, x21, x4 -+ sub x1, x1, #32 -+ str x1, [sp, 144] -+ ccmp x3, x1, 2, cs -+ bhi .L790 -+ ldr q0, [x20, x7] -+ str q0, [x21] -+ cmp x6, 16 -+ bhi .L791 -+.L795: -+ sub x1, x26, x11 -+ str x2, [sp, 288] -+ cmp x1, x22 -+ bcs .L793 -+ ldr x1, [sp, 120] -+ sub x1, x26, x1 -+ cmp x1, x22 -+ bcc .L951 -+ sub x1, x0, x11 -+ add x1, x14, x1 -+ add x2, x1, x5 -+ cmp x14, x2 -+ bcs .L962 -+ sub x2, x11, x0 -+ mov x0, x26 -+ sub x5, x5, x2 -+ add x26, x26, x2 -+ stp x5, x11, [sp, 152] -+ stp x14, x10, [sp, 168] -+ stp x12, x3, [sp, 184] -+ str x4, [sp, 200] -+ bl memmove -+ ldp x5, x11, [sp, 152] -+ ldp x14, x10, [sp, 168] -+ ldp x12, x3, [sp, 184] -+ ldr x4, [sp, 200] -+ mov x0, x11 -+.L793: -+ cmp x22, 15 -+ bls .L798 -+ ldr q0, [x0] -+ str q0, [x26] -+ cmp x5, 16 -+ ble .L794 -+ add x26, x26, 16 -+ add x0, x0, 16 -+ .p2align 3,,7 -+.L799: -+ ldr q0, [x0] -+ add x26, x26, 32 -+ add x0, x0, 32 -+ str q0, [x26, -32] -+ ldr q0, [x0, -16] -+ str q0, [x26, -16] -+ cmp x3, x26 -+ bhi .L799 -+.L794: -+ cmn x4, #120 -+ bhi .L947 -+ add x21, x21, x4 -+ subs w9, w28, #1 -+ beq .L770 -+ cmp w19, 64 -+ bhi .L805 -+ ldr x1, [sp, 128] -+ ldr x0, [sp, 384] -+ cmp x1, x0 -+ bls .L963 -+ cmp x27, x0 -+ beq .L805 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x27, x1 -+ bls .L808 -+ sub x1, x0, x27 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L808: -+ ldr x0, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x0, [sp, 368] -+ str w19, [sp, 376] -+ str x1, [sp, 384] -+.L805: -+ ldr x0, [sp, 208] -+ mov x8, x27 -+ ldr x22, [sp, 408] -+ add x18, x0, 64 -+ ldr x28, [sp, 424] -+ add x0, x0, 32 -+ ldr x26, [sp, 440] -+ mov x13, x22 -+ mov x27, x18 -+ stp x20, x25, [sp, 224] -+ mov x20, x24 -+ mov x25, x14 -+ mov x24, x21 -+ stp x23, x0, [sp, 240] -+ mov x23, x11 -+ .p2align 3,,7 -+.L807: -+ ldr x0, [sp, 136] -+ ldr x26, [x10, x26, lsl 3] -+ ldr x1, [x0, x13, lsl 3] -+ ldr x28, [x12, x28, lsl 3] -+ ubfx w15, w26, 16, 8 -+ ubfx w13, w1, 16, 8 -+ lsr x16, x1, 32 -+ add w2, w13, w15 -+ lsr x3, x26, 32 -+ ubfx w7, w28, 16, 8 -+ lsr x21, x28, 32 -+ add w2, w7, w2 -+ lsr w17, w28, 24 -+ mov x5, x16 -+ and w2, w2, 255 -+ lsr w6, w1, 24 -+ lsr w0, w26, 24 -+ cmp w7, 1 -+ bls .L809 -+ ldr x16, [sp, 368] -+ neg w18, w7 -+ ldr x22, [sp, 464] -+ str x22, [sp, 472] -+ lsl x16, x16, x19 -+ add w19, w19, w7 -+ ldr x7, [sp, 456] -+ lsr x16, x16, x18 -+ add x21, x21, x16 -+ str w19, [sp, 376] -+.L810: -+ stp x21, x7, [sp, 456] -+ cbnz w15, .L964 -+.L815: -+ cmp w2, 30 -+ bhi .L816 -+.L954: -+ ldr x7, [sp, 368] -+.L817: -+ cbnz w13, .L965 -+.L822: -+ add w16, w6, w19 -+ uxtw x13, w17 -+ add w2, w0, w16 -+ uxtw x15, w0 -+ add w19, w17, w2 -+ neg w16, w16 -+ ldr w0, [x27, x13, lsl 2] -+ neg w2, w2 -+ neg w13, w19 -+ ldr w17, [x27, x15, lsl 2] -+ add x22, x24, x5 -+ ldr w18, [x27, x6, lsl 2] -+ lsr x2, x7, x2 -+ and x2, x2, x17 -+ lsr x6, x7, x16 -+ add x26, x2, w26, uxth -+ lsr x7, x7, x13 -+ and x7, x7, x0 -+ sub x0, x22, x21 -+ and x6, x6, x18 -+ ldr x15, [sp, 288] -+ add x13, x6, w1, uxth -+ ldr x2, [sp, 144] -+ add x28, x7, w28, uxth -+ prfm PLDL1KEEP, [x0] -+ add x7, x5, x3 -+ add x1, x15, x5 -+ str w19, [sp, 376] -+ str x13, [sp, 408] -+ add x6, x24, x7 -+ str x28, [sp, 424] -+ cmp x20, x1 -+ str x26, [sp, 440] -+ ccmp x2, x6, 0, cs -+ bcc .L823 -+ ldr q0, [x15] -+ str q0, [x24] -+ cmp x5, 16 -+ bhi .L824 -+.L828: -+ str x1, [sp, 288] -+ sub x1, x22, x23 -+ cmp x21, x1 -+ bls .L826 -+ ldr x1, [sp, 120] -+ sub x1, x22, x1 -+ cmp x21, x1 -+ bhi .L951 -+ sub x1, x0, x23 -+ add x1, x25, x1 -+ add x2, x1, x3 -+ cmp x25, x2 -+ bcs .L966 -+ sub x2, x23, x0 -+ mov x0, x22 -+ sub x3, x3, x2 -+ add x22, x22, x2 -+ stp x3, x6, [sp, 152] -+ stp x10, x12, [sp, 168] -+ str x13, [sp, 184] -+ str w9, [sp, 192] -+ str x8, [sp, 200] -+ str x7, [sp, 216] -+ bl memmove -+ ldp x3, x6, [sp, 152] -+ mov x0, x23 -+ ldp x10, x12, [sp, 168] -+ ldr w9, [sp, 192] -+ ldr x13, [sp, 184] -+ ldr x8, [sp, 200] -+ ldr x7, [sp, 216] -+.L826: -+ cmp x21, 15 -+ bls .L831 -+ ldr q0, [x0] -+ str q0, [x22] -+ cmp x3, 16 -+ ble .L827 -+ add x22, x22, 16 -+ add x0, x0, 16 -+ .p2align 3,,7 -+.L832: -+ ldr q0, [x0] -+ add x22, x22, 32 -+ add x0, x0, 32 -+ str q0, [x22, -32] -+ ldr q0, [x0, -16] -+ str q0, [x22, -16] -+ cmp x6, x22 -+ bhi .L832 -+.L827: -+ cmn x7, #120 -+ bhi .L936 -+ add x24, x24, x7 -+ subs w9, w9, #1 -+ beq .L937 -+ cmp w19, 64 -+ bhi .L807 -+ ldr x1, [sp, 128] -+ ldr x0, [sp, 384] -+ cmp x1, x0 -+ bls .L967 -+ cmp x8, x0 -+ beq .L807 -+ lsr w1, w19, 3 -+ lsr w2, w19, 3 -+ sub x1, x0, x1 -+ cmp x8, x1 -+ bls .L840 -+ sub x1, x0, x8 -+ mov w2, w1 -+ sub x1, x0, w1, uxtw -+.L840: -+ ldr x0, [x1] -+ sub w19, w19, w2, lsl 3 -+ str x0, [sp, 368] -+ str w19, [sp, 376] -+ str x1, [sp, 384] -+ b .L807 -+ .p2align 2,,3 -+.L848: -+ .cfi_restore 27 -+ .cfi_restore 28 -+ mov x21, x1 -+ add x23, x20, 16384 -+.L728: -+ ldr x0, [sp, 112] -+ ldr w1, [x23, 13976] -+ sub x0, x0, x21 -+ cmp w1, 2 -+ beq .L968 -+ sub x2, x24, x8 -+ cmp x2, x0 -+ bhi .L846 -+.L971: -+ cbz x21, .L847 -+ mov x0, x21 -+ mov x1, x8 -+ add x21, x21, x2 -+ bl memcpy -+.L847: -+ sub x4, x21, x25 -+.L727: -+ mov x0, x4 -+ ldp x29, x30, [sp, 16] -+ ldp x19, x20, [sp, 32] -+ ldp x21, x22, [sp, 48] -+ ldp x23, x24, [sp, 64] -+ ldp x25, x26, [sp, 80] -+ add sp, sp, 480 -+ .cfi_restore 29 -+ .cfi_restore 30 -+ .cfi_restore 25 -+ .cfi_restore 26 -+ .cfi_restore 23 -+ .cfi_restore 24 -+ .cfi_restore 21 -+ .cfi_restore 22 -+ .cfi_restore 19 -+ .cfi_restore 20 -+ .cfi_def_cfa_offset 0 -+ ret -+ .p2align 2,,3 -+.L730: -+ .cfi_def_cfa_offset 480 -+ .cfi_offset 19, -448 -+ .cfi_offset 20, -440 -+ .cfi_offset 21, -432 -+ .cfi_offset 22, -424 -+ .cfi_offset 23, -416 -+ .cfi_offset 24, -408 -+ .cfi_offset 25, -400 -+ .cfi_offset 26, -392 -+ .cfi_offset 27, -384 -+ .cfi_offset 28, -376 -+ .cfi_offset 29, -464 -+ .cfi_offset 30, -456 -+ ldrb w1, [x3] -+ str x1, [sp, 368] -+ str x3, [sp, 384] -+ cmp x4, 5 -+ beq .L732 -+ bhi .L733 -+ cmp x4, 3 -+ beq .L734 -+ cmp x4, 4 -+ bne .L969 -+.L735: -+ ldrb w2, [x27, 3] -+ add x1, x1, x2, lsl 24 -+.L734: -+ ldrb w2, [x27, 2] -+ add x1, x1, x2, lsl 16 -+ ldrb w2, [x27, 1] -+ add x1, x1, x2, lsl 8 -+ str x1, [sp, 368] -+.L737: -+ cbnz w0, .L970 -+.L951: -+ ldp x27, x28, [sp, 96] -+ .cfi_restore 28 -+ .cfi_restore 27 -+.L729: -+ mov x4, -20 -+ mov x0, x4 -+ ldp x29, x30, [sp, 16] -+ ldp x19, x20, [sp, 32] -+ ldp x21, x22, [sp, 48] -+ ldp x23, x24, [sp, 64] -+ ldp x25, x26, [sp, 80] -+ add sp, sp, 480 -+ .cfi_restore 29 -+ .cfi_restore 30 -+ .cfi_restore 25 -+ .cfi_restore 26 -+ .cfi_restore 23 -+ .cfi_restore 24 -+ .cfi_restore 21 -+ .cfi_restore 22 -+ .cfi_restore 19 -+ .cfi_restore 20 -+ .cfi_def_cfa_offset 0 -+ ret -+ .p2align 2,,3 -+.L733: -+ .cfi_def_cfa_offset 480 -+ .cfi_offset 19, -448 -+ .cfi_offset 20, -440 -+ .cfi_offset 21, -432 -+ .cfi_offset 22, -424 -+ .cfi_offset 23, -416 -+ .cfi_offset 24, -408 -+ .cfi_offset 25, -400 -+ .cfi_offset 26, -392 -+ .cfi_offset 27, -384 -+ .cfi_offset 28, -376 -+ .cfi_offset 29, -464 -+ .cfi_offset 30, -456 -+ cmp x4, 6 -+ beq .L738 -+ ldrb w2, [x3, 6] -+ add x1, x1, x2, lsl 48 -+.L738: -+ ldrb w2, [x27, 5] -+ add x1, x1, x2, lsl 40 -+.L732: -+ ldrb w2, [x27, 4] -+ add x1, x1, x2, lsl 32 -+ b .L735 -+ .p2align 2,,3 -+.L969: -+ cmp x4, 2 -+ bne .L737 -+ ldrb w2, [x27, 1] -+ add x1, x1, x2, lsl 8 -+ str x1, [sp, 368] -+ b .L737 -+ .p2align 2,,3 -+.L968: -+ .cfi_restore 27 -+ .cfi_restore 28 -+ sub x2, x24, x8 -+ cmp x2, x0 -+ bhi .L846 -+ cbz x21, .L845 -+ mov x0, x21 -+ mov x1, x8 -+ add x21, x21, x2 -+ bl memmove -+ ldr x0, [sp, 112] -+ sub x0, x0, x21 -+.L845: -+ add x24, x20, 94208 -+ mov x1, 30364 -+ add x24, x24, 1692 -+ add x8, x20, x1 -+ sub x2, x24, x8 -+ str x8, [sp, 288] -+ str wzr, [x23, 13976] -+ cmp x2, x0 -+ bls .L971 -+.L846: -+ mov x4, -70 -+ mov x0, x4 -+ ldp x29, x30, [sp, 16] -+ ldp x19, x20, [sp, 32] ++.L654: ++ mov x26, x21 + ldp x21, x22, [sp, 48] -+ ldp x23, x24, [sp, 64] -+ ldp x25, x26, [sp, 80] -+ add sp, sp, 480 -+ .cfi_restore 29 -+ .cfi_restore 30 -+ .cfi_restore 25 -+ .cfi_restore 26 -+ .cfi_restore 23 -+ .cfi_restore 24 -+ .cfi_restore 21 ++ .cfi_remember_state + .cfi_restore 22 -+ .cfi_restore 19 -+ .cfi_restore 20 -+ .cfi_def_cfa_offset 0 -+ ret -+ .p2align 2,,3 -+.L970: -+ .cfi_def_cfa_offset 480 -+ .cfi_offset 19, -448 -+ .cfi_offset 20, -440 -+ .cfi_offset 21, -432 -+ .cfi_offset 22, -424 -+ .cfi_offset 23, -416 -+ .cfi_offset 24, -408 -+ .cfi_offset 25, -400 -+ .cfi_offset 26, -392 -+ .cfi_offset 27, -384 -+ .cfi_offset 28, -376 -+ .cfi_offset 29, -464 -+ .cfi_offset 30, -456 -+ mov w1, 8 -+ sub w1, w1, w4 -+ clz w0, w0 -+ add w0, w0, w1, lsl 3 -+ sub w0, w0, #23 -+ str w0, [sp, 376] -+ b .L731 -+ .p2align 2,,3 -+.L781: -+ cmp w19, 64 -+ bhi .L953 ++ .cfi_restore 21 ++ b .L447 ++.L484: ++ .cfi_restore_state ++ ldr d0, [x3] ++ str d0, [x26] ++ b .L485 ++.L512: ++ ldr x0, [sp, 120] ++ sub x0, x3, x0 ++ cmp x0, x22 ++ bcc .L449 + ldr x4, [sp, 128] -+ ldr x8, [sp, 384] -+ cmp x4, x8 -+ bls .L972 -+ cmp x9, x8 -+ beq .L953 -+ lsr w4, w19, 3 -+ lsr w16, w19, 3 -+ sub x4, x8, x4 -+ cmp x9, x4 -+ bls .L786 -+ sub x4, x8, x9 -+ mov w16, w4 -+ sub x4, x8, w4, uxtw -+.L786: -+ str x4, [sp, 384] -+ sub w19, w19, w16, lsl 3 -+ ldr x4, [x4] -+ str x4, [sp, 368] -+ cbz w15, .L787 -+ .p2align 3,,7 -+.L961: -+ neg w16, w15 -+ lsl x8, x4, x19 -+ add w19, w19, w15 -+ lsr x8, x8, x16 -+ add x6, x6, x8 -+ b .L787 -+ .p2align 2,,3 -+.L774: -+ cmp w30, 0 -+ cset w17, eq -+ cmp w8, 1 -+ beq .L776 -+ cmp w30, 0 -+ add x10, sp, 376 -+ cset x8, ne -+ add x17, x17, 10 -+ add x8, x8, 10 -+ ldr x22, [x10, x17, lsl 3] -+ ldr x30, [x10, x8, lsl 3] -+ stp x22, x30, [sp, 456] -+ cbz w16, .L780 -+.L960: -+ ldr x8, [sp, 368] -+ neg w17, w16 -+ lsl x8, x8, x19 -+ add w19, w19, w16 -+ lsr x8, x8, x17 -+ add x5, x5, x8 -+ str w19, [sp, 376] -+ b .L780 -+ .p2align 2,,3 -+.L959: -+ lsr w1, w19, 3 -+ and w19, w19, 7 -+ sub x0, x0, x1 -+ str x0, [sp, 384] -+ str w19, [sp, 376] -+ ldr x0, [x0] -+ str x0, [sp, 368] -+ b .L771 -+ .p2align 2,,3 -+.L958: -+ mov x2, x5 ++ sub x1, x2, x24 ++ add x1, x4, x1 ++ add x0, x1, x12 ++ cmp x4, x0 ++ bcs .L681 ++ sub x2, x24, x2 + mov x0, x3 -+ stp x11, x9, [sp, 144] -+ str x4, [sp, 160] ++ sub x12, x12, x2 ++ add x3, x3, x2 ++ stp x3, x12, [sp, 136] ++ stp x5, x8, [sp, 152] + bl memmove -+ ldp x11, x9, [sp, 144] -+ ldr x4, [sp, 160] -+ b .L758 -+ .p2align 2,,3 -+.L754: -+ add x0, sp, 512 -+ str x25, [sp] -+ stp x6, x5, [sp, 296] -+ add x4, sp, 288 -+ add x3, sp, 256 -+ ldp x0, x1, [x0, -216] -+ stp x0, x1, [sp, 256] -+ mov x6, x27 -+ ldr x1, [sp, 112] -+ mov x5, x23 -+ ldr x7, [sp, 120] -+ mov x0, x21 -+ stp x11, x9, [sp, 144] -+ str x22, [sp, 272] -+ str x22, [sp, 312] -+ bl ZSTD_execSequenceEndSplitLitBuffer -+ mov x4, x0 -+ ldp x11, x9, [sp, 144] -+ b .L758 -+ .p2align 2,,3 -+.L739: -+ cmp w30, 0 -+ ldr w0, [sp, 376] -+ cset w26, eq -+ cmp w16, 1 -+ beq .L741 -+ cmp w30, 0 -+ uxtw x16, w26 -+ cset x13, ne -+ add x18, sp, 376 -+ add x16, x16, 10 -+ add x13, x13, 10 -+ ldr x22, [x18, x16, lsl 3] -+ ldr x13, [x18, x13, lsl 3] -+ b .L740 -+ .p2align 2,,3 -+.L755: -+ ldr q0, [x8, 16] ++ mov x2, x24 ++ ldp x3, x12, [sp, 136] ++ ldp x5, x8, [sp, 152] ++ b .L513 ++.L669: ++ cmp w2, 64 ++ bhi .L662 ++ ldr x0, [sp, 320] ++ ldr x7, [sp, 336] ++ cmp x0, x7 ++ bcs .L682 ++ ldr x14, [sp, 328] ++ cmp x0, x14 ++ beq .L662 ++ lsr w7, w2, 3 ++ lsr w15, w2, 3 ++ sub x7, x0, x7 ++ cmp x14, x7 ++ bls .L471 ++ sub x14, x0, x14 ++ mov w15, w14 ++ sub x7, x0, x14, uxtw ++.L471: ++ str x7, [sp, 320] ++ sub w2, w2, w15, lsl 3 ++ ldr x7, [x7] ++ str x7, [sp, 304] ++ b .L467 ++.L680: ++ lsr w7, w0, 3 ++ and w0, w0, 7 ++ sub x2, x2, x7 ++ str x2, [sp, 320] ++ ldr x2, [x2] ++ str x2, [sp, 304] ++ b .L502 ++.L511: ++ ldr q0, [x0, 16] + sub x6, x6, #16 -+ str q0, [x21, 16] ++ str q0, [x19, 16] + cmp x6, 16 -+ ble .L759 -+ add x8, x8, 32 -+ add x1, x21, 32 ++ ble .L515 ++ add x0, x0, 32 ++ add x6, x19, 32 ++.L516: ++ ldr q0, [x0] ++ add x6, x6, 32 ++ add x0, x0, 32 ++ str q0, [x6, -32] ++ ldr q0, [x0, -16] ++ str q0, [x6, -16] ++ cmp x3, x6 ++ bhi .L516 ++ b .L515 ++.L496: ++ ldr x7, [sp, 304] ++ add w17, w17, w9 ++ ldr x18, [sp, 392] ++ lsl x7, x7, x0 ++ add w0, w0, 1 ++ add x7, x17, x7, lsr 63 ++ str w0, [sp, 312] ++ cmp x7, 3 ++ beq .L683 ++ add x9, x7, 10 ++ ldr x9, [x11, x9, lsl 3] ++ cmp x9, 0 ++ cinc x28, x9, eq ++ cmp x7, 1 ++ beq .L499 ++.L498: ++ ldr x7, [sp, 400] ++ str x7, [sp, 408] ++.L499: ++ stp x28, x18, [sp, 392] ++ b .L495 ++.L660: ++ ldp x21, x22, [sp, 48] ++ .cfi_remember_state ++ .cfi_restore 22 ++ .cfi_restore 21 ++ b .L447 ++.L653: ++ .cfi_restore_state ++ ldr w0, [sp, 312] ++ cmp w0, 64 ++ bhi .L525 ++ ldr x1, [sp, 320] ++ ldr x2, [sp, 336] ++ cmp x1, x2 ++ bcs .L684 ++ ldr x3, [sp, 328] ++ cmp x1, x3 ++ beq .L525 ++ lsr w2, w0, 3 ++ lsr w4, w0, 3 ++ sub x2, x1, x2 ++ cmp x3, x2 ++ bls .L528 ++ sub x3, x1, x3 ++ mov w4, w3 ++ sub x2, x1, x3, uxtw ++.L528: ++ ldr x1, [x2] ++ sub w0, w0, w4, lsl 3 ++ str x1, [sp, 304] ++ str w0, [sp, 312] ++ str x2, [sp, 320] ++.L525: ++ ldr x1, [sp, 168] ++ mov x26, x23 ++ add x10, sp, 312 ++ mov x23, x24 ++ add x21, x1, 64 ++ mov w24, w9 ++ add x1, x1, 32 ++ stp x20, x1, [sp, 176] ++ mov x20, x27 ++ mov x27, x8 ++ .p2align 3,,7 ++.L527: ++ ldp x7, x5, [sp, 344] ++ ldp x6, x3, [sp, 376] ++ ldp x2, x1, [sp, 360] ++ ldr x5, [x5, x7, lsl 3] ++ ldr x6, [x3, x6, lsl 3] ++ ldr x7, [x1, x2, lsl 3] ++ ubfx w15, w5, 16, 8 ++ ubfx w16, w6, 16, 8 ++ lsr x18, x5, 32 ++ add w2, w15, w16 ++ lsr x3, x6, 32 ++ ubfx w14, w7, 16, 8 ++ mov x11, x18 ++ add w2, w14, w2 ++ lsr x9, x7, 32 ++ and w2, w2, 255 ++ lsr w13, w5, 24 ++ lsr w1, w6, 24 ++ lsr w12, w7, 24 ++ cmp w14, 1 ++ bls .L529 ++ ldr x17, [sp, 304] ++ neg w22, w14 ++ ldr x18, [sp, 392] ++ lsl x17, x17, x0 ++ add w0, w0, w14 ++ lsr x14, x17, x22 ++ add x9, x9, x14 ++ ldr x14, [sp, 400] ++ str w0, [sp, 312] ++ stp x9, x18, [sp, 392] ++ str x14, [sp, 408] ++.L530: ++ cbnz w16, .L685 ++.L535: ++ cmp w2, 30 ++ bhi .L686 ++.L666: ++ ldr x2, [sp, 304] ++.L537: ++ cbnz w15, .L687 ++.L542: ++ add w0, w13, w0 ++ uxtw x16, w1 ++ add w1, w1, w0 ++ uxtw x15, w12 ++ add w12, w12, w1 ++ ldr w14, [x21, x13, lsl 2] ++ neg w13, w0 ++ neg w0, w1 ++ neg w1, w12 ++ add x22, x19, x11 ++ lsr x13, x2, x13 ++ ldr w16, [x21, x16, lsl 2] ++ and x13, x13, x14 ++ lsr x0, x2, x0 ++ lsr x1, x2, x1 ++ ldr w15, [x21, x15, lsl 2] ++ sub x2, x22, x9 ++ and x0, x0, x16 ++ ldr x14, [sp, 224] ++ and x1, x1, x15 ++ prfm PLDL1KEEP, [x2] ++ add x5, x13, x5, uxth ++ add x6, x0, x6, uxth ++ add x7, x1, x7, uxth ++ add x13, x11, x3 ++ add x0, x14, x11 ++ str w12, [sp, 312] ++ cmp x20, x0 ++ str x5, [sp, 344] ++ add x5, x19, x13 ++ str x7, [sp, 360] ++ ccmp x27, x5, 0, cs ++ str x6, [sp, 376] ++ bcc .L543 ++ ldr q0, [x14] ++ str q0, [x19] ++ cmp x11, 16 ++ bhi .L544 ++.L548: ++ str x0, [sp, 224] ++ sub x0, x22, x23 ++ cmp x9, x0 ++ bls .L546 ++ ldr x0, [sp, 120] ++ sub x0, x22, x0 ++ cmp x9, x0 ++ bhi .L449 ++ ldr x4, [sp, 128] ++ sub x1, x2, x23 ++ add x1, x4, x1 ++ add x0, x1, x3 ++ cmp x4, x0 ++ bcs .L688 ++ sub x2, x23, x2 ++ mov x0, x22 ++ sub x3, x3, x2 ++ add x22, x22, x2 ++ stp x3, x9, [sp, 136] ++ stp x5, x13, [sp, 152] ++ bl memmove ++ mov x2, x23 ++ ldp x3, x9, [sp, 136] ++ add x10, sp, 312 ++ ldp x5, x13, [sp, 152] ++.L546: ++ cmp x9, 15 ++ bls .L551 ++ ldr q0, [x2] ++ str q0, [x22] ++ cmp x3, 16 ++ ble .L547 ++ add x1, x22, 16 ++ add x2, x2, 16 + .p2align 3,,7 -+.L760: -+ ldr q0, [x8] ++.L552: ++ ldr q0, [x2] + add x1, x1, 32 -+ add x8, x8, 32 ++ add x2, x2, 32 + str q0, [x1, -32] -+ ldr q0, [x8, -16] ++ ldr q0, [x2, -16] + str q0, [x1, -16] -+ cmp x3, x1 -+ bhi .L760 -+ b .L759 ++ cmp x5, x1 ++ bhi .L552 ++.L547: ++ cmn x13, #120 ++ bhi .L656 ++ ldr w0, [sp, 312] ++ add x19, x19, x13 ++ subs w24, w24, #1 ++ mov w2, w0 ++ beq .L657 ++ cmp w0, 64 ++ bhi .L527 ++ ldr x1, [sp, 320] ++ ldr x3, [sp, 336] ++ cmp x1, x3 ++ bcs .L689 ++ ldr x5, [sp, 328] ++ cmp x1, x5 ++ beq .L527 ++ lsr w3, w0, 3 ++ lsr w0, w0, 3 ++ sub x3, x1, x3 ++ cmp x5, x3 ++ bls .L560 ++ sub x5, x1, x5 ++ mov w0, w5 ++ sub x3, x1, x5, uxtw ++.L560: ++ ldr x1, [x3] ++ sub w0, w2, w0, lsl 3 ++ str x1, [sp, 304] ++ str w0, [sp, 312] ++ str x3, [sp, 320] ++ b .L527 ++.L686: ++ cmp w0, 64 ++ bhi .L666 ++ ldr x14, [sp, 320] ++ ldr x2, [sp, 336] ++ cmp x14, x2 ++ bcs .L690 ++ ldr x16, [sp, 328] ++ cmp x14, x16 ++ beq .L666 ++ lsr w2, w0, 3 ++ lsr w17, w0, 3 ++ sub x2, x14, x2 ++ cmp x16, x2 ++ bls .L541 ++ sub x16, x14, x16 ++ mov w17, w16 ++ sub x2, x14, x16, uxtw ++.L541: ++ str x2, [sp, 320] ++ sub w0, w0, w17, lsl 3 ++ ldr x2, [x2] ++ str x2, [sp, 304] ++ cbz w15, .L542 ++ .p2align 3,,7 ++.L687: ++ neg w16, w15 ++ lsl x14, x2, x0 ++ add w0, w0, w15 ++ lsr x14, x14, x16 ++ add x11, x11, x14 ++ b .L542 + .p2align 2,,3 -+.L935: -+ mov x24, x23 -+ mov x20, x11 -+ ldp x25, x23, [sp, 192] -+ mov x27, x9 -+.L770: -+ cmp w19, 64 -+ bhi .L841 -+ ldr x1, [sp, 128] -+ ldr x0, [sp, 384] -+ cmp x1, x0 -+ bls .L951 -+ cmp x27, x0 -+ bne .L951 -+ cmp w19, 64 -+ bne .L951 -+.L841: -+ ldr x0, [sp, 456] -+ ldp x27, x28, [sp, 96] -+ .cfi_remember_state -+ .cfi_restore 28 -+ .cfi_restore 27 -+ str w0, [x23, 10300] -+ ldr x0, [sp, 464] -+ str w0, [x23, 10304] -+ ldr x0, [sp, 472] -+ str w0, [x23, 10308] -+ ldr x8, [sp, 288] -+ b .L728 -+ .p2align 2,,3 -+.L956: -+ .cfi_restore_state -+ ldr x13, [sp, 368] -+ neg w16, w21 -+ lsl x13, x13, x0 -+ add w0, w0, w21 -+ lsr x13, x13, x16 -+ add x5, x5, x13 -+ str w0, [sp, 376] -+ b .L745 -+ .p2align 2,,3 -+.L957: -+ neg w16, w17 -+ lsl x13, x7, x0 -+ add w0, w0, w17 -+ lsr x13, x13, x16 -+ add x6, x6, x13 -+ b .L752 -+ .p2align 2,,3 -+.L947: -+ ldp x27, x28, [sp, 96] -+ .cfi_remember_state -+ .cfi_restore 28 -+ .cfi_restore 27 -+ b .L727 ++.L529: ++ cmp w18, 0 ++ cset w17, eq ++ cmp w14, 1 ++ beq .L531 ++ cmp w18, 0 ++ uxtw x9, w17 ++ cset x14, ne ++ add x9, x9, 10 ++ add x14, x14, 10 ++ ldr x9, [x10, x9, lsl 3] ++ ldr x14, [x10, x14, lsl 3] ++ stp x9, x14, [sp, 392] ++ cbz w16, .L535 ++.L685: ++ ldr x14, [sp, 304] ++ neg w17, w16 ++ lsl x14, x14, x0 ++ add w0, w0, w16 ++ lsr x14, x14, x17 ++ add x3, x3, x14 ++ str w0, [sp, 312] ++ b .L535 + .p2align 2,,3 -+.L762: -+ .cfi_restore_state -+ cmp x22, 7 -+ bhi .L764 -+ ldrb w1, [x0] -+ strb w1, [x3] -+ ldrb w2, [x0, 1] -+ strb w2, [x3, 1] -+ ldp x2, x1, [sp, 208] -+ ldrb w8, [x0, 2] -+ strb w8, [x3, 2] -+ ldrb w8, [x0, 3] -+ ldr w1, [x1, x22, lsl 2] -+ ldrsw x2, [x2, x22, lsl 2] -+ strb w8, [x3, 3] -+ add x6, x0, x1 -+ ldr w1, [x0, x1] -+ sub x0, x6, x2 -+ str w1, [x3, 4] -+.L765: -+ cmp x5, 8 -+ bls .L758 -+ sub x6, x3, x0 -+ add x2, x0, 8 -+ add x1, x3, 8 -+ cmp x6, 15 -+ bgt .L766 -+ .p2align 3,,7 -+.L767: -+ ldr d0, [x2], 8 -+ str d0, [x1], 8 -+ cmp x7, x1 -+ bhi .L767 -+ b .L758 -+.L764: -+ ldr d0, [x0] -+ str d0, [x3] -+ b .L765 -+.L816: -+ cmp w19, 64 -+ bhi .L954 -+ ldr x2, [sp, 128] -+ ldr x7, [sp, 384] -+ cmp x2, x7 -+ bls .L973 -+ cmp x8, x7 -+ beq .L954 -+ lsr w2, w19, 3 -+ lsr w15, w19, 3 -+ sub x2, x7, x2 -+ cmp x8, x2 -+ bls .L821 -+ sub x2, x7, x8 -+ mov w15, w2 -+ sub x2, x7, w2, uxtw -+.L821: -+ ldr x7, [x2] -+ str x7, [sp, 368] -+ str x2, [sp, 384] -+ sub w19, w19, w15, lsl 3 -+ cbz w13, .L822 -+ .p2align 3,,7 -+.L965: -+ neg w15, w13 -+ lsl x2, x7, x19 -+ add w19, w19, w13 -+ lsr x2, x2, x15 -+ add x5, x5, x2 -+ b .L822 -+.L746: -+ cmp w0, 64 -+ bhi .L952 -+ ldr x7, [sp, 128] -+ ldr x13, [sp, 384] -+ cmp x7, x13 -+ bls .L974 -+ cmp x27, x13 -+ beq .L952 -+ lsr w7, w0, 3 -+ lsr w16, w0, 3 -+ sub x7, x13, x7 -+ cmp x27, x7 -+ bls .L751 -+ sub x7, x13, x27 -+ mov w16, w7 -+ sub x7, x13, w7, uxtw -+.L751: -+ str x7, [sp, 384] -+ sub w0, w0, w16, lsl 3 -+ ldr x7, [x7] -+ str x7, [sp, 368] -+ b .L747 -+.L972: -+ lsr w4, w19, 3 -+ and w19, w19, 7 -+ sub x4, x8, x4 -+ str x4, [sp, 384] -+ ldr x4, [x4] -+ str x4, [sp, 368] -+ b .L782 ++.L689: ++ lsr w2, w0, 3 ++ and w0, w0, 7 ++ sub x1, x1, x2 ++ str x1, [sp, 320] ++ str w0, [sp, 312] ++ ldr x1, [x1] ++ str x1, [sp, 304] ++ b .L527 + .p2align 2,,3 -+.L809: -+ cmp w16, 0 -+ cset w18, eq -+ cmp w7, 1 -+ beq .L811 -+ cmp w16, 0 -+ uxtw x16, w18 -+ cset x7, ne -+ add x4, sp, 376 -+ add x16, x16, 10 -+ add x7, x7, 10 -+ ldr x21, [x4, x16, lsl 3] -+ ldr x7, [x4, x7, lsl 3] -+ stp x21, x7, [sp, 456] -+ cbz w15, .L815 -+.L964: -+ ldr x7, [sp, 368] -+ neg w16, w15 -+ lsl x7, x7, x19 -+ add w19, w19, w15 -+ lsr x7, x7, x16 -+ add x3, x3, x7 -+ str w19, [sp, 376] -+ b .L815 -+ .p2align 2,,3 -+.L967: -+ lsr w1, w19, 3 -+ and w19, w19, 7 -+ sub x0, x0, x1 -+ str x0, [sp, 384] -+ str w19, [sp, 376] -+ ldr x0, [x0] -+ str x0, [sp, 368] -+ b .L807 -+ .p2align 2,,3 -+.L966: ++.L688: + mov x2, x3 + mov x0, x22 -+ stp x10, x12, [sp, 152] -+ str x13, [sp, 168] -+ str w9, [sp, 176] -+ stp x8, x7, [sp, 184] ++ str x13, [sp, 136] + bl memmove -+ ldr w9, [sp, 176] -+ ldp x10, x12, [sp, 152] -+ ldr x13, [sp, 168] -+ ldp x8, x7, [sp, 184] -+ b .L827 -+.L776: -+ ldr x8, [sp, 368] -+ add w17, w17, w22 -+ ldr x30, [sp, 456] -+ lsl x8, x8, x19 -+ add w19, w19, 1 -+ subs x22, x30, #1 -+ add x8, x17, x8, lsr 63 -+ str w19, [sp, 376] -+ cinc x22, x22, eq -+ cmp x8, 3 -+ beq .L778 -+ add x17, x8, 10 -+ add x10, sp, 376 -+ ldr x22, [x10, x17, lsl 3] -+ cmp x22, 0 -+ cinc x22, x22, eq -+ cmp x8, 1 -+ beq .L775 -+.L778: -+ ldr x8, [sp, 464] -+ str x8, [sp, 472] -+ b .L775 -+.L823: ++ add x10, sp, 312 ++ ldr x13, [sp, 136] ++ b .L547 ++.L543: + add x0, sp, 512 -+ stp x5, x3, [sp, 344] -+ mov x7, x25 -+ ldr x6, [sp, 120] -+ mov x4, x20 -+ ldp x0, x1, [x0, -168] -+ stp x0, x1, [sp, 256] -+ add x2, sp, 256 -+ ldr x1, [sp, 112] ++ stp x11, x3, [sp, 280] + mov x5, x23 -+ add x3, sp, 288 -+ mov x0, x24 -+ stp x10, x12, [sp, 152] -+ str x13, [sp, 168] -+ str w9, [sp, 176] -+ str x8, [sp, 184] -+ str x21, [sp, 272] -+ str x21, [sp, 360] ++ ldp x6, x7, [sp, 120] ++ mov x4, x20 ++ ldp x0, x1, [x0, -232] ++ add x2, sp, 192 ++ add x3, sp, 224 ++ stp x0, x1, [sp, 192] ++ mov x1, x26 ++ mov x0, x19 ++ str x9, [sp, 208] ++ str x9, [sp, 296] + bl ZSTD_execSequenceEnd -+ ldr w9, [sp, 176] -+ mov x7, x0 -+ ldp x10, x12, [sp, 152] -+ ldr x13, [sp, 168] -+ ldr x8, [sp, 184] -+ b .L827 -+.L824: -+ ldr q0, [x15, 16] -+ sub x5, x5, #16 -+ str q0, [x24, 16] -+ cmp x5, 16 -+ ble .L828 -+ add x15, x15, 32 -+ add x2, x24, 32 ++ add x10, sp, 312 ++ mov x13, x0 ++ b .L547 ++.L657: ++ mov x27, x20 ++ mov x23, x26 ++ ldr x20, [sp, 176] ++ b .L490 ++.L544: ++ ldr q0, [x14, 16] ++ sub x11, x11, #16 ++ str q0, [x19, 16] ++ cmp x11, 16 ++ ble .L548 ++ add x14, x14, 32 ++ add x1, x19, 32 + .p2align 3,,7 -+.L829: -+ ldr q0, [x15] -+ add x2, x2, 32 -+ add x15, x15, 32 -+ str q0, [x2, -32] -+ ldr q0, [x15, -16] -+ str q0, [x2, -16] -+ cmp x22, x2 -+ bhi .L829 -+ b .L828 -+.L937: -+ mov x21, x24 -+ mov x27, x8 -+ mov x24, x20 -+ ldp x20, x25, [sp, 224] -+ ldr x23, [sp, 240] -+ b .L770 -+.L831: -+ cmp x21, 7 -+ bhi .L833 -+ ldrb w1, [x0] -+ strb w1, [x22] -+ ldr x1, [sp, 248] -+ ldrb w2, [x0, 1] -+ strb w2, [x22, 1] -+ ldr x2, [sp, 208] -+ ldrb w15, [x0, 2] -+ strb w15, [x22, 2] -+ ldr w1, [x1, x21, lsl 2] -+ ldrb w15, [x0, 3] -+ ldrsw x2, [x2, x21, lsl 2] -+ add x5, x0, x1 -+ strb w15, [x22, 3] -+ ldr w1, [x0, x1] -+ sub x0, x5, x2 -+ str w1, [x22, 4] -+.L834: ++.L549: ++ ldr q0, [x14] ++ add x1, x1, 32 ++ add x14, x14, 32 ++ str q0, [x1, -32] ++ ldr q0, [x14, -16] ++ str q0, [x1, -16] ++ cmp x22, x1 ++ bhi .L549 ++ b .L548 ++.L551: ++ cmp x9, 7 ++ bhi .L553 ++ ldrb w0, [x2] ++ strb w0, [x22] ++ ldr x0, [sp, 184] ++ ldrb w1, [x2, 1] ++ strb w1, [x22, 1] ++ ldr x1, [sp, 168] ++ ldrb w7, [x2, 2] ++ strb w7, [x22, 2] ++ ldr w0, [x0, x9, lsl 2] ++ ldrb w7, [x2, 3] ++ ldrsw x1, [x1, x9, lsl 2] ++ add x6, x2, x0 ++ strb w7, [x22, 3] ++ ldr w0, [x2, x0] ++ sub x2, x6, x1 ++ str w0, [x22, 4] ++.L554: + cmp x3, 8 -+ bls .L827 -+ sub x5, x22, x0 -+ add x2, x0, 8 -+ add x1, x22, 8 -+ cmp x5, 15 -+ bgt .L835 ++ bls .L547 ++ add x1, x2, 8 ++ add x0, x22, 8 ++ sub x6, x0, x1 ++ cmp x6, 15 ++ bgt .L555 + .p2align 3,,7 -+.L836: -+ ldr d0, [x2], 8 -+ str d0, [x1], 8 -+ cmp x6, x1 -+ bhi .L836 -+ b .L827 -+.L936: -+ mov x4, x7 -+ ldp x27, x28, [sp, 96] ++.L556: ++ ldr d0, [x1], 8 ++ str d0, [x0], 8 ++ cmp x5, x0 ++ bhi .L556 ++ b .L547 ++.L656: ++ mov x26, x13 ++ ldp x21, x22, [sp, 48] + .cfi_remember_state -+ .cfi_restore 28 -+ .cfi_restore 27 -+ b .L727 -+.L811: ++ .cfi_restore 22 ++ .cfi_restore 21 ++ b .L447 ++.L531: + .cfi_restore_state -+ ldr x16, [sp, 368] -+ add w18, w18, w21 -+ ldr x7, [sp, 456] -+ lsl x16, x16, x19 -+ add w19, w19, 1 -+ subs x21, x7, #1 -+ add x16, x18, x16, lsr 63 -+ str w19, [sp, 376] -+ cinc x21, x21, eq -+ cmp x16, 3 -+ beq .L813 -+ add x18, x16, 10 -+ add x4, sp, 376 -+ ldr x21, [x4, x18, lsl 3] -+ cmp x21, 0 -+ cinc x21, x21, eq -+ cmp x16, 1 -+ beq .L810 -+.L813: -+ ldr x16, [sp, 464] -+ str x16, [sp, 472] -+ b .L810 -+.L766: -+ ldr q0, [x0, 8] -+ str q0, [x3, 8] -+ cmp x5, 24 -+ ble .L758 -+ add x1, x3, 24 -+ add x0, x0, 24 ++ ldr x14, [sp, 304] ++ add w17, w17, w9 ++ ldr x18, [sp, 392] ++ lsl x14, x14, x0 ++ add w0, w0, 1 ++ add x14, x17, x14, lsr 63 ++ str w0, [sp, 312] ++ cmp x14, 3 ++ beq .L691 ++ add x9, x14, 10 ++ ldr x9, [x10, x9, lsl 3] ++ cmp x9, 0 ++ cinc x9, x9, eq ++ cmp x14, 1 ++ beq .L534 ++.L533: ++ ldr x14, [sp, 400] ++ str x14, [sp, 408] ++.L534: ++ stp x9, x18, [sp, 392] ++ b .L530 ++.L486: ++ ldr q0, [x3, 8] ++ str q0, [x26, 8] ++ cmp x22, 24 ++ ble .L478 ++ add x1, x26, 24 ++ add x2, x3, 24 + .p2align 3,,7 -+.L768: -+ ldr q0, [x0] ++.L488: ++ ldr q0, [x2] + add x1, x1, 32 -+ add x0, x0, 32 ++ add x2, x2, 32 + str q0, [x1, -32] -+ ldr q0, [x0, -16] ++ ldr q0, [x2, -16] + str q0, [x1, -16] -+ cmp x7, x1 -+ bhi .L768 -+ b .L758 -+.L833: -+ ldr d0, [x0] ++ cmp x4, x1 ++ bhi .L488 ++ b .L478 ++.L553: ++ ldr d0, [x2] + str d0, [x22] -+ b .L834 -+.L741: -+ ldr x16, [sp, 368] -+ add w22, w26, w22 -+ lsl x16, x16, x0 -+ add w0, w0, 1 -+ add x26, x22, x16, lsr 63 -+ subs x22, x13, #1 -+ str w0, [sp, 376] -+ cinc x22, x22, eq -+ cmp x26, 3 -+ beq .L743 -+ add x9, sp, 480 -+ add x16, x9, x26, lsl 3 -+ ldr x22, [x16, -24] ++ b .L554 ++.L683: ++ subs x9, x18, #1 ++ cinc x28, x9, eq ++ b .L498 ++.L461: ++ ldr x15, [sp, 304] ++ add w7, w7, w22 ++ ldr x16, [sp, 392] ++ lsl x15, x15, x2 ++ add w2, w2, 1 ++ add x7, x7, x15, lsr 63 ++ str w2, [sp, 312] ++ cmp x7, 3 ++ beq .L692 ++ add x8, sp, 416 ++ add x15, x8, x7, lsl 3 ++ ldr x22, [x15, -24] + cmp x22, 0 + cinc x22, x22, eq -+ cmp x26, 1 -+ beq .L740 -+.L743: -+ str x18, [sp, 472] -+ b .L740 -+.L973: -+ lsr w2, w19, 3 -+ and w19, w19, 7 -+ sub x2, x7, x2 -+ ldr x7, [x2] -+ str x7, [sp, 368] -+ str x2, [sp, 384] -+ b .L817 -+.L849: -+ mov x16, x24 -+ mov x21, x25 -+ b .L753 -+.L962: -+ mov x2, x5 -+ mov x0, x26 -+ stp x11, x14, [sp, 152] -+ stp x10, x12, [sp, 168] -+ str x4, [sp, 184] -+ bl memmove -+ ldp x11, x14, [sp, 152] -+ ldp x10, x12, [sp, 168] -+ ldr x4, [sp, 184] -+ b .L794 -+.L974: -+ lsr w7, w0, 3 ++ cmp x7, 1 ++ beq .L464 ++.L463: ++ ldr x7, [sp, 400] ++ str x7, [sp, 408] ++.L464: ++ stp x22, x16, [sp, 392] ++ b .L460 ++.L690: ++ lsr w2, w0, 3 + and w0, w0, 7 -+ sub x7, x13, x7 -+ str x7, [sp, 384] -+ ldr x7, [x7] -+ str x7, [sp, 368] -+ b .L747 -+.L790: -+ stp x6, x5, [sp, 320] -+ mov x7, x14 -+ mov x4, x24 -+ ldp x0, x1, [sp, 320] -+ stp x0, x1, [sp, 256] -+ mov x5, x11 -+ ldp x1, x6, [sp, 112] -+ add x2, sp, 256 -+ add x3, sp, 288 -+ mov x0, x21 -+ stp x11, x14, [sp, 152] -+ stp x10, x12, [sp, 168] ++ sub x2, x14, x2 ++ str x2, [sp, 320] ++ ldr x2, [x2] ++ str x2, [sp, 304] ++ b .L537 ++.L681: ++ mov x2, x12 ++ mov x0, x3 ++ str x8, [sp, 136] ++ bl memmove ++ ldr x8, [sp, 136] ++ b .L514 ++.L682: ++ lsr w7, w2, 3 ++ and w2, w2, 7 ++ sub x0, x0, x7 ++ ldr x7, [x0] ++ str x7, [sp, 304] ++ str x0, [sp, 320] ++ b .L467 ++.L510: ++ stp x6, x12, [sp, 256] ++ mov x5, x24 ++ mov x4, x27 ++ ldr x6, [sp, 120] ++ add x3, sp, 224 ++ ldr x7, [sp, 128] ++ add x2, sp, 192 ++ ldp x0, x1, [sp, 256] ++ str x8, [sp, 136] ++ stp x0, x1, [sp, 192] ++ mov x1, x23 ++ mov x0, x19 ++ str x22, [sp, 208] + str x22, [sp, 272] -+ str x22, [sp, 336] + bl ZSTD_execSequenceEnd -+ mov x4, x0 -+ ldp x11, x14, [sp, 152] -+ ldp x10, x12, [sp, 168] -+ b .L794 -+.L791: -+ mov x1, 30380 -+ sub x6, x6, #16 -+ ldr q0, [x20, x1] -+ str q0, [x21, 16] -+ cmp x6, 16 -+ ble .L795 -+ mov x6, 30396 -+ add x1, x21, 32 -+ add x6, x20, x6 -+.L796: -+ ldr q0, [x6] -+ add x1, x1, 32 -+ add x6, x6, 32 -+ str q0, [x1, -32] -+ ldr q0, [x6, -16] -+ str q0, [x1, -16] -+ cmp x26, x1 -+ bhi .L796 -+ b .L795 -+.L963: -+ lsr w1, w19, 3 -+ and w19, w19, 7 -+ sub x0, x0, x1 -+ str x0, [sp, 384] -+ str w19, [sp, 376] -+ ldr x0, [x0] -+ str x0, [sp, 368] -+ b .L805 -+.L798: ++ mov x26, x0 ++ ldr x8, [sp, 136] ++ b .L514 ++.L691: ++ subs x9, x18, #1 ++ cinc x9, x9, eq ++ b .L533 ++.L684: ++ lsr w2, w0, 3 ++ and w0, w0, 7 ++ sub x1, x1, x2 ++ str x1, [sp, 320] ++ str w0, [sp, 312] ++ ldr x1, [x1] ++ str x1, [sp, 304] ++ b .L525 ++.L518: + cmp x22, 7 -+ bhi .L800 -+ ldr x6, [sp, 208] -+ ldrb w2, [x0] -+ strb w2, [x26] -+ add x1, x6, 32 -+ ldrsw x2, [x6, x22, lsl 2] -+ ldrb w6, [x0, 1] -+ strb w6, [x26, 1] -+ ldr w1, [x1, x22, lsl 2] -+ ldrb w6, [x0, 2] -+ strb w6, [x26, 2] -+ add x6, x0, x1 -+ ldrb w7, [x0, 3] -+ strb w7, [x26, 3] -+ ldr w1, [x0, x1] -+ sub x0, x6, x2 -+ str w1, [x26, 4] -+.L801: -+ cmp x5, 8 -+ bls .L794 -+ sub x6, x26, x0 -+ add x2, x0, 8 -+ add x1, x26, 8 ++ bhi .L520 ++ ldrb w1, [x2] ++ strb w1, [x3] ++ ldr x4, [sp, 168] ++ ldrb w6, [x2, 1] ++ strb w6, [x3, 1] ++ add x0, x4, 32 ++ ldrsw x1, [x4, x22, lsl 2] ++ ldrb w6, [x2, 2] ++ strb w6, [x3, 2] ++ ldr w0, [x0, x22, lsl 2] ++ ldrb w7, [x2, 3] ++ strb w7, [x3, 3] ++ add x6, x2, x0 ++ ldr w0, [x2, x0] ++ sub x2, x6, x1 ++ str w0, [x3, 4] ++.L521: ++ cmp x12, 8 ++ bls .L514 ++ add x1, x2, 8 ++ add x0, x3, 8 ++ sub x6, x0, x1 + cmp x6, 15 -+ bgt .L802 -+.L803: -+ ldr d0, [x2], 8 -+ str d0, [x1], 8 -+ cmp x3, x1 -+ bhi .L803 -+ b .L794 -+.L835: -+ ldr q0, [x0, 8] ++ bgt .L522 ++.L523: ++ ldr d0, [x1], 8 ++ str d0, [x0], 8 ++ cmp x5, x0 ++ bhi .L523 ++ b .L514 ++.L555: ++ ldr q0, [x2, 8] + str q0, [x22, 8] + cmp x3, 24 -+ ble .L827 -+ add x22, x22, 24 -+ add x0, x0, 24 -+.L837: -+ ldr q0, [x0] -+ add x22, x22, 32 -+ add x0, x0, 32 -+ str q0, [x22, -32] -+ ldr q0, [x0, -16] -+ str q0, [x22, -16] -+ cmp x6, x22 -+ bhi .L837 -+ b .L827 -+.L800: -+ ldr d0, [x0] -+ str d0, [x26] -+ b .L801 -+.L802: -+ ldr q0, [x0, 8] -+ str q0, [x26, 8] -+ cmp x5, 24 -+ ble .L794 -+ add x26, x26, 24 -+ add x0, x0, 24 -+.L804: -+ ldr q0, [x0] -+ add x26, x26, 32 ++ ble .L547 ++ add x1, x22, 24 ++ add x2, x2, 24 ++.L557: ++ ldr q0, [x2] ++ add x1, x1, 32 ++ add x2, x2, 32 ++ str q0, [x1, -32] ++ ldr q0, [x2, -16] ++ str q0, [x1, -16] ++ cmp x5, x1 ++ bhi .L557 ++ b .L547 ++.L692: ++ subs x22, x16, #1 ++ cinc x22, x22, eq ++ b .L463 ++.L520: ++ ldr d0, [x2] ++ str d0, [x3] ++ b .L521 ++.L522: ++ ldr q0, [x2, 8] ++ str q0, [x3, 8] ++ cmp x12, 24 ++ ble .L514 ++ add x0, x3, 24 ++ add x1, x2, 24 ++.L524: ++ ldr q0, [x1] + add x0, x0, 32 -+ str q0, [x26, -32] -+ ldr q0, [x0, -16] -+ str q0, [x26, -16] -+ cmp x3, x26 -+ bhi .L804 -+ b .L794 ++ add x1, x1, 32 ++ str q0, [x0, -32] ++ ldr q0, [x1, -16] ++ str q0, [x0, -16] ++ cmp x5, x0 ++ bhi .L524 ++ b .L514 + .cfi_endproc -+.LFE4552: ++.LFE4504: + .size ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0, .-ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0 + .align 2 + .p2align 4,,11 + .global ZSTD_getcBlockSize + .type ZSTD_getcBlockSize, %function +ZSTD_getcBlockSize: -+.LFB4507: ++.LFB4455: + .cfi_startproc + mov x3, x0 + mov x0, -72 + cmp x1, 2 -+ bls .L975 ++ bls .L693 + ldrb w1, [x3, 2] + mov x0, 1 + ldrh w3, [x3] @@ -6354,21 +4743,21 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str w1, [x2] + str w3, [x2, 8] + cmp w1, 1 -+ beq .L975 ++ beq .L693 + cmp w1, 3 + mov x0, -20 + csel x0, x3, x0, ne -+.L975: ++.L693: + ret + .cfi_endproc -+.LFE4507: ++.LFE4455: + .size ZSTD_getcBlockSize, .-ZSTD_getcBlockSize + .align 2 + .p2align 4,,11 + .global ZSTD_decodeLiteralsBlock + .type ZSTD_decodeLiteralsBlock, %function +ZSTD_decodeLiteralsBlock: -+.LFB4509: ++.LFB4457: + .cfi_startproc + stp x29, x30, [sp, -80]! + .cfi_def_cfa_offset 80 @@ -6379,7 +4768,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + cmp x2, 1 + .cfi_offset 21, -48 + .cfi_offset 22, -40 -+ bls .L988 ++ bls .L706 + stp x19, x20, [sp, 16] + .cfi_offset 20, -56 + .cfi_offset 19, -64 @@ -6388,83 +4777,96 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldrb w6, [x1] + and w19, w6, 3 + cmp w19, 2 -+ beq .L984 ++ beq .L702 + cmp w19, 3 -+ beq .L985 ++ beq .L703 + cmp w19, 1 -+ beq .L1061 ++ beq .L781 + cmp x4, 131072 + ubfx x1, x6, 2, 2 + mov x0, 131072 + csel x0, x4, x0, ls + cmp w1, 1 -+ beq .L1002 ++ beq .L721 + cmp w1, 3 -+ bne .L1062 ++ bne .L782 + cmp x2, 2 -+ beq .L1054 ++ beq .L772 + ldrb w6, [x22, 2] + mov x1, 3 + ldrh w7, [x22] + add w6, w7, w6, lsl 16 + ubfx x19, x6, 4, 21 -+.L1004: ++.L723: + cmp x19, 0 + mov x21, -70 + ccmp x3, 0, 0, ne + ccmp x19, x0, 2, ne -+ bhi .L1053 -+ cbnz w5, .L1005 ++ bhi .L773 ++ stp x23, x24, [sp, 48] ++ .cfi_remember_state ++ .cfi_offset 24, -24 ++ .cfi_offset 23, -32 ++ cbnz w5, .L724 + add x5, x19, 131072 + add x5, x5, 64 + cmp x4, x5 -+ bls .L1005 ++ bls .L724 + add x0, x3, 131072 -+ mov w5, 1 ++ add x23, x20, 16384 + add x0, x0, 32 ++ str x0, [x20, 30344] + add x3, x0, x19 -+ b .L1006 ++ str x3, [x20, 30352] ++ mov w3, 1 ++ mov w4, w3 ++ str w3, [x23, 13976] ++ b .L725 + .p2align 2,,3 -+.L985: ++.L703: ++ .cfi_restore_state + add x0, x0, 16384 + mov x21, -30 + ldr w0, [x0, 13616] -+ cbz w0, .L1053 -+.L984: ++ cbz w0, .L773 ++.L702: + cmp x2, 4 -+ bls .L1054 -+ ldr w1, [x22] -+ cmp x3, 0 ++ bls .L772 + stp x23, x24, [sp, 48] + .cfi_offset 24, -24 + .cfi_offset 23, -32 -+ add x24, x20, 16384 ++ add x23, x20, 16384 ++ ldr w1, [x22] ++ mov w8, 16 + ubfx x6, x6, 2, 2 -+ cset w8, eq ++ ldr w7, [x23, 13836] + lsr w0, w1, 4 -+ ldr w10, [x24, 13836] ++ cmp w7, 0 ++ csel w7, w7, w8, eq ++ cmp x3, 0 ++ cset w8, eq + cmp w6, 2 -+ beq .L989 ++ beq .L708 + cmp w6, 3 -+ bne .L1063 -+ ands x23, x0, 262143 ++ bne .L783 ++ ands x24, x0, 262143 + mov x21, -70 + and w0, w0, 262143 + ccmp w8, 0, 4, ne -+ bne .L1059 ++ bne .L779 + ldrb w6, [x22, 4] + cmp w0, 131072 -+ bls .L1064 -+.L1058: ++ bls .L784 ++.L778: + ldp x19, x20, [sp, 16] + .cfi_restore 20 + .cfi_restore 19 + ldp x23, x24, [sp, 48] + .cfi_restore 24 + .cfi_restore 23 -+.L988: ++.L706: + mov x21, -20 -+.L981: ++.L699: + mov x0, x21 + ldp x21, x22, [sp, 32] + ldp x29, x30, [sp], 80 @@ -6475,7 +4877,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1061: ++.L781: + .cfi_def_cfa_offset 80 + .cfi_offset 19, -64 + .cfi_offset 20, -56 @@ -6488,127 +4890,112 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 24, -24 + .cfi_offset 23, -32 + cmp w0, 1 -+ beq .L1011 ++ beq .L730 + cmp w0, 3 -+ beq .L1012 ++ beq .L731 + ubfx x19, x6, 3, 5 + mov x21, 2 -+ mov x24, 1 -+.L1013: ++ mov x23, 1 ++.L732: + cmp x19, 0 + ccmp x3, 0, 0, ne -+ beq .L1028 -+.L1019: ++ beq .L748 ++.L738: + cmp x4, 131072 + mov x0, 131072 + csel x0, x4, x0, ls + cmp x0, x19 -+ bcc .L1028 -+ cbnz w5, .L1014 ++ bcc .L748 ++ cbnz w5, .L733 + add x1, x19, 131072 + add x1, x1, 64 + cmp x4, x1 -+ bls .L1014 -+ add x23, x3, 131072 -+ mov w3, 1 -+ add x23, x23, 32 -+ add x0, x23, x19 -+.L1015: -+ add x1, x20, 16384 -+ str x23, [x20, 30344] -+ str x0, [x20, 30352] ++ bls .L733 ++ add x0, x20, 16384 ++ add x24, x3, 131072 ++ add x24, x24, 32 ++ str x24, [x20, 30344] ++ add x1, x24, x19 ++ str x1, [x20, 30352] ++ mov w1, 1 ++ str w1, [x0, 13976] ++.L734: ++ ldrb w1, [x22, x23] ++ mov x0, x24 + mov x2, x19 -+ mov x0, x23 -+ str w3, [x1, 13976] -+ ldrb w1, [x22, x24] + bl memset -+.L1017: -+ ldr x0, [x20, 30344] -+ ldp x23, x24, [sp, 48] -+ .cfi_remember_state -+ .cfi_restore 24 -+ .cfi_restore 23 -+ str x0, [x20, 30120] -+ str x19, [x20, 30152] -+ ldp x19, x20, [sp, 16] -+ .cfi_restore 20 -+ .cfi_restore 19 -+ b .L981 ++ b .L736 + .p2align 2,,3 -+.L1063: -+ .cfi_restore_state ++.L783: + eor w6, w6, 1 -+ and x23, x0, 1023 -+ ubfx x9, x1, 14, 10 -+ mov x7, 3 -+.L991: -+ cmp x23, 0 ++ and x24, x0, 1023 ++ ubfx x10, x1, 14, 10 ++ mov x9, 3 ++.L710: ++ cmp x24, 0 + mov x21, -70 + ccmp w8, 0, 4, ne -+ bne .L1059 ++ bne .L779 + eor w0, w6, 1 + and w0, w0, 1 -+.L1018: ++.L737: + cmp w0, 0 -+ ccmp x23, 5, 2, ne -+ bls .L1022 -+ add x21, x7, x9 ++ ccmp x24, 5, 2, ne ++ bls .L742 ++ add x21, x9, x10 + cmp x21, x2 -+ bhi .L1058 ++ bhi .L778 + cmp x4, 131072 + mov x0, 131072 + csel x0, x4, x0, ls -+ cmp x0, x23 -+ bcc .L1028 -+ cbnz w5, .L992 -+ add x1, x23, 131072 ++ cmp x0, x24 ++ bcc .L748 ++ cbnz w5, .L711 ++ add x1, x24, 131072 + add x1, x1, 64 + cmp x4, x1 -+ bls .L992 ++ bls .L711 + add x1, x3, 131072 -+ mov w0, 1 + add x1, x1, 32 -+ add x4, x1, x23 -+.L993: + str x1, [x20, 30344] -+ cmp x23, 768 -+ str x4, [x20, 30352] -+ str w0, [x24, 13976] -+ ldr w0, [x24, 13812] ++ add x0, x1, x24 ++ str x0, [x20, 30352] ++ mov w0, 1 ++ str w0, [x23, 13976] ++.L712: ++ ldr w0, [x23, 13812] ++ cmp x24, 768 + ccmp w0, 0, 4, hi -+ bne .L1065 -+.L995: -+ cmp w10, 0 -+ add x2, x22, x7 -+ cset w5, ne -+ lsl w7, w5, 4 ++ bne .L785 ++.L714: ++ add x2, x22, x9 + cmp w19, 3 -+ beq .L1066 ++ beq .L786 + mov x8, 10296 -+ mov x10, 27324 ++ mov x9, 27324 + add x0, x20, x8 -+ add x5, x20, x10 -+ mov x4, x9 ++ add x5, x20, x9 ++ mov x4, x10 + mov x3, x2 -+ cbz w6, .L1000 -+ mov x2, x23 ++ cbz w6, .L719 ++ mov x2, x24 + mov x6, 2560 + bl HUF_decompress1X1_DCtx_wksp + mov x22, x0 -+.L999: -+ ldr w0, [x24, 13976] ++.L718: ++ ldr w0, [x23, 13976] + cmp w0, 2 -+ beq .L1067 -+.L1001: ++ beq .L787 ++.L720: + cmn x22, #120 -+ bhi .L1058 ++ bhi .L778 + ldr x0, [x20, 30344] + str x0, [x20, 30120] -+ str x23, [x20, 30152] ++ str x24, [x20, 30152] + mov w0, 1 -+ str w0, [x24, 13616] ++ str w0, [x23, 13616] + cmp w19, 2 -+ bne .L1059 ++ bne .L779 + mov x6, 10296 + add x0, x20, x6 + ldp x23, x24, [sp, 48] @@ -6618,20 +5005,20 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x19, x20, [sp, 16] + .cfi_restore 20 + .cfi_restore 19 -+ b .L981 ++ b .L699 + .p2align 2,,3 -+.L1062: ++.L782: + .cfi_offset 19, -64 + .cfi_offset 20, -56 + ubfx x19, x6, 3, 5 + mov x1, 1 -+ b .L1004 ++ b .L723 + .p2align 2,,3 -+.L1012: ++.L731: + .cfi_offset 23, -32 + .cfi_offset 24, -24 + cmp x2, 3 -+ bls .L1058 ++ bls .L778 + ldrh w0, [x1] + mov x21, -70 + ldrb w6, [x1, 2] @@ -6640,38 +5027,38 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + lsr w0, w6, 4 + ccmp x3, 0, 0, ne + ubfx x19, x6, 4, 21 -+ beq .L1059 ++ beq .L779 + cmp w0, 131072 -+ bhi .L1058 ++ bhi .L778 + mov x21, 4 -+ mov x24, 3 -+ b .L1019 ++ mov x23, 3 ++ b .L738 + .p2align 2,,3 -+.L989: -+ and x23, x0, 16383 -+ lsr w9, w1, 18 ++.L708: ++ and x24, x0, 16383 ++ lsr w10, w1, 18 + mov w6, 0 -+ mov x7, 4 -+ b .L991 ++ mov x9, 4 ++ b .L710 + .p2align 2,,3 -+.L1011: ++.L730: + cmp x2, 2 -+ beq .L1058 ++ beq .L778 + ldrh w6, [x1] + mov x21, 3 -+ mov x24, 2 ++ mov x23, 2 + ubfx x19, x6, 4, 12 -+ b .L1013 ++ b .L732 + .p2align 2,,3 -+.L1002: ++.L721: + .cfi_restore 23 + .cfi_restore 24 + ldrh w6, [x22] + mov x1, 2 + ubfx x19, x6, 4, 12 -+ b .L1004 ++ b .L723 + .p2align 2,,3 -+.L1053: ++.L773: + mov x0, x21 + ldp x19, x20, [sp, 16] + .cfi_restore 20 @@ -6685,7 +5072,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L992: ++.L711: + .cfi_def_cfa_offset 80 + .cfi_offset 19, -64 + .cfi_offset 20, -56 @@ -6695,19 +5082,22 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 24, -24 + .cfi_offset 29, -80 + .cfi_offset 30, -72 -+ cmp x23, 65536 -+ bls .L994 -+ sub x2, x0, x23 ++ cmp x24, 65536 ++ bls .L713 ++ sub x2, x0, x24 + add x4, x3, x0 + add x1, x3, x2 ++ str x1, [x20, 30344] ++ str x4, [x20, 30352] + mov w0, 2 -+ b .L993 ++ str w0, [x23, 13976] ++ b .L712 + .p2align 2,,3 -+.L1014: ++.L733: + mov x2, 30364 -+ add x23, x20, x2 ++ add x24, x20, x2 + cmp x19, 65536 -+ bls .L1016 ++ bls .L735 + mov x1, 65504 + add x0, x0, x1 + sub x0, x0, x19 @@ -6719,53 +5109,60 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str x3, [x20, 30352] + mov w3, 2 + str w3, [x1, 13976] -+ ldrb w1, [x22, x24] ++ ldrb w1, [x22, x23] + bl memset -+ ldrb w1, [x22, x24] -+ mov x0, x23 ++ ldrb w1, [x22, x23] ++ mov x0, x24 + mov x2, 65536 + bl memset -+ b .L1017 ++ b .L736 + .p2align 2,,3 -+.L1005: -+ .cfi_restore 23 -+ .cfi_restore 24 ++.L724: + cmp x19, 65536 -+ bhi .L1068 ++ bhi .L788 ++ add x23, x20, 16384 + mov x4, 30364 + add x0, x20, x4 -+ add x3, x0, x19 -+ mov w5, 0 -+.L1006: -+ add x4, x20, 16384 -+ add x21, x19, x1 + str x0, [x20, 30344] -+ add x6, x21, 32 ++ add x3, x0, x19 + str x3, [x20, 30352] -+ str w5, [x4, 13976] -+ cmp x6, x2 -+ bls .L1008 ++ mov w4, 0 ++ str wzr, [x23, 13976] ++.L725: ++ add x21, x19, x1 ++ add x3, x21, 32 ++ cmp x3, x2 ++ bls .L727 + cmp x21, x2 -+ bhi .L1054 ++ bhi .L778 + add x1, x22, x1 -+ cmp w5, 2 -+ beq .L1069 ++ cmp w4, 2 ++ beq .L789 + mov x2, x19 + bl memcpy -+.L1010: ++.L736: + ldr x0, [x20, 30344] ++ ldp x23, x24, [sp, 48] ++ .cfi_remember_state ++ .cfi_restore 24 ++ .cfi_restore 23 + str x0, [x20, 30120] + str x19, [x20, 30152] ++ mov x0, x21 + ldp x19, x20, [sp, 16] + .cfi_restore 20 + .cfi_restore 19 -+ b .L981 ++ ldp x21, x22, [sp, 32] ++ ldp x29, x30, [sp], 80 ++ .cfi_restore 30 ++ .cfi_restore 29 ++ .cfi_restore 21 ++ .cfi_restore 22 ++ .cfi_def_cfa_offset 0 ++ ret + .p2align 2,,3 -+.L1059: -+ .cfi_offset 19, -64 -+ .cfi_offset 20, -56 -+ .cfi_offset 23, -32 -+ .cfi_offset 24, -24 ++.L779: ++ .cfi_restore_state + mov x0, x21 + ldp x19, x20, [sp, 16] + .cfi_remember_state @@ -6783,7 +5180,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1028: ++.L748: + .cfi_restore_state + mov x21, -70 + mov x0, x21 @@ -6802,7 +5199,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1054: ++.L772: + .cfi_def_cfa_offset 80 + .cfi_offset 19, -64 + .cfi_offset 20, -56 @@ -6811,92 +5208,101 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 29, -80 + .cfi_offset 30, -72 + ldp x19, x20, [sp, 16] -+ .cfi_remember_state + .cfi_restore 20 + .cfi_restore 19 -+ b .L988 ++ b .L706 + .p2align 2,,3 -+.L1068: -+ .cfi_restore_state ++.L788: ++ .cfi_offset 19, -64 ++ .cfi_offset 20, -56 ++ .cfi_offset 23, -32 ++ .cfi_offset 24, -24 + mov x5, 65504 + add x0, x0, x5 ++ add x23, x20, 16384 + sub x0, x0, x19 + sub x4, x19, #65536 + add x0, x3, x0 -+ mov w5, 2 + add x3, x0, x4 -+ b .L1006 ++ str x0, [x20, 30344] ++ str x3, [x20, 30352] ++ mov w3, 2 ++ mov w4, w3 ++ str w3, [x23, 13976] ++ b .L725 + .p2align 2,,3 -+.L994: -+ .cfi_offset 23, -32 -+ .cfi_offset 24, -24 ++.L713: + mov x12, 30364 + add x1, x20, x12 -+ add x4, x1, x23 -+ mov w0, 0 -+ b .L993 ++ add x0, x1, x24 ++ str x1, [x20, 30344] ++ str x0, [x20, 30352] ++ str wzr, [x23, 13976] ++ b .L712 + .p2align 2,,3 -+.L1016: -+ add x0, x23, x19 -+ mov w3, 0 -+ b .L1015 ++.L735: ++ add x0, x20, 16384 ++ add x1, x24, x19 ++ str x24, [x20, 30344] ++ str x1, [x20, 30352] ++ str wzr, [x0, 13976] ++ b .L734 + .p2align 2,,3 -+.L1064: ++.L784: + ubfiz x6, x6, 10, 8 + lsr w1, w1, 22 -+ add x9, x1, x6 ++ add x10, x1, x6 + mov w0, 1 + mov w6, 0 -+ mov x7, 5 -+ b .L1018 ++ mov x9, 5 ++ b .L737 + .p2align 2,,3 -+.L1008: -+ .cfi_restore 23 -+ .cfi_restore 24 ++.L727: + add x1, x22, x1 + str x1, [x20, 30120] + add x1, x1, x19 + str x19, [x20, 30152] + str x1, [x20, 30352] + ldp x19, x20, [sp, 16] ++ .cfi_remember_state + .cfi_restore 20 + .cfi_restore 19 -+ str wzr, [x4, 13976] -+ b .L981 ++ str wzr, [x23, 13976] ++ ldp x23, x24, [sp, 48] ++ .cfi_restore 24 ++ .cfi_restore 23 ++ b .L699 + .p2align 2,,3 -+.L1066: -+ .cfi_offset 19, -64 -+ .cfi_offset 20, -56 -+ .cfi_offset 23, -32 -+ .cfi_offset 24, -24 ++.L786: ++ .cfi_restore_state + mov x0, x1 + mov w5, w7 -+ mov x3, x9 -+ mov x1, x23 ++ mov x3, x10 ++ mov x1, x24 + ldr x4, [x20, 24] -+ cbz w6, .L998 ++ cbz w6, .L717 + bl HUF_decompress1X_usingDTable + mov x22, x0 -+ b .L999 ++ b .L718 + .p2align 2,,3 -+.L1000: -+ mov x2, x23 ++.L719: ++ mov x2, x24 + mov x6, 2560 + bl HUF_decompress4X_hufOnly_wksp + mov x22, x0 -+ b .L999 -+.L1065: ++ b .L718 ++.L785: + ldr x4, [x20, 24] + mov x11, 16448 -+ add x5, x4, x11 ++ add x8, x4, x11 + .p2align 3,,7 -+.L996: ++.L715: + prfm PLDL2KEEP, [x4] + add x4, x4, 64 -+ cmp x4, x5 -+ bne .L996 -+ b .L995 -+.L1022: ++ cmp x8, x4 ++ bne .L715 ++ b .L714 ++.L742: + mov x21, -24 + ldp x19, x20, [sp, 16] + .cfi_remember_state @@ -6905,8 +5311,8 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x23, x24, [sp, 48] + .cfi_restore 24 + .cfi_restore 23 -+ b .L981 -+.L1067: ++ b .L699 ++.L787: + .cfi_restore_state + ldr x1, [x20, 30352] + mov x7, 30364 @@ -6918,7 +5324,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bl memcpy + mov x25, 65504 + ldr x1, [x20, 30344] -+ sub x2, x23, #65536 ++ sub x2, x24, #65536 + add x0, x1, x25 + bl memmove + ldr x1, [x20, 30344] @@ -6929,14 +5335,12 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + sub x0, x0, #32 + str x1, [x20, 30344] + str x0, [x20, 30352] -+ b .L1001 -+.L998: ++ b .L720 ++.L717: + bl HUF_decompress4X_usingDTable + mov x22, x0 -+ b .L999 -+.L1069: -+ .cfi_restore 23 -+ .cfi_restore 24 ++ b .L718 ++.L789: + sub x2, x19, #65536 + bl memcpy + mov x3, 30364 @@ -6945,29 +5349,29 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + add x0, x20, x3 + mov x2, 65536 + bl memcpy -+ b .L1010 ++ b .L736 + .cfi_endproc -+.LFE4509: ++.LFE4457: + .size ZSTD_decodeLiteralsBlock, .-ZSTD_decodeLiteralsBlock + .align 2 + .p2align 4,,11 + .global ZSTD_buildFSETable + .type ZSTD_buildFSETable, %function +ZSTD_buildFSETable: -+.LFB4513: ++.LFB4461: + .cfi_startproc + b ZSTD_buildFSETable_body_default.constprop.0 + .cfi_endproc -+.LFE4513: ++.LFE4461: + .size ZSTD_buildFSETable, .-ZSTD_buildFSETable + .align 2 + .p2align 4,,11 + .global ZSTD_decodeSeqHeaders + .type ZSTD_decodeSeqHeaders, %function +ZSTD_decodeSeqHeaders: -+.LFB4515: ++.LFB4463: + .cfi_startproc -+ cbz x3, .L1102 ++ cbz x3, .L820 + sub sp, sp, #272 + .cfi_def_cfa_offset 272 + stp x29, x30, [sp, 48] @@ -6982,14 +5386,14 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 21, -192 + .cfi_offset 22, -184 + ldrb w22, [x2] -+ cbnz w22, .L1073 ++ cbnz w22, .L793 + str wzr, [x1] + mov x0, 1 + cmp x3, x0 -+ beq .L1071 -+.L1072: ++ beq .L791 ++.L792: + mov x0, -72 -+.L1071: ++.L791: + ldp x29, x30, [sp, 48] + ldp x19, x20, [sp, 64] + ldp x21, x22, [sp, 80] @@ -7004,7 +5408,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1073: ++.L793: + .cfi_restore_state + stp x23, x24, [sp, 96] + .cfi_offset 24, -168 @@ -7013,20 +5417,20 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov x23, x0 + add x0, x2, 1 + cmp w22, 127 -+ ble .L1075 ++ ble .L795 + cmp w22, 255 -+ beq .L1109 ++ beq .L827 + cmp x19, x0 -+ bls .L1107 ++ bls .L825 + ldrb w2, [x2, 1] + sub w22, w22, #128 + add x0, x20, 2 + add w22, w2, w22, lsl 8 -+.L1075: ++.L795: + add x21, x0, 1 + str w22, [x1] + cmp x21, x19 -+ bhi .L1107 ++ bhi .L825 + stp x25, x26, [sp, 112] + .cfi_offset 26, -152 + .cfi_offset 25, -160 @@ -7045,16 +5449,16 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + add x27, x23, x10 + lsr w1, w26, 6 + cmp w1, 2 -+ beq .L1077 ++ beq .L797 + cmp w1, 3 -+ beq .L1078 ++ beq .L798 + cmp w1, 1 -+ beq .L1110 ++ beq .L828 + adrp x24, .LANCHOR0 + add x24, x24, :lo12:.LANCHOR0 + add x0, x24, 376 + str x0, [x23] -+.L1082: ++.L802: + add x0, x24, 1024 + add x1, x24, 1288 + mov x8, 4136 @@ -7072,7 +5476,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov w3, 31 + bl ZSTD_buildSeqTable.constprop.0 + cmn x0, #120 -+ bhi .L1088 ++ bhi .L809 + ldr w2, [x25, 13620] + add x3, x24, 1536 + ldr w1, [x25, 13812] @@ -7094,7 +5498,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov w3, 52 + bl ZSTD_buildSeqTable.constprop.0 + cmn x0, #120 -+ bhi .L1088 ++ bhi .L809 + add x21, x21, x0 + sub x0, x21, x20 + ldp x23, x24, [sp, 96] @@ -7106,17 +5510,17 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x27, x28, [sp, 128] + .cfi_restore 28 + .cfi_restore 27 -+ b .L1071 ++ b .L791 + .p2align 2,,3 -+.L1107: ++.L825: + .cfi_offset 23, -176 + .cfi_offset 24, -168 + ldp x23, x24, [sp, 96] + .cfi_restore 24 + .cfi_restore 23 -+ b .L1072 ++ b .L792 + .p2align 2,,3 -+.L1102: ++.L820: + .cfi_def_cfa_offset 0 + .cfi_restore 19 + .cfi_restore 20 @@ -7127,7 +5531,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov x0, -72 + ret + .p2align 2,,3 -+.L1109: ++.L827: + .cfi_def_cfa_offset 272 + .cfi_offset 19, -208 + .cfi_offset 20, -200 @@ -7139,121 +5543,1016 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 30, -216 + add x0, x2, 3 + cmp x0, x19 -+ bhi .L1107 ++ bhi .L825 + ldrh w22, [x2, 1] + mov w11, 32512 + add w22, w22, w11 -+ b .L1075 ++ b .L795 + .p2align 2,,3 -+.L1078: ++.L798: + .cfi_offset 25, -160 + .cfi_offset 26, -152 + .cfi_offset 27, -144 + .cfi_offset 28, -136 -+ cbz w2, .L1088 ++ cbz w2, .L809 + cmp w3, 0 + ccmp w22, 24, 4, ne -+ ble .L1108 ++ ble .L826 + ldr x0, [x23] + mov x9, 4160 + add x1, x0, x9 + .p2align 3,,7 -+.L1083: ++.L804: + prfm PLDL2KEEP, [x0] + add x0, x0, 64 -+ cmp x0, x1 -+ bne .L1083 -+.L1108: ++ cmp x1, x0 ++ bne .L804 ++.L826: + adrp x24, .LANCHOR0 + add x24, x24, :lo12:.LANCHOR0 -+ b .L1082 ++ b .L802 + .p2align 2,,3 -+.L1110: -+ cbz x6, .L1088 ++.L828: ++ cbz x6, .L809 + ldrb w1, [x0, 1] + cmp w1, 35 -+ bhi .L1088 ++ bhi .L809 + adrp x24, .LANCHOR0 + add x24, x24, :lo12:.LANCHOR0 + uxtw x6, w1 + add x5, x24, 192 + and x1, x1, 255 -+ add x4, x24, 336 -+ add x21, x0, 2 -+ ldr w0, [x5, x6, lsl 2] -+ sub x6, x19, x21 -+ ldrb w1, [x4, x1] -+ str x28, [x23] -+ str xzr, [x23, 32] -+ strh wzr, [x23, 40] -+ strb w1, [x23, 42] -+ strb wzr, [x23, 43] -+ str w0, [x23, 44] -+ b .L1082 ++ add x4, x24, 336 ++ add x21, x0, 2 ++ ldr w0, [x5, x6, lsl 2] ++ sub x6, x19, x21 ++ ldrb w1, [x4, x1] ++ str x28, [x23] ++ str xzr, [x23, 32] ++ strh wzr, [x23, 40] ++ strb w1, [x23, 42] ++ strb wzr, [x23, 43] ++ str w0, [x23, 44] ++ b .L802 ++ .p2align 2,,3 ++.L797: ++ mov x4, x6 ++ add x0, sp, 160 ++ mov x3, x21 ++ add x2, sp, 156 ++ add x1, sp, 152 ++ bl FSE_readNCount ++ cmn x0, #120 ++ bhi .L809 ++ ldr w5, [sp, 156] ++ cmp w5, 9 ++ bhi .L809 ++ ldr w2, [sp, 152] ++ adrp x24, .LANCHOR0 ++ add x24, x24, :lo12:.LANCHOR0 ++ add x21, x21, x0 ++ add x3, x24, 192 ++ mov x6, x27 ++ add x1, sp, 160 ++ add x4, x24, 336 ++ mov x0, x28 ++ bl ZSTD_buildFSETable_body_default.constprop.0 ++ ldr w2, [x25, 13620] ++ sub x6, x19, x21 ++ ldr w3, [x25, 13812] ++ str x28, [x23] ++ b .L802 ++ .p2align 2,,3 ++.L809: ++ mov x0, -20 ++ ldp x29, x30, [sp, 48] ++ ldp x19, x20, [sp, 64] ++ ldp x21, x22, [sp, 80] ++ ldp x23, x24, [sp, 96] ++ .cfi_restore 24 ++ .cfi_restore 23 ++ ldp x25, x26, [sp, 112] ++ .cfi_restore 26 ++ .cfi_restore 25 ++ ldp x27, x28, [sp, 128] ++ .cfi_restore 28 ++ .cfi_restore 27 ++ add sp, sp, 272 ++ .cfi_restore 29 ++ .cfi_restore 30 ++ .cfi_restore 21 ++ .cfi_restore 22 ++ .cfi_restore 19 ++ .cfi_restore 20 ++ .cfi_def_cfa_offset 0 ++ ret ++ .cfi_endproc ++.LFE4463: ++ .size ZSTD_decodeSeqHeaders, .-ZSTD_decodeSeqHeaders ++ .section .rodata.str1.8,"aMS",@progbits,1 ++ .align 3 ++.LC0: ++ .string "do not support long offset" ++ .text ++ .align 2 ++ .p2align 4,,11 ++ .global ZSTD_decompressSequences_body_ver3 ++ .type ZSTD_decompressSequences_body_ver3, %function ++ZSTD_decompressSequences_body_ver3: ++.LFB4483: ++ .cfi_startproc ++ stp x29, x30, [sp, -112]! ++ .cfi_def_cfa_offset 112 ++ .cfi_offset 29, -112 ++ .cfi_offset 30, -104 ++ mov x29, sp ++ stp x19, x20, [sp, 16] ++ .cfi_offset 20, -88 ++ .cfi_offset 19, -96 ++ cmp w6, 1 ++ beq .L993 ++ add x6, x0, 16384 ++ mov x11, x1 ++ ldr x12, [x0, 30120] ++ add x19, x11, x2 ++ ldr w1, [x6, 13976] ++ ldr x20, [x0, 30152] ++ add x20, x12, x20 ++ cbz w1, .L832 ++ ldr x19, [x0, 30344] ++.L832: ++ ldr q0, [x19] ++ str q0, [sp, 96] ++ cbz w5, .L934 ++ ldp x10, x13, [x0] ++ sub x4, x4, #8 ++ ldr x7, [x0, 16] ++ add x15, x3, x4 ++ stp x21, x22, [sp, 32] ++ .cfi_offset 22, -72 ++ .cfi_offset 21, -80 ++ mov w1, 1 ++ add x7, x7, 8 ++ stp x23, x24, [sp, 48] ++ .cfi_offset 24, -56 ++ .cfi_offset 23, -64 ++ add x8, x10, 8 ++ ldr w10, [x10, 4] ++ stp x25, x26, [sp, 64] ++ .cfi_offset 26, -40 ++ .cfi_offset 25, -48 ++ add x9, x13, 8 ++ mov w16, 0 ++ ldr x2, [x3, x4] ++ ldr w3, [x13, 4] ++ ldr w4, [x7, -4] ++ orr x2, x2, 1 ++ str w1, [x6, 13620] ++ and w13, w4, 255 ++ ldrb w1, [x15, 7] ++ clz w1, w1 ++ sub w1, w1, #23 ++ lsl x2, x2, x1 ++ and w1, w3, 255 ++ tst w10, 255 ++ beq .L834 ++ neg w16, w10 ++ lsr x16, x2, x16 ++ lsl x2, x2, x10 ++.L834: ++ mov w18, 0 ++ cbz w13, .L835 ++ neg w18, w4 ++ lsr x18, x2, x18 ++ lsl x2, x2, x4 ++.L835: ++ mov w17, 0 ++ cbz w1, .L836 ++ neg w17, w3 ++ lsr x17, x2, x17 ++ lsl x2, x2, x3 ++.L836: ++ mov x4, 26684 ++ add x10, x0, x4 ++ cmp w5, 0 ++ bgt .L938 ++ adrp x13, .LANCHOR0 ++ add x13, x13, :lo12:.LANCHOR0 ++ mov x3, 26688 ++ add x14, x13, 32 ++ add x0, x0, x3 ++ mov w5, 64 ++ .p2align 3,,7 ++.L894: ++ rbit x2, x2 ++ clz x2, x2 ++ ldr x4, [x7, w18, sxtw 3] ++ sub x15, x15, x2, lsr 3 ++ ldr x16, [x8, w16, sxtw 3] ++ and w2, w2, 7 ++ ldr x1, [x15] ++ ubfx w19, w4, 16, 8 ++ ubfx x18, x4, 16, 8 ++ lsr x20, x16, 32 ++ orr x1, x1, 1 ++ ldr x17, [x9, w17, sxtw 3] ++ lsl x1, x1, x2 ++ cmp w19, 1 ++ bls .L838 ++ sub w3, w5, w19 ++ ldr w22, [x6, 10300] ++ ldr w21, [x6, 10304] ++ lsr x2, x4, 32 ++ lsr x3, x1, x3 ++ add w2, w3, w2 ++ lsl x1, x1, x18 ++ str w2, [x6, 10300] ++ str w22, [x6, 10304] ++ str w21, [x6, 10308] ++.L839: ++ lsr x18, x17, 32 ++ ubfx x21, x17, 16, 8 ++ mov w22, w18 ++ ubfx w3, w17, 16, 8 ++ tst w17, 16711680 ++ beq .L846 ++ sub w22, w5, w3 ++ lsr x22, x1, x22 ++ add w22, w22, w18 ++ lsl x1, x1, x21 ++.L846: ++ ubfx x23, x16, 16, 8 ++ ubfx w21, w16, 16, 8 ++ mov w18, w20 ++ tst w16, 16711680 ++ beq .L848 ++ sub w18, w5, w21 ++ lsr x18, x1, x18 ++ add w18, w18, w20 ++ lsl x1, x1, x23 ++.L848: ++ add w3, w3, w19 ++ add w3, w3, w21 ++ cmp w3, 38 ++ bgt .L994 ++.L849: ++ lsr w19, w16, 24 ++ lsr w3, w16, 24 ++ and w16, w16, 65535 ++ cbz w3, .L851 ++ sub w3, w5, w3 ++ lsr x3, x1, x3 ++ add w16, w16, w3 ++ lsl x1, x1, x19 ++.L851: ++ lsr w19, w17, 24 ++ lsr w3, w17, 24 ++ and w17, w17, 65535 ++ cbz w3, .L853 ++ sub w3, w5, w3 ++ lsr x3, x1, x3 ++ add w17, w17, w3 ++ lsl x1, x1, x19 ++.L853: ++ lsr w19, w4, 24 ++ lsr w3, w4, 24 ++ and w23, w4, 65535 ++ cbz w3, .L855 ++ sub w3, w5, w3 ++ lsr x3, x1, x3 ++ add w23, w23, w3 ++ lsl x1, x1, x19 ++.L855: ++ ldr q0, [x12] ++ str q0, [x11] ++ cmp w18, 16 ++ bgt .L856 ++.L860: ++ sxtw x4, w18 ++ sxtw x24, w2 ++ add x20, x11, x4 ++ sxtw x21, w22 ++ add x4, x12, x4 ++ sub x3, x20, x24 ++ add x19, x20, x21 ++ cmp w2, 15 ++ ble .L995 ++ ldr q0, [x3], 16 ++ add x2, x20, 16 ++ str q0, [x11, w18, sxtw] ++ cmp w22, 16 ++ ble .L865 ++ .p2align 3,,7 ++.L864: ++ ldr q0, [x3] ++ add x2, x2, 32 ++ add x3, x3, 32 ++ str q0, [x2, -32] ++ ldr q0, [x3, -16] ++ str q0, [x2, -16] ++ cmp x2, x19 ++ bcc .L864 ++.L865: ++ rbit x1, x1 ++ clz x1, x1 ++ ldr x18, [x7, w23, sxtw 3] ++ sub x15, x15, x1, lsr 3 ++ ldr x16, [x8, w16, sxtw 3] ++ and w1, w1, 7 ++ ldr x2, [x15] ++ ubfx w11, w18, 16, 8 ++ ubfx x22, x18, 16, 8 ++ lsr x12, x16, 32 ++ orr x2, x2, 1 ++ ldr x17, [x9, w17, sxtw 3] ++ lsl x2, x2, x1 ++ cmp w11, 1 ++ bls .L996 ++ sub w1, w5, w11 ++ ldr w24, [x6, 10300] ++ ldr w23, [x6, 10304] ++ lsr x3, x18, 32 ++ lsr x1, x2, x1 ++ str w24, [x6, 10304] ++ lsl x2, x2, x22 ++ add w22, w1, w3 ++ str w22, [x6, 10300] ++ str w23, [x6, 10308] ++.L871: ++ lsr x3, x17, 32 ++ ubfx x23, x17, 16, 8 ++ mov w30, w3 ++ ubfx w1, w17, 16, 8 ++ tst w17, 16711680 ++ beq .L878 ++ sub w30, w5, w1 ++ lsr x30, x2, x30 ++ add w30, w30, w3 ++ lsl x2, x2, x23 ++.L878: ++ ubfx x24, x16, 16, 8 ++ ubfx w3, w16, 16, 8 ++ mov w23, w12 ++ tst w16, 16711680 ++ beq .L880 ++ sub w23, w5, w3 ++ lsr x23, x2, x23 ++ add w23, w23, w12 ++ lsl x2, x2, x24 ++.L880: ++ add w1, w1, w11 ++ add w1, w1, w3 ++ cmp w1, 38 ++ bgt .L997 ++.L881: ++ lsr w3, w16, 24 ++ lsr w1, w16, 24 ++ and w16, w16, 65535 ++ cbz w1, .L883 ++ sub w1, w5, w1 ++ lsr x1, x2, x1 ++ add w16, w16, w1 ++ lsl x2, x2, x3 ++.L883: ++ lsr w3, w17, 24 ++ lsr w1, w17, 24 ++ and w17, w17, 65535 ++ cbz w1, .L885 ++ sub w1, w5, w1 ++ lsr x1, x2, x1 ++ add w17, w17, w1 ++ lsl x2, x2, x3 ++.L885: ++ lsr w3, w18, 24 ++ lsr w1, w18, 24 ++ and w18, w18, 65535 ++ cbz w1, .L887 ++ sub w1, w5, w1 ++ lsr x1, x2, x1 ++ add w18, w18, w1 ++ lsl x2, x2, x3 ++.L887: ++ ldr q0, [x4] ++ str q0, [x20, x21] ++ cmp w23, 16 ++ bgt .L888 ++.L892: ++ sxtw x1, w23 ++ sxtw x20, w22 ++ add x3, x19, x1 ++ add x12, x4, x1 ++ add x11, x3, x30, sxtw ++ sub x4, x3, x20 ++ cmp w22, 15 ++ ble .L998 ++ ldr q0, [x4], 16 ++ add x3, x3, 16 ++ str q0, [x19, w23, sxtw] ++ cmp w30, 16 ++ ble .L894 ++ .p2align 3,,7 ++.L895: ++ ldr q0, [x4] ++ add x3, x3, 32 ++ add x4, x4, 32 ++ str q0, [x3, -32] ++ ldr q0, [x4, -16] ++ str q0, [x3, -16] ++ cmp x3, x11 ++ bcc .L895 ++ b .L894 ++ .p2align 2,,3 ++.L934: ++ .cfi_restore 21 ++ .cfi_restore 22 ++ .cfi_restore 23 ++ .cfi_restore 24 ++ .cfi_restore 25 ++ .cfi_restore 26 ++ mov x0, x11 ++.L833: ++ sub x20, x20, x12 ++ mov x2, x12 ++ add w4, w12, w20 ++ mov x1, x0 ++ .p2align 3,,7 ++.L933: ++ ldr q0, [x2], 16 ++ sub w3, w4, w2 ++ str q0, [x1], 16 ++ cmp w3, 0 ++ bgt .L933 ++ ldr q0, [sp, 96] ++ add x0, x0, x20 ++ sub x0, x0, x11 ++ str q0, [x19] ++ ldp x19, x20, [sp, 16] ++ .cfi_restore 20 ++ .cfi_restore 19 ++ ldp x29, x30, [sp], 112 ++ .cfi_restore 30 ++ .cfi_restore 29 ++ .cfi_def_cfa_offset 0 ++ ret ++ .p2align 2,,3 ++.L997: ++ .cfi_def_cfa_offset 112 ++ .cfi_offset 19, -96 ++ .cfi_offset 20, -88 ++ .cfi_offset 21, -80 ++ .cfi_offset 22, -72 ++ .cfi_offset 23, -64 ++ .cfi_offset 24, -56 ++ .cfi_offset 25, -48 ++ .cfi_offset 26, -40 ++ .cfi_offset 29, -112 ++ .cfi_offset 30, -104 ++ rbit x2, x2 ++ clz x2, x2 ++ sub x15, x15, x2, lsr 3 ++ and w2, w2, 7 ++ ldr x1, [x15] ++ orr x1, x1, 1 ++ lsl x2, x1, x2 ++ b .L881 ++ .p2align 2,,3 ++.L996: ++ cmp w12, 0 ++ cset w1, eq ++ cbnz w11, .L872 ++ and x1, x1, 255 ++ csel x3, x10, x0, eq ++ ldr w22, [x10, x1, lsl 2] ++ ldr w1, [x3] ++ str w22, [x6, 10300] ++ str w1, [x6, 10304] ++ b .L871 + .p2align 2,,3 -+.L1077: -+ mov x4, x6 -+ add x0, sp, 160 -+ mov x3, x21 -+ add x2, sp, 156 -+ add x1, sp, 152 -+ bl FSE_readNCount -+ cmn x0, #120 -+ bhi .L1088 -+ ldr w5, [sp, 156] -+ cmp w5, 9 -+ bhi .L1088 -+ ldr w2, [sp, 152] -+ adrp x24, .LANCHOR0 -+ add x24, x24, :lo12:.LANCHOR0 -+ add x21, x21, x0 -+ add x3, x24, 192 -+ mov x6, x27 -+ add x1, sp, 160 -+ add x4, x24, 336 -+ mov x0, x28 -+ bl ZSTD_buildFSETable_body_default.constprop.0 -+ ldr w2, [x25, 13620] -+ sub x6, x19, x21 -+ ldr w3, [x25, 13812] -+ str x28, [x23] -+ b .L1082 ++.L995: ++ cmp x24, 7 ++ bhi .L866 ++ ldrb w2, [x3] ++ strb w2, [x20] ++ ldr w2, [x14, x24, lsl 2] ++ ldrb w11, [x3, 1] ++ strb w11, [x20, 1] ++ add x12, x3, x2 ++ ldrsw x11, [x13, x24, lsl 2] ++ ldrb w18, [x3, 2] ++ strb w18, [x20, 2] ++ ldrb w18, [x3, 3] ++ strb w18, [x20, 3] ++ ldr w2, [x3, x2] ++ sub x3, x12, x11 ++ str w2, [x20, 4] ++.L867: ++ cmp w22, 8 ++ ble .L865 ++ add x11, x3, 8 ++ add x2, x20, 8 ++ sub x12, x2, x11 ++ cmp x12, 15 ++ bgt .L868 ++ .p2align 3,,7 ++.L869: ++ ldr d0, [x11], 8 ++ str d0, [x2], 8 ++ cmp x2, x19 ++ bcs .L865 ++ ldr d0, [x11], 8 ++ str d0, [x2], 8 ++ cmp x2, x19 ++ bcc .L869 ++ b .L865 ++ .p2align 2,,3 ++.L856: ++ sub w4, w18, #17 ++ add x19, x11, 16 ++ add x3, x12, 16 ++ lsr w4, w4, 4 ++ add w4, w4, 2 ++ ubfiz x4, x4, 4, 29 ++ add x4, x12, x4 ++ .p2align 3,,7 ++.L859: ++ ldr q0, [x3], 16 ++ str q0, [x19], 16 ++ cmp x3, x4 ++ bne .L859 ++ b .L860 + .p2align 2,,3 -+.L1088: -+ mov x0, -20 -+ ldp x29, x30, [sp, 48] -+ ldp x19, x20, [sp, 64] -+ ldp x21, x22, [sp, 80] -+ ldp x23, x24, [sp, 96] ++.L994: ++ rbit x1, x1 ++ clz x1, x1 ++ sub x15, x15, x1, lsr 3 ++ and w1, w1, 7 ++ ldr x3, [x15] ++ orr x3, x3, 1 ++ lsl x1, x3, x1 ++ b .L849 ++ .p2align 2,,3 ++.L838: ++ cmp w20, 0 ++ cset w2, eq ++ cbnz w19, .L840 ++ and x2, x2, 255 ++ csel x3, x10, x0, eq ++ ldr w2, [x10, x2, lsl 2] ++ ldr w3, [x3] ++ str w2, [x6, 10300] ++ str w3, [x6, 10304] ++ b .L839 ++ .p2align 2,,3 ++.L998: ++ cmp x20, 7 ++ bhi .L897 ++ ldrb w21, [x4] ++ strb w21, [x19, x1] ++ ldr w1, [x14, x20, lsl 2] ++ ldrb w19, [x4, 1] ++ strb w19, [x3, 1] ++ add x21, x4, x1 ++ ldrsw x19, [x13, x20, lsl 2] ++ ldrb w20, [x4, 2] ++ strb w20, [x3, 2] ++ ldrb w20, [x4, 3] ++ strb w20, [x3, 3] ++ ldr w1, [x4, x1] ++ sub x4, x21, x19 ++ str w1, [x3, 4] ++.L898: ++ cmp w30, 8 ++ ble .L894 ++ add x19, x4, 8 ++ add x1, x3, 8 ++ sub x20, x1, x19 ++ cmp x20, 15 ++ bgt .L899 ++ .p2align 3,,7 ++.L900: ++ ldr d0, [x19], 8 ++ str d0, [x1], 8 ++ cmp x1, x11 ++ bcs .L894 ++ ldr d0, [x19], 8 ++ str d0, [x1], 8 ++ cmp x1, x11 ++ bcc .L900 ++ b .L894 ++ .p2align 2,,3 ++.L888: ++ sub w3, w23, #17 ++ add x11, x19, 16 ++ add x1, x4, 16 ++ lsr w3, w3, 4 ++ add w3, w3, 2 ++ ubfiz x3, x3, 4, 29 ++ add x3, x4, x3 ++ .p2align 3,,7 ++.L891: ++ ldr q0, [x1], 16 ++ str q0, [x11], 16 ++ cmp x1, x3 ++ bne .L891 ++ b .L892 ++.L840: ++ lsr x18, x4, 32 ++ lsr x3, x1, 63 ++ add w2, w2, w18 ++ lsl x1, x1, 1 ++ add w2, w2, w3 ++ ldr w18, [x6, 10300] ++ cmp w2, 3 ++ beq .L999 ++ ldrsw x3, [x10, w2, sxtw 2] ++ cmp x3, 0 ++ cset x21, eq ++ sub x3, x3, x21 ++ cmp w2, 1 ++ beq .L844 ++.L843: ++ ldr w2, [x6, 10304] ++ str w2, [x6, 10308] ++.L844: ++ mov w2, w3 ++ str w3, [x6, 10300] ++ str w18, [x6, 10304] ++ b .L839 ++.L897: ++ ldr d0, [x4] ++ str d0, [x19, x1] ++ b .L898 ++.L872: ++ lsr x22, x18, 32 ++ lsr x3, x2, 63 ++ add w1, w1, w22 ++ ldr w23, [x6, 10300] ++ add w1, w1, w3 ++ lsl x2, x2, 1 ++ cmp w1, 3 ++ beq .L1000 ++ ldrsw x3, [x10, w1, sxtw 2] ++ cmp x3, 0 ++ cset x22, eq ++ sub x3, x3, x22 ++ cmp w1, 1 ++ beq .L876 ++.L875: ++ ldr w1, [x6, 10304] ++ str w1, [x6, 10308] ++.L876: ++ mov w22, w3 ++ str w3, [x6, 10300] ++ str w23, [x6, 10304] ++ b .L871 ++.L866: ++ ldr d0, [x3] ++ str d0, [x20] ++ b .L867 ++.L1000: ++ sub w3, w23, #1 ++ sxtw x3, w3 ++ cmp x3, 0 ++ cset x1, eq ++ sub x3, x3, x1 ++ b .L875 ++.L999: ++ sub w3, w18, #1 ++ sxtw x3, w3 ++ cmp x3, 0 ++ cset x2, eq ++ sub x3, x3, x2 ++ b .L843 ++.L899: ++ ldr q0, [x4, 8] ++ str q0, [x3, 8] ++ cmp w30, 24 ++ ble .L894 ++ add x3, x3, 24 ++ add x4, x4, 24 ++ .p2align 3,,7 ++.L901: ++ ldr q0, [x4] ++ add x3, x3, 32 ++ add x4, x4, 32 ++ str q0, [x3, -32] ++ ldr q0, [x4, -16] ++ str q0, [x3, -16] ++ cmp x3, x11 ++ bcc .L901 ++ b .L894 ++.L868: ++ ldr q0, [x3, 8] ++ str q0, [x20, 8] ++ cmp w22, 24 ++ ble .L865 ++ add x3, x3, 24 ++ add x2, x20, 24 ++ .p2align 3,,7 ++.L870: ++ ldr q0, [x3] ++ add x2, x2, 32 ++ add x3, x3, 32 ++ str q0, [x2, -32] ++ ldr q0, [x3, -16] ++ str q0, [x2, -16] ++ cmp x2, x19 ++ bcc .L870 ++ b .L865 ++.L938: ++ adrp x21, .LANCHOR0 ++ add x21, x21, :lo12:.LANCHOR0 ++ mov x1, 26688 ++ add x22, x21, 32 ++ add x30, x0, x1 ++ mov x0, x11 ++ mov w14, 64 ++ .p2align 3,,7 ++.L837: ++ rbit x3, x2 ++ prfm PLDL1KEEP, [x15] ++ clz x3, x3 ++ sub w5, w5, #1 ++ ldr x18, [x7, w18, sxtw 3] ++ sub x15, x15, x3, lsr 3 ++ ldr x16, [x8, w16, sxtw 3] ++ and w3, w3, 7 ++ ldr x1, [x15] ++ ubfx w4, w18, 16, 8 ++ ubfx x23, x18, 16, 8 ++ lsr x24, x16, 32 ++ orr x1, x1, 1 ++ ldr x17, [x9, w17, sxtw 3] ++ lsl x2, x1, x3 ++ cmp w4, 1 ++ bls .L902 ++ sub w13, w14, w4 ++ ldr w25, [x6, 10300] ++ ldr w3, [x6, 10304] ++ lsr x1, x18, 32 ++ lsr x13, x2, x13 ++ add w13, w13, w1 ++ lsl x2, x2, x23 ++ str w13, [x6, 10300] ++ str w25, [x6, 10304] ++ str w3, [x6, 10308] ++.L903: ++ lsr x1, x17, 32 ++ ubfx x25, x17, 16, 8 ++ mov w23, w1 ++ ubfx w3, w17, 16, 8 ++ tst w17, 16711680 ++ beq .L910 ++ sub w23, w14, w3 ++ lsr x23, x2, x23 ++ add w23, w23, w1 ++ lsl x2, x2, x25 ++.L910: ++ ubfx x26, x16, 16, 8 ++ ubfx w25, w16, 16, 8 ++ mov w1, w24 ++ tst w16, 16711680 ++ beq .L912 ++ sub w1, w14, w25 ++ lsr x1, x2, x1 ++ add w1, w1, w24 ++ lsl x2, x2, x26 ++.L912: ++ add w3, w3, w4 ++ add w3, w3, w25 ++ cmp w3, 37 ++ bgt .L1001 ++.L913: ++ lsr w4, w16, 24 ++ lsr w3, w16, 24 ++ and w16, w16, 65535 ++ cbz w3, .L915 ++ sub w3, w14, w3 ++ lsr x3, x2, x3 ++ add w16, w16, w3 ++ lsl x2, x2, x4 ++.L915: ++ lsr w4, w17, 24 ++ lsr w3, w17, 24 ++ and w17, w17, 65535 ++ cbz w3, .L917 ++ sub w3, w14, w3 ++ lsr x3, x2, x3 ++ add w17, w17, w3 ++ lsl x2, x2, x4 ++.L917: ++ lsr w4, w18, 24 ++ lsr w3, w18, 24 ++ and w18, w18, 65535 ++ cbz w3, .L919 ++ sub w3, w14, w3 ++ lsr x3, x2, x3 ++ add w18, w18, w3 ++ lsl x2, x2, x4 ++.L919: ++ ldr q0, [x12] ++ str q0, [x0] ++ cmp w1, 16 ++ bgt .L920 ++.L924: ++ sxtw x1, w1 ++ sxtw x24, w13 ++ add x3, x0, x1 ++ add x12, x12, x1 ++ sub x4, x3, x24 ++ add x0, x3, x23, sxtw ++ cmp w13, 15 ++ ble .L1002 ++ ldr q0, [x4], 16 ++ str q0, [x3], 16 ++ cmp w23, 16 ++ ble .L927 ++ .p2align 3,,7 ++.L926: ++ ldr q0, [x4] ++ add x3, x3, 32 ++ add x4, x4, 32 ++ str q0, [x3, -32] ++ ldr q0, [x4, -16] ++ str q0, [x3, -16] ++ cmp x3, x0 ++ bcc .L926 ++.L927: ++ cbnz w5, .L837 ++ ldp x21, x22, [sp, 32] ++ .cfi_remember_state ++ .cfi_restore 22 ++ .cfi_restore 21 ++ ldp x23, x24, [sp, 48] + .cfi_restore 24 + .cfi_restore 23 -+ ldp x25, x26, [sp, 112] ++ ldp x25, x26, [sp, 64] + .cfi_restore 26 + .cfi_restore 25 -+ ldp x27, x28, [sp, 128] -+ .cfi_restore 28 -+ .cfi_restore 27 -+ add sp, sp, 272 -+ .cfi_restore 29 -+ .cfi_restore 30 ++ b .L833 ++ .p2align 2,,3 ++.L902: ++ .cfi_restore_state ++ cmp w24, 0 ++ cset w1, eq ++ cbnz w4, .L904 ++ and x1, x1, 255 ++ csel x3, x30, x10, ne ++ ldr w13, [x10, x1, lsl 2] ++ ldr w1, [x3] ++ str w13, [x6, 10300] ++ str w1, [x6, 10304] ++ b .L903 ++ .p2align 2,,3 ++.L904: ++ lsr x13, x18, 32 ++ lsr x3, x2, 63 ++ add w1, w1, w13 ++ ldr w23, [x6, 10300] ++ add w1, w1, w3 ++ lsl x2, x2, 1 ++ cmp w1, 3 ++ beq .L1003 ++ ldrsw x3, [x10, w1, sxtw 2] ++ cmp x3, 0 ++ cset x13, eq ++ sub x3, x3, x13 ++ cmp w1, 1 ++ beq .L908 ++.L907: ++ ldr w1, [x6, 10304] ++ str w1, [x6, 10308] ++.L908: ++ mov w13, w3 ++ str w3, [x6, 10300] ++ str w23, [x6, 10304] ++ b .L903 ++ .p2align 2,,3 ++.L1002: ++ cmp x24, 7 ++ bhi .L928 ++ ldrb w1, [x4] ++ strb w1, [x3] ++ ldr w1, [x22, x24, lsl 2] ++ ldrb w13, [x4, 1] ++ strb w13, [x3, 1] ++ add x25, x4, x1 ++ ldrsw x13, [x21, x24, lsl 2] ++ ldrb w24, [x4, 2] ++ strb w24, [x3, 2] ++ ldrb w24, [x4, 3] ++ strb w24, [x3, 3] ++ ldr w1, [x4, x1] ++ sub x4, x25, x13 ++ str w1, [x3, 4] ++.L929: ++ cmp w23, 8 ++ ble .L927 ++ add x13, x4, 8 ++ add x1, x3, 8 ++ sub x24, x1, x13 ++ cmp x24, 15 ++ bgt .L930 ++ .p2align 3,,7 ++.L931: ++ ldr d0, [x13], 8 ++ str d0, [x1], 8 ++ cmp x1, x0 ++ bcc .L931 ++ b .L927 ++ .p2align 2,,3 ++.L920: ++ sub w4, w1, #17 ++ add x24, x0, 16 ++ add x3, x12, 16 ++ lsr w4, w4, 4 ++ add w4, w4, 2 ++ ubfiz x4, x4, 4, 29 ++ add x4, x12, x4 ++ .p2align 3,,7 ++.L923: ++ ldr q0, [x3], 16 ++ str q0, [x24], 16 ++ cmp x3, x4 ++ bne .L923 ++ b .L924 ++ .p2align 2,,3 ++.L1001: ++ rbit x2, x2 ++ clz x2, x2 ++ sub x15, x15, x2, lsr 3 ++ and w2, w2, 7 ++ ldr x3, [x15] ++ orr x3, x3, 1 ++ lsl x2, x3, x2 ++ b .L913 ++ .p2align 2,,3 ++.L1003: ++ sub w3, w23, #1 ++ sxtw x3, w3 ++ cmp x3, 0 ++ cset x1, eq ++ sub x3, x3, x1 ++ b .L907 ++.L928: ++ ldr d0, [x4] ++ str d0, [x3] ++ b .L929 ++.L930: ++ ldr q0, [x4, 8] ++ str q0, [x3, 8] ++ cmp w23, 24 ++ ble .L927 ++ add x3, x3, 24 ++ add x4, x4, 24 ++ .p2align 3,,7 ++.L932: ++ ldr q0, [x4] ++ add x3, x3, 32 ++ add x4, x4, 32 ++ str q0, [x3, -32] ++ ldr q0, [x4, -16] ++ str q0, [x3, -16] ++ cmp x3, x0 ++ bcc .L932 ++ b .L927 ++.L993: + .cfi_restore 21 + .cfi_restore 22 -+ .cfi_restore 19 -+ .cfi_restore 20 -+ .cfi_def_cfa_offset 0 -+ ret ++ .cfi_restore 23 ++ .cfi_restore 24 ++ .cfi_restore 25 ++ .cfi_restore 26 ++ adrp x0, .LC0 ++ add x0, x0, :lo12:.LC0 ++ stp x21, x22, [sp, 32] ++ .cfi_offset 22, -72 ++ .cfi_offset 21, -80 ++ stp x23, x24, [sp, 48] ++ .cfi_offset 24, -56 ++ .cfi_offset 23, -64 ++ stp x25, x26, [sp, 64] ++ .cfi_offset 26, -40 ++ .cfi_offset 25, -48 ++ str w6, [sp, 92] ++ bl puts ++ mov w0, 10 ++ bl putchar ++ ldr w6, [sp, 92] ++ mov w0, w6 ++ bl exit + .cfi_endproc -+.LFE4515: -+ .size ZSTD_decodeSeqHeaders, .-ZSTD_decodeSeqHeaders ++.LFE4483: ++ .size ZSTD_decompressSequences_body_ver3, .-ZSTD_decompressSequences_body_ver3 + .align 2 + .p2align 4,,11 + .global ZSTD_decompressBlock_internal + .type ZSTD_decompressBlock_internal, %function +ZSTD_decompressBlock_internal: -+.LFB4541: ++.LFB4496: + .cfi_startproc + cmp x4, 131072 -+ bhi .L1165 ++ bhi .L1060 + sub sp, sp, #320 + .cfi_def_cfa_offset 320 + stp x29, x30, [sp, 48] @@ -7270,72 +6569,75 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + stp x21, x22, [sp, 80] + .cfi_offset 21, -240 + .cfi_offset 22, -232 -+ mov x22, x2 + mov w21, w5 -+ mov x2, x19 ++ mov x22, x0 + mov w5, w6 + stp x23, x24, [sp, 96] + .cfi_offset 23, -224 + .cfi_offset 24, -216 + mov x24, x1 -+ mov x23, x0 ++ mov x23, x2 + mov x1, x20 ++ mov x2, x19 + bl ZSTD_decodeLiteralsBlock + cmn x0, #120 -+ bhi .L1111 ++ bhi .L1004 + stp x25, x26, [sp, 112] + .cfi_offset 26, -200 + .cfi_offset 25, -208 -+ add x8, x20, x0 -+ add x26, x23, 16384 ++ mov x5, 131072 ++ sub x26, x19, x0 + stp x27, x28, [sp, 128] + .cfi_offset 28, -184 + .cfi_offset 27, -192 -+ mov x5, 131072 -+ sub x27, x19, x0 -+ cbz w21, .L1114 -+ ldr w5, [x26, 13560] -+.L1114: -+ cmp x22, x5 -+ ldr w28, [x26, 13812] -+ ldr x0, [x23, 29904] -+ csel x5, x22, x5, ls ++ add x28, x20, x0 ++ cbz w21, .L1007 ++ add x0, x22, 16384 ++ ldr w5, [x0, 13560] ++.L1007: ++ cmp x23, x5 ++ ldr x0, [x22, 29904] ++ csel x5, x23, x5, ls + add x5, x24, x5 + sub x21, x5, x0 -+ cbz x27, .L1115 -+ mov x3, x8 ++ cbz x26, .L1008 ++ mov x3, x28 ++ add x27, x22, 16384 + ldrb w25, [x3], 1 -+ cbnz w25, .L1116 ++ ldr w8, [x27, 13812] ++ cbnz w25, .L1009 + mov x4, 0 -+ cmp x27, 1 -+ bne .L1115 -+.L1117: -+ cmp w28, 0 ++ cmp x26, 1 ++ bne .L1008 ++.L1010: ++ cmp w8, 0 + mov x0, 16777216 + mov w25, 0 + ccmp x21, x0, 0, eq -+ bhi .L1164 -+.L1133: -+ str wzr, [x26, 13812] -+ cbnz w28, .L1166 -+.L1135: -+ ldr w0, [x26, 13976] ++ bhi .L1059 ++.L1027: ++ str wzr, [x27, 13812] ++ cbnz w8, .L1061 ++.L1029: ++ ldr w0, [x27, 13976] + mov w5, w25 -+ mov x2, x22 ++ mov x2, x23 + mov x1, x24 + cmp w0, 2 -+ mov x0, x23 ++ mov x0, x22 ++ beq .L1062 ++ ldp x29, x30, [sp, 48] ++ mov w6, 0 ++ ldp x19, x20, [sp, 64] ++ ldp x21, x22, [sp, 80] ++ ldp x23, x24, [sp, 96] + ldp x25, x26, [sp, 112] ++ .cfi_remember_state + .cfi_restore 26 + .cfi_restore 25 + ldp x27, x28, [sp, 128] + .cfi_restore 28 + .cfi_restore 27 -+ beq .L1167 -+ ldp x29, x30, [sp, 48] -+ ldp x19, x20, [sp, 64] -+ ldp x21, x22, [sp, 80] -+ ldp x23, x24, [sp, 96] + add sp, sp, 320 + .cfi_restore 29 + .cfi_restore 30 @@ -7346,31 +6648,19 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_restore 19 + .cfi_restore 20 + .cfi_def_cfa_offset 0 -+ b ZSTD_decompressSequences_default.constprop.0 ++ b ZSTD_decompressSequences_body_ver3 + .p2align 2,,3 -+.L1116: -+ .cfi_def_cfa_offset 320 -+ .cfi_offset 19, -256 -+ .cfi_offset 20, -248 -+ .cfi_offset 21, -240 -+ .cfi_offset 22, -232 -+ .cfi_offset 23, -224 -+ .cfi_offset 24, -216 -+ .cfi_offset 25, -208 -+ .cfi_offset 26, -200 -+ .cfi_offset 27, -192 -+ .cfi_offset 28, -184 -+ .cfi_offset 29, -272 -+ .cfi_offset 30, -264 ++.L1009: ++ .cfi_restore_state + add x20, x20, x19 + cmp w25, 127 -+ ble .L1118 ++ ble .L1011 + cmp w25, 255 -+ beq .L1168 ++ beq .L1063 + cmp x20, x3 -+ bhi .L1169 ++ bhi .L1064 + .p2align 3,,7 -+.L1115: ++.L1008: + ldp x25, x26, [sp, 112] + .cfi_restore 26 + .cfi_restore 25 @@ -7378,7 +6668,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x27, x28, [sp, 128] + .cfi_restore 28 + .cfi_restore 27 -+.L1111: ++.L1004: + ldp x29, x30, [sp, 48] + ldp x19, x20, [sp, 64] + ldp x21, x22, [sp, 80] @@ -7395,11 +6685,11 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1165: ++.L1060: + mov x0, -72 + ret + .p2align 2,,3 -+.L1169: ++.L1064: + .cfi_def_cfa_offset 320 + .cfi_offset 19, -256 + .cfi_offset 20, -248 @@ -7413,35 +6703,35 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 28, -184 + .cfi_offset 29, -272 + .cfi_offset 30, -264 -+ ldrb w0, [x8, 1] ++ ldrb w0, [x28, 1] + sub w25, w25, #128 -+ add x3, x8, 2 ++ add x3, x28, 2 + add w25, w0, w25, lsl 8 -+.L1118: ++.L1011: + add x19, x3, 1 + cmp x20, x19 -+ bcc .L1115 ++ bcc .L1008 + ldrb w10, [x3] + mov w0, 35 + str w0, [sp, 200] + mov x11, 27324 -+ ldr w1, [x26, 13620] -+ add x7, x23, 32 ++ ldr w1, [x27, 13620] ++ add x7, x22, 32 + lsr w0, w10, 6 + sub x6, x20, x19 -+ add x11, x23, x11 ++ add x11, x22, x11 + cmp w0, 2 -+ beq .L1120 ++ beq .L1013 + cmp w0, 3 -+ beq .L1121 ++ beq .L1014 + cmp w0, 1 -+ beq .L1170 ++ beq .L1065 + adrp x9, .LANCHOR0 -+ mov w3, w28 ++ mov w3, w8 + add x9, x9, :lo12:.LANCHOR0 + add x0, x9, 376 -+ str x0, [x23] -+.L1125: ++ str x0, [x22] ++.L1018: + add x0, x9, 1024 + add x2, x9, 1288 + stp x2, x0, [sp] @@ -7450,25 +6740,25 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str w1, [sp, 16] + mov x5, x19 + str w3, [sp, 24] -+ add x1, x23, 16 ++ add x1, x22, 16 + str w25, [sp, 32] + mov w4, 8 + str x11, [sp, 40] + mov w3, 31 -+ str x8, [sp, 176] ++ str w8, [sp, 176] + mov x8, 4136 -+ add x0, x23, x8 -+ str w10, [sp, 152] ++ add x0, x22, x8 ++ str w10, [sp, 156] + stp x11, x9, [sp, 160] + bl ZSTD_buildSeqTable.constprop.0 -+ ldr w10, [sp, 152] ++ ldr w10, [sp, 156] ++ ldr w8, [sp, 176] + cmn x0, #120 + ldp x11, x9, [sp, 160] -+ ldr x8, [sp, 176] -+ bhi .L1131 -+ ldr w2, [x26, 13620] ++ bhi .L1025 ++ ldr w2, [x27, 13620] + add x3, x9, 1536 -+ ldr w1, [x26, 13812] ++ ldr w1, [x27, 13812] + add x4, x9, 2056 + stp x4, x3, [sp] + add x19, x19, x0 @@ -7480,53 +6770,53 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str w25, [sp, 32] + add x7, x9, 1320 + str x11, [sp, 40] -+ add x0, x23, x0 ++ add x0, x22, x0 + ubfx x2, x10, 2, 2 -+ add x1, x23, 8 ++ add x1, x22, 8 + mov w4, 9 + mov w3, 52 -+ str x8, [sp, 152] ++ str w8, [sp, 156] + bl ZSTD_buildSeqTable.constprop.0 + cmn x0, #120 -+ bhi .L1131 -+ ldr x8, [sp, 152] ++ bhi .L1025 + add x3, x19, x0 -+ sub x0, x3, x8 ++ sub x0, x3, x28 + cmn x0, #120 -+ bhi .L1163 -+ sub x4, x27, x0 -+ cbz x24, .L1171 -+ cmp w28, 0 ++ bhi .L1057 ++ ldr w8, [sp, 156] ++ sub x4, x26, x0 ++ cbz x24, .L1066 ++ cmp w8, 0 + mov x0, 16777216 + ccmp x21, x0, 0, eq -+ bls .L1133 ++ bls .L1027 + cmp w25, 8 -+ ble .L1164 -+ ldr x0, [x23, 16] ++ ble .L1059 ++ ldr x0, [x22, 16] + mov w2, 1 + ldr w6, [x0, 4] + lsl w2, w2, w6 -+ cbz w2, .L1164 ++ cbz w2, .L1059 + sub w2, w2, #1 + add x5, x0, 18 + add x0, x0, 10 + mov w1, 0 -+ add x2, x5, w2, uxtw 3 ++ add x2, x5, x2, uxtw 3 + .p2align 3,,7 -+.L1138: ++.L1032: + ldrb w5, [x0], 8 + cmp w5, 22 + cinc w1, w1, hi + cmp x2, x0 -+ bne .L1138 ++ bne .L1032 + mov w0, 8 + sub w0, w0, w6 + lsl w1, w1, w0 + cmp w1, 6 -+ cset w28, hi -+ b .L1133 ++ cset w8, hi ++ b .L1027 + .p2align 2,,3 -+.L1131: ++.L1025: + mov x0, -20 + ldp x25, x26, [sp, 112] + .cfi_remember_state @@ -7535,22 +6825,25 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x27, x28, [sp, 128] + .cfi_restore 28 + .cfi_restore 27 -+ b .L1111 ++ b .L1004 + .p2align 2,,3 -+.L1164: ++.L1059: + .cfi_restore_state -+ str wzr, [x26, 13812] -+ b .L1135 ++ str wzr, [x27, 13812] ++ b .L1029 + .p2align 2,,3 -+.L1167: -+ .cfi_restore 25 -+ .cfi_restore 26 -+ .cfi_restore 27 -+ .cfi_restore 28 ++.L1062: + ldp x29, x30, [sp, 48] + ldp x19, x20, [sp, 64] + ldp x21, x22, [sp, 80] + ldp x23, x24, [sp, 96] ++ ldp x25, x26, [sp, 112] ++ .cfi_remember_state ++ .cfi_restore 26 ++ .cfi_restore 25 ++ ldp x27, x28, [sp, 128] ++ .cfi_restore 28 ++ .cfi_restore 27 + add sp, sp, 320 + .cfi_restore 29 + .cfi_restore 30 @@ -7563,24 +6856,12 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + b ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0 + .p2align 2,,3 -+.L1166: -+ .cfi_def_cfa_offset 320 -+ .cfi_offset 19, -256 -+ .cfi_offset 20, -248 -+ .cfi_offset 21, -240 -+ .cfi_offset 22, -232 -+ .cfi_offset 23, -224 -+ .cfi_offset 24, -216 -+ .cfi_offset 25, -208 -+ .cfi_offset 26, -200 -+ .cfi_offset 27, -192 -+ .cfi_offset 28, -184 -+ .cfi_offset 29, -272 -+ .cfi_offset 30, -264 ++.L1061: ++ .cfi_restore_state + mov w5, w25 -+ mov x2, x22 ++ mov x2, x23 + mov x1, x24 -+ mov x0, x23 ++ mov x0, x22 + ldp x29, x30, [sp, 48] + ldp x19, x20, [sp, 64] + ldp x21, x22, [sp, 80] @@ -7604,11 +6885,11 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + b ZSTD_decompressSequencesLong_default.constprop.0 + .p2align 2,,3 -+.L1171: ++.L1066: + .cfi_restore_state + mov x0, -70 -+ cbz w25, .L1117 -+.L1163: ++ cbz w25, .L1010 ++.L1057: + ldp x25, x26, [sp, 112] + .cfi_remember_state + .cfi_restore 26 @@ -7616,23 +6897,23 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x27, x28, [sp, 128] + .cfi_restore 28 + .cfi_restore 27 -+ b .L1111 ++ b .L1004 + .p2align 2,,3 -+.L1168: ++.L1063: + .cfi_restore_state -+ add x3, x8, 3 ++ add x3, x28, 3 + cmp x20, x3 -+ bcc .L1115 -+ ldrh w25, [x8, 1] ++ bcc .L1008 ++ ldrh w25, [x28, 1] + mov w12, 32512 + add w25, w25, w12 -+ b .L1118 ++ b .L1011 + .p2align 2,,3 -+.L1170: -+ cbz x6, .L1131 ++.L1065: ++ cbz x6, .L1025 + ldrb w0, [x3, 1] + cmp w0, 35 -+ bhi .L1131 ++ bhi .L1025 + adrp x9, .LANCHOR0 + add x9, x9, :lo12:.LANCHOR0 + uxtw x5, w0 @@ -7640,65 +6921,62 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + and x0, x0, 255 + add x4, x9, 336 + add x19, x3, 2 -+ mov w3, w28 ++ mov w3, w8 + ldr w2, [x2, x5, lsl 2] + sub x6, x20, x19 + ldrb w0, [x4, x0] -+ str x7, [x23] -+ str xzr, [x23, 32] -+ strh wzr, [x23, 40] -+ strb w0, [x23, 42] -+ strb wzr, [x23, 43] -+ str w2, [x23, 44] -+ b .L1125 ++ str x7, [x22] ++ str xzr, [x22, 32] ++ strh wzr, [x22, 40] ++ strb w0, [x22, 42] ++ strb wzr, [x22, 43] ++ str w2, [x22, 44] ++ b .L1018 + .p2align 2,,3 -+.L1121: -+ cbz w1, .L1131 -+ cmp w28, 0 ++.L1014: ++ cbz w1, .L1025 ++ cmp w8, 0 + ccmp w25, 24, 4, ne -+ ble .L1142 -+ ldr x0, [x23] ++ ble .L1058 ++ ldr x0, [x22] + mov x9, 4160 + add x2, x0, x9 + .p2align 3,,7 -+.L1126: ++.L1020: + prfm PLDL2KEEP, [x0] + add x0, x0, 64 + cmp x2, x0 -+ bne .L1126 -+.L1142: ++ bne .L1020 ++.L1058: + adrp x9, .LANCHOR0 -+ mov w3, w28 ++ mov w3, w8 + add x9, x9, :lo12:.LANCHOR0 -+ b .L1125 ++ b .L1018 + .p2align 2,,3 -+.L1120: ++.L1013: + mov x4, x6 + add x0, sp, 208 + mov x3, x19 + add x2, sp, 204 + add x1, sp, 200 -+ str x8, [sp, 152] -+ str w10, [sp, 160] ++ stp w8, w10, [sp, 156] + stp x7, x11, [sp, 168] + bl FSE_readNCount -+ ldr w10, [sp, 160] + cmn x0, #120 -+ ldr x8, [sp, 152] ++ ldp w8, w10, [sp, 156] + ldp x7, x11, [sp, 168] -+ bhi .L1131 ++ bhi .L1025 + ldr w5, [sp, 204] -+ str x8, [sp, 152] -+ str w10, [sp, 160] ++ stp w8, w10, [sp, 156] + cmp w5, 9 -+ bhi .L1131 ++ bhi .L1025 + ldr w2, [sp, 200] + adrp x9, .LANCHOR0 + add x9, x9, :lo12:.LANCHOR0 + mov x6, x11 + add x3, x9, 192 -+ add x19, x19, x0 + add x1, sp, 208 ++ add x19, x19, x0 + add x4, x9, 336 + mov x0, x7 + stp x7, x11, [sp, 168] @@ -7706,30 +6984,29 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + bl ZSTD_buildFSETable_body_default.constprop.0 + ldp x7, x11, [sp, 168] + sub x6, x20, x19 -+ ldr w1, [x26, 13620] -+ ldr w3, [x26, 13812] -+ ldr w10, [sp, 160] -+ str x7, [x23] -+ ldr x8, [sp, 152] ++ ldr w1, [x27, 13620] ++ ldr w3, [x27, 13812] ++ ldp w8, w10, [sp, 156] ++ str x7, [x22] + ldr x9, [sp, 184] -+ b .L1125 ++ b .L1018 + .cfi_endproc -+.LFE4541: ++.LFE4496: + .size ZSTD_decompressBlock_internal, .-ZSTD_decompressBlock_internal + .align 2 + .p2align 4,,11 + .global ZSTD_checkContinuity + .type ZSTD_checkContinuity, %function +ZSTD_checkContinuity: -+.LFB4542: ++.LFB4497: + .cfi_startproc + cmp x2, 0 + ldr x2, [x0, 29888] + ccmp x2, x1, 4, ne -+ bne .L1177 ++ bne .L1072 + ret + .p2align 2,,3 -+.L1177: ++.L1072: + ldr x3, [x0, 29896] + str x1, [x0, 29888] + str x1, [x0, 29896] @@ -7739,14 +7016,14 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str x1, [x0, 29904] + ret + .cfi_endproc -+.LFE4542: ++.LFE4497: + .size ZSTD_checkContinuity, .-ZSTD_checkContinuity + .align 2 + .p2align 4,,11 + .global ZSTD_decompressBlock + .type ZSTD_decompressBlock, %function +ZSTD_decompressBlock: -+.LFB4543: ++.LFB4498: + .cfi_startproc + stp x29, x30, [sp, -96]! + .cfi_def_cfa_offset 96 @@ -7770,7 +7047,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 24, -40 + mov x23, x2 + ccmp x1, x0, 4, ne -+ beq .L1179 ++ beq .L1074 + ldr x1, [x20, 29896] + str x19, [x20, 29888] + str x19, [x20, 29896] @@ -7778,10 +7055,10 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + str x0, [x20, 29912] + sub x0, x19, x1 + str x0, [x20, 29904] -+.L1179: ++.L1074: + mov x0, -72 + cmp x21, 131072 -+ bhi .L1180 ++ bhi .L1075 + mov x4, x23 + mov x3, x19 + mov x2, x21 @@ -7790,7 +7067,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov w5, 0 + bl ZSTD_decodeLiteralsBlock + cmn x0, #120 -+ bhi .L1180 ++ bhi .L1075 + stp x25, x26, [sp, 64] + .cfi_offset 26, -24 + .cfi_offset 25, -32 @@ -7805,12 +7082,12 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldr x24, [x20, 29904] + bl ZSTD_decodeSeqHeaders + cmn x0, #120 -+ bhi .L1204 ++ bhi .L1099 + ldr w5, [sp, 92] + add x3, x22, x0 + sub x4, x21, x0 -+ cbz x19, .L1206 -+.L1182: ++ cbz x19, .L1101 ++.L1077: + cmp x23, 131072 + mov x0, 131072 + csel x0, x23, x0, ls @@ -7819,46 +7096,47 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + mov x1, 16777216 + sub x0, x0, x24 + ccmp x0, x1, 0, eq -+ bls .L1183 ++ bls .L1078 + cmp w5, 8 -+ ble .L1205 ++ ble .L1100 + ldr x0, [x20, 16] + mov w2, 1 + ldr w7, [x0, 4] + lsl w2, w2, w7 -+ cbz w2, .L1205 ++ cbz w2, .L1100 + sub w2, w2, #1 + add x6, x0, 18 + add x0, x0, 10 + mov w1, 0 -+ add x2, x6, w2, uxtw 3 ++ add x2, x6, x2, uxtw 3 + .p2align 3,,7 -+.L1188: ++.L1083: + ldrb w6, [x0], 8 + cmp w6, 22 + cinc w1, w1, hi + cmp x2, x0 -+ bne .L1188 ++ bne .L1083 + mov w0, 8 + sub w0, w0, w7 + lsl w1, w1, w0 + cmp w1, 6 + cset w26, hi -+.L1183: ++.L1078: + str wzr, [x25, 13812] -+ cbnz w26, .L1207 -+.L1185: ++ cbnz w26, .L1102 ++.L1080: + ldr w0, [x25, 13976] + mov x2, x23 -+ mov x1, x19 + cmp w0, 2 ++ beq .L1103 ++ mov x1, x19 + mov x0, x20 -+ beq .L1208 -+ bl ZSTD_decompressSequences_default.constprop.0 ++ mov w6, 0 ++ bl ZSTD_decompressSequences_body_ver3 + ldp x25, x26, [sp, 64] + .cfi_restore 26 + .cfi_restore 25 -+.L1180: ++.L1075: + add x19, x19, x0 + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] @@ -7876,7 +7154,7 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1206: ++.L1101: + .cfi_def_cfa_offset 96 + .cfi_offset 19, -80 + .cfi_offset 20, -72 @@ -7890,8 +7168,8 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_offset 30, -88 + mov x0, -70 + cmp w5, 0 -+ ble .L1182 -+.L1204: ++ ble .L1077 ++.L1099: + add x19, x19, x0 + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] @@ -7913,20 +7191,22 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .cfi_def_cfa_offset 0 + ret + .p2align 2,,3 -+.L1205: ++.L1100: + .cfi_restore_state + str wzr, [x25, 13812] -+ b .L1185 ++ b .L1080 + .p2align 2,,3 -+.L1208: ++.L1103: ++ mov x1, x19 ++ mov x0, x20 + bl ZSTD_decompressSequencesSplitLitBuffer_default.constprop.0 + ldp x25, x26, [sp, 64] + .cfi_remember_state + .cfi_restore 26 + .cfi_restore 25 -+ b .L1180 ++ b .L1075 + .p2align 2,,3 -+.L1207: ++.L1102: + .cfi_restore_state + mov x2, x23 + mov x1, x19 @@ -7935,18 +7215,16 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + ldp x25, x26, [sp, 64] + .cfi_restore 26 + .cfi_restore 25 -+ b .L1180 ++ b .L1075 + .cfi_endproc -+.LFE4543: ++.LFE4498: + .size ZSTD_decompressBlock, .-ZSTD_decompressBlock -+ .set dec32table.0,dec32table.2 -+ .set dec64table.1,dec64table.3 + .section .rodata + .align 3 + .set .LANCHOR0,. + 0 -+ .type dec64table.3, %object -+ .size dec64table.3, 32 -+dec64table.3: ++ .type dec64table.1, %object ++ .size dec64table.1, 32 ++dec64table.1: + .word 8 + .word 8 + .word 8 @@ -7955,9 +7233,9 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .word 9 + .word 10 + .word 11 -+ .type dec32table.2, %object -+ .size dec32table.2, 32 -+dec32table.2: ++ .type dec32table.0, %object ++ .size dec32table.0, 32 ++dec32table.0: + .word 0 + .word 1 + .word 2 @@ -8858,12 +8136,12 @@ diff -Nur zstd-1.5.4/lib/decompress/zstd_decompress_block_aarch64.S zstd/lib/dec + .string "" + .ascii "\001\001\001\001\002\002\003\003\004\004\005\007\b\t\n\013\f" + .ascii "\r\016\017\020" -+ .ident "GCC: (GNU) 11.4.0" ++ .ident "GCC: (GNU) 10.3.1" + .section .note.GNU-stack,"",@progbits -diff -Nur zstd-1.5.4/lib/libzstd.mk zstd/lib/libzstd.mk +diff -Naur zstd-1.5.4/lib/libzstd.mk zstd/lib/libzstd.mk --- zstd-1.5.4/lib/libzstd.mk 2023-02-10 08:41:50.000000000 +0800 -+++ zstd/lib/libzstd.mk 2025-06-23 16:48:36.885521370 +0800 -@@ -142,12 +142,23 @@ ++++ zstd/lib/libzstd.mk 2025-08-18 21:50:50.105008530 +0800 +@@ -142,6 +142,16 @@ ZSTD_DECOMPRESS_AMD64_ASM_FILES := $(sort $(wildcard $(LIBZSTD)/decompress/*_amd64.S)) @@ -8880,10 +8158,15 @@ diff -Nur zstd-1.5.4/lib/libzstd.mk zstd/lib/libzstd.mk ifneq ($(ZSTD_NO_ASM), 0) CPPFLAGS += -DZSTD_DISABLE_ASM else - # Unconditionally add the ASM files they are disabled by - # macros in the .S file. - ZSTD_DECOMPRESS_FILES += $(ZSTD_DECOMPRESS_AMD64_ASM_FILES) -+ $(info PATH: $(ZSTD_DECOMPRESS_FILES)) - endif +diff -Naur zstd-1.5.4/lib/Makefile zstd/lib/Makefile +--- zstd-1.5.4/lib/Makefile 2023-02-10 08:41:50.000000000 +0800 ++++ zstd/lib/Makefile 2025-08-18 23:33:22.281008530 +0800 +@@ -138,7 +138,7 @@ + LIBZSTD = libzstd.$(SHARED_EXT_VER) + .PHONY: $(LIBZSTD) # must be run every time + $(LIBZSTD): CPPFLAGS += $(CPPFLAGS_DYNLIB) +-$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden ++$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden + $(LIBZSTD): LDFLAGS += -shared $(LDFLAGS_DYNLIB) - ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0) + ifndef BUILD_DIR