You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
574 lines
14 KiB
574 lines
14 KiB
/*
|
|
* Branch/Call/Jump (BCJ) filter decoders
|
|
*
|
|
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
|
* Igor Pavlov <http://7-zip.org/>
|
|
*
|
|
* This file has been put into the public domain.
|
|
* You can do whatever you want with this file.
|
|
*/
|
|
|
|
#include "xz_private.h"
|
|
|
|
/*
|
|
* The rest of the file is inside this ifdef. It makes things a little more
|
|
* convenient when building without support for any BCJ filters.
|
|
*/
|
|
#ifdef XZ_DEC_BCJ
|
|
|
|
struct xz_dec_bcj {
|
|
/* Type of the BCJ filter being used */
|
|
enum {
|
|
BCJ_X86 = 4, /* x86 or x86-64 */
|
|
BCJ_POWERPC = 5, /* Big endian only */
|
|
BCJ_IA64 = 6, /* Big or little endian */
|
|
BCJ_ARM = 7, /* Little endian only */
|
|
BCJ_ARMTHUMB = 8, /* Little endian only */
|
|
BCJ_SPARC = 9 /* Big or little endian */
|
|
} type;
|
|
|
|
/*
|
|
* Return value of the next filter in the chain. We need to preserve
|
|
* this information across calls, because we must not call the next
|
|
* filter anymore once it has returned XZ_STREAM_END.
|
|
*/
|
|
enum xz_ret ret;
|
|
|
|
/* True if we are operating in single-call mode. */
|
|
bool single_call;
|
|
|
|
/*
|
|
* Absolute position relative to the beginning of the uncompressed
|
|
* data (in a single .xz Block). We care only about the lowest 32
|
|
* bits so this doesn't need to be uint64_t even with big files.
|
|
*/
|
|
uint32_t pos;
|
|
|
|
/* x86 filter state */
|
|
uint32_t x86_prev_mask;
|
|
|
|
/* Temporary space to hold the variables from struct xz_buf */
|
|
uint8_t *out;
|
|
size_t out_pos;
|
|
size_t out_size;
|
|
|
|
struct {
|
|
/* Amount of already filtered data in the beginning of buf */
|
|
size_t filtered;
|
|
|
|
/* Total amount of data currently stored in buf */
|
|
size_t size;
|
|
|
|
/*
|
|
* Buffer to hold a mix of filtered and unfiltered data. This
|
|
* needs to be big enough to hold Alignment + 2 * Look-ahead:
|
|
*
|
|
* Type Alignment Look-ahead
|
|
* x86 1 4
|
|
* PowerPC 4 0
|
|
* IA-64 16 0
|
|
* ARM 4 0
|
|
* ARM-Thumb 2 2
|
|
* SPARC 4 0
|
|
*/
|
|
uint8_t buf[16];
|
|
} temp;
|
|
};
|
|
|
|
#ifdef XZ_DEC_X86
|
|
/*
|
|
* This is used to test the most significant byte of a memory address
|
|
* in an x86 instruction.
|
|
*/
|
|
static inline int bcj_x86_test_msbyte(uint8_t b)
|
|
{
|
|
return b == 0x00 || b == 0xFF;
|
|
}
|
|
|
|
static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
static const bool mask_to_allowed_status[8]
|
|
= { true, true, true, false, true, false, false, false };
|
|
|
|
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
|
|
|
|
size_t i;
|
|
size_t prev_pos = (size_t)-1;
|
|
uint32_t prev_mask = s->x86_prev_mask;
|
|
uint32_t src;
|
|
uint32_t dest;
|
|
uint32_t j;
|
|
uint8_t b;
|
|
|
|
if (size <= 4)
|
|
return 0;
|
|
|
|
size -= 4;
|
|
for (i = 0; i < size; ++i) {
|
|
if ((buf[i] & 0xFE) != 0xE8)
|
|
continue;
|
|
|
|
prev_pos = i - prev_pos;
|
|
if (prev_pos > 3) {
|
|
prev_mask = 0;
|
|
} else {
|
|
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
|
|
if (prev_mask != 0) {
|
|
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
|
|
if (!mask_to_allowed_status[prev_mask]
|
|
|| bcj_x86_test_msbyte(b)) {
|
|
prev_pos = i;
|
|
prev_mask = (prev_mask << 1) | 1;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
prev_pos = i;
|
|
|
|
if (bcj_x86_test_msbyte(buf[i + 4])) {
|
|
src = get_unaligned_le32(buf + i + 1);
|
|
while (true) {
|
|
dest = src - (s->pos + (uint32_t)i + 5);
|
|
if (prev_mask == 0)
|
|
break;
|
|
|
|
j = mask_to_bit_num[prev_mask] * 8;
|
|
b = (uint8_t)(dest >> (24 - j));
|
|
if (!bcj_x86_test_msbyte(b))
|
|
break;
|
|
|
|
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
|
|
}
|
|
|
|
dest &= 0x01FFFFFF;
|
|
dest |= (uint32_t)0 - (dest & 0x01000000);
|
|
put_unaligned_le32(dest, buf + i + 1);
|
|
i += 4;
|
|
} else {
|
|
prev_mask = (prev_mask << 1) | 1;
|
|
}
|
|
}
|
|
|
|
prev_pos = i - prev_pos;
|
|
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
#ifdef XZ_DEC_POWERPC
|
|
static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
size_t i;
|
|
uint32_t instr;
|
|
|
|
for (i = 0; i + 4 <= size; i += 4) {
|
|
instr = get_unaligned_be32(buf + i);
|
|
if ((instr & 0xFC000003) == 0x48000001) {
|
|
instr &= 0x03FFFFFC;
|
|
instr -= s->pos + (uint32_t)i;
|
|
instr &= 0x03FFFFFC;
|
|
instr |= 0x48000001;
|
|
put_unaligned_be32(instr, buf + i);
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
#ifdef XZ_DEC_IA64
|
|
static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
static const uint8_t branch_table[32] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
4, 4, 6, 6, 0, 0, 7, 7,
|
|
4, 4, 0, 0, 4, 4, 0, 0
|
|
};
|
|
|
|
/*
|
|
* The local variables take a little bit stack space, but it's less
|
|
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
|
|
* stack usage here without doing that for the LZMA2 decoder too.
|
|
*/
|
|
|
|
/* Loop counters */
|
|
size_t i;
|
|
size_t j;
|
|
|
|
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
|
|
uint32_t slot;
|
|
|
|
/* Bitwise offset of the instruction indicated by slot */
|
|
uint32_t bit_pos;
|
|
|
|
/* bit_pos split into byte and bit parts */
|
|
uint32_t byte_pos;
|
|
uint32_t bit_res;
|
|
|
|
/* Address part of an instruction */
|
|
uint32_t addr;
|
|
|
|
/* Mask used to detect which instructions to convert */
|
|
uint32_t mask;
|
|
|
|
/* 41-bit instruction stored somewhere in the lowest 48 bits */
|
|
uint64_t instr;
|
|
|
|
/* Instruction normalized with bit_res for easier manipulation */
|
|
uint64_t norm;
|
|
|
|
for (i = 0; i + 16 <= size; i += 16) {
|
|
mask = branch_table[buf[i] & 0x1F];
|
|
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
|
|
if (((mask >> slot) & 1) == 0)
|
|
continue;
|
|
|
|
byte_pos = bit_pos >> 3;
|
|
bit_res = bit_pos & 7;
|
|
instr = 0;
|
|
for (j = 0; j < 6; ++j)
|
|
instr |= (uint64_t)(buf[i + j + byte_pos])
|
|
<< (8 * j);
|
|
|
|
norm = instr >> bit_res;
|
|
|
|
if (((norm >> 37) & 0x0F) == 0x05
|
|
&& ((norm >> 9) & 0x07) == 0) {
|
|
addr = (norm >> 13) & 0x0FFFFF;
|
|
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
|
|
addr <<= 4;
|
|
addr -= s->pos + (uint32_t)i;
|
|
addr >>= 4;
|
|
|
|
norm &= ~((uint64_t)0x8FFFFF << 13);
|
|
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
|
|
norm |= (uint64_t)(addr & 0x100000)
|
|
<< (36 - 20);
|
|
|
|
instr &= (1 << bit_res) - 1;
|
|
instr |= norm << bit_res;
|
|
|
|
for (j = 0; j < 6; j++)
|
|
buf[i + j + byte_pos]
|
|
= (uint8_t)(instr >> (8 * j));
|
|
}
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
#ifdef XZ_DEC_ARM
|
|
static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
size_t i;
|
|
uint32_t addr;
|
|
|
|
for (i = 0; i + 4 <= size; i += 4) {
|
|
if (buf[i + 3] == 0xEB) {
|
|
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
|
|
| ((uint32_t)buf[i + 2] << 16);
|
|
addr <<= 2;
|
|
addr -= s->pos + (uint32_t)i + 8;
|
|
addr >>= 2;
|
|
buf[i] = (uint8_t)addr;
|
|
buf[i + 1] = (uint8_t)(addr >> 8);
|
|
buf[i + 2] = (uint8_t)(addr >> 16);
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
#ifdef XZ_DEC_ARMTHUMB
|
|
static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
size_t i;
|
|
uint32_t addr;
|
|
|
|
for (i = 0; i + 4 <= size; i += 2) {
|
|
if ((buf[i + 1] & 0xF8) == 0xF0
|
|
&& (buf[i + 3] & 0xF8) == 0xF8) {
|
|
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
|
|
| ((uint32_t)buf[i] << 11)
|
|
| (((uint32_t)buf[i + 3] & 0x07) << 8)
|
|
| (uint32_t)buf[i + 2];
|
|
addr <<= 1;
|
|
addr -= s->pos + (uint32_t)i + 4;
|
|
addr >>= 1;
|
|
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
|
|
buf[i] = (uint8_t)(addr >> 11);
|
|
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
|
|
buf[i + 2] = (uint8_t)addr;
|
|
i += 2;
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
#ifdef XZ_DEC_SPARC
|
|
static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
|
{
|
|
size_t i;
|
|
uint32_t instr;
|
|
|
|
for (i = 0; i + 4 <= size; i += 4) {
|
|
instr = get_unaligned_be32(buf + i);
|
|
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
|
|
instr <<= 2;
|
|
instr -= s->pos + (uint32_t)i;
|
|
instr >>= 2;
|
|
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
|
|
| 0x40000000 | (instr & 0x3FFFFF);
|
|
put_unaligned_be32(instr, buf + i);
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
|
|
* of data that got filtered.
|
|
*
|
|
* NOTE: This is implemented as a switch statement to avoid using function
|
|
* pointers, which could be problematic in the kernel boot code, which must
|
|
* avoid pointers to static data (at least on x86).
|
|
*/
|
|
static void bcj_apply(struct xz_dec_bcj *s,
|
|
uint8_t *buf, size_t *pos, size_t size)
|
|
{
|
|
size_t filtered;
|
|
|
|
buf += *pos;
|
|
size -= *pos;
|
|
|
|
switch (s->type) {
|
|
#ifdef XZ_DEC_X86
|
|
case BCJ_X86:
|
|
filtered = bcj_x86(s, buf, size);
|
|
break;
|
|
#endif
|
|
#ifdef XZ_DEC_POWERPC
|
|
case BCJ_POWERPC:
|
|
filtered = bcj_powerpc(s, buf, size);
|
|
break;
|
|
#endif
|
|
#ifdef XZ_DEC_IA64
|
|
case BCJ_IA64:
|
|
filtered = bcj_ia64(s, buf, size);
|
|
break;
|
|
#endif
|
|
#ifdef XZ_DEC_ARM
|
|
case BCJ_ARM:
|
|
filtered = bcj_arm(s, buf, size);
|
|
break;
|
|
#endif
|
|
#ifdef XZ_DEC_ARMTHUMB
|
|
case BCJ_ARMTHUMB:
|
|
filtered = bcj_armthumb(s, buf, size);
|
|
break;
|
|
#endif
|
|
#ifdef XZ_DEC_SPARC
|
|
case BCJ_SPARC:
|
|
filtered = bcj_sparc(s, buf, size);
|
|
break;
|
|
#endif
|
|
default:
|
|
/* Never reached but silence compiler warnings. */
|
|
filtered = 0;
|
|
break;
|
|
}
|
|
|
|
*pos += filtered;
|
|
s->pos += filtered;
|
|
}
|
|
|
|
/*
|
|
* Flush pending filtered data from temp to the output buffer.
|
|
* Move the remaining mixture of possibly filtered and unfiltered
|
|
* data to the beginning of temp.
|
|
*/
|
|
static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
|
|
{
|
|
size_t copy_size;
|
|
|
|
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
|
|
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
|
|
b->out_pos += copy_size;
|
|
|
|
s->temp.filtered -= copy_size;
|
|
s->temp.size -= copy_size;
|
|
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
|
|
}
|
|
|
|
/*
|
|
* The BCJ filter functions are primitive in sense that they process the
|
|
* data in chunks of 1-16 bytes. To hide this issue, this function does
|
|
* some buffering.
|
|
*/
|
|
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
|
|
struct xz_dec_lzma2 *lzma2,
|
|
struct xz_buf *b)
|
|
{
|
|
size_t out_start;
|
|
|
|
/*
|
|
* Flush pending already filtered data to the output buffer. Return
|
|
* immediatelly if we couldn't flush everything, or if the next
|
|
* filter in the chain had already returned XZ_STREAM_END.
|
|
*/
|
|
if (s->temp.filtered > 0) {
|
|
bcj_flush(s, b);
|
|
if (s->temp.filtered > 0)
|
|
return XZ_OK;
|
|
|
|
if (s->ret == XZ_STREAM_END)
|
|
return XZ_STREAM_END;
|
|
}
|
|
|
|
/*
|
|
* If we have more output space than what is currently pending in
|
|
* temp, copy the unfiltered data from temp to the output buffer
|
|
* and try to fill the output buffer by decoding more data from the
|
|
* next filter in the chain. Apply the BCJ filter on the new data
|
|
* in the output buffer. If everything cannot be filtered, copy it
|
|
* to temp and rewind the output buffer position accordingly.
|
|
*
|
|
* This needs to be always run when temp.size == 0 to handle a special
|
|
* case where the output buffer is full and the next filter has no
|
|
* more output coming but hasn't returned XZ_STREAM_END yet.
|
|
*/
|
|
if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
|
|
out_start = b->out_pos;
|
|
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
|
|
b->out_pos += s->temp.size;
|
|
|
|
s->ret = xz_dec_lzma2_run(lzma2, b);
|
|
if (s->ret != XZ_STREAM_END
|
|
&& (s->ret != XZ_OK || s->single_call))
|
|
return s->ret;
|
|
|
|
bcj_apply(s, b->out, &out_start, b->out_pos);
|
|
|
|
/*
|
|
* As an exception, if the next filter returned XZ_STREAM_END,
|
|
* we can do that too, since the last few bytes that remain
|
|
* unfiltered are meant to remain unfiltered.
|
|
*/
|
|
if (s->ret == XZ_STREAM_END)
|
|
return XZ_STREAM_END;
|
|
|
|
s->temp.size = b->out_pos - out_start;
|
|
b->out_pos -= s->temp.size;
|
|
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
|
|
|
|
/*
|
|
* If there wasn't enough input to the next filter to fill
|
|
* the output buffer with unfiltered data, there's no point
|
|
* to try decoding more data to temp.
|
|
*/
|
|
if (b->out_pos + s->temp.size < b->out_size)
|
|
return XZ_OK;
|
|
}
|
|
|
|
/*
|
|
* We have unfiltered data in temp. If the output buffer isn't full
|
|
* yet, try to fill the temp buffer by decoding more data from the
|
|
* next filter. Apply the BCJ filter on temp. Then we hopefully can
|
|
* fill the actual output buffer by copying filtered data from temp.
|
|
* A mix of filtered and unfiltered data may be left in temp; it will
|
|
* be taken care on the next call to this function.
|
|
*/
|
|
if (b->out_pos < b->out_size) {
|
|
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
|
|
s->out = b->out;
|
|
s->out_pos = b->out_pos;
|
|
s->out_size = b->out_size;
|
|
b->out = s->temp.buf;
|
|
b->out_pos = s->temp.size;
|
|
b->out_size = sizeof(s->temp.buf);
|
|
|
|
s->ret = xz_dec_lzma2_run(lzma2, b);
|
|
|
|
s->temp.size = b->out_pos;
|
|
b->out = s->out;
|
|
b->out_pos = s->out_pos;
|
|
b->out_size = s->out_size;
|
|
|
|
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
|
|
return s->ret;
|
|
|
|
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
|
|
|
|
/*
|
|
* If the next filter returned XZ_STREAM_END, we mark that
|
|
* everything is filtered, since the last unfiltered bytes
|
|
* of the stream are meant to be left as is.
|
|
*/
|
|
if (s->ret == XZ_STREAM_END)
|
|
s->temp.filtered = s->temp.size;
|
|
|
|
bcj_flush(s, b);
|
|
if (s->temp.filtered > 0)
|
|
return XZ_OK;
|
|
}
|
|
|
|
return s->ret;
|
|
}
|
|
|
|
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
|
|
{
|
|
struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
|
if (s != NULL)
|
|
s->single_call = single_call;
|
|
|
|
return s;
|
|
}
|
|
|
|
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
|
|
{
|
|
switch (id) {
|
|
#ifdef XZ_DEC_X86
|
|
case BCJ_X86:
|
|
#endif
|
|
#ifdef XZ_DEC_POWERPC
|
|
case BCJ_POWERPC:
|
|
#endif
|
|
#ifdef XZ_DEC_IA64
|
|
case BCJ_IA64:
|
|
#endif
|
|
#ifdef XZ_DEC_ARM
|
|
case BCJ_ARM:
|
|
#endif
|
|
#ifdef XZ_DEC_ARMTHUMB
|
|
case BCJ_ARMTHUMB:
|
|
#endif
|
|
#ifdef XZ_DEC_SPARC
|
|
case BCJ_SPARC:
|
|
#endif
|
|
break;
|
|
|
|
default:
|
|
/* Unsupported Filter ID */
|
|
return XZ_OPTIONS_ERROR;
|
|
}
|
|
|
|
s->type = id;
|
|
s->ret = XZ_OK;
|
|
s->pos = 0;
|
|
s->x86_prev_mask = 0;
|
|
s->temp.filtered = 0;
|
|
s->temp.size = 0;
|
|
|
|
return XZ_OK;
|
|
}
|
|
|
|
#endif
|
|
|