mirror of
https://github.com/acidanthera/OpenCorePkg.git
synced 2025-12-08 19:25:01 +00:00
Implement OcCompressionLib
This commit is contained in:
parent
5420e23d98
commit
1a505b7f23
78
Include/Library/OcCompressionLib.h
Normal file
78
Include/Library/OcCompressionLib.h
Normal file
@ -0,0 +1,78 @@
|
||||
/** @file
|
||||
Copyright (C) 2019, vit9696. All rights reserved.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
This program and the accompanying materials
|
||||
are licensed and made available under the terms and conditions of the BSD License
|
||||
which accompanies this distribution. The full text of the license may be found at
|
||||
http://opensource.org/licenses/bsd-license.php
|
||||
|
||||
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
||||
**/
|
||||
|
||||
#ifndef OC_COMPRESSION_LIB_H
|
||||
#define OC_COMPRESSION_LIB_H
|
||||
|
||||
/**
|
||||
Maximumum compression and decompression buffer size may vary from
|
||||
0 to OC_COMPRESSION_MAX_LENGTH inclusive.
|
||||
**/
|
||||
#define OC_COMPRESSION_MAX_LENGTH BASE_1GB
|
||||
|
||||
/**
|
||||
Compress buffer with LZSS algorithm.
|
||||
|
||||
@param[out] Dst Destination buffer.
|
||||
@param[in] DstLen Destination buffer size.
|
||||
@param[in] Src Source buffer.
|
||||
@param[in] SrcLen Source buffer size.
|
||||
|
||||
@return Dst + CompressedLen on success otherwise NULL.
|
||||
**/
|
||||
UINT8 *
|
||||
CompressLZSS (
|
||||
OUT UINT8 *Dst,
|
||||
IN UINT32 DstLen,
|
||||
IN UINT8 *Src,
|
||||
IN UINT32 SrcLen
|
||||
);
|
||||
|
||||
/**
|
||||
Decompress buffer with LZSS algorithm.
|
||||
|
||||
@param[out] Dst Destination buffer.
|
||||
@param[in] DstLen Destination buffer size.
|
||||
@param[in] Src Source buffer.
|
||||
@param[in] SrcLen Source buffer size.
|
||||
|
||||
@return DecompressedLen on success otherwise 0.
|
||||
**/
|
||||
UINT32
|
||||
DecompressLZSS (
|
||||
OUT UINT8 *Dst,
|
||||
IN UINT32 DstLen,
|
||||
IN UINT8 *Src,
|
||||
IN UINT32 SrcLen
|
||||
);
|
||||
|
||||
/**
|
||||
Decompress buffer with LZVN algorithm.
|
||||
|
||||
@param[out] Dst Destination buffer.
|
||||
@param[in] DstLen Destination buffer size.
|
||||
@param[in] Src Source buffer.
|
||||
@param[in] SrcLen Source buffer size.
|
||||
|
||||
@return DecompressedLen on success otherwise 0.
|
||||
**/
|
||||
UINTN
|
||||
DecompressLZVN (
|
||||
OUT VOID *Dst,
|
||||
IN UINTN DstLen,
|
||||
IN CONST VOID *Src,
|
||||
IN UINTN SrcLen
|
||||
);
|
||||
|
||||
#endif // OC_COMPRESSION_LIB_H
|
||||
42
Library/OcCompressionLib/OcCompressionLib.inf
Normal file
42
Library/OcCompressionLib/OcCompressionLib.inf
Normal file
@ -0,0 +1,42 @@
|
||||
## @file
|
||||
# OcCompressionLib
|
||||
#
|
||||
# Copyright (c) 2019, vit9696
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program and the accompanying materials
|
||||
# are licensed and made available under the terms and conditions of the BSD License
|
||||
# which accompanies this distribution. The full text of the license may be found at
|
||||
# http://opensource.org/licenses/bsd-license.php
|
||||
#
|
||||
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
||||
#
|
||||
##
|
||||
|
||||
[Defines]
|
||||
INF_VERSION = 0x00010005
|
||||
BASE_NAME = OcCompressionLib
|
||||
FILE_GUID = C384FA2A-B1AE-44FA-A33C-D59325B637C9
|
||||
MODULE_TYPE = DXE_DRIVER
|
||||
VERSION_STRING = 1.0
|
||||
LIBRARY_CLASS = OcCompressionLib|DXE_CORE DXE_DRIVER DXE_RUNTIME_DRIVER DXE_SAL_DRIVER DXE_SMM_DRIVER SMM_CORE UEFI_APPLICATION UEFI_DRIVER
|
||||
|
||||
|
||||
#
|
||||
# VALID_ARCHITECTURES = X64
|
||||
#
|
||||
|
||||
[Sources]
|
||||
lzss/lzss.c
|
||||
lzss/lzss.h
|
||||
lzvn/lzvn.c
|
||||
lzvn/lzvn.h
|
||||
|
||||
[Packages]
|
||||
MdePkg/MdePkg.dec
|
||||
OcSupportPkg/OcSupportPkg.dec
|
||||
|
||||
[LibraryClasses]
|
||||
BaseLib
|
||||
383
Library/OcCompressionLib/lzss/lzss.c
Normal file
383
Library/OcCompressionLib/lzss/lzss.c
Normal file
@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
|
||||
*
|
||||
* @APPLE_LICENSE_HEADER_START@
|
||||
*
|
||||
* This file contains Original Code and/or Modifications of Original Code
|
||||
* as defined in and that are subject to the Apple Public Source License
|
||||
* Version 2.0 (the 'License'). You may not use this file except in
|
||||
* compliance with the License. Please obtain a copy of the License at
|
||||
* http://www.opensource.apple.com/apsl/ and read it before using this
|
||||
* file.
|
||||
*
|
||||
* The Original Code and all software distributed under the License are
|
||||
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
||||
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
|
||||
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
|
||||
* Please see the License for the specific language governing rights and
|
||||
* limitations under the License.
|
||||
*
|
||||
* @APPLE_LICENSE_HEADER_END@
|
||||
*/
|
||||
#include "lzss.h"
|
||||
|
||||
/*******************************************************************************
|
||||
*******************************************************************************/
|
||||
u_int32_t local_adler32(u_int8_t * buffer, int32_t length)
|
||||
{
|
||||
int32_t cnt;
|
||||
u_int32_t result, lowHalf, highHalf;
|
||||
|
||||
lowHalf = 1;
|
||||
highHalf = 0;
|
||||
|
||||
for (cnt = 0; cnt < length; cnt++) {
|
||||
if ((cnt % 5000) == 0) {
|
||||
lowHalf %= 65521L;
|
||||
highHalf %= 65521L;
|
||||
}
|
||||
|
||||
lowHalf += buffer[cnt];
|
||||
highHalf += lowHalf;
|
||||
}
|
||||
|
||||
lowHalf %= 65521L;
|
||||
highHalf %= 65521L;
|
||||
|
||||
result = (highHalf << 16) | lowHalf;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**************************************************************
|
||||
LZSS.C -- A Data Compression Program
|
||||
***************************************************************
|
||||
4/6/1989 Haruhiko Okumura
|
||||
Use, distribute, and modify this program freely.
|
||||
Please send me your improved versions.
|
||||
PC-VAN SCIENCE
|
||||
NIFTY-Serve PAF01022
|
||||
CompuServe 74050,1022
|
||||
|
||||
**************************************************************/
|
||||
|
||||
#define N 4096 /* size of ring buffer - must be power of 2 */
|
||||
#define F 18 /* upper limit for match_length */
|
||||
#define THRESHOLD 2 /* encode string into position and length
|
||||
if match_length is greater than this */
|
||||
#define NIL N /* index for root of binary search trees */
|
||||
|
||||
struct encode_state {
|
||||
/*
|
||||
* left & right children & parent. These constitute binary search trees.
|
||||
*/
|
||||
int lchild[N + 1], rchild[N + 257], parent[N + 1];
|
||||
|
||||
/* ring buffer of size N, with extra F-1 bytes to aid string comparison */
|
||||
u_int8_t text_buf[N + F - 1];
|
||||
|
||||
/*
|
||||
* match_length of longest match.
|
||||
* These are set by the insert_node() procedure.
|
||||
*/
|
||||
int match_position, match_length;
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
*******************************************************************************/
|
||||
u_int32_t decompress_lzss(
|
||||
u_int8_t * dst,
|
||||
u_int32_t dstlen,
|
||||
u_int8_t * src,
|
||||
u_int32_t srclen)
|
||||
{
|
||||
/* ring buffer of size N, with extra F-1 bytes to aid string comparison */
|
||||
u_int8_t text_buf[N + F - 1];
|
||||
u_int8_t * dststart = dst;
|
||||
const u_int8_t * dstend = dst + dstlen;
|
||||
const u_int8_t * srcend = src + srclen;
|
||||
int i, j, k, r, c;
|
||||
unsigned int flags;
|
||||
|
||||
if (dstlen > OC_COMPRESSION_MAX_LENGTH || srclen > OC_COMPRESSION_MAX_LENGTH) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
dst = dststart;
|
||||
for (i = 0; i < N - F; i++)
|
||||
text_buf[i] = ' ';
|
||||
r = N - F;
|
||||
flags = 0;
|
||||
for ( ; ; ) {
|
||||
if (((flags >>= 1) & 0x100) == 0) {
|
||||
if (src < srcend) c = *src++; else break;
|
||||
flags = c | 0xFF00; /* uses higher byte cleverly */
|
||||
} /* to count eight */
|
||||
if (flags & 1) {
|
||||
if (src < srcend) c = *src++; else break;
|
||||
if (dst < dstend) *dst++ = c; else break;
|
||||
text_buf[r++] = c;
|
||||
r &= (N - 1);
|
||||
} else {
|
||||
if (src < srcend) i = *src++; else break;
|
||||
if (src < srcend) j = *src++; else break;
|
||||
i |= ((j & 0xF0) << 4);
|
||||
j = (j & 0x0F) + THRESHOLD;
|
||||
for (k = 0; k <= j; k++) {
|
||||
c = text_buf[(i + k) & (N - 1)];
|
||||
if (dst < dstend) *dst++ = c; else break;
|
||||
text_buf[r++] = c;
|
||||
r &= (N - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (u_int32_t)(dst - dststart);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize state, mostly the trees
|
||||
*
|
||||
* For i = 0 to N - 1, rchild[i] and lchild[i] will be the right and left
|
||||
* children of node i. These nodes need not be initialized. Also, parent[i]
|
||||
* is the parent of node i. These are initialized to NIL (= N), which stands
|
||||
* for 'not used.' For i = 0 to 255, rchild[N + i + 1] is the root of the
|
||||
* tree for strings that begin with character i. These are initialized to NIL.
|
||||
* Note there are 256 trees. */
|
||||
static void init_state(struct encode_state *sp)
|
||||
{
|
||||
int i;
|
||||
|
||||
bzero(sp, sizeof(*sp));
|
||||
|
||||
for (i = 0; i < N - F; i++)
|
||||
sp->text_buf[i] = ' ';
|
||||
for (i = N + 1; i <= N + 256; i++)
|
||||
sp->rchild[i] = NIL;
|
||||
for (i = 0; i < N; i++)
|
||||
sp->parent[i] = NIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inserts string of length F, text_buf[r..r+F-1], into one of the trees
|
||||
* (text_buf[r]'th tree) and returns the longest-match position and length
|
||||
* via the global variables match_position and match_length.
|
||||
* If match_length = F, then removes the old node in favor of the new one,
|
||||
* because the old one will be deleted sooner. Note r plays double role,
|
||||
* as tree node and position in buffer.
|
||||
*/
|
||||
static void insert_node(struct encode_state *sp, int r)
|
||||
{
|
||||
int i, p, cmp;
|
||||
u_int8_t *key;
|
||||
|
||||
cmp = 1;
|
||||
key = &sp->text_buf[r];
|
||||
p = N + 1 + key[0];
|
||||
sp->rchild[r] = sp->lchild[r] = NIL;
|
||||
sp->match_length = 0;
|
||||
for ( ; ; ) {
|
||||
if (cmp >= 0) {
|
||||
if (sp->rchild[p] != NIL)
|
||||
p = sp->rchild[p];
|
||||
else {
|
||||
sp->rchild[p] = r;
|
||||
sp->parent[r] = p;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (sp->lchild[p] != NIL)
|
||||
p = sp->lchild[p];
|
||||
else {
|
||||
sp->lchild[p] = r;
|
||||
sp->parent[r] = p;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (i = 1; i < F; i++) {
|
||||
if ((cmp = key[i] - sp->text_buf[p + i]) != 0)
|
||||
break;
|
||||
}
|
||||
if (i > sp->match_length) {
|
||||
sp->match_position = p;
|
||||
if ((sp->match_length = i) >= F)
|
||||
break;
|
||||
}
|
||||
}
|
||||
sp->parent[r] = sp->parent[p];
|
||||
sp->lchild[r] = sp->lchild[p];
|
||||
sp->rchild[r] = sp->rchild[p];
|
||||
sp->parent[sp->lchild[p]] = r;
|
||||
sp->parent[sp->rchild[p]] = r;
|
||||
if (sp->rchild[sp->parent[p]] == p)
|
||||
sp->rchild[sp->parent[p]] = r;
|
||||
else
|
||||
sp->lchild[sp->parent[p]] = r;
|
||||
sp->parent[p] = NIL; /* remove p */
|
||||
}
|
||||
|
||||
/* deletes node p from tree */
|
||||
static void delete_node(struct encode_state *sp, int p)
|
||||
{
|
||||
int q;
|
||||
|
||||
if (sp->parent[p] == NIL)
|
||||
return; /* not in tree */
|
||||
if (sp->rchild[p] == NIL)
|
||||
q = sp->lchild[p];
|
||||
else if (sp->lchild[p] == NIL)
|
||||
q = sp->rchild[p];
|
||||
else {
|
||||
q = sp->lchild[p];
|
||||
if (sp->rchild[q] != NIL) {
|
||||
do {
|
||||
q = sp->rchild[q];
|
||||
} while (sp->rchild[q] != NIL);
|
||||
sp->rchild[sp->parent[q]] = sp->lchild[q];
|
||||
sp->parent[sp->lchild[q]] = sp->parent[q];
|
||||
sp->lchild[q] = sp->lchild[p];
|
||||
sp->parent[sp->lchild[p]] = q;
|
||||
}
|
||||
sp->rchild[q] = sp->rchild[p];
|
||||
sp->parent[sp->rchild[p]] = q;
|
||||
}
|
||||
sp->parent[q] = sp->parent[p];
|
||||
if (sp->rchild[sp->parent[p]] == p)
|
||||
sp->rchild[sp->parent[p]] = q;
|
||||
else
|
||||
sp->lchild[sp->parent[p]] = q;
|
||||
sp->parent[p] = NIL;
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
*******************************************************************************/
|
||||
u_int8_t * compress_lzss(
|
||||
u_int8_t * dst,
|
||||
u_int32_t dstlen,
|
||||
u_int8_t * src,
|
||||
u_int32_t srclen)
|
||||
{
|
||||
u_int8_t * result = NULL;
|
||||
/* Encoding state, mostly tree but some current match stuff */
|
||||
struct encode_state *sp;
|
||||
|
||||
int i, c, len, r, s, last_match_length, code_buf_ptr;
|
||||
u_int8_t code_buf[17], mask;
|
||||
u_int8_t * srcend = src + srclen;
|
||||
u_int8_t *dstend = dst + dstlen;
|
||||
|
||||
if (dstlen > OC_COMPRESSION_MAX_LENGTH || srclen > OC_COMPRESSION_MAX_LENGTH) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize trees */
|
||||
sp = (struct encode_state *) malloc(sizeof(*sp));
|
||||
if (!sp) goto finish;
|
||||
|
||||
init_state(sp);
|
||||
|
||||
/*
|
||||
* code_buf[1..16] saves eight units of code, and code_buf[0] works
|
||||
* as eight flags, "1" representing that the unit is an unencoded
|
||||
* letter (1 byte), "0" a position-and-length pair (2 bytes).
|
||||
* Thus, eight units require at most 16 bytes of code.
|
||||
*/
|
||||
code_buf[0] = 0;
|
||||
code_buf_ptr = mask = 1;
|
||||
|
||||
/* Clear the buffer with any character that will appear often. */
|
||||
s = 0; r = N - F;
|
||||
|
||||
/* Read F bytes into the last F bytes of the buffer */
|
||||
for (len = 0; len < F && src < srcend; len++)
|
||||
sp->text_buf[r + len] = *src++;
|
||||
if (!len)
|
||||
goto finish;
|
||||
|
||||
/*
|
||||
* Insert the F strings, each of which begins with one or more
|
||||
* 'space' characters. Note the order in which these strings are
|
||||
* inserted. This way, degenerate trees will be less likely to occur.
|
||||
*/
|
||||
for (i = 1; i <= F; i++)
|
||||
insert_node(sp, r - i);
|
||||
|
||||
/*
|
||||
* Finally, insert the whole string just read.
|
||||
* The global variables match_length and match_position are set.
|
||||
*/
|
||||
insert_node(sp, r);
|
||||
do {
|
||||
/* match_length may be spuriously long near the end of text. */
|
||||
if (sp->match_length > len)
|
||||
sp->match_length = len;
|
||||
if (sp->match_length <= THRESHOLD) {
|
||||
sp->match_length = 1; /* Not long enough match. Send one byte. */
|
||||
code_buf[0] |= mask; /* 'send one byte' flag */
|
||||
code_buf[code_buf_ptr++] = sp->text_buf[r]; /* Send uncoded. */
|
||||
} else {
|
||||
/* Send position and length pair. Note match_length > THRESHOLD. */
|
||||
code_buf[code_buf_ptr++] = (u_int8_t) sp->match_position;
|
||||
code_buf[code_buf_ptr++] = (u_int8_t)
|
||||
( ((sp->match_position >> 4) & 0xF0)
|
||||
| (sp->match_length - (THRESHOLD + 1)) );
|
||||
}
|
||||
if ((mask <<= 1) == 0) { /* Shift mask left one bit. */
|
||||
/* Send at most 8 units of code together */
|
||||
for (i = 0; i < code_buf_ptr; i++)
|
||||
if (dst < dstend)
|
||||
*dst++ = code_buf[i];
|
||||
else
|
||||
goto finish;
|
||||
code_buf[0] = 0;
|
||||
code_buf_ptr = mask = 1;
|
||||
}
|
||||
last_match_length = sp->match_length;
|
||||
for (i = 0; i < last_match_length && src < srcend; i++) {
|
||||
delete_node(sp, s); /* Delete old strings and */
|
||||
c = *src++;
|
||||
sp->text_buf[s] = c; /* read new bytes */
|
||||
|
||||
/*
|
||||
* If the position is near the end of buffer, extend the buffer
|
||||
* to make string comparison easier.
|
||||
*/
|
||||
if (s < F - 1)
|
||||
sp->text_buf[s + N] = c;
|
||||
|
||||
/* Since this is a ring buffer, increment the position modulo N. */
|
||||
s = (s + 1) & (N - 1);
|
||||
r = (r + 1) & (N - 1);
|
||||
|
||||
/* Register the string in text_buf[r..r+F-1] */
|
||||
insert_node(sp, r);
|
||||
}
|
||||
while (i++ < last_match_length) {
|
||||
delete_node(sp, s);
|
||||
|
||||
/* After the end of text, no need to read, */
|
||||
s = (s + 1) & (N - 1);
|
||||
r = (r + 1) & (N - 1);
|
||||
/* but buffer may not be empty. */
|
||||
if (--len)
|
||||
insert_node(sp, r);
|
||||
}
|
||||
} while (len > 0); /* until length of string to be processed is zero */
|
||||
|
||||
if (code_buf_ptr > 1) { /* Send remaining code. */
|
||||
for (i = 0; i < code_buf_ptr; i++)
|
||||
if (dst < dstend)
|
||||
*dst++ = code_buf[i];
|
||||
else
|
||||
goto finish;
|
||||
}
|
||||
|
||||
result = dst;
|
||||
|
||||
finish:
|
||||
if (sp) free(sp);
|
||||
|
||||
return result;
|
||||
}
|
||||
37
Library/OcCompressionLib/lzss/lzss.h
Normal file
37
Library/OcCompressionLib/lzss/lzss.h
Normal file
@ -0,0 +1,37 @@
|
||||
/** @file
|
||||
Copyright (C) 2019, vit9696. All rights reserved.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
This program and the accompanying materials
|
||||
are licensed and made available under the terms and conditions of the BSD License
|
||||
which accompanies this distribution. The full text of the license may be found at
|
||||
http://opensource.org/licenses/bsd-license.php
|
||||
|
||||
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
||||
**/
|
||||
|
||||
#ifndef LZSS_H
|
||||
#define LZSS_H
|
||||
|
||||
#include <Library/BaseMemoryLib.h>
|
||||
#include <Library/MemoryAllocationLib.h>
|
||||
#include <Library/OcCompressionLib.h>
|
||||
|
||||
typedef UINT8 u_int8_t;
|
||||
typedef UINT16 u_int16_t;
|
||||
typedef UINT32 u_int32_t;
|
||||
|
||||
typedef INT8 int8_t;
|
||||
typedef INT16 int16_t;
|
||||
typedef INT32 int32_t;
|
||||
|
||||
#define compress_lzss CompressLZSS
|
||||
#define decompress_lzss DecompressLZSS
|
||||
|
||||
#define bzero(Dst, Size) ZeroMem ((Dst), (Size))
|
||||
#define malloc(Size) AllocatePool (Size)
|
||||
#define free(Ptr) FreePool (Ptr)
|
||||
|
||||
#endif // LZSS_H
|
||||
834
Library/OcCompressionLib/lzvn/lzvn.c
Normal file
834
Library/OcCompressionLib/lzvn/lzvn.c
Normal file
@ -0,0 +1,834 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// LZVN low-level decoder
|
||||
|
||||
#include "lzvn.h"
|
||||
|
||||
#ifndef assert
|
||||
# define assert(x) do { } while (0)
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
# define LZFSE_INLINE __forceinline
|
||||
# define __builtin_expect(X, Y) (X)
|
||||
# define __attribute__(X)
|
||||
# pragma warning(disable : 4068) // warning C4068: unknown pragma
|
||||
#else
|
||||
# define LZFSE_INLINE static inline __attribute__((__always_inline__))
|
||||
#endif
|
||||
|
||||
/*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */
|
||||
#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__)
|
||||
typedef int64_t lzvn_offset;
|
||||
#else
|
||||
typedef int32_t lzvn_offset;
|
||||
#endif
|
||||
|
||||
/*! @abstract Base decoder state. */
|
||||
typedef struct {
|
||||
|
||||
// Decoder I/O
|
||||
|
||||
// Next byte to read in source buffer
|
||||
const unsigned char *src;
|
||||
// Next byte after source buffer
|
||||
const unsigned char *src_end;
|
||||
|
||||
// Next byte to write in destination buffer (by decoder)
|
||||
unsigned char *dst;
|
||||
// Valid range for destination buffer is [dst_begin, dst_end - 1]
|
||||
unsigned char *dst_begin;
|
||||
unsigned char *dst_end;
|
||||
// Next byte to read in destination buffer (modified by caller)
|
||||
unsigned char *dst_current;
|
||||
|
||||
// Decoder state
|
||||
|
||||
// Partially expanded match, or 0,0,0.
|
||||
// In that case, src points to the next literal to copy, or the next op-code
|
||||
// if L==0.
|
||||
size_t L, M, D;
|
||||
|
||||
// Distance for last emitted match, or 0
|
||||
lzvn_offset d_prev;
|
||||
|
||||
// Did we decode end-of-stream?
|
||||
int end_of_stream;
|
||||
|
||||
} lzvn_decoder_state;
|
||||
|
||||
/*! @abstract Load bytes from memory location SRC. */
|
||||
LZFSE_INLINE uint16_t load2(const void *ptr) {
|
||||
uint16_t data;
|
||||
memcpy(&data, ptr, sizeof data);
|
||||
return data;
|
||||
}
|
||||
|
||||
LZFSE_INLINE uint32_t load4(const void *ptr) {
|
||||
uint32_t data;
|
||||
memcpy(&data, ptr, sizeof data);
|
||||
return data;
|
||||
}
|
||||
|
||||
LZFSE_INLINE uint64_t load8(const void *ptr) {
|
||||
uint64_t data;
|
||||
memcpy(&data, ptr, sizeof data);
|
||||
return data;
|
||||
}
|
||||
|
||||
/*! @abstract Store bytes to memory location DST. */
|
||||
LZFSE_INLINE void store4(void *ptr, uint32_t data) {
|
||||
memcpy(ptr, &data, sizeof data);
|
||||
}
|
||||
|
||||
LZFSE_INLINE void store8(void *ptr, uint64_t data) {
|
||||
memcpy(ptr, &data, sizeof data);
|
||||
}
|
||||
|
||||
/*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if
|
||||
* we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */
|
||||
LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb,
|
||||
unsigned width) {
|
||||
static const size_t container_width = sizeof container * 8;
|
||||
assert(lsb < container_width);
|
||||
assert(width > 0 && width <= container_width);
|
||||
assert(lsb + width <= container_width);
|
||||
if (width == container_width)
|
||||
return container;
|
||||
return (container >> lsb) & (((uintmax_t)1 << width) - 1);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_LABELS_AS_VALUES)
|
||||
# if defined(__GNUC__) || defined(__clang__)
|
||||
# define HAVE_LABELS_AS_VALUES 1
|
||||
# else
|
||||
# define HAVE_LABELS_AS_VALUES 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Both the source and destination buffers are represented by a pointer and
|
||||
// a length; they are *always* updated in concert using this macro; however
|
||||
// many bytes the pointer is advanced, the length is decremented by the same
|
||||
// amount. Thus, pointer + length always points to the byte one past the end
|
||||
// of the buffer.
|
||||
#define PTR_LEN_INC(_pointer, _length, _increment) \
|
||||
(_pointer += _increment, _length -= _increment)
|
||||
|
||||
// Update state with current positions and distance, corresponding to the
|
||||
// beginning of an instruction in both streams
|
||||
#define UPDATE_GOOD \
|
||||
(state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
|
||||
|
||||
/*! @abstract Decode source to destination.
|
||||
* Updates \p state (src,dst,d_prev). */
|
||||
void lzvn_decode(lzvn_decoder_state *state) {
|
||||
#if HAVE_LABELS_AS_VALUES
|
||||
// Jump table for all instructions
|
||||
static const void *opc_tbl[256] = {
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
|
||||
&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
|
||||
&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
|
||||
&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
|
||||
&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
|
||||
&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
|
||||
&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
|
||||
&&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
|
||||
&&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
|
||||
&&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
|
||||
&&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
|
||||
#endif
|
||||
size_t src_len = state->src_end - state->src;
|
||||
size_t dst_len = state->dst_end - state->dst;
|
||||
if (src_len == 0 || dst_len == 0)
|
||||
return; // empty buffer
|
||||
|
||||
const unsigned char *src_ptr = state->src;
|
||||
unsigned char *dst_ptr = state->dst;
|
||||
size_t D = state->d_prev;
|
||||
size_t M;
|
||||
size_t L;
|
||||
size_t opc_len;
|
||||
|
||||
// Do we have a partially expanded match saved in state?
|
||||
if (state->L != 0 || state->M != 0) {
|
||||
L = state->L;
|
||||
M = state->M;
|
||||
D = state->D;
|
||||
opc_len = 0; // we already skipped the op
|
||||
state->L = state->M = state->D = 0;
|
||||
if (M == 0)
|
||||
goto copy_literal;
|
||||
if (L == 0)
|
||||
goto copy_match;
|
||||
goto copy_literal_and_match;
|
||||
}
|
||||
|
||||
unsigned char opc = src_ptr[0];
|
||||
|
||||
#if HAVE_LABELS_AS_VALUES
|
||||
goto *opc_tbl[opc];
|
||||
#else
|
||||
for (;;) {
|
||||
switch (opc) {
|
||||
#endif
|
||||
// ===============================================================
|
||||
// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
|
||||
// literal and a match. The bulk of their implementations are shared;
|
||||
// each label here only does the work of setting the opcode length (not
|
||||
// including any literal bytes), and extracting the literal length, match
|
||||
// length, and match distance (except in pre_d). They then jump into the
|
||||
// shared implementation to actually output the literal and match bytes.
|
||||
//
|
||||
// No error checking happens in the first stage, except for ensuring that
|
||||
// the source has enough length to represent the full opcode before
|
||||
// reading past the first byte.
|
||||
sml_d:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
case 20:
|
||||
case 21:
|
||||
case 24:
|
||||
case 25:
|
||||
case 26:
|
||||
case 27:
|
||||
case 28:
|
||||
case 29:
|
||||
case 32:
|
||||
case 33:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
case 40:
|
||||
case 41:
|
||||
case 42:
|
||||
case 43:
|
||||
case 44:
|
||||
case 45:
|
||||
case 48:
|
||||
case 49:
|
||||
case 50:
|
||||
case 51:
|
||||
case 52:
|
||||
case 53:
|
||||
case 56:
|
||||
case 57:
|
||||
case 58:
|
||||
case 59:
|
||||
case 60:
|
||||
case 61:
|
||||
case 64:
|
||||
case 65:
|
||||
case 66:
|
||||
case 67:
|
||||
case 68:
|
||||
case 69:
|
||||
case 72:
|
||||
case 73:
|
||||
case 74:
|
||||
case 75:
|
||||
case 76:
|
||||
case 77:
|
||||
case 80:
|
||||
case 81:
|
||||
case 82:
|
||||
case 83:
|
||||
case 84:
|
||||
case 85:
|
||||
case 88:
|
||||
case 89:
|
||||
case 90:
|
||||
case 91:
|
||||
case 92:
|
||||
case 93:
|
||||
case 96:
|
||||
case 97:
|
||||
case 98:
|
||||
case 99:
|
||||
case 100:
|
||||
case 101:
|
||||
case 104:
|
||||
case 105:
|
||||
case 106:
|
||||
case 107:
|
||||
case 108:
|
||||
case 109:
|
||||
case 128:
|
||||
case 129:
|
||||
case 130:
|
||||
case 131:
|
||||
case 132:
|
||||
case 133:
|
||||
case 136:
|
||||
case 137:
|
||||
case 138:
|
||||
case 139:
|
||||
case 140:
|
||||
case 141:
|
||||
case 144:
|
||||
case 145:
|
||||
case 146:
|
||||
case 147:
|
||||
case 148:
|
||||
case 149:
|
||||
case 152:
|
||||
case 153:
|
||||
case 154:
|
||||
case 155:
|
||||
case 156:
|
||||
case 157:
|
||||
case 192:
|
||||
case 193:
|
||||
case 194:
|
||||
case 195:
|
||||
case 196:
|
||||
case 197:
|
||||
case 200:
|
||||
case 201:
|
||||
case 202:
|
||||
case 203:
|
||||
case 204:
|
||||
case 205:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
|
||||
// where the length of literal (0-3 bytes) is encoded by the high 2 bits of
|
||||
// the first byte. We first extract the literal length so we know how long
|
||||
// the opcode is, then check that the source can hold both this opcode and
|
||||
// at least one byte of the next (because any valid input stream must be
|
||||
// terminated with an eos token).
|
||||
opc_len = 2;
|
||||
L = (size_t)extract(opc, 6, 2);
|
||||
M = (size_t)extract(opc, 3, 3) + 3;
|
||||
// We need to ensure that the source buffer is long enough that we can
|
||||
// safely read this entire opcode, the literal that follows, and the first
|
||||
// byte of the next opcode. Once we satisfy this requirement, we can
|
||||
// safely unpack the match distance. A check similar to this one is
|
||||
// present in all the opcode implementations.
|
||||
if (src_len <= opc_len + L)
|
||||
return; // source truncated
|
||||
D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
|
||||
goto copy_literal_and_match;
|
||||
|
||||
med_d:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 160:
|
||||
case 161:
|
||||
case 162:
|
||||
case 163:
|
||||
case 164:
|
||||
case 165:
|
||||
case 166:
|
||||
case 167:
|
||||
case 168:
|
||||
case 169:
|
||||
case 170:
|
||||
case 171:
|
||||
case 172:
|
||||
case 173:
|
||||
case 174:
|
||||
case 175:
|
||||
case 176:
|
||||
case 177:
|
||||
case 178:
|
||||
case 179:
|
||||
case 180:
|
||||
case 181:
|
||||
case 182:
|
||||
case 183:
|
||||
case 184:
|
||||
case 185:
|
||||
case 186:
|
||||
case 187:
|
||||
case 188:
|
||||
case 189:
|
||||
case 190:
|
||||
case 191:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "medium distance": This is a minor variant of the "small distance"
|
||||
// encoding, where we will now use two extra bytes instead of one to encode
|
||||
// the restof the match length and distance. This allows an extra two bits
|
||||
// for the match length, and an extra three bits for the match distance. The
|
||||
// full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
|
||||
opc_len = 3;
|
||||
L = (size_t)extract(opc, 3, 2);
|
||||
if (src_len <= opc_len + L)
|
||||
return; // source truncated
|
||||
uint16_t opc23 = load2(&src_ptr[1]);
|
||||
M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
|
||||
D = (size_t)extract(opc23, 2, 14);
|
||||
goto copy_literal_and_match;
|
||||
|
||||
lrg_d:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 7:
|
||||
case 15:
|
||||
case 23:
|
||||
case 31:
|
||||
case 39:
|
||||
case 47:
|
||||
case 55:
|
||||
case 63:
|
||||
case 71:
|
||||
case 79:
|
||||
case 87:
|
||||
case 95:
|
||||
case 103:
|
||||
case 111:
|
||||
case 135:
|
||||
case 143:
|
||||
case 151:
|
||||
case 159:
|
||||
case 199:
|
||||
case 207:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "large distance": This is another variant of the "small distance"
|
||||
// encoding, where we will now use two extra bytes to encode the match
|
||||
// distance, which allows distances up to 65535 to be represented. The full
|
||||
// structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
|
||||
opc_len = 3;
|
||||
L = (size_t)extract(opc, 6, 2);
|
||||
M = (size_t)extract(opc, 3, 3) + 3;
|
||||
if (src_len <= opc_len + L)
|
||||
return; // source truncated
|
||||
D = load2(&src_ptr[1]);
|
||||
goto copy_literal_and_match;
|
||||
|
||||
pre_d:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 70:
|
||||
case 78:
|
||||
case 86:
|
||||
case 94:
|
||||
case 102:
|
||||
case 110:
|
||||
case 134:
|
||||
case 142:
|
||||
case 150:
|
||||
case 158:
|
||||
case 198:
|
||||
case 206:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "previous distance": This opcode has the structure LLMMM110, where the
|
||||
// length of the literal (0-3 bytes) is encoded by the high 2 bits of the
|
||||
// first byte. We first extract the literal length so we know how long
|
||||
// the opcode is, then check that the source can hold both this opcode and
|
||||
// at least one byte of the next (because any valid input stream must be
|
||||
// terminated with an eos token).
|
||||
opc_len = 1;
|
||||
L = (size_t)extract(opc, 6, 2);
|
||||
M = (size_t)extract(opc, 3, 3) + 3;
|
||||
if (src_len <= opc_len + L)
|
||||
return; // source truncated
|
||||
goto copy_literal_and_match;
|
||||
|
||||
copy_literal_and_match:
|
||||
// Common implementation of writing data for opcodes that have both a
|
||||
// literal and a match. We begin by advancing the source pointer past
|
||||
// the opcode, so that it points at the first literal byte (if L
|
||||
// is non-zero; otherwise it points at the next opcode).
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
// Now we copy the literal from the source pointer to the destination.
|
||||
if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
|
||||
// The literal is 0-3 bytes; if we are not near the end of the buffer,
|
||||
// we can safely just do a 4 byte copy (which is guaranteed to cover
|
||||
// the complete literal, and may include some other bytes as well).
|
||||
store4(dst_ptr, load4(src_ptr));
|
||||
} else if (L <= dst_len) {
|
||||
// We are too close to the end of either the input or output stream
|
||||
// to be able to safely use a four-byte copy, but we will not exhaust
|
||||
// either stream (we already know that the source will not be
|
||||
// exhausted from checks in the individual opcode implementations,
|
||||
// and we just tested that dst_len > L). Thus, we need to do a
|
||||
// byte-by-byte copy of the literal. This is slow, but it can only ever
|
||||
// happen near the very end of a buffer, so it is not an important case to
|
||||
// optimize.
|
||||
for (size_t i = 0; i < L; ++i)
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
} else {
|
||||
// Destination truncated: fill DST, and store partial match
|
||||
|
||||
// Copy partial literal
|
||||
for (size_t i = 0; i < dst_len; ++i)
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
// Save state
|
||||
state->src = src_ptr + dst_len;
|
||||
state->dst = dst_ptr + dst_len;
|
||||
state->L = L - dst_len;
|
||||
state->M = M;
|
||||
state->D = D;
|
||||
return; // destination truncated
|
||||
}
|
||||
// Having completed the copy of the literal, we advance both the source
|
||||
// and destination pointers by the number of literal bytes.
|
||||
PTR_LEN_INC(dst_ptr, dst_len, L);
|
||||
PTR_LEN_INC(src_ptr, src_len, L);
|
||||
// Check if the match distance is valid; matches must not reference
|
||||
// bytes that preceed the start of the output buffer, nor can the match
|
||||
// distance be zero.
|
||||
if (D > (size_t)(dst_ptr - state->dst_begin) || D == 0)
|
||||
goto invalid_match_distance;
|
||||
copy_match:
|
||||
// Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
|
||||
// in mind that we may have D < M, in which case the source and destination
|
||||
// windows overlap in the copy. The semantics of the match copy are *not*
|
||||
// those of memmove( ); if the buffers overlap it needs to behave as though
|
||||
// we were copying byte-by-byte in increasing address order. If, for example,
|
||||
// D is 1, the copy operation is equivalent to:
|
||||
//
|
||||
// memset(dst_ptr, dst_ptr[-1], M);
|
||||
//
|
||||
// i.e. it splats the previous byte. This means that we need to be very
|
||||
// careful about using wide loads or stores to perform the copy operation.
|
||||
if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
|
||||
// We are not near the end of the buffer, and the match distance
|
||||
// is at least eight. Thus, we can safely loop using eight byte
|
||||
// copies. The last of these may slop over the intended end of
|
||||
// the match, but this is OK because we know we have a safety bound
|
||||
// away from the end of the destination buffer.
|
||||
for (size_t i = 0; i < M; i += 8)
|
||||
store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
|
||||
} else if (M <= dst_len) {
|
||||
// Either the match distance is too small, or we are too close to
|
||||
// the end of the buffer to safely use eight byte copies. Fall back
|
||||
// on a simple byte-by-byte implementation.
|
||||
for (size_t i = 0; i < M; ++i)
|
||||
dst_ptr[i] = dst_ptr[i - D];
|
||||
} else {
|
||||
// Destination truncated: fill DST, and store partial match
|
||||
|
||||
// Copy partial match
|
||||
for (size_t i = 0; i < dst_len; ++i)
|
||||
dst_ptr[i] = dst_ptr[i - D];
|
||||
// Save state
|
||||
state->src = src_ptr;
|
||||
state->dst = dst_ptr + dst_len;
|
||||
state->L = 0;
|
||||
state->M = M - dst_len;
|
||||
state->D = D;
|
||||
return; // destination truncated
|
||||
}
|
||||
// Update the destination pointer and length to account for the bytes
|
||||
// written by the match, then load the next opcode byte and branch to
|
||||
// the appropriate implementation.
|
||||
PTR_LEN_INC(dst_ptr, dst_len, M);
|
||||
opc = src_ptr[0];
|
||||
#if HAVE_LABELS_AS_VALUES
|
||||
goto *opc_tbl[opc];
|
||||
#else
|
||||
break;
|
||||
#endif
|
||||
|
||||
// ===============================================================
|
||||
// Opcodes representing only a match (no literal).
|
||||
// These two opcodes (lrg_m and sml_m) encode only a match. The match
|
||||
// distance is carried over from the previous opcode, so all they need
|
||||
// to encode is the match length. We are able to reuse the match copy
|
||||
// sequence from the literal and match opcodes to perform the actual
|
||||
// copy implementation.
|
||||
sml_m:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 241:
|
||||
case 242:
|
||||
case 243:
|
||||
case 244:
|
||||
case 245:
|
||||
case 246:
|
||||
case 247:
|
||||
case 248:
|
||||
case 249:
|
||||
case 250:
|
||||
case 251:
|
||||
case 252:
|
||||
case 253:
|
||||
case 254:
|
||||
case 255:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "small match": This opcode has no literal, and uses the previous match
|
||||
// distance (i.e. it encodes only the match length), in a single byte as
|
||||
// 1111MMMM.
|
||||
opc_len = 1;
|
||||
if (src_len <= opc_len)
|
||||
return; // source truncated
|
||||
M = (size_t)extract(opc, 0, 4);
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
goto copy_match;
|
||||
|
||||
lrg_m:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 240:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "large match": This opcode has no literal, and uses the previous match
|
||||
// distance (i.e. it encodes only the match length). It is encoded in two
|
||||
// bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
|
||||
// be represented by sml_m, there is an implicit bias of 16 on the match
|
||||
// length; the representable values are [16,271].
|
||||
opc_len = 2;
|
||||
if (src_len <= opc_len)
|
||||
return; // source truncated
|
||||
M = src_ptr[1] + 16;
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
goto copy_match;
|
||||
|
||||
// ===============================================================
|
||||
// Opcodes representing only a literal (no match).
|
||||
// These two opcodes (lrg_l and sml_l) encode only a literal. There is no
|
||||
// match length or match distance to worry about (but we need to *not*
|
||||
// touch D, as it must be preserved between opcodes).
|
||||
sml_l:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 225:
|
||||
case 226:
|
||||
case 227:
|
||||
case 228:
|
||||
case 229:
|
||||
case 230:
|
||||
case 231:
|
||||
case 232:
|
||||
case 233:
|
||||
case 234:
|
||||
case 235:
|
||||
case 236:
|
||||
case 237:
|
||||
case 238:
|
||||
case 239:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "small literal": This opcode has no match, and encodes only a literal
|
||||
// of length up to 15 bytes. The format is 1110LLLL LITERAL.
|
||||
opc_len = 1;
|
||||
L = (size_t)extract(opc, 0, 4);
|
||||
goto copy_literal;
|
||||
|
||||
lrg_l:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 224:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
// "large literal": This opcode has no match, and uses the previous match
|
||||
// distance (i.e. it encodes only the match length). It is encoded in two
|
||||
// bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
|
||||
// bytes can be represented by sml_l, there is an implicit bias of 16 on
|
||||
// the literal length; the representable values are [16,271].
|
||||
opc_len = 2;
|
||||
if (src_len <= 2)
|
||||
return; // source truncated
|
||||
L = src_ptr[1] + 16;
|
||||
goto copy_literal;
|
||||
|
||||
copy_literal:
|
||||
// Check that the source buffer is large enough to hold the complete
|
||||
// literal and at least the first byte of the next opcode. If so, advance
|
||||
// the source pointer to point to the first byte of the literal and adjust
|
||||
// the source length accordingly.
|
||||
if (src_len <= opc_len + L)
|
||||
return; // source truncated
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
// Now we copy the literal from the source pointer to the destination.
|
||||
if (dst_len >= L + 7 && src_len >= L + 7) {
|
||||
// We are not near the end of the source or destination buffers; thus
|
||||
// we can safely copy the literal using wide copies, without worrying
|
||||
// about reading or writing past the end of either buffer.
|
||||
for (size_t i = 0; i < L; i += 8)
|
||||
store8(&dst_ptr[i], load8(&src_ptr[i]));
|
||||
} else if (L <= dst_len) {
|
||||
// We are too close to the end of either the input or output stream
|
||||
// to be able to safely use an eight-byte copy. Instead we copy the
|
||||
// literal byte-by-byte.
|
||||
for (size_t i = 0; i < L; ++i)
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
} else {
|
||||
// Destination truncated: fill DST, and store partial match
|
||||
|
||||
// Copy partial literal
|
||||
for (size_t i = 0; i < dst_len; ++i)
|
||||
dst_ptr[i] = src_ptr[i];
|
||||
// Save state
|
||||
state->src = src_ptr + dst_len;
|
||||
state->dst = dst_ptr + dst_len;
|
||||
state->L = L - dst_len;
|
||||
state->M = 0;
|
||||
state->D = D;
|
||||
return; // destination truncated
|
||||
}
|
||||
// Having completed the copy of the literal, we advance both the source
|
||||
// and destination pointers by the number of literal bytes.
|
||||
PTR_LEN_INC(dst_ptr, dst_len, L);
|
||||
PTR_LEN_INC(src_ptr, src_len, L);
|
||||
// Load the first byte of the next opcode, and jump to its implementation.
|
||||
opc = src_ptr[0];
|
||||
#if HAVE_LABELS_AS_VALUES
|
||||
goto *opc_tbl[opc];
|
||||
#else
|
||||
break;
|
||||
#endif
|
||||
|
||||
// ===============================================================
|
||||
// Other opcodes
|
||||
nop:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 14:
|
||||
case 22:
|
||||
#endif
|
||||
UPDATE_GOOD;
|
||||
opc_len = 1;
|
||||
if (src_len <= opc_len)
|
||||
return; // source truncated
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
opc = src_ptr[0];
|
||||
#if HAVE_LABELS_AS_VALUES
|
||||
goto *opc_tbl[opc];
|
||||
#else
|
||||
break;
|
||||
#endif
|
||||
|
||||
eos:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 6:
|
||||
#endif
|
||||
opc_len = 8;
|
||||
if (src_len < opc_len)
|
||||
return; // source truncated (here we don't need an extra byte for next op
|
||||
// code)
|
||||
PTR_LEN_INC(src_ptr, src_len, opc_len);
|
||||
state->end_of_stream = 1;
|
||||
UPDATE_GOOD;
|
||||
return; // end-of-stream
|
||||
|
||||
// ===============================================================
|
||||
// Return on error
|
||||
udef:
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
case 30:
|
||||
case 38:
|
||||
case 46:
|
||||
case 54:
|
||||
case 62:
|
||||
case 112:
|
||||
case 113:
|
||||
case 114:
|
||||
case 115:
|
||||
case 116:
|
||||
case 117:
|
||||
case 118:
|
||||
case 119:
|
||||
case 120:
|
||||
case 121:
|
||||
case 122:
|
||||
case 123:
|
||||
case 124:
|
||||
case 125:
|
||||
case 126:
|
||||
case 127:
|
||||
case 208:
|
||||
case 209:
|
||||
case 210:
|
||||
case 211:
|
||||
case 212:
|
||||
case 213:
|
||||
case 214:
|
||||
case 215:
|
||||
case 216:
|
||||
case 217:
|
||||
case 218:
|
||||
case 219:
|
||||
case 220:
|
||||
case 221:
|
||||
case 222:
|
||||
case 223:
|
||||
#endif
|
||||
invalid_match_distance:
|
||||
|
||||
return; // we already updated state
|
||||
#if !HAVE_LABELS_AS_VALUES
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t lzvn_decode_buffer(void *dst, size_t dst_size,
|
||||
const void *src, size_t src_size) {
|
||||
// Init LZVN decoder state
|
||||
lzvn_decoder_state dstate;
|
||||
|
||||
if (dst_size > OC_COMPRESSION_MAX_LENGTH || src_size > OC_COMPRESSION_MAX_LENGTH) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(&dstate, 0x00, sizeof(dstate));
|
||||
dstate.src = src;
|
||||
dstate.src_end = src + src_size;
|
||||
|
||||
dstate.dst_begin = dst;
|
||||
dstate.dst = dst;
|
||||
dstate.dst_end = dst + dst_size;
|
||||
|
||||
dstate.d_prev = 0;
|
||||
dstate.end_of_stream = 0;
|
||||
|
||||
// Run LZVN decoder
|
||||
lzvn_decode(&dstate);
|
||||
|
||||
// This is how much we decompressed
|
||||
return dstate.dst - (unsigned char*) dst;
|
||||
}
|
||||
37
Library/OcCompressionLib/lzvn/lzvn.h
Normal file
37
Library/OcCompressionLib/lzvn/lzvn.h
Normal file
@ -0,0 +1,37 @@
|
||||
/** @file
|
||||
Copyright (C) 2019, vit9696. All rights reserved.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
This program and the accompanying materials
|
||||
are licensed and made available under the terms and conditions of the BSD License
|
||||
which accompanies this distribution. The full text of the license may be found at
|
||||
http://opensource.org/licenses/bsd-license.php
|
||||
|
||||
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
||||
**/
|
||||
|
||||
#ifndef LZVN_H
|
||||
#define LZVN_H
|
||||
|
||||
#include <Library/BaseMemoryLib.h>
|
||||
#include <Library/OcCompressionLib.h>
|
||||
|
||||
typedef UINT16 uint16_t;
|
||||
typedef UINT32 uint32_t;
|
||||
typedef UINT64 uint64_t;
|
||||
|
||||
typedef INT16 int16_t;
|
||||
typedef INT32 int32_t;
|
||||
typedef INT64 int64_t;
|
||||
|
||||
typedef UINTN size_t;
|
||||
typedef UINTN uintmax_t;
|
||||
|
||||
#define lzvn_decode_buffer DecompressLZVN
|
||||
|
||||
#define memset(Dst, Value, Size) SetMem ((Dst), (Size), (UINT8)(Value))
|
||||
#define memcpy(Dst, Src, Size) CopyMem ((Dst), (Src), (Size))
|
||||
|
||||
#endif /* LZVN_H */
|
||||
@ -59,6 +59,9 @@
|
||||
## @libraryclass
|
||||
OcAppleImageVerificationLib|Include/Library/OcAppleImageVerificationLib.h
|
||||
|
||||
## @libraryclass
|
||||
OcCompressionLib|Include/Library/OcCompressionLib.h
|
||||
|
||||
## @libraryclass
|
||||
OcCpuLib|Include/Library/OcCpuLib.h
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@
|
||||
OcAppleImageVerificationLib|OcSupportPkg/Library/OcAppleImageVerificationLib/OcAppleImageVerificationLib.inf
|
||||
OcCpuLib|OcSupportPkg/Library/OcCpuLib/OcCpuLib.inf
|
||||
OcCryptoLib|OcSupportPkg/Library/OcCryptoLib/OcCryptoLib.inf
|
||||
OcCompressionLib|OcSupportPkg/Library/OcCompressionLib/OcCompressionLib.inf
|
||||
OcDataHubLib|OcSupportPkg/Library/OcDataHubLib/OcDataHubLib.inf
|
||||
OcDebugLogLib|OcSupportPkg/Library/OcDebugLogLib/OcDebugLogLib.inf
|
||||
OcDevicePathLib|OcSupportPkg/Library/OcDevicePathLib/OcDevicePathLib.inf
|
||||
@ -79,6 +80,7 @@
|
||||
OcSupportPkg/Library/OcAppleImageVerificationLib/OcAppleImageVerificationLib.inf
|
||||
OcSupportPkg/Library/OcCpuLib/OcCpuLib.inf
|
||||
OcSupportPkg/Library/OcCryptoLib/OcCryptoLib.inf
|
||||
OcSupportPkg/Library/OcCompressionLib/OcCompressionLib.inf
|
||||
OcSupportPkg/Library/OcDataHubLib/OcDataHubLib.inf
|
||||
OcSupportPkg/Library/OcDebugLogLib/OcDebugLogLib.inf
|
||||
OcSupportPkg/Library/OcDevicePathLib/OcDevicePathLib.inf
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user