mirror of https://github.com/stenzek/duckstation
				
				
				
			
			You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			333 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
			
		
		
	
	
			333 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)
 | 
						|
2023-03-02 : Igor Pavlov : Public domain */
 | 
						|
 | 
						|
#ifndef ZIP7_INC_BCJ2_H
 | 
						|
#define ZIP7_INC_BCJ2_H
 | 
						|
 | 
						|
#include "7zTypes.h"
 | 
						|
 | 
						|
EXTERN_C_BEGIN
 | 
						|
 | 
						|
#define BCJ2_NUM_STREAMS 4
 | 
						|
 | 
						|
enum
 | 
						|
{
 | 
						|
  BCJ2_STREAM_MAIN,
 | 
						|
  BCJ2_STREAM_CALL,
 | 
						|
  BCJ2_STREAM_JUMP,
 | 
						|
  BCJ2_STREAM_RC
 | 
						|
};
 | 
						|
 | 
						|
enum
 | 
						|
{
 | 
						|
  BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
 | 
						|
  BCJ2_DEC_STATE_ORIG_1,
 | 
						|
  BCJ2_DEC_STATE_ORIG_2,
 | 
						|
  BCJ2_DEC_STATE_ORIG_3,
 | 
						|
  
 | 
						|
  BCJ2_DEC_STATE_ORIG,
 | 
						|
  BCJ2_DEC_STATE_ERROR     /* after detected data error */
 | 
						|
};
 | 
						|
 | 
						|
enum
 | 
						|
{
 | 
						|
  BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
 | 
						|
  BCJ2_ENC_STATE_FINISHED  /* it's state after fully encoded stream */
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */
 | 
						|
#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)
 | 
						|
 | 
						|
/*
 | 
						|
CBcj2Dec / CBcj2Enc
 | 
						|
bufs sizes:
 | 
						|
  BUF_SIZE(n) = lims[n] - bufs[n]
 | 
						|
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:
 | 
						|
    (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
 | 
						|
    (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
 | 
						|
*/
 | 
						|
 | 
						|
// typedef UInt32 CBcj2Prob;
 | 
						|
typedef UInt16 CBcj2Prob;
 | 
						|
 | 
						|
/*
 | 
						|
BCJ2 encoder / decoder internal requirements:
 | 
						|
  - If last bytes of stream contain marker (e8/e8/0f8x), then
 | 
						|
    there is also encoded symbol (0 : no conversion) in RC stream.
 | 
						|
  - One case of overlapped instructions is supported,
 | 
						|
    if last byte of converted instruction is (0f) and next byte is (8x):
 | 
						|
      marker [xx xx xx 0f] 8x
 | 
						|
    then the pair (0f 8x) is treated as marker.
 | 
						|
*/
 | 
						|
 | 
						|
/* ---------- BCJ2 Decoder ---------- */
 | 
						|
 | 
						|
/*
 | 
						|
CBcj2Dec:
 | 
						|
(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
 | 
						|
  bufs[BCJ2_STREAM_MAIN] >= dest &&
 | 
						|
  bufs[BCJ2_STREAM_MAIN] - dest >=
 | 
						|
        BUF_SIZE(BCJ2_STREAM_CALL) +
 | 
						|
        BUF_SIZE(BCJ2_STREAM_JUMP)
 | 
						|
  reserve = bufs[BCJ2_STREAM_MAIN] - dest -
 | 
						|
      ( BUF_SIZE(BCJ2_STREAM_CALL) +
 | 
						|
        BUF_SIZE(BCJ2_STREAM_JUMP) )
 | 
						|
  and additional conditions:
 | 
						|
  if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())
 | 
						|
  {
 | 
						|
    (reserve != 1) : if (ver <  v23.00)
 | 
						|
  }
 | 
						|
  else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())
 | 
						|
  {
 | 
						|
    (reserve >= 6) : if (ver <  v23.00)
 | 
						|
    (reserve >= 4) : if (ver >= v23.00)
 | 
						|
    We need that (reserve) because after first call of Bcj2Dec_Decode(),
 | 
						|
    CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).
 | 
						|
  }
 | 
						|
  (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().
 | 
						|
  (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,
 | 
						|
     and (reserve) is calculated from full (final) sizes of all streams before first call.
 | 
						|
*/
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
  const Byte *bufs[BCJ2_NUM_STREAMS];
 | 
						|
  const Byte *lims[BCJ2_NUM_STREAMS];
 | 
						|
  Byte *dest;
 | 
						|
  const Byte *destLim;
 | 
						|
 | 
						|
  unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
 | 
						|
 | 
						|
  UInt32 ip;      /* property of starting base for decoding */
 | 
						|
  UInt32 temp;    /* Byte temp[4]; */
 | 
						|
  UInt32 range;
 | 
						|
  UInt32 code;
 | 
						|
  CBcj2Prob probs[2 + 256];
 | 
						|
} CBcj2Dec;
 | 
						|
 | 
						|
 | 
						|
/* Note:
 | 
						|
   Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)
 | 
						|
   if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()
 | 
						|
*/
 | 
						|
void Bcj2Dec_Init(CBcj2Dec *p);
 | 
						|
 | 
						|
 | 
						|
/* Bcj2Dec_Decode():
 | 
						|
   returns:
 | 
						|
     SZ_OK
 | 
						|
     SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct
 | 
						|
*/
 | 
						|
SRes Bcj2Dec_Decode(CBcj2Dec *p);
 | 
						|
 | 
						|
/* To check that decoding was finished you can compare
 | 
						|
   sizes of processed streams with sizes known from another sources.
 | 
						|
   You must do at least one mandatory check from the two following options:
 | 
						|
      - the check for size of processed output (ORIG) stream.
 | 
						|
      - the check for size of processed input  (MAIN) stream.
 | 
						|
   additional optional checks:
 | 
						|
      - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)
 | 
						|
      - the checks Bcj2Dec_IsMaybeFinished*()
 | 
						|
   also before actual decoding you can check that the
 | 
						|
   following condition is met for stream sizes:
 | 
						|
     ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )
 | 
						|
*/
 | 
						|
 | 
						|
/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for
 | 
						|
      additional input data in BCJ2_STREAM_MAIN stream.
 | 
						|
   Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.
 | 
						|
*/
 | 
						|
#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)
 | 
						|
 | 
						|
/* if the stream decoding was finished correctly, then range decoder
 | 
						|
   part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).
 | 
						|
   Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.
 | 
						|
*/
 | 
						|
#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)
 | 
						|
 | 
						|
/* use Bcj2Dec_IsMaybeFinished() only as additional check
 | 
						|
    after at least one mandatory check from the two following options:
 | 
						|
      - the check for size of processed output (ORIG) stream.
 | 
						|
      - the check for size of processed input  (MAIN) stream.
 | 
						|
*/
 | 
						|
#define Bcj2Dec_IsMaybeFinished(_p_) ( \
 | 
						|
        Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \
 | 
						|
        Bcj2Dec_IsMaybeFinished_code(_p_))
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/* ---------- BCJ2 Encoder ---------- */
 | 
						|
 | 
						|
typedef enum
 | 
						|
{
 | 
						|
  BCJ2_ENC_FINISH_MODE_CONTINUE,
 | 
						|
  BCJ2_ENC_FINISH_MODE_END_BLOCK,
 | 
						|
  BCJ2_ENC_FINISH_MODE_END_STREAM
 | 
						|
} EBcj2Enc_FinishMode;
 | 
						|
 | 
						|
/*
 | 
						|
  BCJ2_ENC_FINISH_MODE_CONTINUE:
 | 
						|
     process non finished encoding.
 | 
						|
     It notifies the encoder that additional further calls
 | 
						|
     can provide more input data (src) than provided by current call.
 | 
						|
     In  that case the CBcj2Enc encoder still can move (src) pointer
 | 
						|
     up to (srcLim), but CBcj2Enc encoder can store some of the last
 | 
						|
     processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.
 | 
						|
   at return:
 | 
						|
       (CBcj2Enc::src will point to position that includes
 | 
						|
       processed data and data copied to (temp[]) buffer)
 | 
						|
       That data from (temp[]) buffer will be used in further calls.
 | 
						|
  
 | 
						|
  BCJ2_ENC_FINISH_MODE_END_BLOCK:
 | 
						|
     finish encoding of current block (ended at srcLim) without RC flushing.
 | 
						|
   at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&
 | 
						|
                  CBcj2Enc::src == CBcj2Enc::srcLim)
 | 
						|
        :  it shows that block encoding was finished. And the encoder is
 | 
						|
           ready for new (src) data or for stream finish operation.
 | 
						|
     finished block means
 | 
						|
     {
 | 
						|
       CBcj2Enc has completed block encoding up to (srcLim).
 | 
						|
       (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will
 | 
						|
       not cross block boundary at (srcLim).
 | 
						|
       temporary CBcj2Enc buffer for (ORIG) src data is empty.
 | 
						|
       3 output uncompressed streams (MAIN, CALL, JUMP) were flushed.
 | 
						|
       RC stream was not flushed. And RC stream will cross block boundary.
 | 
						|
     }
 | 
						|
     Note: some possible implementation of BCJ2 encoder could
 | 
						|
     write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),
 | 
						|
     and it could calculate symbol for RC in another call of Bcj2Enc_Encode().
 | 
						|
     BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.
 | 
						|
     And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.
 | 
						|
     So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK
 | 
						|
     to ensure that RC symbol is calculated and written in proper block.
 | 
						|
    
 | 
						|
  BCJ2_ENC_FINISH_MODE_END_STREAM
 | 
						|
     finish encoding of stream (ended at srcLim) fully including RC flushing.
 | 
						|
   at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)
 | 
						|
        : it shows that stream encoding was finished fully,
 | 
						|
          and all output streams were flushed fully.
 | 
						|
     also Bcj2Enc_IsFinished() can be called.
 | 
						|
*/
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
  32-bit relative offset in JUMP/CALL commands is
 | 
						|
    - (mod 4 GiB)  for 32-bit x86 code
 | 
						|
    - signed Int32 for 64-bit x86-64 code
 | 
						|
  BCJ2 encoder also does internal relative to absolute address conversions.
 | 
						|
  And there are 2 possible ways to do it:
 | 
						|
    before v23: we used 32-bit variables and (mod 4 GiB) conversion
 | 
						|
    since  v23: we use  64-bit variables and (signed Int32 offset) conversion.
 | 
						|
  The absolute address condition for conversion in v23:
 | 
						|
    ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)
 | 
						|
  note that if (fileSize64 > 2 GiB). there is difference between
 | 
						|
  old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).
 | 
						|
  And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
// for old (v22) way for conversion:
 | 
						|
typedef UInt32 CBcj2Enc_ip_unsigned;
 | 
						|
typedef  Int32 CBcj2Enc_ip_signed;
 | 
						|
#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)
 | 
						|
*/
 | 
						|
typedef UInt64 CBcj2Enc_ip_unsigned;
 | 
						|
typedef  Int64 CBcj2Enc_ip_signed;
 | 
						|
 | 
						|
/* maximum size of file that can be used for conversion condition */
 | 
						|
#define BCJ2_ENC_FileSize_MAX             ((CBcj2Enc_ip_unsigned)0 - 2)
 | 
						|
 | 
						|
/* default value of fileSize64_minus1 variable that means
 | 
						|
   that absolute address limitation will not be used */
 | 
						|
#define BCJ2_ENC_FileSizeField_UNLIMITED  ((CBcj2Enc_ip_unsigned)0 - 1)
 | 
						|
 | 
						|
/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */
 | 
						|
#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \
 | 
						|
    ((CBcj2Enc_ip_unsigned)(fileSize) - 1)
 | 
						|
 | 
						|
/* set CBcj2Enc::fileSize64_minus1 variable from size of file */
 | 
						|
#define Bcj2Enc_SET_FileSize(p, fileSize) \
 | 
						|
    (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);
 | 
						|
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
  Byte *bufs[BCJ2_NUM_STREAMS];
 | 
						|
  const Byte *lims[BCJ2_NUM_STREAMS];
 | 
						|
  const Byte *src;
 | 
						|
  const Byte *srcLim;
 | 
						|
 | 
						|
  unsigned state;
 | 
						|
  EBcj2Enc_FinishMode finishMode;
 | 
						|
 | 
						|
  Byte context;
 | 
						|
  Byte flushRem;
 | 
						|
  Byte isFlushState;
 | 
						|
 | 
						|
  Byte cache;
 | 
						|
  UInt32 range;
 | 
						|
  UInt64 low;
 | 
						|
  UInt64 cacheSize;
 | 
						|
  
 | 
						|
  // UInt32 context;  // for marker version, it can include marker flag.
 | 
						|
 | 
						|
  /* (ip64) and (fileIp64) correspond to virtual source stream position
 | 
						|
     that doesn't include data in temp[] */
 | 
						|
  CBcj2Enc_ip_unsigned ip64;         /* current (ip) position */
 | 
						|
  CBcj2Enc_ip_unsigned fileIp64;     /* start (ip) position of current file */
 | 
						|
  CBcj2Enc_ip_unsigned fileSize64_minus1;   /* size of current file (for conversion limitation) */
 | 
						|
  UInt32 relatLimit;  /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */
 | 
						|
  // UInt32 relatExcludeBits;
 | 
						|
 | 
						|
  UInt32 tempTarget;
 | 
						|
  unsigned tempPos; /* the number of bytes that were copied to temp[] buffer
 | 
						|
                       (tempPos <= 4) outside of Bcj2Enc_Encode() */
 | 
						|
  // Byte temp[4]; // for marker version
 | 
						|
  Byte temp[8];
 | 
						|
  CBcj2Prob probs[2 + 256];
 | 
						|
} CBcj2Enc;
 | 
						|
 | 
						|
void Bcj2Enc_Init(CBcj2Enc *p);
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
Bcj2Enc_Encode(): at exit:
 | 
						|
  p->State <  BCJ2_NUM_STREAMS    : we need more buffer space for output stream
 | 
						|
                                    (bufs[p->State] == lims[p->State])
 | 
						|
  p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream
 | 
						|
                                    (src == srcLim)
 | 
						|
  p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream
 | 
						|
*/
 | 
						|
void Bcj2Enc_Encode(CBcj2Enc *p);
 | 
						|
 | 
						|
/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.
 | 
						|
   CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.
 | 
						|
   (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after
 | 
						|
   fully processed data and after data copied to temp buffer.
 | 
						|
   So if the caller needs to get real number of fully processed input
 | 
						|
   bytes (without look ahead data in temp buffer),
 | 
						|
   the caller must subtruct (CBcj2Enc::tempPos) value from processed size
 | 
						|
   value that is calculated based on current (CBcj2Enc::src):
 | 
						|
     cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -
 | 
						|
        Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);
 | 
						|
*/
 | 
						|
/* get the size of input data that was stored in temp[] buffer: */
 | 
						|
#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)
 | 
						|
 | 
						|
#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)
 | 
						|
 | 
						|
/* Note : the decoder supports overlapping of marker (0f 80).
 | 
						|
   But we can eliminate such overlapping cases by setting
 | 
						|
   the limit for relative offset conversion as
 | 
						|
     CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)
 | 
						|
*/
 | 
						|
/* default value for CBcj2Enc::relatLimit */
 | 
						|
#define BCJ2_ENC_RELAT_LIMIT_DEFAULT  ((UInt32)0x0f << 24)
 | 
						|
#define BCJ2_ENC_RELAT_LIMIT_MAX      ((UInt32)1 << 31)
 | 
						|
// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5
 | 
						|
 | 
						|
EXTERN_C_END
 | 
						|
 | 
						|
#endif
 |