/*  Tarlz - Archiver with multimember lzip compression
    Copyright (C) 2013-2019 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

enum { header_size = 512 };
typedef uint8_t Tar_header[header_size];

enum Offsets {
  name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
  mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157,
  magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297,
  devmajor_o = 329, devminor_o = 337, prefix_o = 345 };

enum Lengths {
  name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12,
  mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100,
  magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32,
  devmajor_l = 8, devminor_l = 8, prefix_l = 155 };

enum Typeflag {
  tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
  tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
  tf_global = 'g', tf_extended = 'x' };

const uint8_t ustar_magic[magic_l] =
  { 0x75, 0x73, 0x74, 0x61, 0x72, 0 };			// "ustar\0"

inline bool verify_ustar_magic( const uint8_t * const header )
  { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }


// Round "size" to the next multiple of header size (512).
//
inline unsigned long long round_up( const unsigned long long size )
  {
  const int rem = size % header_size;
  const int padding = rem ? header_size - rem : 0;
  return size + padding;
  }


class Extended		// stores metadata from/for extended records
  {
  std::string linkpath_;
  std::string path_;
  unsigned long long file_size_;

  mutable long long full_size_;		// cached sizes
  mutable int recsize_linkpath_;
  mutable int recsize_path_;
  mutable int recsize_file_size_;

  bool crc_present_;		// true if CRC present in parsed records

public:
  static const std::string crc_record;

  Extended()
    : file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ),
      recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {}

  void reset()
    { linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1;
      recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1;
      crc_present_ = false; }

  bool empty() const
    { return linkpath_.empty() && path_.empty() && file_size_ == 0; }

  const std::string & linkpath() const { return linkpath_; }
  const std::string & path() const { return path_; }
  unsigned long long file_size() const { return file_size_; }

  void linkpath( const char * const lp )
    { linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; }
  void path( const char * const p )
    { path_ = p; full_size_ = -1; recsize_path_ = -1; }
  void file_size( const unsigned long long fs )
    { file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; }

  int recsize_linkpath() const;
  int recsize_path() const;
  int recsize_file_size() const;
  unsigned long long edsize() const		// extended data size
    { return empty() ? 0 : recsize_linkpath() + recsize_path() +
                           recsize_file_size() + crc_record.size(); }
  unsigned long long edsize_pad() const		// edsize rounded up
    { return round_up( edsize() ); }
  unsigned long long full_size() const
    { if( full_size_ < 0 )
        full_size_ = ( empty() ? 0 : header_size + edsize_pad() );
      return full_size_; }

  bool crc_present() const { return crc_present_; }
  bool parse( const char * const buf, const unsigned long long edsize,
              const bool permissive );
  };


enum {
  min_dictionary_bits = 12,
  min_dictionary_size = 1 << min_dictionary_bits,
  max_dictionary_bits = 29,
  max_dictionary_size = 1 << max_dictionary_bits,
  min_member_size = 36,
  min_data_size = 2 * min_dictionary_size,
  max_data_size = 2 * max_dictionary_size };


inline bool isvalid_ds( const unsigned dictionary_size )
  { return ( dictionary_size >= min_dictionary_size &&
             dictionary_size <= max_dictionary_size ); }


const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };	// "LZIP"

struct Lzip_header
  {
  uint8_t data[6];			// 0-3 magic bytes
					//   4 version
					//   5 coded_dict_size
  enum { size = 6 };

  bool verify_magic() const
    { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }

  bool verify_prefix( const int sz ) const	// detect (truncated) header
    {
    for( int i = 0; i < sz && i < 4; ++i )
      if( data[i] != lzip_magic[i] ) return false;
    return ( sz > 0 );
    }
  bool verify_corrupt() const			// detect corrupt header
    {
    int matches = 0;
    for( int i = 0; i < 4; ++i )
      if( data[i] == lzip_magic[i] ) ++matches;
    return ( matches > 1 && matches < 4 );
    }

  uint8_t version() const { return data[4]; }
  bool verify_version() const { return ( data[4] == 1 ); }

  unsigned dictionary_size() const
    {
    unsigned sz = ( 1 << ( data[5] & 0x1F ) );
    if( sz > min_dictionary_size )
      sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
    return sz;
    }
  };


struct Lzip_trailer
  {
  uint8_t data[20];	//  0-3  CRC32 of the uncompressed data
			//  4-11 size of the uncompressed data
			// 12-19 member size including header and trailer
  enum { size = 20 };

  unsigned data_crc() const
    {
    unsigned tmp = 0;
    for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  unsigned long long data_size() const
    {
    unsigned long long tmp = 0;
    for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  unsigned long long member_size() const
    {
    unsigned long long tmp = 0;
    for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  bool verify_consistency() const	// check internal consistency
    {
    const unsigned crc = data_crc();
    const unsigned long long dsize = data_size();
    if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
    const unsigned long long msize = member_size();
    if( msize < min_member_size ) return false;
    const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
    if( mlimit > dsize && msize > mlimit ) return false;
    const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
    if( dlimit > msize && dsize > dlimit ) return false;
    return true;
    }
  };


class CRC32
  {
  uint32_t data[256];		// Table of CRCs of all 8-bit messages.

public:
  CRC32( const bool castagnoli = false )
    {
    const unsigned cpol = 0x82F63B78U;	// CRC32-C  Castagnoli polynomial.
    const unsigned ipol = 0xEDB88320U;	// IEEE 802.3 Ethernet polynomial.
    const unsigned poly = castagnoli ? cpol : ipol;

    for( unsigned n = 0; n < 256; ++n )
      {
      unsigned c = n;
      for( int k = 0; k < 8; ++k )
        { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
      data[n] = c;
      }
    }

  void update_byte( uint32_t & crc, const uint8_t byte ) const
    { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }

  void update_buf( uint32_t & crc, const uint8_t * const buffer,
                   const int size ) const
    {
    uint32_t c = crc;
    for( int i = 0; i < size; ++i )
      c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
    crc = c;
    }

  // Calculates the crc of size bytes except a window of 8 bytes at pos
  uint32_t windowed_crc( const uint8_t * const buffer, const int pos,
                         const int size ) const
    {
    uint32_t crc = 0xFFFFFFFFU;
    update_buf( crc, buffer, pos );
    update_buf( crc, buffer + pos + 8, size - pos - 8 );
    return crc ^ 0xFFFFFFFFU;
    }
  };

extern const CRC32 crc32c;


enum { initial_line_length = 1000 };	// must be >= 77

class Resizable_buffer
  {
  char * p;
  unsigned size_;

public:
  explicit Resizable_buffer( const unsigned initial_size )
    : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
  ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }

  bool resize( const unsigned new_size )
    {
    if( size_ < new_size )
      {
      char * const tmp = (char *)std::realloc( p, new_size );
      if( !tmp ) return false;
      p = tmp; size_ = new_size;
      }
    return true;
    }
  char * operator()() const { return p; }
  unsigned size() const { return size_; }
  };

const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
const char * const trailing_msg = "Trailing data not allowed.";

// defined in create.cc
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
extern int cl_owner;
extern int cl_group;
extern int cl_data_size;
extern Solidity solidity;
unsigned ustar_chksum( const uint8_t * const header );
bool verify_ustar_chksum( const uint8_t * const header );
class Arg_parser;
int concatenate( const std::string & archive_name, const Arg_parser & parser,
                 const int filenames );
int encode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const int level, const bool append );

// defined in extract.cc
bool block_is_zero( const uint8_t * const buf, const int size );
void format_member_name( const Extended & extended, const Tar_header header,
                         Resizable_buffer & rbuf, const bool long_format );
const char * remove_leading_slash( const char * const filename );
bool compare_prefix_dir( const char * const dir, const char * const name );
bool compare_tslash( const char * const name1, const char * const name2 );
int readblock( const int fd, uint8_t * const buf, const int size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
unsigned long long parse_octal( const uint8_t * const ptr, const int size );
int decode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const int num_workers, const int debug_level,
            const bool keep_damaged, const bool listing, const bool missing_crc,
            const bool permissive );

// defined in list_lz.cc
class Lzip_index;
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
             const Lzip_index & lzip_index, const int filenames,
             const int debug_level, const int infd, const int num_workers,
             const bool missing_crc, const bool permissive );

// defined in main.cc
extern int verbosity;
int open_instream( const std::string & name );
int open_outstream( const std::string & name, const bool create = true );
void show_error( const char * const msg, const int errcode = 0,
                 const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
                      const int errcode = 0 );
void internal_error( const char * const msg );
