misc/libphysfs/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp
branchui-scaling
changeset 15283 c4fd2813b127
parent 13390 0135e64c6c66
parent 15279 7ab5cf405686
child 15663 d92eeb468dad
equal deleted inserted replaced
13390:0135e64c6c66 15283:c4fd2813b127
     1 // LzmaBench.cpp
       
     2 
       
     3 #include "StdAfx.h"
       
     4 
       
     5 #include "LzmaBench.h"
       
     6 
       
     7 #ifndef _WIN32
       
     8 #define USE_POSIX_TIME
       
     9 #define USE_POSIX_TIME2
       
    10 #endif
       
    11 
       
    12 #ifdef USE_POSIX_TIME
       
    13 #include <time.h>
       
    14 #ifdef USE_POSIX_TIME2
       
    15 #include <sys/time.h>
       
    16 #endif
       
    17 #endif
       
    18 
       
    19 #ifdef _WIN32
       
    20 #define USE_ALLOCA
       
    21 #endif
       
    22 
       
    23 #ifdef USE_ALLOCA
       
    24 #ifdef _WIN32
       
    25 #include <malloc.h>
       
    26 #else
       
    27 #include <stdlib.h>
       
    28 #endif
       
    29 #endif
       
    30 
       
    31 extern "C" 
       
    32 { 
       
    33 #include "../../../../C/Alloc.h"
       
    34 #include "../../../../C/7zCrc.h"
       
    35 }
       
    36 #include "../../../Common/MyCom.h"
       
    37 #include "../../ICoder.h"
       
    38 
       
    39 #ifdef BENCH_MT
       
    40 #include "../../../Windows/Thread.h"
       
    41 #include "../../../Windows/Synchronization.h"
       
    42 #endif
       
    43 
       
    44 #ifdef EXTERNAL_LZMA
       
    45 #include "../../../Windows/PropVariant.h"
       
    46 #else
       
    47 #include "../LZMA/LZMADecoder.h"
       
    48 #include "../LZMA/LZMAEncoder.h"
       
    49 #endif
       
    50 
       
    51 static const UInt32 kUncompressMinBlockSize = 1 << 26;
       
    52 static const UInt32 kAdditionalSize = (1 << 16);
       
    53 static const UInt32 kCompressedAdditionalSize = (1 << 10);
       
    54 static const UInt32 kMaxLzmaPropSize = 5;
       
    55 
       
    56 class CBaseRandomGenerator
       
    57 {
       
    58   UInt32 A1;
       
    59   UInt32 A2;
       
    60 public:
       
    61   CBaseRandomGenerator() { Init(); }
       
    62   void Init() { A1 = 362436069; A2 = 521288629;}
       
    63   UInt32 GetRnd() 
       
    64   {
       
    65     return 
       
    66       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
       
    67       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
       
    68   }
       
    69 };
       
    70 
       
    71 class CBenchBuffer
       
    72 {
       
    73 public:
       
    74   size_t BufferSize;
       
    75   Byte *Buffer;
       
    76   CBenchBuffer(): Buffer(0) {} 
       
    77   virtual ~CBenchBuffer() { Free(); }
       
    78   void Free() 
       
    79   { 
       
    80     ::MidFree(Buffer);
       
    81     Buffer = 0;
       
    82   }
       
    83   bool Alloc(size_t bufferSize) 
       
    84   {
       
    85     if (Buffer != 0 && BufferSize == bufferSize)
       
    86       return true;
       
    87     Free();
       
    88     Buffer = (Byte *)::MidAlloc(bufferSize);
       
    89     BufferSize = bufferSize;
       
    90     return (Buffer != 0);
       
    91   }
       
    92 };
       
    93 
       
    94 class CBenchRandomGenerator: public CBenchBuffer
       
    95 {
       
    96   CBaseRandomGenerator *RG;
       
    97 public:
       
    98   void Set(CBaseRandomGenerator *rg) { RG = rg; }
       
    99   UInt32 GetVal(UInt32 &res, int numBits) 
       
   100   {
       
   101     UInt32 val = res & (((UInt32)1 << numBits) - 1);
       
   102     res >>= numBits;
       
   103     return val;
       
   104   }
       
   105   UInt32 GetLen(UInt32 &res) 
       
   106   { 
       
   107     UInt32 len = GetVal(res, 2);
       
   108     return GetVal(res, 1 + len);
       
   109   }
       
   110   void Generate()
       
   111   {
       
   112     UInt32 pos = 0;
       
   113     UInt32 rep0 = 1;
       
   114     while (pos < BufferSize)
       
   115     {
       
   116       UInt32 res = RG->GetRnd();
       
   117       res >>= 1;
       
   118       if (GetVal(res, 1) == 0 || pos < 1024)
       
   119         Buffer[pos++] = (Byte)(res & 0xFF);
       
   120       else
       
   121       {
       
   122         UInt32 len;
       
   123         len = 1 + GetLen(res);
       
   124         if (GetVal(res, 3) != 0)
       
   125         {
       
   126           len += GetLen(res);
       
   127           do
       
   128           {
       
   129             UInt32 ppp = GetVal(res, 5) + 6;
       
   130             res = RG->GetRnd();
       
   131             if (ppp > 30)
       
   132               continue;
       
   133             rep0 = /* (1 << ppp) +*/  GetVal(res, ppp);
       
   134             res = RG->GetRnd();
       
   135           }
       
   136           while (rep0 >= pos);
       
   137           rep0++;
       
   138         }
       
   139 
       
   140         for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
       
   141           Buffer[pos] = Buffer[pos - rep0];
       
   142       }
       
   143     }
       
   144   }
       
   145 };
       
   146 
       
   147 
       
   148 class CBenchmarkInStream: 
       
   149   public ISequentialInStream,
       
   150   public CMyUnknownImp
       
   151 {
       
   152   const Byte *Data;
       
   153   size_t Pos;
       
   154   size_t Size;
       
   155 public:
       
   156   MY_UNKNOWN_IMP
       
   157   void Init(const Byte *data, size_t size)
       
   158   {
       
   159     Data = data;
       
   160     Size = size;
       
   161     Pos = 0;
       
   162   }
       
   163   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
       
   164 };
       
   165 
       
   166 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
       
   167 {
       
   168   size_t remain = Size - Pos;
       
   169   UInt32 kMaxBlockSize = (1 << 20);
       
   170   if (size > kMaxBlockSize)
       
   171     size = kMaxBlockSize;
       
   172   if (size > remain)
       
   173     size = (UInt32)remain;
       
   174   for (UInt32 i = 0; i < size; i++)
       
   175     ((Byte *)data)[i] = Data[Pos + i];
       
   176   Pos += size;
       
   177   if(processedSize != NULL)
       
   178     *processedSize = size;
       
   179   return S_OK;
       
   180 }
       
   181   
       
   182 class CBenchmarkOutStream: 
       
   183   public ISequentialOutStream,
       
   184   public CBenchBuffer,
       
   185   public CMyUnknownImp
       
   186 {
       
   187   // bool _overflow;
       
   188 public:
       
   189   UInt32 Pos;
       
   190   // CBenchmarkOutStream(): _overflow(false) {} 
       
   191   void Init() 
       
   192   {
       
   193     // _overflow = false;
       
   194     Pos = 0;
       
   195   }
       
   196   MY_UNKNOWN_IMP
       
   197   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
       
   198 };
       
   199 
       
   200 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
       
   201 {
       
   202   size_t curSize = BufferSize - Pos;
       
   203   if (curSize > size)
       
   204     curSize = size;
       
   205   memcpy(Buffer + Pos, data, curSize);
       
   206   Pos += (UInt32)curSize;
       
   207   if(processedSize != NULL)
       
   208     *processedSize = (UInt32)curSize;
       
   209   if (curSize != size)
       
   210   {
       
   211     // _overflow = true;
       
   212     return E_FAIL;
       
   213   }
       
   214   return S_OK;
       
   215 }
       
   216   
       
   217 class CCrcOutStream: 
       
   218   public ISequentialOutStream,
       
   219   public CMyUnknownImp
       
   220 {
       
   221 public:
       
   222   UInt32 Crc;
       
   223   MY_UNKNOWN_IMP
       
   224   void Init() { Crc = CRC_INIT_VAL; }
       
   225   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
       
   226 };
       
   227 
       
   228 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
       
   229 {
       
   230   Crc = CrcUpdate(Crc, data, size);
       
   231   if (processedSize != NULL)
       
   232     *processedSize = size;
       
   233   return S_OK;
       
   234 }
       
   235   
       
   236 static UInt64 GetTimeCount()
       
   237 {
       
   238   #ifdef USE_POSIX_TIME
       
   239   #ifdef USE_POSIX_TIME2
       
   240   timeval v;
       
   241   if (gettimeofday(&v, 0) == 0)
       
   242     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
       
   243   return (UInt64)time(NULL) * 1000000;
       
   244   #else
       
   245   return time(NULL);
       
   246   #endif
       
   247   #else
       
   248   /*
       
   249   LARGE_INTEGER value;
       
   250   if (::QueryPerformanceCounter(&value))
       
   251     return value.QuadPart;
       
   252   */
       
   253   return GetTickCount();
       
   254   #endif 
       
   255 }
       
   256 
       
   257 static UInt64 GetFreq()
       
   258 {
       
   259   #ifdef USE_POSIX_TIME
       
   260   #ifdef USE_POSIX_TIME2
       
   261   return 1000000;
       
   262   #else
       
   263   return 1;
       
   264   #endif 
       
   265   #else
       
   266   /*
       
   267   LARGE_INTEGER value;
       
   268   if (::QueryPerformanceFrequency(&value))
       
   269     return value.QuadPart;
       
   270   */
       
   271   return 1000;
       
   272   #endif 
       
   273 }
       
   274 
       
   275 #ifndef USE_POSIX_TIME
       
   276 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
       
   277 #endif
       
   278 static UInt64 GetUserTime()
       
   279 {
       
   280   #ifdef USE_POSIX_TIME
       
   281   return clock();
       
   282   #else
       
   283   FILETIME creationTime, exitTime, kernelTime, userTime;
       
   284   if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0)
       
   285     return GetTime64(userTime) + GetTime64(kernelTime);
       
   286   return (UInt64)GetTickCount() * 10000;
       
   287   #endif 
       
   288 }
       
   289 
       
   290 static UInt64 GetUserFreq()
       
   291 {
       
   292   #ifdef USE_POSIX_TIME
       
   293   return CLOCKS_PER_SEC;
       
   294   #else
       
   295   return 10000000;
       
   296   #endif 
       
   297 }
       
   298 
       
   299 class CBenchProgressStatus
       
   300 {
       
   301   #ifdef BENCH_MT
       
   302   NWindows::NSynchronization::CCriticalSection CS;  
       
   303   #endif
       
   304 public:
       
   305   HRESULT Res;
       
   306   bool EncodeMode;
       
   307   void SetResult(HRESULT res) 
       
   308   {
       
   309     #ifdef BENCH_MT
       
   310     NWindows::NSynchronization::CCriticalSectionLock lock(CS);
       
   311     #endif
       
   312     Res = res;
       
   313   }
       
   314   HRESULT GetResult()
       
   315   {
       
   316     #ifdef BENCH_MT
       
   317     NWindows::NSynchronization::CCriticalSectionLock lock(CS);
       
   318     #endif
       
   319     return Res;
       
   320   }
       
   321 };
       
   322 
       
   323 class CBenchProgressInfo:
       
   324   public ICompressProgressInfo,
       
   325   public CMyUnknownImp
       
   326 {
       
   327 public:
       
   328   CBenchProgressStatus *Status;
       
   329   CBenchInfo BenchInfo;
       
   330   HRESULT Res;
       
   331   IBenchCallback *callback;
       
   332   CBenchProgressInfo(): callback(0) {}
       
   333   MY_UNKNOWN_IMP
       
   334   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
       
   335 };
       
   336 
       
   337 void SetStartTime(CBenchInfo &bi)
       
   338 {
       
   339   bi.GlobalFreq = GetFreq();
       
   340   bi.UserFreq = GetUserFreq();
       
   341   bi.GlobalTime = ::GetTimeCount();
       
   342   bi.UserTime = ::GetUserTime();
       
   343 }
       
   344 
       
   345 void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
       
   346 {
       
   347   dest.GlobalFreq = GetFreq();
       
   348   dest.UserFreq = GetUserFreq();
       
   349   dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
       
   350   dest.UserTime = ::GetUserTime() - biStart.UserTime;
       
   351 }
       
   352 
       
   353 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
       
   354 {
       
   355   HRESULT res = Status->GetResult();
       
   356   if (res != S_OK)
       
   357     return res;
       
   358   if (!callback)
       
   359     return res;
       
   360   CBenchInfo info = BenchInfo;
       
   361   SetFinishTime(BenchInfo, info);
       
   362   if (Status->EncodeMode)
       
   363   {
       
   364     info.UnpackSize = *inSize;
       
   365     info.PackSize = *outSize;
       
   366     res = callback->SetEncodeResult(info, false);
       
   367   }
       
   368   else
       
   369   {
       
   370     info.PackSize = BenchInfo.PackSize + *inSize;
       
   371     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
       
   372     res = callback->SetDecodeResult(info, false);
       
   373   }
       
   374   if (res != S_OK)
       
   375     Status->SetResult(res);
       
   376   return res;
       
   377 }
       
   378 
       
   379 static const int kSubBits = 8;
       
   380 
       
   381 static UInt32 GetLogSize(UInt32 size)
       
   382 {
       
   383   for (int i = kSubBits; i < 32; i++)
       
   384     for (UInt32 j = 0; j < (1 << kSubBits); j++)
       
   385       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
       
   386         return (i << kSubBits) + j;
       
   387   return (32 << kSubBits);
       
   388 }
       
   389 
       
   390 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
       
   391 {
       
   392   while (v1 > 1000000)
       
   393   {
       
   394     v1 >>= 1;
       
   395     v2 >>= 1;
       
   396   }
       
   397 }
       
   398 
       
   399 UInt64 GetUsage(const CBenchInfo &info)
       
   400 {
       
   401   UInt64 userTime = info.UserTime;
       
   402   UInt64 userFreq = info.UserFreq;
       
   403   UInt64 globalTime = info.GlobalTime;
       
   404   UInt64 globalFreq = info.GlobalFreq;
       
   405   NormalizeVals(userTime, userFreq);
       
   406   NormalizeVals(globalFreq, globalTime);
       
   407   if (userFreq == 0)
       
   408     userFreq = 1;
       
   409   if (globalTime == 0)
       
   410     globalTime = 1;
       
   411   return userTime * globalFreq * 1000000 / userFreq / globalTime;
       
   412 }
       
   413 
       
   414 UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
       
   415 {
       
   416   UInt64 userTime = info.UserTime;
       
   417   UInt64 userFreq = info.UserFreq;
       
   418   UInt64 globalTime = info.GlobalTime;
       
   419   UInt64 globalFreq = info.GlobalFreq;
       
   420   NormalizeVals(userFreq, userTime);
       
   421   NormalizeVals(globalTime, globalFreq);
       
   422   if (globalFreq == 0)
       
   423     globalFreq = 1;
       
   424   if (userTime == 0)
       
   425     userTime = 1;
       
   426   return userFreq * globalTime / globalFreq *  rating / userTime;
       
   427 }
       
   428 
       
   429 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
       
   430 {
       
   431   UInt64 elTime = elapsedTime;
       
   432   NormalizeVals(freq, elTime);
       
   433   if (elTime == 0)
       
   434     elTime = 1;
       
   435   return value * freq / elTime;
       
   436 }
       
   437 
       
   438 UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
       
   439 {
       
   440   UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
       
   441   // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8
       
   442   UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2
       
   443 
       
   444   UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
       
   445   return MyMultDiv64(numCommands, elapsedTime, freq);
       
   446 }
       
   447 
       
   448 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
       
   449 {
       
   450   // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8
       
   451   UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2
       
   452   return MyMultDiv64(numCommands, elapsedTime, freq);
       
   453 }
       
   454 
       
   455 #ifdef EXTERNAL_LZMA
       
   456 typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID, 
       
   457     const GUID *interfaceID, void **outObject);
       
   458 #endif
       
   459 
       
   460 struct CEncoderInfo;
       
   461 
       
   462 struct CEncoderInfo
       
   463 {
       
   464   #ifdef BENCH_MT
       
   465   NWindows::CThread thread[2];
       
   466   #endif
       
   467   CMyComPtr<ICompressCoder> encoder;
       
   468   CBenchProgressInfo *progressInfoSpec[2];
       
   469   CMyComPtr<ICompressProgressInfo> progressInfo[2];
       
   470   UInt32 NumIterations;
       
   471   #ifdef USE_ALLOCA
       
   472   size_t AllocaSize;
       
   473   #endif
       
   474 
       
   475   struct CDecoderInfo
       
   476   {
       
   477     CEncoderInfo *Encoder;
       
   478     UInt32 DecoderIndex;
       
   479     #ifdef USE_ALLOCA
       
   480     size_t AllocaSize;
       
   481     #endif
       
   482     bool CallbackMode;
       
   483   };
       
   484   CDecoderInfo decodersInfo[2];
       
   485 
       
   486   CMyComPtr<ICompressCoder> decoders[2];
       
   487   HRESULT Results[2];
       
   488   CBenchmarkOutStream *outStreamSpec;
       
   489   CMyComPtr<ISequentialOutStream> outStream;
       
   490   IBenchCallback *callback;
       
   491   UInt32 crc;
       
   492   UInt32 kBufferSize;
       
   493   UInt32 compressedSize;
       
   494   CBenchRandomGenerator rg;
       
   495   CBenchmarkOutStream *propStreamSpec;
       
   496   CMyComPtr<ISequentialOutStream> propStream;
       
   497   HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
       
   498   HRESULT Encode();
       
   499   HRESULT Decode(UInt32 decoderIndex);
       
   500 
       
   501   CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
       
   502 
       
   503   #ifdef BENCH_MT
       
   504   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
       
   505   {
       
   506     CEncoderInfo *encoder = (CEncoderInfo *)param;
       
   507     #ifdef USE_ALLOCA
       
   508     alloca(encoder->AllocaSize);
       
   509     #endif
       
   510     HRESULT res = encoder->Encode();
       
   511     encoder->Results[0] = res;
       
   512     if (res != S_OK)
       
   513       encoder->progressInfoSpec[0]->Status->SetResult(res);
       
   514 
       
   515     return 0;
       
   516   }
       
   517   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
       
   518   {
       
   519     CDecoderInfo *decoder = (CDecoderInfo *)param;
       
   520     #ifdef USE_ALLOCA
       
   521     alloca(decoder->AllocaSize);
       
   522     #endif
       
   523     CEncoderInfo *encoder = decoder->Encoder;
       
   524     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
       
   525     return 0;
       
   526   }
       
   527 
       
   528   HRESULT CreateEncoderThread()
       
   529   {
       
   530     return thread[0].Create(EncodeThreadFunction, this);
       
   531   }
       
   532 
       
   533   HRESULT CreateDecoderThread(int index, bool callbackMode
       
   534       #ifdef USE_ALLOCA
       
   535       , size_t allocaSize
       
   536       #endif
       
   537       )
       
   538   {
       
   539     CDecoderInfo &decoder = decodersInfo[index];
       
   540     decoder.DecoderIndex = index;
       
   541     decoder.Encoder = this;
       
   542     #ifdef USE_ALLOCA
       
   543     decoder.AllocaSize = allocaSize;
       
   544     #endif
       
   545     decoder.CallbackMode = callbackMode;
       
   546     return thread[index].Create(DecodeThreadFunction, &decoder);
       
   547   }
       
   548   #endif
       
   549 };
       
   550 
       
   551 HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
       
   552 {
       
   553   rg.Set(rgLoc);
       
   554   kBufferSize = dictionarySize + kAdditionalSize;
       
   555   UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
       
   556   if (!rg.Alloc(kBufferSize))
       
   557     return E_OUTOFMEMORY;
       
   558   rg.Generate();
       
   559   crc = CrcCalc(rg.Buffer, rg.BufferSize);
       
   560 
       
   561   outStreamSpec = new CBenchmarkOutStream;
       
   562   if (!outStreamSpec->Alloc(kCompressedBufferSize))
       
   563     return E_OUTOFMEMORY;
       
   564 
       
   565   outStream = outStreamSpec;
       
   566 
       
   567   propStreamSpec = 0;
       
   568   if (!propStream)
       
   569   {
       
   570     propStreamSpec = new CBenchmarkOutStream;
       
   571     propStream = propStreamSpec;
       
   572   }
       
   573   if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
       
   574     return E_OUTOFMEMORY;
       
   575   propStreamSpec->Init();
       
   576   
       
   577   PROPID propIDs[] = 
       
   578   { 
       
   579     NCoderPropID::kDictionarySize, 
       
   580     NCoderPropID::kMultiThread
       
   581   };
       
   582   const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
       
   583   PROPVARIANT properties[kNumProps];
       
   584   properties[0].vt = VT_UI4;
       
   585   properties[0].ulVal = (UInt32)dictionarySize;
       
   586 
       
   587   properties[1].vt = VT_BOOL;
       
   588   properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE;
       
   589 
       
   590   {
       
   591     CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
       
   592     RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
       
   593     if (!setCoderProperties)
       
   594       return E_FAIL;
       
   595     RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps));
       
   596 
       
   597     CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
       
   598     encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
       
   599     if (writeCoderProperties)
       
   600     {
       
   601       RINOK(writeCoderProperties->WriteCoderProperties(propStream));
       
   602     }
       
   603   }
       
   604   return S_OK;
       
   605 }
       
   606 
       
   607 HRESULT CEncoderInfo::Encode()
       
   608 {
       
   609   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
       
   610   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
       
   611   inStreamSpec->Init(rg.Buffer, rg.BufferSize);
       
   612   outStreamSpec->Init();
       
   613 
       
   614   RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
       
   615   compressedSize = outStreamSpec->Pos;
       
   616   encoder.Release();
       
   617   return S_OK;
       
   618 }
       
   619 
       
   620 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
       
   621 {
       
   622   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
       
   623   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
       
   624   CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
       
   625 
       
   626   CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
       
   627   decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
       
   628   if (!compressSetDecoderProperties)
       
   629     return E_FAIL;
       
   630 
       
   631   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
       
   632   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
       
   633     
       
   634   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
       
   635   pi->BenchInfo.UnpackSize = 0;
       
   636   pi->BenchInfo.PackSize = 0;
       
   637 
       
   638   for (UInt32 j = 0; j < NumIterations; j++)
       
   639   {
       
   640     inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
       
   641     crcOutStreamSpec->Init();
       
   642     
       
   643     RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
       
   644     UInt64 outSize = kBufferSize;
       
   645     RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
       
   646     if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
       
   647       return S_FALSE;
       
   648     pi->BenchInfo.UnpackSize += kBufferSize;
       
   649     pi->BenchInfo.PackSize += compressedSize;
       
   650   }
       
   651   decoder.Release();
       
   652   return S_OK;
       
   653 }
       
   654 
       
   655 static const UInt32 kNumThreadsMax = (1 << 16);
       
   656 
       
   657 struct CBenchEncoders
       
   658 {
       
   659   CEncoderInfo *encoders;
       
   660   CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
       
   661   ~CBenchEncoders() { delete []encoders; }
       
   662 };
       
   663 
       
   664 HRESULT LzmaBench(
       
   665   #ifdef EXTERNAL_LZMA
       
   666   CCodecs *codecs,
       
   667   #endif
       
   668   UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
       
   669 {
       
   670   UInt32 numEncoderThreads = 
       
   671     #ifdef BENCH_MT
       
   672     (numThreads > 1 ? numThreads / 2 : 1);
       
   673     #else
       
   674     1;
       
   675     #endif
       
   676   UInt32 numSubDecoderThreads = 
       
   677     #ifdef BENCH_MT
       
   678     (numThreads > 1 ? 2 : 1);
       
   679     #else
       
   680     1;
       
   681     #endif
       
   682   if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
       
   683   {
       
   684     return E_INVALIDARG;
       
   685   }
       
   686 
       
   687   CBenchEncoders encodersSpec(numEncoderThreads);
       
   688   CEncoderInfo *encoders = encodersSpec.encoders;
       
   689 
       
   690   #ifdef EXTERNAL_LZMA
       
   691   UString name = L"LZMA";
       
   692   #endif
       
   693 
       
   694   UInt32 i;
       
   695   for (i = 0; i < numEncoderThreads; i++)
       
   696   {
       
   697     CEncoderInfo &encoder = encoders[i];
       
   698     encoder.callback = (i == 0) ? callback : 0;
       
   699 
       
   700     #ifdef EXTERNAL_LZMA
       
   701     RINOK(codecs->CreateCoder(name, true, encoder.encoder));
       
   702     #else
       
   703     encoder.encoder = new NCompress::NLZMA::CEncoder;
       
   704     #endif
       
   705     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
       
   706     {
       
   707       #ifdef EXTERNAL_LZMA
       
   708       RINOK(codecs->CreateCoder(name, false, encoder.decoders[j]));
       
   709       #else
       
   710       encoder.decoders[j] = new NCompress::NLZMA::CDecoder;
       
   711       #endif
       
   712     }
       
   713   }
       
   714 
       
   715   CBaseRandomGenerator rg;
       
   716   rg.Init();
       
   717   for (i = 0; i < numEncoderThreads; i++)
       
   718   {
       
   719     RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
       
   720   }
       
   721 
       
   722   CBenchProgressStatus status;
       
   723   status.Res = S_OK;
       
   724   status.EncodeMode = true;
       
   725 
       
   726   for (i = 0; i < numEncoderThreads; i++)
       
   727   {
       
   728     CEncoderInfo &encoder = encoders[i];
       
   729     for (int j = 0; j < 2; j++)
       
   730     {
       
   731       encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
       
   732       encoder.progressInfoSpec[j]->Status = &status;
       
   733     }
       
   734     if (i == 0)
       
   735     {
       
   736       encoder.progressInfoSpec[0]->callback = callback;
       
   737       encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
       
   738       SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
       
   739     }
       
   740 
       
   741     #ifdef BENCH_MT
       
   742     if (numEncoderThreads > 1)
       
   743     {
       
   744       #ifdef USE_ALLOCA
       
   745       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
       
   746       #endif
       
   747       RINOK(encoder.CreateEncoderThread())
       
   748     }
       
   749     else
       
   750     #endif
       
   751     {
       
   752       RINOK(encoder.Encode());
       
   753     }
       
   754   }
       
   755   #ifdef BENCH_MT
       
   756   if (numEncoderThreads > 1)
       
   757     for (i = 0; i < numEncoderThreads; i++)
       
   758       encoders[i].thread[0].Wait();
       
   759   #endif
       
   760 
       
   761   RINOK(status.Res);
       
   762 
       
   763   CBenchInfo info;
       
   764 
       
   765   SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
       
   766   info.UnpackSize = 0;
       
   767   info.PackSize = 0;
       
   768   info.NumIterations = 1; // progressInfoSpec->NumIterations;
       
   769   for (i = 0; i < numEncoderThreads; i++)
       
   770   {
       
   771     CEncoderInfo &encoder = encoders[i];
       
   772     info.UnpackSize += encoder.kBufferSize;
       
   773     info.PackSize += encoder.compressedSize;
       
   774   }
       
   775   RINOK(callback->SetEncodeResult(info, true));
       
   776 
       
   777 
       
   778   status.Res = S_OK;
       
   779   status.EncodeMode = false;
       
   780 
       
   781   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
       
   782   for (i = 0; i < numEncoderThreads; i++)
       
   783   {
       
   784     CEncoderInfo &encoder = encoders[i];
       
   785     encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
       
   786 
       
   787     if (i == 0)
       
   788     {
       
   789       encoder.progressInfoSpec[0]->callback = callback;
       
   790       encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
       
   791       SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
       
   792     }
       
   793 
       
   794     #ifdef BENCH_MT
       
   795     if (numDecoderThreads > 1)
       
   796     {
       
   797       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
       
   798       {
       
   799         size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF;
       
   800         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
       
   801             #ifdef USE_ALLOCA
       
   802             , allocaSize
       
   803             #endif
       
   804             );
       
   805         RINOK(res);
       
   806       }
       
   807     }
       
   808     else
       
   809     #endif
       
   810     {
       
   811       RINOK(encoder.Decode(0));
       
   812     }
       
   813   }
       
   814   #ifdef BENCH_MT
       
   815   HRESULT res = S_OK;
       
   816   if (numDecoderThreads > 1)
       
   817     for (i = 0; i < numEncoderThreads; i++)
       
   818       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
       
   819       {
       
   820         CEncoderInfo &encoder = encoders[i];
       
   821         encoder.thread[j].Wait();
       
   822         if (encoder.Results[j] != S_OK)
       
   823           res = encoder.Results[j];
       
   824       }
       
   825   RINOK(res);
       
   826   #endif
       
   827   RINOK(status.Res);
       
   828   SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
       
   829   info.UnpackSize = 0;
       
   830   info.PackSize = 0;
       
   831   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
       
   832   for (i = 0; i < numEncoderThreads; i++)
       
   833   {
       
   834     CEncoderInfo &encoder = encoders[i];
       
   835     info.UnpackSize += encoder.kBufferSize;
       
   836     info.PackSize += encoder.compressedSize;
       
   837   }
       
   838   RINOK(callback->SetDecodeResult(info, false));
       
   839   RINOK(callback->SetDecodeResult(info, true));
       
   840   return S_OK;
       
   841 }
       
   842 
       
   843 
       
   844 inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
       
   845 { 
       
   846   UInt32 hs = dictionary - 1;
       
   847   hs |= (hs >> 1);
       
   848   hs |= (hs >> 2);
       
   849   hs |= (hs >> 4);
       
   850   hs |= (hs >> 8);
       
   851   hs >>= 1;
       
   852   hs |= 0xFFFF;
       
   853   if (hs > (1 << 24))
       
   854     hs >>= 1;
       
   855   hs++;
       
   856   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + 
       
   857       (1 << 20) + (multiThread ? (6 << 20) : 0);
       
   858 }
       
   859 
       
   860 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
       
   861 {
       
   862   const UInt32 kBufferSize = dictionary;
       
   863   const UInt32 kCompressedBufferSize = (kBufferSize / 2);
       
   864   UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
       
   865   UInt32 numBigThreads = numThreads / numSubThreads;
       
   866   return (kBufferSize + kCompressedBufferSize +
       
   867     GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
       
   868 }
       
   869 
       
   870 static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
       
   871 {
       
   872   for (UInt32 i = 0; i < numCycles; i++)
       
   873     if (CrcCalc(data, size) != crcBase)
       
   874       return false;
       
   875   return true;
       
   876 }
       
   877 
       
   878 #ifdef BENCH_MT
       
   879 struct CCrcInfo
       
   880 {
       
   881   NWindows::CThread Thread;
       
   882   const Byte *Data;
       
   883   UInt32 Size;
       
   884   UInt32 NumCycles;
       
   885   UInt32 Crc;
       
   886   bool Res;
       
   887   void Wait()
       
   888   {
       
   889     Thread.Wait();
       
   890     Thread.Close();
       
   891   }
       
   892 };
       
   893 
       
   894 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
       
   895 {
       
   896   CCrcInfo *p = (CCrcInfo *)param;
       
   897   p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
       
   898   return 0;
       
   899 }
       
   900 
       
   901 struct CCrcThreads
       
   902 {
       
   903   UInt32 NumThreads;
       
   904   CCrcInfo *Items;
       
   905   CCrcThreads(): Items(0), NumThreads(0) {}
       
   906   void WaitAll()
       
   907   {
       
   908     for (UInt32 i = 0; i < NumThreads; i++)
       
   909       Items[i].Wait();
       
   910     NumThreads = 0;
       
   911   }
       
   912   ~CCrcThreads() 
       
   913   { 
       
   914     WaitAll();
       
   915     delete []Items; 
       
   916   }
       
   917 };
       
   918 #endif
       
   919 
       
   920 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
       
   921 {
       
   922   UInt32 crc = CRC_INIT_VAL;;
       
   923   for (UInt32 i = 0; i < size; i++)
       
   924     crc = CRC_UPDATE_BYTE(crc, buf[i]);
       
   925   return CRC_GET_DIGEST(crc);
       
   926 }
       
   927 
       
   928 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
       
   929 {
       
   930   for (UInt32 i = 0; i < size; i++)
       
   931     buf[i] = (Byte)RG.GetRnd();
       
   932 }
       
   933 
       
   934 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
       
   935 {
       
   936   RandGen(buf, size, RG);
       
   937   return CrcCalc1(buf, size);
       
   938 }
       
   939 
       
   940 bool CrcInternalTest()
       
   941 {
       
   942   CBenchBuffer buffer;
       
   943   const UInt32 kBufferSize0 = (1 << 8);
       
   944   const UInt32 kBufferSize1 = (1 << 10);
       
   945   const UInt32 kCheckSize = (1 << 5);
       
   946   if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
       
   947     return false;
       
   948   Byte *buf = buffer.Buffer;
       
   949   UInt32 i;
       
   950   for (i = 0; i < kBufferSize0; i++)
       
   951     buf[i] = (Byte)i;
       
   952   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
       
   953   if (crc1 != 0x29058C73)
       
   954     return false;
       
   955   CBaseRandomGenerator RG;
       
   956   RandGen(buf + kBufferSize0, kBufferSize1, RG);
       
   957   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
       
   958     for (UInt32 j = 0; j < kCheckSize; j++)
       
   959       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
       
   960         return false;
       
   961   return true;
       
   962 }
       
   963 
       
   964 HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
       
   965 {
       
   966   if (numThreads == 0)
       
   967     numThreads = 1;
       
   968 
       
   969   CBenchBuffer buffer;
       
   970   size_t totalSize = (size_t)bufferSize * numThreads;
       
   971   if (totalSize / numThreads != bufferSize)
       
   972     return E_OUTOFMEMORY;
       
   973   if (!buffer.Alloc(totalSize))
       
   974     return E_OUTOFMEMORY;
       
   975 
       
   976   Byte *buf = buffer.Buffer;
       
   977   CBaseRandomGenerator RG;
       
   978   UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1;
       
   979 
       
   980   UInt64 timeVal;
       
   981   #ifdef BENCH_MT
       
   982   CCrcThreads threads;
       
   983   if (numThreads > 1)
       
   984   {
       
   985     threads.Items = new CCrcInfo[numThreads];
       
   986     UInt32 i;
       
   987     for (i = 0; i < numThreads; i++)
       
   988     {
       
   989       CCrcInfo &info = threads.Items[i];
       
   990       Byte *data = buf + (size_t)bufferSize * i;
       
   991       info.Data = data;
       
   992       info.NumCycles = numCycles;
       
   993       info.Size = bufferSize;
       
   994       info.Crc = RandGenCrc(data, bufferSize, RG);
       
   995     }
       
   996     timeVal = GetTimeCount();
       
   997     for (i = 0; i < numThreads; i++)
       
   998     {
       
   999       CCrcInfo &info = threads.Items[i];
       
  1000       RINOK(info.Thread.Create(CrcThreadFunction, &info));
       
  1001       threads.NumThreads++;
       
  1002     }
       
  1003     threads.WaitAll();
       
  1004     for (i = 0; i < numThreads; i++)
       
  1005       if (!threads.Items[i].Res)
       
  1006         return S_FALSE;
       
  1007   }
       
  1008   else
       
  1009   #endif
       
  1010   {
       
  1011     UInt32 crc = RandGenCrc(buf, bufferSize, RG);
       
  1012     timeVal = GetTimeCount();
       
  1013     if (!CrcBig(buf, bufferSize, numCycles, crc))
       
  1014       return S_FALSE;
       
  1015   }
       
  1016   timeVal = GetTimeCount() - timeVal;
       
  1017   if (timeVal == 0)
       
  1018     timeVal = 1;
       
  1019 
       
  1020   UInt64 size = (UInt64)numCycles * totalSize;
       
  1021   speed = MyMultDiv64(size, timeVal, GetFreq());
       
  1022   return S_OK;
       
  1023 }
       
  1024