misc/libtremor/tremor/mdct.c
branchwebgl
changeset 9521 8054d9d775fd
parent 9282 92af50454cf2
parent 9519 b8b5c82eb61b
child 9950 2759212a27de
equal deleted inserted replaced
9282:92af50454cf2 9521:8054d9d775fd
     1 /********************************************************************
       
     2  *                                                                  *
       
     3  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
       
     4  *                                                                  *
       
     5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
       
     6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
       
     7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
       
     8  *                                                                  *
       
     9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
       
    10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
       
    11  *                                                                  *
       
    12  ********************************************************************
       
    13 
       
    14  function: normalized modified discrete cosine transform
       
    15            power of two length transform only [64 <= n ]
       
    16  last mod: $Id: mdct.c,v 1.9 2002/10/16 09:17:39 xiphmont Exp $
       
    17 
       
    18  Original algorithm adapted long ago from _The use of multirate filter
       
    19  banks for coding of high quality digital audio_, by T. Sporer,
       
    20  K. Brandenburg and B. Edler, collection of the European Signal
       
    21  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
       
    22  211-214
       
    23 
       
    24  The below code implements an algorithm that no longer looks much like
       
    25  that presented in the paper, but the basic structure remains if you
       
    26  dig deep enough to see it.
       
    27 
       
    28  This module DOES NOT INCLUDE code to generate/apply the window
       
    29  function.  Everybody has their own weird favorite including me... I
       
    30  happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
       
    31  vehemently disagree.
       
    32 
       
    33  ********************************************************************/
       
    34 
       
    35 #include "ivorbiscodec.h"
       
    36 #include "codebook.h"
       
    37 #include "misc.h"
       
    38 #include "mdct.h"
       
    39 #include "mdct_lookup.h"
       
    40 
       
    41 
       
    42 /* 8 point butterfly (in place) */
       
    43 STIN void mdct_butterfly_8(DATA_TYPE *x){
       
    44 
       
    45   REG_TYPE r0   = x[4] + x[0];
       
    46   REG_TYPE r1   = x[4] - x[0];
       
    47   REG_TYPE r2   = x[5] + x[1];
       
    48   REG_TYPE r3   = x[5] - x[1];
       
    49   REG_TYPE r4   = x[6] + x[2];
       
    50   REG_TYPE r5   = x[6] - x[2];
       
    51   REG_TYPE r6   = x[7] + x[3];
       
    52   REG_TYPE r7   = x[7] - x[3];
       
    53 
       
    54 	   x[0] = r5   + r3;
       
    55 	   x[1] = r7   - r1;
       
    56 	   x[2] = r5   - r3;
       
    57 	   x[3] = r7   + r1;
       
    58            x[4] = r4   - r0;
       
    59 	   x[5] = r6   - r2;
       
    60            x[6] = r4   + r0;
       
    61 	   x[7] = r6   + r2;
       
    62 	   MB();
       
    63 }
       
    64 
       
    65 /* 16 point butterfly (in place, 4 register) */
       
    66 STIN void mdct_butterfly_16(DATA_TYPE *x){
       
    67 
       
    68   REG_TYPE r0, r1;
       
    69 
       
    70 	   r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
       
    71 	   r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
       
    72 	   x[ 0] = MULT31((r0 + r1) , cPI2_8);
       
    73 	   x[ 1] = MULT31((r1 - r0) , cPI2_8);
       
    74 	   MB();
       
    75 
       
    76 	   r0 = x[10] - x[ 2]; x[10] += x[ 2];
       
    77 	   r1 = x[ 3] - x[11]; x[11] += x[ 3];
       
    78 	   x[ 2] = r1; x[ 3] = r0;
       
    79 	   MB();
       
    80 
       
    81 	   r0 = x[12] - x[ 4]; x[12] += x[ 4];
       
    82 	   r1 = x[13] - x[ 5]; x[13] += x[ 5];
       
    83 	   x[ 4] = MULT31((r0 - r1) , cPI2_8);
       
    84 	   x[ 5] = MULT31((r0 + r1) , cPI2_8);
       
    85 	   MB();
       
    86 
       
    87 	   r0 = x[14] - x[ 6]; x[14] += x[ 6];
       
    88 	   r1 = x[15] - x[ 7]; x[15] += x[ 7];
       
    89 	   x[ 6] = r0; x[ 7] = r1;
       
    90 	   MB();
       
    91 
       
    92 	   mdct_butterfly_8(x);
       
    93 	   mdct_butterfly_8(x+8);
       
    94 }
       
    95 
       
    96 /* 32 point butterfly (in place, 4 register) */
       
    97 STIN void mdct_butterfly_32(DATA_TYPE *x){
       
    98 
       
    99   REG_TYPE r0, r1;
       
   100 
       
   101 	   r0 = x[30] - x[14]; x[30] += x[14];           
       
   102 	   r1 = x[31] - x[15]; x[31] += x[15];
       
   103 	   x[14] = r0; x[15] = r1;
       
   104 	   MB();
       
   105 
       
   106 	   r0 = x[28] - x[12]; x[28] += x[12];           
       
   107 	   r1 = x[29] - x[13]; x[29] += x[13];
       
   108 	   XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
       
   109 	   MB();
       
   110 
       
   111 	   r0 = x[26] - x[10]; x[26] += x[10];
       
   112 	   r1 = x[27] - x[11]; x[27] += x[11];
       
   113 	   x[10] = MULT31((r0 - r1) , cPI2_8);
       
   114 	   x[11] = MULT31((r0 + r1) , cPI2_8);
       
   115 	   MB();
       
   116 
       
   117 	   r0 = x[24] - x[ 8]; x[24] += x[ 8];
       
   118 	   r1 = x[25] - x[ 9]; x[25] += x[ 9];
       
   119 	   XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
       
   120 	   MB();
       
   121 
       
   122 	   r0 = x[22] - x[ 6]; x[22] += x[ 6];
       
   123 	   r1 = x[ 7] - x[23]; x[23] += x[ 7];
       
   124 	   x[ 6] = r1; x[ 7] = r0;
       
   125 	   MB();
       
   126 
       
   127 	   r0 = x[ 4] - x[20]; x[20] += x[ 4];
       
   128 	   r1 = x[ 5] - x[21]; x[21] += x[ 5];
       
   129 	   XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
       
   130 	   MB();
       
   131 
       
   132 	   r0 = x[ 2] - x[18]; x[18] += x[ 2];
       
   133 	   r1 = x[ 3] - x[19]; x[19] += x[ 3];
       
   134 	   x[ 2] = MULT31((r1 + r0) , cPI2_8);
       
   135 	   x[ 3] = MULT31((r1 - r0) , cPI2_8);
       
   136 	   MB();
       
   137 
       
   138 	   r0 = x[ 0] - x[16]; x[16] += x[ 0];
       
   139 	   r1 = x[ 1] - x[17]; x[17] += x[ 1];
       
   140 	   XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
       
   141 	   MB();
       
   142 
       
   143 	   mdct_butterfly_16(x);
       
   144 	   mdct_butterfly_16(x+16);
       
   145 }
       
   146 
       
   147 /* N/stage point generic N stage butterfly (in place, 2 register) */
       
   148 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
       
   149 
       
   150   LOOKUP_T *T   = sincos_lookup0;
       
   151   DATA_TYPE *x1        = x + points      - 8;
       
   152   DATA_TYPE *x2        = x + (points>>1) - 8;
       
   153   REG_TYPE   r0;
       
   154   REG_TYPE   r1;
       
   155 
       
   156   do{
       
   157     r0 = x1[6] - x2[6]; x1[6] += x2[6];
       
   158     r1 = x2[7] - x1[7]; x1[7] += x2[7];
       
   159     XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
       
   160 
       
   161     r0 = x1[4] - x2[4]; x1[4] += x2[4];
       
   162     r1 = x2[5] - x1[5]; x1[5] += x2[5];
       
   163     XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
       
   164 
       
   165     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   166     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   167     XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
       
   168 
       
   169     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   170     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   171     XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
       
   172 
       
   173     x1-=8; x2-=8;
       
   174   }while(T<sincos_lookup0+1024);
       
   175   do{
       
   176     r0 = x1[6] - x2[6]; x1[6] += x2[6];
       
   177     r1 = x1[7] - x2[7]; x1[7] += x2[7];
       
   178     XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
       
   179 
       
   180     r0 = x1[4] - x2[4]; x1[4] += x2[4];
       
   181     r1 = x1[5] - x2[5]; x1[5] += x2[5];
       
   182     XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
       
   183 
       
   184     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   185     r1 = x1[3] - x2[3]; x1[3] += x2[3];
       
   186     XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
       
   187 
       
   188     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   189     r1 = x1[1] - x2[1]; x1[1] += x2[1];
       
   190     XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
       
   191 
       
   192     x1-=8; x2-=8;
       
   193   }while(T>sincos_lookup0);
       
   194   do{
       
   195     r0 = x2[6] - x1[6]; x1[6] += x2[6];
       
   196     r1 = x2[7] - x1[7]; x1[7] += x2[7];
       
   197     XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
       
   198 
       
   199     r0 = x2[4] - x1[4]; x1[4] += x2[4];
       
   200     r1 = x2[5] - x1[5]; x1[5] += x2[5];
       
   201     XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
       
   202 
       
   203     r0 = x2[2] - x1[2]; x1[2] += x2[2];
       
   204     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   205     XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
       
   206 
       
   207     r0 = x2[0] - x1[0]; x1[0] += x2[0];
       
   208     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   209     XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
       
   210 
       
   211     x1-=8; x2-=8;
       
   212   }while(T<sincos_lookup0+1024);
       
   213   do{
       
   214     r0 = x1[6] - x2[6]; x1[6] += x2[6];
       
   215     r1 = x2[7] - x1[7]; x1[7] += x2[7];
       
   216     XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
       
   217 
       
   218     r0 = x1[4] - x2[4]; x1[4] += x2[4];
       
   219     r1 = x2[5] - x1[5]; x1[5] += x2[5];
       
   220     XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
       
   221 
       
   222     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   223     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   224     XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
       
   225 
       
   226     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   227     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   228     XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
       
   229 
       
   230     x1-=8; x2-=8;
       
   231   }while(T>sincos_lookup0);
       
   232 }
       
   233 
       
   234 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
       
   235 
       
   236   int stages=8-shift;
       
   237   int i,j;
       
   238   
       
   239   for(i=0;--stages>0;i++){
       
   240     for(j=0;j<(1<<i);j++)
       
   241       mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
       
   242   }
       
   243 
       
   244   for(j=0;j<points;j+=32)
       
   245     mdct_butterfly_32(x+j);
       
   246 
       
   247 }
       
   248 
       
   249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
       
   250 
       
   251 STIN int bitrev12(int x){
       
   252   return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
       
   253 }
       
   254 
       
   255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
       
   256 
       
   257   int          bit   = 0;
       
   258   DATA_TYPE   *w0    = x;
       
   259   DATA_TYPE   *w1    = x = w0+(n>>1);
       
   260   LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
       
   261   LOOKUP_T    *Ttop  = T+1024;
       
   262   DATA_TYPE    r2;
       
   263 
       
   264   do{
       
   265     DATA_TYPE r3     = bitrev12(bit++);
       
   266     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
       
   267     DATA_TYPE *x1    = x + (r3>>shift);
       
   268 
       
   269     REG_TYPE  r0     = x0[0]  + x1[0];
       
   270     REG_TYPE  r1     = x1[1]  - x0[1];
       
   271 
       
   272 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
       
   273 
       
   274 	      w1    -= 4;
       
   275 
       
   276 	      r0     = (x0[1] + x1[1])>>1;
       
   277               r1     = (x0[0] - x1[0])>>1;
       
   278 	      w0[0]  = r0     + r2;
       
   279 	      w0[1]  = r1     + r3;
       
   280 	      w1[2]  = r0     - r2;
       
   281 	      w1[3]  = r3     - r1;
       
   282 
       
   283 	      r3     = bitrev12(bit++);
       
   284               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
       
   285               x1     = x + (r3>>shift);
       
   286 
       
   287               r0     = x0[0]  + x1[0];
       
   288               r1     = x1[1]  - x0[1];
       
   289 
       
   290 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
       
   291 
       
   292               r0     = (x0[1] + x1[1])>>1;
       
   293               r1     = (x0[0] - x1[0])>>1;
       
   294 	      w0[2]  = r0     + r2;
       
   295 	      w0[3]  = r1     + r3;
       
   296 	      w1[0]  = r0     - r2;
       
   297 	      w1[1]  = r3     - r1;
       
   298 
       
   299 	      w0    += 4;
       
   300   }while(T<Ttop);
       
   301   do{
       
   302     DATA_TYPE r3     = bitrev12(bit++);
       
   303     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
       
   304     DATA_TYPE *x1    = x + (r3>>shift);
       
   305 
       
   306     REG_TYPE  r0     = x0[0]  + x1[0];
       
   307     REG_TYPE  r1     = x1[1]  - x0[1];
       
   308 
       
   309 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
       
   310 
       
   311 	      w1    -= 4;
       
   312 
       
   313 	      r0     = (x0[1] + x1[1])>>1;
       
   314               r1     = (x0[0] - x1[0])>>1;
       
   315 	      w0[0]  = r0     + r2;
       
   316 	      w0[1]  = r1     + r3;
       
   317 	      w1[2]  = r0     - r2;
       
   318 	      w1[3]  = r3     - r1;
       
   319 
       
   320 	      r3     = bitrev12(bit++);
       
   321               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
       
   322               x1     = x + (r3>>shift);
       
   323 
       
   324               r0     = x0[0]  + x1[0];
       
   325               r1     = x1[1]  - x0[1];
       
   326 
       
   327 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
       
   328 
       
   329               r0     = (x0[1] + x1[1])>>1;
       
   330               r1     = (x0[0] - x1[0])>>1;
       
   331 	      w0[2]  = r0     + r2;
       
   332 	      w0[3]  = r1     + r3;
       
   333 	      w1[0]  = r0     - r2;
       
   334 	      w1[1]  = r3     - r1;
       
   335 
       
   336 	      w0    += 4;
       
   337   }while(w0<w1);
       
   338 }
       
   339 
       
   340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
       
   341   int n2=n>>1;
       
   342   int n4=n>>2;
       
   343   DATA_TYPE *iX;
       
   344   DATA_TYPE *oX;
       
   345   LOOKUP_T *T;
       
   346   LOOKUP_T *V;
       
   347   int shift;
       
   348   int step;
       
   349 
       
   350   for (shift=6;!(n&(1<<shift));shift++);
       
   351   shift=13-shift;
       
   352   step=2<<shift;
       
   353    
       
   354   /* rotate */
       
   355 
       
   356   iX            = in+n2-7;
       
   357   oX            = out+n2+n4;
       
   358   T             = sincos_lookup0;
       
   359 
       
   360   do{
       
   361     oX-=4;
       
   362     XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
       
   363     XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
       
   364     iX-=8;
       
   365   }while(iX>=in+n4);
       
   366   do{
       
   367     oX-=4;
       
   368     XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
       
   369     XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
       
   370     iX-=8;
       
   371   }while(iX>=in);
       
   372 
       
   373   iX            = in+n2-8;
       
   374   oX            = out+n2+n4;
       
   375   T             = sincos_lookup0;
       
   376 
       
   377   do{
       
   378     T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
       
   379     T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
       
   380     iX-=8;
       
   381     oX+=4;
       
   382   }while(iX>=in+n4);
       
   383   do{
       
   384     T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
       
   385     T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
       
   386     iX-=8;
       
   387     oX+=4;
       
   388   }while(iX>=in);
       
   389 
       
   390   mdct_butterflies(out+n2,n2,shift);
       
   391   mdct_bitreverse(out,n,step,shift);
       
   392 
       
   393   /* rotate + window */
       
   394 
       
   395   step>>=2;
       
   396   {
       
   397     DATA_TYPE *oX1=out+n2+n4;
       
   398     DATA_TYPE *oX2=out+n2+n4;
       
   399     DATA_TYPE *iX =out;
       
   400 
       
   401     switch(step) {
       
   402       default: {
       
   403         T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
       
   404         do{
       
   405           oX1-=4;
       
   406 	  XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
       
   407 	  XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
       
   408 	  XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
       
   409 	  XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
       
   410 	  oX2+=4;
       
   411 	  iX+=8;
       
   412 	}while(iX<oX1);
       
   413 	break;
       
   414       }
       
   415 
       
   416       case 1: {
       
   417         /* linear interpolation between table values: offset=0.5, step=1 */
       
   418 	REG_TYPE  t0,t1,v0,v1;
       
   419         T         = sincos_lookup0;
       
   420         V         = sincos_lookup1;
       
   421 	t0        = (*T++)>>1;
       
   422 	t1        = (*T++)>>1;
       
   423         do{
       
   424           oX1-=4;
       
   425 
       
   426 	  t0 += (v0 = (*V++)>>1);
       
   427 	  t1 += (v1 = (*V++)>>1);
       
   428 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
       
   429 	  v0 += (t0 = (*T++)>>1);
       
   430 	  v1 += (t1 = (*T++)>>1);
       
   431 	  XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
       
   432 	  t0 += (v0 = (*V++)>>1);
       
   433 	  t1 += (v1 = (*V++)>>1);
       
   434 	  XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
       
   435 	  v0 += (t0 = (*T++)>>1);
       
   436 	  v1 += (t1 = (*T++)>>1);
       
   437 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
       
   438 
       
   439 	  oX2+=4;
       
   440 	  iX+=8;
       
   441 	}while(iX<oX1);
       
   442 	break;
       
   443       }
       
   444 
       
   445       case 0: {
       
   446         /* linear interpolation between table values: offset=0.25, step=0.5 */
       
   447 	REG_TYPE  t0,t1,v0,v1,q0,q1;
       
   448         T         = sincos_lookup0;
       
   449         V         = sincos_lookup1;
       
   450 	t0        = *T++;
       
   451 	t1        = *T++;
       
   452         do{
       
   453           oX1-=4;
       
   454 
       
   455 	  v0  = *V++;
       
   456 	  v1  = *V++;
       
   457 	  t0 +=  (q0 = (v0-t0)>>2);
       
   458 	  t1 +=  (q1 = (v1-t1)>>2);
       
   459 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
       
   460 	  t0  = v0-q0;
       
   461 	  t1  = v1-q1;
       
   462 	  XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
       
   463 
       
   464 	  t0  = *T++;
       
   465 	  t1  = *T++;
       
   466 	  v0 += (q0 = (t0-v0)>>2);
       
   467 	  v1 += (q1 = (t1-v1)>>2);
       
   468 	  XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
       
   469 	  v0  = t0-q0;
       
   470 	  v1  = t1-q1;
       
   471 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
       
   472 
       
   473 	  oX2+=4;
       
   474 	  iX+=8;
       
   475 	}while(iX<oX1);
       
   476 	break;
       
   477       }
       
   478     }
       
   479 
       
   480     iX=out+n2+n4;
       
   481     oX1=out+n4;
       
   482     oX2=oX1;
       
   483 
       
   484     do{
       
   485       oX1-=4;
       
   486       iX-=4;
       
   487 
       
   488       oX2[0] = -(oX1[3] = iX[3]);
       
   489       oX2[1] = -(oX1[2] = iX[2]);
       
   490       oX2[2] = -(oX1[1] = iX[1]);
       
   491       oX2[3] = -(oX1[0] = iX[0]);
       
   492 
       
   493       oX2+=4;
       
   494     }while(oX2<iX);
       
   495 
       
   496     iX=out+n2+n4;
       
   497     oX1=out+n2+n4;
       
   498     oX2=out+n2;
       
   499 
       
   500     do{
       
   501       oX1-=4;
       
   502       oX1[0]= iX[3];
       
   503       oX1[1]= iX[2];
       
   504       oX1[2]= iX[1];
       
   505       oX1[3]= iX[0];
       
   506       iX+=4;
       
   507     }while(oX1>oX2);
       
   508   }
       
   509 }
       
   510