misc/libtremor/tremor/mdct.c
changeset 7697 767d3c4153a1
parent 6045 9a7cc0f29430
child 7849 a12155461b34
equal deleted inserted replaced
7696:78a00bc68913 7697:767d3c4153a1
     4  *                                                                  *
     4  *                                                                  *
     5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     8  *                                                                  *
     8  *                                                                  *
     9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
     9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2003    *
    10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
    10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
    11  *                                                                  *
    11  *                                                                  *
    12  ********************************************************************
    12  ********************************************************************
    13 
    13 
    14  function: normalized modified discrete cosine transform
    14  function: normalized modified discrete cosine transform
    15            power of two length transform only [64 <= n ]
    15            power of two length transform only [64 <= n ]
    16  last mod: $Id: mdct.c,v 1.9 2002/10/16 09:17:39 xiphmont Exp $
    16  last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
    17 
    17 
    18  Original algorithm adapted long ago from _The use of multirate filter
    18  Original algorithm adapted long ago from _The use of multirate filter
    19  banks for coding of high quality digital audio_, by T. Sporer,
    19  banks for coding of high quality digital audio_, by T. Sporer,
    20  K. Brandenburg and B. Edler, collection of the European Signal
    20  K. Brandenburg and B. Edler, collection of the European Signal
    21  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
    21  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
    31  vehemently disagree.
    31  vehemently disagree.
    32 
    32 
    33  ********************************************************************/
    33  ********************************************************************/
    34 
    34 
    35 #include "ivorbiscodec.h"
    35 #include "ivorbiscodec.h"
    36 #include "codebook.h"
    36 #include "os.h"
    37 #include "misc.h"
    37 #include "misc.h"
    38 #include "mdct.h"
    38 #include "mdct.h"
    39 #include "mdct_lookup.h"
    39 #include "mdct_lookup.h"
    40 
    40 
       
    41 STIN void presymmetry(DATA_TYPE *in,int n2,int step){
       
    42   DATA_TYPE *aX;
       
    43   DATA_TYPE *bX;
       
    44   LOOKUP_T *T;
       
    45   int n4=n2>>1;
       
    46   
       
    47   aX            = in+n2-3;
       
    48   T             = sincos_lookup0;
       
    49 
       
    50   do{
       
    51     REG_TYPE  r0= aX[0];
       
    52     REG_TYPE  r2= aX[2];
       
    53     XPROD31( r0, r2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
       
    54     aX-=4;
       
    55   }while(aX>=in+n4);
       
    56   do{
       
    57     REG_TYPE  r0= aX[0];
       
    58     REG_TYPE  r2= aX[2];
       
    59     XPROD31( r0, r2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
       
    60     aX-=4;
       
    61   }while(aX>=in);
       
    62 
       
    63   aX            = in+n2-4;
       
    64   bX            = in;
       
    65   T             = sincos_lookup0;
       
    66   do{
       
    67     REG_TYPE  ri0= aX[0];
       
    68     REG_TYPE  ri2= aX[2];
       
    69     REG_TYPE  ro0= bX[0];
       
    70     REG_TYPE  ro2= bX[2];
       
    71     
       
    72     XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
       
    73     XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
       
    74     
       
    75     aX-=4;
       
    76     bX+=4;
       
    77   }while(aX>=in+n4);
       
    78 
       
    79 }
    41 
    80 
    42 /* 8 point butterfly (in place) */
    81 /* 8 point butterfly (in place) */
    43 STIN void mdct_butterfly_8(DATA_TYPE *x){
    82 STIN void mdct_butterfly_8(DATA_TYPE *x){
    44 
    83 
    45   REG_TYPE r0   = x[4] + x[0];
    84   REG_TYPE r0   = x[0] + x[1];
    46   REG_TYPE r1   = x[4] - x[0];
    85   REG_TYPE r1   = x[0] - x[1];
    47   REG_TYPE r2   = x[5] + x[1];
    86   REG_TYPE r2   = x[2] + x[3];
    48   REG_TYPE r3   = x[5] - x[1];
    87   REG_TYPE r3   = x[2] - x[3];
    49   REG_TYPE r4   = x[6] + x[2];
    88   REG_TYPE r4   = x[4] + x[5];
    50   REG_TYPE r5   = x[6] - x[2];
    89   REG_TYPE r5   = x[4] - x[5];
    51   REG_TYPE r6   = x[7] + x[3];
    90   REG_TYPE r6   = x[6] + x[7];
    52   REG_TYPE r7   = x[7] - x[3];
    91   REG_TYPE r7   = x[6] - x[7];
    53 
    92 
    54 	   x[0] = r5   + r3;
    93 	   x[0] = r5   + r3;
    55 	   x[1] = r7   - r1;
    94 	   x[1] = r7   - r1;
    56 	   x[2] = r5   - r3;
    95 	   x[2] = r5   - r3;
    57 	   x[3] = r7   + r1;
    96 	   x[3] = r7   + r1;
    62 	   MB();
   101 	   MB();
    63 }
   102 }
    64 
   103 
    65 /* 16 point butterfly (in place, 4 register) */
   104 /* 16 point butterfly (in place, 4 register) */
    66 STIN void mdct_butterfly_16(DATA_TYPE *x){
   105 STIN void mdct_butterfly_16(DATA_TYPE *x){
    67 
   106   
    68   REG_TYPE r0, r1;
   107   REG_TYPE r0, r1, r2, r3;
    69 
   108   
    70 	   r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
   109 	   r0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
    71 	   r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
   110 	   r1 = x[10] - x[11]; x[10] += x[11];
    72 	   x[ 0] = MULT31((r0 + r1) , cPI2_8);
   111 	   r2 = x[ 1] - x[ 0]; x[ 9]  = x[ 1] + x[0];
    73 	   x[ 1] = MULT31((r1 - r0) , cPI2_8);
   112 	   r3 = x[ 3] - x[ 2]; x[11]  = x[ 3] + x[2];
    74 	   MB();
   113 	   x[ 0] = MULT31((r0 - r1) , cPI2_8);
    75 
   114 	   x[ 1] = MULT31((r2 + r3) , cPI2_8);
    76 	   r0 = x[10] - x[ 2]; x[10] += x[ 2];
   115 	   x[ 2] = MULT31((r0 + r1) , cPI2_8);
    77 	   r1 = x[ 3] - x[11]; x[11] += x[ 3];
   116 	   x[ 3] = MULT31((r3 - r2) , cPI2_8);
    78 	   x[ 2] = r1; x[ 3] = r0;
   117 	   MB();
    79 	   MB();
   118 
    80 
   119 	   r2 = x[12] - x[13]; x[12] += x[13];
    81 	   r0 = x[12] - x[ 4]; x[12] += x[ 4];
   120 	   r3 = x[14] - x[15]; x[14] += x[15];
    82 	   r1 = x[13] - x[ 5]; x[13] += x[ 5];
   121 	   r0 = x[ 4] - x[ 5]; x[13]  = x[ 5] + x[ 4];
    83 	   x[ 4] = MULT31((r0 - r1) , cPI2_8);
   122 	   r1 = x[ 7] - x[ 6]; x[15]  = x[ 7] + x[ 6];
    84 	   x[ 5] = MULT31((r0 + r1) , cPI2_8);
   123 	   x[ 4] = r2; x[ 5] = r1; 
    85 	   MB();
   124 	   x[ 6] = r3; x[ 7] = r0;
    86 
       
    87 	   r0 = x[14] - x[ 6]; x[14] += x[ 6];
       
    88 	   r1 = x[15] - x[ 7]; x[15] += x[ 7];
       
    89 	   x[ 6] = r0; x[ 7] = r1;
       
    90 	   MB();
   125 	   MB();
    91 
   126 
    92 	   mdct_butterfly_8(x);
   127 	   mdct_butterfly_8(x);
    93 	   mdct_butterfly_8(x+8);
   128 	   mdct_butterfly_8(x+8);
    94 }
   129 }
    95 
   130 
    96 /* 32 point butterfly (in place, 4 register) */
   131 /* 32 point butterfly (in place, 4 register) */
    97 STIN void mdct_butterfly_32(DATA_TYPE *x){
   132 STIN void mdct_butterfly_32(DATA_TYPE *x){
    98 
   133 
    99   REG_TYPE r0, r1;
   134   REG_TYPE r0, r1, r2, r3;
   100 
   135 
   101 	   r0 = x[30] - x[14]; x[30] += x[14];           
   136 	   r0 = x[16] - x[17]; x[16] += x[17];
   102 	   r1 = x[31] - x[15]; x[31] += x[15];
   137 	   r1 = x[18] - x[19]; x[18] += x[19];
   103 	   x[14] = r0; x[15] = r1;
   138 	   r2 = x[ 1] - x[ 0]; x[17]  = x[ 1] + x[ 0];
   104 	   MB();
   139 	   r3 = x[ 3] - x[ 2]; x[19]  = x[ 3] + x[ 2];
   105 
   140 	   XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
   106 	   r0 = x[28] - x[12]; x[28] += x[12];           
   141 	   XPROD31 ( r2, r3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
   107 	   r1 = x[29] - x[13]; x[29] += x[13];
   142 	   MB();
   108 	   XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
   143 
   109 	   MB();
   144 	   r0 = x[20] - x[21]; x[20] += x[21];
   110 
   145 	   r1 = x[22] - x[23]; x[22] += x[23];
   111 	   r0 = x[26] - x[10]; x[26] += x[10];
   146 	   r2 = x[ 5] - x[ 4]; x[21]  = x[ 5] + x[ 4];
   112 	   r1 = x[27] - x[11]; x[27] += x[11];
   147 	   r3 = x[ 7] - x[ 6]; x[23]  = x[ 7] + x[ 6];
   113 	   x[10] = MULT31((r0 - r1) , cPI2_8);
   148 	   x[ 4] = MULT31((r0 - r1) , cPI2_8);
   114 	   x[11] = MULT31((r0 + r1) , cPI2_8);
   149 	   x[ 5] = MULT31((r3 + r2) , cPI2_8);
   115 	   MB();
   150 	   x[ 6] = MULT31((r0 + r1) , cPI2_8);
   116 
   151 	   x[ 7] = MULT31((r3 - r2) , cPI2_8);
   117 	   r0 = x[24] - x[ 8]; x[24] += x[ 8];
   152 	   MB();
   118 	   r1 = x[25] - x[ 9]; x[25] += x[ 9];
   153 
   119 	   XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
   154 	   r0 = x[24] - x[25]; x[24] += x[25];           
   120 	   MB();
   155 	   r1 = x[26] - x[27]; x[26] += x[27];
   121 
   156 	   r2 = x[ 9] - x[ 8]; x[25]  = x[ 9] + x[ 8];
   122 	   r0 = x[22] - x[ 6]; x[22] += x[ 6];
   157 	   r3 = x[11] - x[10]; x[27]  = x[11] + x[10];
   123 	   r1 = x[ 7] - x[23]; x[23] += x[ 7];
   158 	   XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
   124 	   x[ 6] = r1; x[ 7] = r0;
   159 	   XPROD31 ( r2, r3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
   125 	   MB();
   160 	   MB();
   126 
   161 
   127 	   r0 = x[ 4] - x[20]; x[20] += x[ 4];
   162 	   r0 = x[28] - x[29]; x[28] += x[29];           
   128 	   r1 = x[ 5] - x[21]; x[21] += x[ 5];
   163 	   r1 = x[30] - x[31]; x[30] += x[31];
   129 	   XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
   164 	   r2 = x[12] - x[13]; x[29]  = x[13] + x[12];
   130 	   MB();
   165 	   r3 = x[15] - x[14]; x[31]  = x[15] + x[14];
   131 
   166 	   x[12] = r0; x[13] = r3; 
   132 	   r0 = x[ 2] - x[18]; x[18] += x[ 2];
   167 	   x[14] = r1; x[15] = r2;
   133 	   r1 = x[ 3] - x[19]; x[19] += x[ 3];
       
   134 	   x[ 2] = MULT31((r1 + r0) , cPI2_8);
       
   135 	   x[ 3] = MULT31((r1 - r0) , cPI2_8);
       
   136 	   MB();
       
   137 
       
   138 	   r0 = x[ 0] - x[16]; x[16] += x[ 0];
       
   139 	   r1 = x[ 1] - x[17]; x[17] += x[ 1];
       
   140 	   XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
       
   141 	   MB();
   168 	   MB();
   142 
   169 
   143 	   mdct_butterfly_16(x);
   170 	   mdct_butterfly_16(x);
   144 	   mdct_butterfly_16(x+16);
   171 	   mdct_butterfly_16(x+16);
   145 }
   172 }
   146 
   173 
   147 /* N/stage point generic N stage butterfly (in place, 2 register) */
   174 /* N/stage point generic N stage butterfly (in place, 2 register) */
   148 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
   175 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
   149 
   176 
   150   LOOKUP_T *T   = sincos_lookup0;
   177   LOOKUP_T   *T  = sincos_lookup0;
   151   DATA_TYPE *x1        = x + points      - 8;
   178   DATA_TYPE *x1  = x + points - 4;
   152   DATA_TYPE *x2        = x + (points>>1) - 8;
   179   DATA_TYPE *x2  = x + (points>>1) - 4;
   153   REG_TYPE   r0;
   180   REG_TYPE   r0, r1, r2, r3;
   154   REG_TYPE   r1;
   181 
   155 
   182   do{
   156   do{
   183     r0 = x1[0] - x1[1]; x1[0] += x1[1];
   157     r0 = x1[6] - x2[6]; x1[6] += x2[6];
   184     r1 = x1[3] - x1[2]; x1[2] += x1[3];
   158     r1 = x2[7] - x1[7]; x1[7] += x2[7];
   185     r2 = x2[1] - x2[0]; x1[1]  = x2[1] + x2[0];
   159     XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
   186     r3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
   160 
   187     XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[2] );
   161     r0 = x1[4] - x2[4]; x1[4] += x2[4];
   188     XPROD31( r2, r3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
   162     r1 = x2[5] - x1[5]; x1[5] += x2[5];
   189     x1-=4; 
   163     XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
   190     x2-=4;
   164 
       
   165     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   166     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   167     XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
       
   168 
       
   169     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   170     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   171     XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
       
   172 
       
   173     x1-=8; x2-=8;
       
   174   }while(T<sincos_lookup0+1024);
   191   }while(T<sincos_lookup0+1024);
   175   do{
   192   do{
   176     r0 = x1[6] - x2[6]; x1[6] += x2[6];
   193     r0 = x1[0] - x1[1]; x1[0] += x1[1];
   177     r1 = x1[7] - x2[7]; x1[7] += x2[7];
   194     r1 = x1[2] - x1[3]; x1[2] += x1[3];
   178     XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
   195     r2 = x2[0] - x2[1]; x1[1]  = x2[1] + x2[0];
   179 
   196     r3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
   180     r0 = x1[4] - x2[4]; x1[4] += x2[4];
   197     XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[2] );
   181     r1 = x1[5] - x2[5]; x1[5] += x2[5];
   198     XNPROD31( r3, r2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
   182     XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
   199     x1-=4; 
   183 
   200     x2-=4; 
   184     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   185     r1 = x1[3] - x2[3]; x1[3] += x2[3];
       
   186     XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
       
   187 
       
   188     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   189     r1 = x1[1] - x2[1]; x1[1] += x2[1];
       
   190     XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
       
   191 
       
   192     x1-=8; x2-=8;
       
   193   }while(T>sincos_lookup0);
       
   194   do{
       
   195     r0 = x2[6] - x1[6]; x1[6] += x2[6];
       
   196     r1 = x2[7] - x1[7]; x1[7] += x2[7];
       
   197     XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
       
   198 
       
   199     r0 = x2[4] - x1[4]; x1[4] += x2[4];
       
   200     r1 = x2[5] - x1[5]; x1[5] += x2[5];
       
   201     XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
       
   202 
       
   203     r0 = x2[2] - x1[2]; x1[2] += x2[2];
       
   204     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   205     XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
       
   206 
       
   207     r0 = x2[0] - x1[0]; x1[0] += x2[0];
       
   208     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   209     XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
       
   210 
       
   211     x1-=8; x2-=8;
       
   212   }while(T<sincos_lookup0+1024);
       
   213   do{
       
   214     r0 = x1[6] - x2[6]; x1[6] += x2[6];
       
   215     r1 = x2[7] - x1[7]; x1[7] += x2[7];
       
   216     XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
       
   217 
       
   218     r0 = x1[4] - x2[4]; x1[4] += x2[4];
       
   219     r1 = x2[5] - x1[5]; x1[5] += x2[5];
       
   220     XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
       
   221 
       
   222     r0 = x1[2] - x2[2]; x1[2] += x2[2];
       
   223     r1 = x2[3] - x1[3]; x1[3] += x2[3];
       
   224     XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
       
   225 
       
   226     r0 = x1[0] - x2[0]; x1[0] += x2[0];
       
   227     r1 = x2[1] - x1[1]; x1[1] += x2[1];
       
   228     XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
       
   229 
       
   230     x1-=8; x2-=8;
       
   231   }while(T>sincos_lookup0);
   201   }while(T>sincos_lookup0);
   232 }
   202 }
   233 
   203 
   234 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
   204 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
   235 
   205 
   236   int stages=8-shift;
   206   int stages=8-shift;
   237   int i,j;
   207   int i,j;
   238   
   208 
   239   for(i=0;--stages>0;i++){
   209   for(i=0;--stages>0;i++){
   240     for(j=0;j<(1<<i);j++)
   210     for(j=0;j<(1<<i);j++)
   241       mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
   211       mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
   242   }
   212   }
   243 
   213   
   244   for(j=0;j<points;j+=32)
   214   for(j=0;j<points;j+=32)
   245     mdct_butterfly_32(x+j);
   215     mdct_butterfly_32(x+j);
   246 
       
   247 }
   216 }
   248 
   217 
   249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
   218 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
   250 
   219 
   251 STIN int bitrev12(int x){
   220 STIN int bitrev12(int x){
   252   return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
   221   return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
   253 }
   222 }
   254 
   223 
   255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
   224 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
   256 
       
   257   int          bit   = 0;
   225   int          bit   = 0;
       
   226   DATA_TYPE   *w     = x+(n>>1);
       
   227 
       
   228   do{
       
   229     DATA_TYPE  b     = bitrev12(bit++);
       
   230     DATA_TYPE *xx    = x + (b>>shift);
       
   231     REG_TYPE  r;
       
   232 
       
   233                w    -= 2;
       
   234 
       
   235 	       if(w>xx){
       
   236 
       
   237 		 r      = xx[0];
       
   238 		 xx[0]  = w[0];
       
   239 		 w[0]   = r;
       
   240 		 
       
   241 		 r      = xx[1];
       
   242 		 xx[1]  = w[1];
       
   243 		 w[1]   = r;
       
   244 	       }
       
   245   }while(w>x);
       
   246 }
       
   247 
       
   248 STIN void mdct_step7(DATA_TYPE *x,int n,int step){
   258   DATA_TYPE   *w0    = x;
   249   DATA_TYPE   *w0    = x;
   259   DATA_TYPE   *w1    = x = w0+(n>>1);
   250   DATA_TYPE   *w1    = x+(n>>1);
   260   LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
   251   LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
   261   LOOKUP_T    *Ttop  = T+1024;
   252   LOOKUP_T    *Ttop  = T+1024;
   262   DATA_TYPE    r2;
   253   REG_TYPE     r0, r1, r2, r3;
   263 
   254   
   264   do{
   255   do{
   265     DATA_TYPE r3     = bitrev12(bit++);
   256 	      w1    -= 2;
   266     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
   257 
   267     DATA_TYPE *x1    = x + (r3>>shift);
   258               r0     = w0[0]  + w1[0];
   268 
   259               r1     = w1[1]  - w0[1];	      
   269     REG_TYPE  r0     = x0[0]  + x1[0];
   260 	      r2     = MULT32(r0, T[1]) + MULT32(r1, T[0]);
   270     REG_TYPE  r1     = x1[1]  - x0[1];
   261 	      r3     = MULT32(r1, T[1]) - MULT32(r0, T[0]);
   271 
   262 	      T+=step;
   272 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
   263 
   273 
   264 	      r0     = (w0[1] + w1[1])>>1;
   274 	      w1    -= 4;
   265               r1     = (w0[0] - w1[0])>>1;
   275 
       
   276 	      r0     = (x0[1] + x1[1])>>1;
       
   277               r1     = (x0[0] - x1[0])>>1;
       
   278 	      w0[0]  = r0     + r2;
   266 	      w0[0]  = r0     + r2;
   279 	      w0[1]  = r1     + r3;
   267 	      w0[1]  = r1     + r3;
   280 	      w1[2]  = r0     - r2;
       
   281 	      w1[3]  = r3     - r1;
       
   282 
       
   283 	      r3     = bitrev12(bit++);
       
   284               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
       
   285               x1     = x + (r3>>shift);
       
   286 
       
   287               r0     = x0[0]  + x1[0];
       
   288               r1     = x1[1]  - x0[1];
       
   289 
       
   290 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
       
   291 
       
   292               r0     = (x0[1] + x1[1])>>1;
       
   293               r1     = (x0[0] - x1[0])>>1;
       
   294 	      w0[2]  = r0     + r2;
       
   295 	      w0[3]  = r1     + r3;
       
   296 	      w1[0]  = r0     - r2;
   268 	      w1[0]  = r0     - r2;
   297 	      w1[1]  = r3     - r1;
   269 	      w1[1]  = r3     - r1;
   298 
   270 
   299 	      w0    += 4;
   271 	      w0    += 2;
   300   }while(T<Ttop);
   272   }while(T<Ttop);
   301   do{
   273   do{
   302     DATA_TYPE r3     = bitrev12(bit++);
   274 	      w1    -= 2;
   303     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
   275 
   304     DATA_TYPE *x1    = x + (r3>>shift);
   276               r0     = w0[0]  + w1[0];
   305 
   277               r1     = w1[1]  - w0[1];	
   306     REG_TYPE  r0     = x0[0]  + x1[0];
   278 	      T-=step;
   307     REG_TYPE  r1     = x1[1]  - x0[1];
   279 	      r2     = MULT32(r0, T[0]) + MULT32(r1, T[1]);
   308 
   280 	      r3     = MULT32(r1, T[0]) - MULT32(r0, T[1]);      
   309 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
   281 
   310 
   282 	      r0     = (w0[1] + w1[1])>>1;
   311 	      w1    -= 4;
   283               r1     = (w0[0] - w1[0])>>1;
   312 
       
   313 	      r0     = (x0[1] + x1[1])>>1;
       
   314               r1     = (x0[0] - x1[0])>>1;
       
   315 	      w0[0]  = r0     + r2;
   284 	      w0[0]  = r0     + r2;
   316 	      w0[1]  = r1     + r3;
   285 	      w0[1]  = r1     + r3;
   317 	      w1[2]  = r0     - r2;
       
   318 	      w1[3]  = r3     - r1;
       
   319 
       
   320 	      r3     = bitrev12(bit++);
       
   321               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
       
   322               x1     = x + (r3>>shift);
       
   323 
       
   324               r0     = x0[0]  + x1[0];
       
   325               r1     = x1[1]  - x0[1];
       
   326 
       
   327 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
       
   328 
       
   329               r0     = (x0[1] + x1[1])>>1;
       
   330               r1     = (x0[0] - x1[0])>>1;
       
   331 	      w0[2]  = r0     + r2;
       
   332 	      w0[3]  = r1     + r3;
       
   333 	      w1[0]  = r0     - r2;
   286 	      w1[0]  = r0     - r2;
   334 	      w1[1]  = r3     - r1;
   287 	      w1[1]  = r3     - r1;
   335 
   288 
   336 	      w0    += 4;
   289 	      w0    += 2;
   337   }while(w0<w1);
   290   }while(w0<w1);
   338 }
   291 }
   339 
   292 
   340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
   293 STIN void mdct_step8(DATA_TYPE *x, int n, int step){
   341   int n2=n>>1;
       
   342   int n4=n>>2;
       
   343   DATA_TYPE *iX;
       
   344   DATA_TYPE *oX;
       
   345   LOOKUP_T *T;
   294   LOOKUP_T *T;
   346   LOOKUP_T *V;
   295   LOOKUP_T *V;
       
   296   DATA_TYPE *iX =x+(n>>1);
       
   297   step>>=2;
       
   298 
       
   299   switch(step) {
       
   300   default: 
       
   301     T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
       
   302     do{
       
   303       REG_TYPE     r0  =  x[0];
       
   304       REG_TYPE     r1  = -x[1];
       
   305                    XPROD31( r0, r1, T[0], T[1], x, x+1); T+=step;
       
   306                    x  +=2;
       
   307     }while(x<iX);
       
   308     break;
       
   309   
       
   310   case 1: 
       
   311     {
       
   312       /* linear interpolation between table values: offset=0.5, step=1 */
       
   313       REG_TYPE    t0,t1,v0,v1,r0,r1;
       
   314       T         = sincos_lookup0;
       
   315       V         = sincos_lookup1;
       
   316       t0        = (*T++)>>1;
       
   317       t1        = (*T++)>>1;
       
   318       do{
       
   319 	    r0  =  x[0];
       
   320 	    r1  = -x[1];	
       
   321 	    t0 += (v0 = (*V++)>>1);
       
   322 	    t1 += (v1 = (*V++)>>1);
       
   323 	    XPROD31( r0, r1, t0, t1, x, x+1 );
       
   324 	    
       
   325 	    r0  =  x[2];
       
   326 	    r1  = -x[3];
       
   327 	    v0 += (t0 = (*T++)>>1);
       
   328 	    v1 += (t1 = (*T++)>>1);
       
   329 	    XPROD31( r0, r1, v0, v1, x+2, x+3 );
       
   330 	    
       
   331 	    x += 4;
       
   332       }while(x<iX);
       
   333       break;
       
   334     }
       
   335     
       
   336   case 0: 
       
   337     {
       
   338       /* linear interpolation between table values: offset=0.25, step=0.5 */
       
   339       REG_TYPE    t0,t1,v0,v1,q0,q1,r0,r1;
       
   340       T         = sincos_lookup0;
       
   341       V         = sincos_lookup1;
       
   342       t0        = *T++;
       
   343       t1        = *T++;
       
   344       do{
       
   345 
       
   346 	
       
   347 	v0  = *V++;
       
   348 	v1  = *V++;
       
   349 	t0 +=  (q0 = (v0-t0)>>2);
       
   350 	t1 +=  (q1 = (v1-t1)>>2);
       
   351 	r0  =  x[0];
       
   352 	r1  = -x[1];	
       
   353 	XPROD31( r0, r1, t0, t1, x, x+1 );
       
   354 	t0  = v0-q0;
       
   355 	t1  = v1-q1;
       
   356 	r0  =  x[2];
       
   357 	r1  = -x[3];	
       
   358 	XPROD31( r0, r1, t0, t1, x+2, x+3 );
       
   359 	
       
   360 	t0  = *T++;
       
   361 	t1  = *T++;
       
   362 	v0 += (q0 = (t0-v0)>>2);
       
   363 	v1 += (q1 = (t1-v1)>>2);
       
   364 	r0  =  x[4];
       
   365 	r1  = -x[5];	
       
   366 	XPROD31( r0, r1, v0, v1, x+4, x+5 );
       
   367 	v0  = t0-q0;
       
   368 	v1  = t1-q1;
       
   369 	r0  =  x[6];
       
   370 	r1  = -x[7];	
       
   371 	XPROD31( r0, r1, v0, v1, x+5, x+6 );
       
   372 
       
   373 	x+=8;
       
   374       }while(x<iX);
       
   375       break;
       
   376     }
       
   377   }
       
   378 }
       
   379 
       
   380 /* partial; doesn't perform last-step deinterleave/unrolling.  That
       
   381    can be done more efficiently during pcm output */
       
   382 void mdct_backward(int n, DATA_TYPE *in){
   347   int shift;
   383   int shift;
   348   int step;
   384   int step;
   349 
   385   
   350   for (shift=6;!(n&(1<<shift));shift++);
   386   for (shift=4;!(n&(1<<shift));shift++);
   351   shift=13-shift;
   387   shift=13-shift;
   352   step=2<<shift;
   388   step=2<<shift;
   353    
   389    
   354   /* rotate */
   390   presymmetry(in,n>>1,step);
   355 
   391   mdct_butterflies(in,n>>1,shift);
   356   iX            = in+n2-7;
   392   mdct_bitreverse(in,n,shift);
   357   oX            = out+n2+n4;
   393   mdct_step7(in,n,step);
   358   T             = sincos_lookup0;
   394   mdct_step8(in,n,step);
   359 
   395 }
   360   do{
   396 
   361     oX-=4;
   397 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
   362     XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
   398   int i;
   363     XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
   399   n>>=2;
   364     iX-=8;
   400   in+=1;
   365   }while(iX>=in+n4);
   401 
   366   do{
   402   for(i=0;i<n;i++)
   367     oX-=4;
   403     right[i]=in[i<<1];
   368     XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
   404 }
   369     XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
   405 
   370     iX-=8;
   406 void mdct_unroll_lap(int n0,int n1,
   371   }while(iX>=in);
   407 		     int lW,int W,
   372 
   408 		     DATA_TYPE *in,
   373   iX            = in+n2-8;
   409 		     DATA_TYPE *right,
   374   oX            = out+n2+n4;
   410 		     LOOKUP_T *w0,
   375   T             = sincos_lookup0;
   411 		     LOOKUP_T *w1,
   376 
   412 		     ogg_int16_t *out,
   377   do{
   413 		     int step,
   378     T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
   414 		     int start, /* samples, this frame */
   379     T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
   415 		     int end    /* samples, this frame */){
   380     iX-=8;
   416 
   381     oX+=4;
   417   DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
   382   }while(iX>=in+n4);
   418   DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
   383   do{
   419   DATA_TYPE *post;
   384     T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
   420   LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
   385     T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
   421   LOOKUP_T *wL=(W && lW ? w1         : w0        );
   386     iX-=8;
   422 
   387     oX+=4;
   423   int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
   388   }while(iX>=in);
   424   int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
   389 
   425   int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
   390   mdct_butterflies(out+n2,n2,shift);
   426   int n,off;
   391   mdct_bitreverse(out,n,step,shift);
   427 
   392 
   428   /* preceeding direct-copy lapping from previous frame, if any */
   393   /* rotate + window */
   429   if(preLap){
   394 
   430     n      = (end<preLap?end:preLap);
   395   step>>=2;
   431     off    = (start<preLap?start:preLap);
   396   {
   432     post   = r-n;
   397     DATA_TYPE *oX1=out+n2+n4;
   433     r     -= off;
   398     DATA_TYPE *oX2=out+n2+n4;
   434     start -= off;
   399     DATA_TYPE *iX =out;
   435     end   -= n;
   400 
   436     while(r>post){
   401     switch(step) {
   437       *out = CLIP_TO_15((*--r)>>9);
   402       default: {
   438       out+=step;
   403         T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
       
   404         do{
       
   405           oX1-=4;
       
   406 	  XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
       
   407 	  XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
       
   408 	  XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
       
   409 	  XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
       
   410 	  oX2+=4;
       
   411 	  iX+=8;
       
   412 	}while(iX<oX1);
       
   413 	break;
       
   414       }
       
   415 
       
   416       case 1: {
       
   417         /* linear interpolation between table values: offset=0.5, step=1 */
       
   418 	REG_TYPE  t0,t1,v0,v1;
       
   419         T         = sincos_lookup0;
       
   420         V         = sincos_lookup1;
       
   421 	t0        = (*T++)>>1;
       
   422 	t1        = (*T++)>>1;
       
   423         do{
       
   424           oX1-=4;
       
   425 
       
   426 	  t0 += (v0 = (*V++)>>1);
       
   427 	  t1 += (v1 = (*V++)>>1);
       
   428 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
       
   429 	  v0 += (t0 = (*T++)>>1);
       
   430 	  v1 += (t1 = (*T++)>>1);
       
   431 	  XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
       
   432 	  t0 += (v0 = (*V++)>>1);
       
   433 	  t1 += (v1 = (*V++)>>1);
       
   434 	  XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
       
   435 	  v0 += (t0 = (*T++)>>1);
       
   436 	  v1 += (t1 = (*T++)>>1);
       
   437 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
       
   438 
       
   439 	  oX2+=4;
       
   440 	  iX+=8;
       
   441 	}while(iX<oX1);
       
   442 	break;
       
   443       }
       
   444 
       
   445       case 0: {
       
   446         /* linear interpolation between table values: offset=0.25, step=0.5 */
       
   447 	REG_TYPE  t0,t1,v0,v1,q0,q1;
       
   448         T         = sincos_lookup0;
       
   449         V         = sincos_lookup1;
       
   450 	t0        = *T++;
       
   451 	t1        = *T++;
       
   452         do{
       
   453           oX1-=4;
       
   454 
       
   455 	  v0  = *V++;
       
   456 	  v1  = *V++;
       
   457 	  t0 +=  (q0 = (v0-t0)>>2);
       
   458 	  t1 +=  (q1 = (v1-t1)>>2);
       
   459 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
       
   460 	  t0  = v0-q0;
       
   461 	  t1  = v1-q1;
       
   462 	  XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
       
   463 
       
   464 	  t0  = *T++;
       
   465 	  t1  = *T++;
       
   466 	  v0 += (q0 = (t0-v0)>>2);
       
   467 	  v1 += (q1 = (t1-v1)>>2);
       
   468 	  XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
       
   469 	  v0  = t0-q0;
       
   470 	  v1  = t1-q1;
       
   471 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
       
   472 
       
   473 	  oX2+=4;
       
   474 	  iX+=8;
       
   475 	}while(iX<oX1);
       
   476 	break;
       
   477       }
       
   478     }
   439     }
   479 
   440   }
   480     iX=out+n2+n4;
   441   
   481     oX1=out+n4;
   442   /* cross-lap; two halves due to wrap-around */
   482     oX2=oX1;
   443   n      = (end<halfLap?end:halfLap);
   483 
   444   off    = (start<halfLap?start:halfLap);
   484     do{
   445   post   = r-n;
   485       oX1-=4;
   446   r     -= off;
   486       iX-=4;
   447   l     -= off*2;
   487 
   448   start -= off;
   488       oX2[0] = -(oX1[3] = iX[3]);
   449   wR    -= off;
   489       oX2[1] = -(oX1[2] = iX[2]);
   450   wL    += off;
   490       oX2[2] = -(oX1[1] = iX[1]);
   451   end   -= n;
   491       oX2[3] = -(oX1[0] = iX[0]);
   452   while(r>post){
   492 
   453     l-=2;
   493       oX2+=4;
   454     *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
   494     }while(oX2<iX);
   455     out+=step;
   495 
   456   }
   496     iX=out+n2+n4;
   457 
   497     oX1=out+n2+n4;
   458   n      = (end<halfLap?end:halfLap);
   498     oX2=out+n2;
   459   off    = (start<halfLap?start:halfLap);
   499 
   460   post   = r+n;
   500     do{
   461   r     += off;
   501       oX1-=4;
   462   l     += off*2;
   502       oX1[0]= iX[3];
   463   start -= off;
   503       oX1[1]= iX[2];
   464   end   -= n;
   504       oX1[2]= iX[1];
   465   wR    -= off;
   505       oX1[3]= iX[0];
   466   wL    += off;
   506       iX+=4;
   467   while(r<post){
   507     }while(oX1>oX2);
   468     *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
   508   }
   469     out+=step;
   509 }
   470     l+=2;
   510 
   471   }
       
   472 
       
   473   /* preceeding direct-copy lapping from previous frame, if any */
       
   474   if(postLap){
       
   475     n      = (end<postLap?end:postLap);
       
   476     off    = (start<postLap?start:postLap);
       
   477     post   = l+n*2;
       
   478     l     += off*2;
       
   479     while(l<post){
       
   480       *out = CLIP_TO_15((-*l)>>9);
       
   481       out+=step;
       
   482       l+=2;
       
   483     }
       
   484   }
       
   485 }
       
   486