misc/libtremor/tremor/asm_arm.h
branchhedgeroid
changeset 6045 9a7cc0f29430
parent 6043 9bd2d6b1ba52
equal deleted inserted replaced
6043:9bd2d6b1ba52 6045:9a7cc0f29430
       
     1 /********************************************************************
       
     2  *                                                                  *
       
     3  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
       
     4  *                                                                  *
       
     5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
       
     6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
       
     7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
       
     8  *                                                                  *
       
     9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
       
    10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
       
    11  *                                                                  *
       
    12  ********************************************************************
       
    13 
       
    14  function: arm7 and later wide math functions
       
    15 
       
    16  ********************************************************************/
       
    17 
       
    18 #ifdef _ARM_ASSEM_
       
    19 
       
    20 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
       
    21 #define _V_WIDE_MATH
       
    22 
       
    23 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
       
    24   int lo,hi;
       
    25   asm volatile("smull\t%0, %1, %2, %3"
       
    26                : "=&r"(lo),"=&r"(hi)
       
    27                : "%r"(x),"r"(y)
       
    28 	       : "cc");
       
    29   return(hi);
       
    30 }
       
    31 
       
    32 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
       
    33   return MULT32(x,y)<<1;
       
    34 }
       
    35 
       
    36 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
       
    37   int lo,hi;
       
    38   asm volatile("smull	%0, %1, %2, %3\n\t"
       
    39 	       "movs	%0, %0, lsr #15\n\t"
       
    40 	       "adc	%1, %0, %1, lsl #17\n\t"
       
    41                : "=&r"(lo),"=&r"(hi)
       
    42                : "%r"(x),"r"(y)
       
    43 	       : "cc");
       
    44   return(hi);
       
    45 }
       
    46 
       
    47 #define MB() asm volatile ("" : : : "memory")
       
    48 
       
    49 static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
       
    50 			   ogg_int32_t  t, ogg_int32_t  v,
       
    51 			   ogg_int32_t *x, ogg_int32_t *y)
       
    52 {
       
    53   int x1, y1, l;
       
    54   asm(	"smull	%0, %1, %4, %6\n\t"
       
    55 	"smlal	%0, %1, %5, %7\n\t"
       
    56 	"rsb	%3, %4, #0\n\t"
       
    57 	"smull	%0, %2, %5, %6\n\t"
       
    58 	"smlal	%0, %2, %3, %7"
       
    59 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
       
    60 	: "3" (a), "r" (b), "r" (t), "r" (v)
       
    61 	: "cc" );
       
    62   *x = x1;
       
    63   MB();
       
    64   *y = y1;
       
    65 }
       
    66 
       
    67 static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
       
    68 			   ogg_int32_t  t, ogg_int32_t  v,
       
    69 			   ogg_int32_t *x, ogg_int32_t *y)
       
    70 {
       
    71   int x1, y1, l;
       
    72   asm(	"smull	%0, %1, %4, %6\n\t"
       
    73 	"smlal	%0, %1, %5, %7\n\t"
       
    74 	"rsb	%3, %4, #0\n\t"
       
    75 	"smull	%0, %2, %5, %6\n\t"
       
    76 	"smlal	%0, %2, %3, %7"
       
    77 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
       
    78 	: "3" (a), "r" (b), "r" (t), "r" (v)
       
    79 	: "cc" );
       
    80   *x = x1 << 1;
       
    81   MB();
       
    82   *y = y1 << 1;
       
    83 }
       
    84 
       
    85 static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
       
    86 			    ogg_int32_t  t, ogg_int32_t  v,
       
    87 			    ogg_int32_t *x, ogg_int32_t *y)
       
    88 {
       
    89   int x1, y1, l;
       
    90   asm(	"rsb	%2, %4, #0\n\t"
       
    91 	"smull	%0, %1, %3, %5\n\t"
       
    92 	"smlal	%0, %1, %2, %6\n\t"
       
    93 	"smull	%0, %2, %4, %5\n\t"
       
    94 	"smlal	%0, %2, %3, %6"
       
    95 	: "=&r" (l), "=&r" (x1), "=&r" (y1)
       
    96 	: "r" (a), "r" (b), "r" (t), "r" (v)
       
    97 	: "cc" );
       
    98   *x = x1 << 1;
       
    99   MB();
       
   100   *y = y1 << 1;
       
   101 }
       
   102 
       
   103 #endif
       
   104 
       
   105 #ifndef _V_CLIP_MATH
       
   106 #define _V_CLIP_MATH
       
   107 
       
   108 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
       
   109   int tmp;
       
   110   asm volatile("subs	%1, %0, #32768\n\t"
       
   111 	       "movpl	%0, #0x7f00\n\t"
       
   112 	       "orrpl	%0, %0, #0xff\n"
       
   113 	       "adds	%1, %0, #32768\n\t"
       
   114 	       "movmi	%0, #0x8000"
       
   115 	       : "+r"(x),"=r"(tmp)
       
   116 	       :
       
   117 	       : "cc");
       
   118   return(x);
       
   119 }
       
   120 
       
   121 #endif
       
   122 
       
   123 #ifndef _V_LSP_MATH_ASM
       
   124 #define _V_LSP_MATH_ASM
       
   125 
       
   126 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
       
   127 				ogg_int32_t *qexpp,
       
   128 				ogg_int32_t *ilsp,ogg_int32_t wi,
       
   129 				ogg_int32_t m){
       
   130   
       
   131   ogg_uint32_t qi=*qip,pi=*pip;
       
   132   ogg_int32_t qexp=*qexpp;
       
   133 
       
   134   asm("mov     r0,%3;"
       
   135       "mov     r1,%5,asr#1;"
       
   136       "add     r0,r0,r1,lsl#3;"
       
   137       "1:"
       
   138       
       
   139       "ldmdb   r0!,{r1,r3};"
       
   140       "subs    r1,r1,%4;"          //ilsp[j]-wi
       
   141       "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
       
   142       "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
       
   143       
       
   144       "subs    r1,r3,%4;"          //ilsp[j+1]-wi
       
   145       "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
       
   146       "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
       
   147       
       
   148       "cmn     r2,r3;"             // shift down 16?
       
   149       "beq     0f;"
       
   150       "add     %2,%2,#16;"
       
   151       "mov     %0,%0,lsr #16;"
       
   152       "orr     %0,%0,r2,lsl #16;"
       
   153       "mov     %1,%1,lsr #16;"
       
   154       "orr     %1,%1,r3,lsl #16;"
       
   155       "0:"
       
   156       "cmp     r0,%3;\n"
       
   157       "bhi     1b;\n"
       
   158       
       
   159       // odd filter assymetry
       
   160       "ands    r0,%5,#1;\n"
       
   161       "beq     2f;\n"
       
   162       "add     r0,%3,%5,lsl#2;\n"
       
   163       
       
   164       "ldr     r1,[r0,#-4];\n"
       
   165       "mov     r0,#0x4000;\n"
       
   166       
       
   167       "subs    r1,r1,%4;\n"          //ilsp[j]-wi
       
   168       "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
       
   169       "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
       
   170       "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
       
   171       
       
   172       "cmn     r2,r3;\n"             // shift down 16?
       
   173       "beq     2f;\n"
       
   174       "add     %2,%2,#16;\n"
       
   175       "mov     %0,%0,lsr #16;\n"
       
   176       "orr     %0,%0,r2,lsl #16;\n"
       
   177       "mov     %1,%1,lsr #16;\n"
       
   178       "orr     %1,%1,r3,lsl #16;\n"
       
   179       
       
   180       //qi=(pi>>shift)*labs(ilsp[j]-wi);
       
   181       //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
       
   182       //qexp+=shift;
       
   183       
       
   184       //}
       
   185 	 
       
   186       /* normalize to max 16 sig figs */
       
   187       "2:"
       
   188       "mov     r2,#0;"
       
   189       "orr     r1,%0,%1;"
       
   190       "tst     r1,#0xff000000;"
       
   191       "addne   r2,r2,#8;"
       
   192       "movne   r1,r1,lsr #8;"
       
   193       "tst     r1,#0x00f00000;"
       
   194       "addne   r2,r2,#4;"
       
   195       "movne   r1,r1,lsr #4;"
       
   196       "tst     r1,#0x000c0000;"
       
   197       "addne   r2,r2,#2;"
       
   198       "movne   r1,r1,lsr #2;"
       
   199       "tst     r1,#0x00020000;"
       
   200       "addne   r2,r2,#1;"
       
   201       "movne   r1,r1,lsr #1;"
       
   202       "tst     r1,#0x00010000;"
       
   203       "addne   r2,r2,#1;"
       
   204       "mov     %0,%0,lsr r2;"
       
   205       "mov     %1,%1,lsr r2;"
       
   206       "add     %2,%2,r2;"
       
   207       
       
   208       : "+r"(qi),"+r"(pi),"+r"(qexp)
       
   209       : "r"(ilsp),"r"(wi),"r"(m)
       
   210       : "r0","r1","r2","r3","cc");
       
   211   
       
   212   *qip=qi;
       
   213   *pip=pi;
       
   214   *qexpp=qexp;
       
   215 }
       
   216 
       
   217 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
       
   218 
       
   219   ogg_uint32_t qi=*qip;
       
   220   ogg_int32_t qexp=*qexpp;
       
   221 
       
   222   asm("tst     %0,#0x0000ff00;"
       
   223       "moveq   %0,%0,lsl #8;"
       
   224       "subeq   %1,%1,#8;"
       
   225       "tst     %0,#0x0000f000;"
       
   226       "moveq   %0,%0,lsl #4;"
       
   227       "subeq   %1,%1,#4;"
       
   228       "tst     %0,#0x0000c000;"
       
   229       "moveq   %0,%0,lsl #2;"
       
   230       "subeq   %1,%1,#2;"
       
   231       "tst     %0,#0x00008000;"
       
   232       "moveq   %0,%0,lsl #1;"
       
   233       "subeq   %1,%1,#1;"
       
   234       : "+r"(qi),"+r"(qexp)
       
   235       :
       
   236       : "cc");
       
   237   *qip=qi;
       
   238   *qexpp=qexp;
       
   239 }
       
   240 
       
   241 #endif
       
   242 #endif
       
   243