asm_arm.h 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /************************************************************************
  2. * Copyright (C) 2002-2009, Xiph.org Foundation
  3. * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above
  13. * copyright notice, this list of conditions and the following disclaimer
  14. * in the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the names of the Xiph.org Foundation nor Pinknoise
  17. * Productions Ltd nor the names of its contributors may be used to
  18. * endorse or promote products derived from this software without
  19. * specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. ************************************************************************
  33. function: arm7 and later wide math functions
  34. ************************************************************************/
  35. #ifdef _ARM_ASSEM_
  36. #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
  37. #define _V_WIDE_MATH
  38. static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
  39. int lo,hi;
  40. asm volatile("smull\t%0, %1, %2, %3"
  41. : "=&r"(lo),"=&r"(hi)
  42. : "%r"(x),"r"(y)
  43. : "cc");
  44. return(hi);
  45. }
  46. static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
  47. return MULT32(x,y)<<1;
  48. }
  49. static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
  50. int lo,hi;
  51. asm volatile("smull %0, %1, %2, %3\n\t"
  52. "movs %0, %0, lsr #15\n\t"
  53. "adc %1, %0, %1, lsl #17\n\t"
  54. : "=&r"(lo),"=&r"(hi)
  55. : "%r"(x),"r"(y)
  56. : "cc");
  57. return(hi);
  58. }
  59. #define MB() asm volatile ("" : : : "memory")
  60. static inline void XPROD32(ogg_int32_t a, ogg_int32_t b,
  61. ogg_int32_t t, ogg_int32_t v,
  62. ogg_int32_t *x, ogg_int32_t *y)
  63. {
  64. int x1, y1, l;
  65. asm( "smull %0, %1, %4, %6\n\t"
  66. "smlal %0, %1, %5, %7\n\t"
  67. "rsb %3, %4, #0\n\t"
  68. "smull %0, %2, %5, %6\n\t"
  69. "smlal %0, %2, %3, %7"
  70. : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
  71. : "3" (a), "r" (b), "r" (t), "r" (v)
  72. : "cc" );
  73. *x = x1;
  74. MB();
  75. *y = y1;
  76. }
  77. /* x = (a*t + b*v)>>31, y = (b*t - a*v)>>31 */
  78. static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
  79. ogg_int32_t t, ogg_int32_t v,
  80. ogg_int32_t *x, ogg_int32_t *y)
  81. {
  82. int x1, y1, l;
  83. asm( "smull %0, %1, %4, %6\n\t"
  84. "smlal %0, %1, %5, %7\n\t"
  85. "rsb %3, %4, #0\n\t"
  86. "smull %0, %2, %5, %6\n\t"
  87. "smlal %0, %2, %3, %7"
  88. : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
  89. : "3" (a), "r" (b), "r" (t), "r" (v)
  90. : "cc" );
  91. *x = x1 << 1;
  92. MB();
  93. *y = y1 << 1;
  94. }
  95. /* x = (a*t - b*v)>>31, y = (b*t + a*v)>>31 */
  96. static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
  97. ogg_int32_t t, ogg_int32_t v,
  98. ogg_int32_t *x, ogg_int32_t *y)
  99. {
  100. int x1, y1, l;
  101. asm( "rsb %2, %4, #0\n\t"
  102. "smull %0, %1, %3, %5\n\t"
  103. "smlal %0, %1, %2, %6\n\t"
  104. "smull %0, %2, %4, %5\n\t"
  105. "smlal %0, %2, %3, %6"
  106. : "=&r" (l), "=&r" (x1), "=&r" (y1)
  107. : "r" (a), "r" (b), "r" (t), "r" (v)
  108. : "cc" );
  109. *x = x1 << 1;
  110. MB();
  111. *y = y1 << 1;
  112. }
  113. #endif
  114. #ifndef _V_CLIP_MATH
  115. #define _V_CLIP_MATH
  116. static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
  117. int tmp;
  118. asm volatile("subs %1, %0, #32768\n\t"
  119. "movpl %0, #0x7f00\n\t"
  120. "orrpl %0, %0, #0xff\n"
  121. "adds %1, %0, #32768\n\t"
  122. "movmi %0, #0x8000"
  123. : "+r"(x),"=r"(tmp)
  124. :
  125. : "cc");
  126. return(x);
  127. }
  128. #endif
  129. #ifndef _V_LSP_MATH_ASM
  130. #define _V_LSP_MATH_ASM
  131. static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
  132. ogg_int32_t *qexpp,
  133. ogg_int32_t *ilsp,ogg_int32_t wi,
  134. ogg_int32_t m){
  135. ogg_uint32_t qi=*qip,pi=*pip;
  136. ogg_int32_t qexp=*qexpp;
  137. asm("mov r0,%3;"
  138. "mov r1,%5,asr#1;"
  139. "add r0,r0,r1,lsl#3;"
  140. "1:"
  141. "ldmdb r0!,{r1,r3};"
  142. "subs r1,r1,%4;" //ilsp[j]-wi
  143. "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi)
  144. "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi)
  145. "subs r1,r3,%4;" //ilsp[j+1]-wi
  146. "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi)
  147. "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi)
  148. "cmn r2,r3;" // shift down 16?
  149. "beq 0f;"
  150. "add %2,%2,#16;"
  151. "mov %0,%0,lsr #16;"
  152. "orr %0,%0,r2,lsl #16;"
  153. "mov %1,%1,lsr #16;"
  154. "orr %1,%1,r3,lsl #16;"
  155. "0:"
  156. "cmp r0,%3;\n"
  157. "bhi 1b;\n"
  158. // odd filter assymetry
  159. "ands r0,%5,#1;\n"
  160. "beq 2f;\n"
  161. "add r0,%3,%5,lsl#2;\n"
  162. "ldr r1,[r0,#-4];\n"
  163. "mov r0,#0x4000;\n"
  164. "subs r1,r1,%4;\n" //ilsp[j]-wi
  165. "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi)
  166. "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi)
  167. "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi)
  168. "cmn r2,r3;\n" // shift down 16?
  169. "beq 2f;\n"
  170. "add %2,%2,#16;\n"
  171. "mov %0,%0,lsr #16;\n"
  172. "orr %0,%0,r2,lsl #16;\n"
  173. "mov %1,%1,lsr #16;\n"
  174. "orr %1,%1,r3,lsl #16;\n"
  175. //qi=(pi>>shift)*labs(ilsp[j]-wi);
  176. //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
  177. //qexp+=shift;
  178. //}
  179. /* normalize to max 16 sig figs */
  180. "2:"
  181. "mov r2,#0;"
  182. "orr r1,%0,%1;"
  183. "tst r1,#0xff000000;"
  184. "addne r2,r2,#8;"
  185. "movne r1,r1,lsr #8;"
  186. "tst r1,#0x00f00000;"
  187. "addne r2,r2,#4;"
  188. "movne r1,r1,lsr #4;"
  189. "tst r1,#0x000c0000;"
  190. "addne r2,r2,#2;"
  191. "movne r1,r1,lsr #2;"
  192. "tst r1,#0x00020000;"
  193. "addne r2,r2,#1;"
  194. "movne r1,r1,lsr #1;"
  195. "tst r1,#0x00010000;"
  196. "addne r2,r2,#1;"
  197. "mov %0,%0,lsr r2;"
  198. "mov %1,%1,lsr r2;"
  199. "add %2,%2,r2;"
  200. : "+r"(qi),"+r"(pi),"+r"(qexp)
  201. : "r"(ilsp),"r"(wi),"r"(m)
  202. : "r0","r1","r2","r3","cc");
  203. *qip=qi;
  204. *pip=pi;
  205. *qexpp=qexp;
  206. }
  207. static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
  208. ogg_uint32_t qi=*qip;
  209. ogg_int32_t qexp=*qexpp;
  210. asm("tst %0,#0x0000ff00;"
  211. "moveq %0,%0,lsl #8;"
  212. "subeq %1,%1,#8;"
  213. "tst %0,#0x0000f000;"
  214. "moveq %0,%0,lsl #4;"
  215. "subeq %1,%1,#4;"
  216. "tst %0,#0x0000c000;"
  217. "moveq %0,%0,lsl #2;"
  218. "subeq %1,%1,#2;"
  219. "tst %0,#0x00008000;"
  220. "moveq %0,%0,lsl #1;"
  221. "subeq %1,%1,#1;"
  222. : "+r"(qi),"+r"(qexp)
  223. :
  224. : "cc");
  225. *qip=qi;
  226. *qexpp=qexp;
  227. }
  228. #endif
  229. #endif