hevcdsp.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /*
  2. * HEVC video decoder
  3. *
  4. * Copyright (C) 2012 - 2013 Guillaume Martres
  5. * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
  6. *
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. #ifndef AVCODEC_X86_HEVCDSP_H
  25. #define AVCODEC_X86_HEVCDSP_H
  26. #include <stddef.h>
  27. #include <stdint.h>
  28. #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
  29. dst[idx1][idx2][idx3] = ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt; \
  30. dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt; \
  31. dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt; \
  32. dst ## _uni_w[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt; \
  33. dst ## _bi_w[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt
  34. #define PEL_PROTOTYPE(name, D, opt) \
  35. void ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \
  36. void ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \
  37. void ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \
  38. void ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); \
  39. void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  40. ///////////////////////////////////////////////////////////////////////////////
  41. // MC functions
  42. ///////////////////////////////////////////////////////////////////////////////
  43. #define EPEL_PROTOTYPES(fname, bitd, opt) \
  44. PEL_PROTOTYPE(fname##4, bitd, opt); \
  45. PEL_PROTOTYPE(fname##6, bitd, opt); \
  46. PEL_PROTOTYPE(fname##8, bitd, opt); \
  47. PEL_PROTOTYPE(fname##12, bitd, opt); \
  48. PEL_PROTOTYPE(fname##16, bitd, opt); \
  49. PEL_PROTOTYPE(fname##24, bitd, opt); \
  50. PEL_PROTOTYPE(fname##32, bitd, opt); \
  51. PEL_PROTOTYPE(fname##48, bitd, opt); \
  52. PEL_PROTOTYPE(fname##64, bitd, opt)
  53. #define QPEL_PROTOTYPES(fname, bitd, opt) \
  54. PEL_PROTOTYPE(fname##4, bitd, opt); \
  55. PEL_PROTOTYPE(fname##8, bitd, opt); \
  56. PEL_PROTOTYPE(fname##12, bitd, opt); \
  57. PEL_PROTOTYPE(fname##16, bitd, opt); \
  58. PEL_PROTOTYPE(fname##24, bitd, opt); \
  59. PEL_PROTOTYPE(fname##32, bitd, opt); \
  60. PEL_PROTOTYPE(fname##48, bitd, opt); \
  61. PEL_PROTOTYPE(fname##64, bitd, opt)
  62. #define WEIGHTING_PROTOTYPE(width, bitd, opt) \
  63. void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int height, int denom, int _wx, int _ox); \
  64. void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1)
  65. #define WEIGHTING_PROTOTYPES(bitd, opt) \
  66. WEIGHTING_PROTOTYPE(2, bitd, opt); \
  67. WEIGHTING_PROTOTYPE(4, bitd, opt); \
  68. WEIGHTING_PROTOTYPE(6, bitd, opt); \
  69. WEIGHTING_PROTOTYPE(8, bitd, opt); \
  70. WEIGHTING_PROTOTYPE(12, bitd, opt); \
  71. WEIGHTING_PROTOTYPE(16, bitd, opt); \
  72. WEIGHTING_PROTOTYPE(24, bitd, opt); \
  73. WEIGHTING_PROTOTYPE(32, bitd, opt); \
  74. WEIGHTING_PROTOTYPE(48, bitd, opt); \
  75. WEIGHTING_PROTOTYPE(64, bitd, opt)
  76. ///////////////////////////////////////////////////////////////////////////////
  77. // QPEL_PIXELS EPEL_PIXELS
  78. ///////////////////////////////////////////////////////////////////////////////
  79. EPEL_PROTOTYPES(pel_pixels , 8, sse4);
  80. EPEL_PROTOTYPES(pel_pixels , 10, sse4);
  81. EPEL_PROTOTYPES(pel_pixels , 12, sse4);
  82. void ff_hevc_put_hevc_pel_pixels16_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  83. void ff_hevc_put_hevc_pel_pixels24_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  84. void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  85. void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  86. void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  87. void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  88. void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  89. void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  90. void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  91. void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  92. void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  93. void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  94. void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
  95. void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); //used for 10bit
  96. void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);//used for 10bit
  97. void ff_hevc_put_hevc_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  98. void ff_hevc_put_hevc_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  99. void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  100. void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  101. void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  102. void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  103. void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  104. void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  105. void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  106. void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
  107. ///////////////////////////////////////////////////////////////////////////////
  108. // EPEL
  109. ///////////////////////////////////////////////////////////////////////////////
  110. EPEL_PROTOTYPES(epel_h , 8, sse4);
  111. EPEL_PROTOTYPES(epel_h , 10, sse4);
  112. EPEL_PROTOTYPES(epel_h , 12, sse4);
  113. EPEL_PROTOTYPES(epel_v , 8, sse4);
  114. EPEL_PROTOTYPES(epel_v , 10, sse4);
  115. EPEL_PROTOTYPES(epel_v , 12, sse4);
  116. EPEL_PROTOTYPES(epel_hv , 8, sse4);
  117. EPEL_PROTOTYPES(epel_hv , 10, sse4);
  118. EPEL_PROTOTYPES(epel_hv , 12, sse4);
  119. PEL_PROTOTYPE(epel_h16, 8, avx2);
  120. PEL_PROTOTYPE(epel_h24, 8, avx2);
  121. PEL_PROTOTYPE(epel_h32, 8, avx2);
  122. PEL_PROTOTYPE(epel_h48, 8, avx2);
  123. PEL_PROTOTYPE(epel_h64, 8, avx2);
  124. PEL_PROTOTYPE(epel_h16,10, avx2);
  125. PEL_PROTOTYPE(epel_h24,10, avx2);
  126. PEL_PROTOTYPE(epel_h32,10, avx2);
  127. PEL_PROTOTYPE(epel_h48,10, avx2);
  128. PEL_PROTOTYPE(epel_h64,10, avx2);
  129. PEL_PROTOTYPE(epel_v16, 8, avx2);
  130. PEL_PROTOTYPE(epel_v24, 8, avx2);
  131. PEL_PROTOTYPE(epel_v32, 8, avx2);
  132. PEL_PROTOTYPE(epel_v48, 8, avx2);
  133. PEL_PROTOTYPE(epel_v64, 8, avx2);
  134. PEL_PROTOTYPE(epel_v16,10, avx2);
  135. PEL_PROTOTYPE(epel_v24,10, avx2);
  136. PEL_PROTOTYPE(epel_v32,10, avx2);
  137. PEL_PROTOTYPE(epel_v48,10, avx2);
  138. PEL_PROTOTYPE(epel_v64,10, avx2);
  139. PEL_PROTOTYPE(epel_hv16, 8, avx2);
  140. PEL_PROTOTYPE(epel_hv24, 8, avx2);
  141. PEL_PROTOTYPE(epel_hv32, 8, avx2);
  142. PEL_PROTOTYPE(epel_hv48, 8, avx2);
  143. PEL_PROTOTYPE(epel_hv64, 8, avx2);
  144. PEL_PROTOTYPE(epel_hv16,10, avx2);
  145. PEL_PROTOTYPE(epel_hv24,10, avx2);
  146. PEL_PROTOTYPE(epel_hv32,10, avx2);
  147. PEL_PROTOTYPE(epel_hv48,10, avx2);
  148. PEL_PROTOTYPE(epel_hv64,10, avx2);
  149. ///////////////////////////////////////////////////////////////////////////////
  150. // QPEL
  151. ///////////////////////////////////////////////////////////////////////////////
  152. QPEL_PROTOTYPES(qpel_h , 8, sse4);
  153. QPEL_PROTOTYPES(qpel_h , 10, sse4);
  154. QPEL_PROTOTYPES(qpel_h , 12, sse4);
  155. QPEL_PROTOTYPES(qpel_v, 8, sse4);
  156. QPEL_PROTOTYPES(qpel_v, 10, sse4);
  157. QPEL_PROTOTYPES(qpel_v, 12, sse4);
  158. QPEL_PROTOTYPES(qpel_hv, 8, sse4);
  159. QPEL_PROTOTYPES(qpel_hv, 10, sse4);
  160. QPEL_PROTOTYPES(qpel_hv, 12, sse4);
  161. PEL_PROTOTYPE(qpel_h16, 8, avx2);
  162. PEL_PROTOTYPE(qpel_h24, 8, avx2);
  163. PEL_PROTOTYPE(qpel_h32, 8, avx2);
  164. PEL_PROTOTYPE(qpel_h48, 8, avx2);
  165. PEL_PROTOTYPE(qpel_h64, 8, avx2);
  166. PEL_PROTOTYPE(qpel_h16,10, avx2);
  167. PEL_PROTOTYPE(qpel_h24,10, avx2);
  168. PEL_PROTOTYPE(qpel_h32,10, avx2);
  169. PEL_PROTOTYPE(qpel_h48,10, avx2);
  170. PEL_PROTOTYPE(qpel_h64,10, avx2);
  171. PEL_PROTOTYPE(qpel_v16, 8, avx2);
  172. PEL_PROTOTYPE(qpel_v24, 8, avx2);
  173. PEL_PROTOTYPE(qpel_v32, 8, avx2);
  174. PEL_PROTOTYPE(qpel_v48, 8, avx2);
  175. PEL_PROTOTYPE(qpel_v64, 8, avx2);
  176. PEL_PROTOTYPE(qpel_v16,10, avx2);
  177. PEL_PROTOTYPE(qpel_v24,10, avx2);
  178. PEL_PROTOTYPE(qpel_v32,10, avx2);
  179. PEL_PROTOTYPE(qpel_v48,10, avx2);
  180. PEL_PROTOTYPE(qpel_v64,10, avx2);
  181. PEL_PROTOTYPE(qpel_hv16, 8, avx2);
  182. PEL_PROTOTYPE(qpel_hv24, 8, avx2);
  183. PEL_PROTOTYPE(qpel_hv32, 8, avx2);
  184. PEL_PROTOTYPE(qpel_hv48, 8, avx2);
  185. PEL_PROTOTYPE(qpel_hv64, 8, avx2);
  186. PEL_PROTOTYPE(qpel_hv16,10, avx2);
  187. PEL_PROTOTYPE(qpel_hv24,10, avx2);
  188. PEL_PROTOTYPE(qpel_hv32,10, avx2);
  189. PEL_PROTOTYPE(qpel_hv48,10, avx2);
  190. PEL_PROTOTYPE(qpel_hv64,10, avx2);
  191. WEIGHTING_PROTOTYPES(8, sse4);
  192. WEIGHTING_PROTOTYPES(10, sse4);
  193. WEIGHTING_PROTOTYPES(12, sse4);
  194. ///////////////////////////////////////////////////////////////////////////////
  195. // TRANSFORM_ADD
  196. ///////////////////////////////////////////////////////////////////////////////
  197. void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  198. void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  199. void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  200. void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  201. void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  202. void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  203. void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  204. void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  205. void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  206. void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  207. void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  208. void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  209. void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  210. void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
  211. #endif // AVCODEC_X86_HEVCDSP_H