update libvpx to hash 133f83e2c tag v1.7.0 from https://chromium.googlesource.com/webm/libvpx

2025-08-13 01:26:58 +00:00 · 2019-01-17 20:16:34 +00:00
parent 9bfd5255b2
commit 731ee28fe7
599 changed files with 103914 additions and 65606 deletions
--- a/libs/libvpx/vp8/decoder/threading.c
+++ b/libs/libvpx/vp8/decoder/threading.c
@@ -20,6 +20,7 @@
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/extend.h"
 #include "vpx_ports/vpx_timer.h"
+#include "decoderthreading.h"
 #include "detokenize.h"
 #include "vp8/common/reconintra4x4.h"
 #include "vp8/common/reconinter.h"
@@ -36,8 +37,6 @@
    memset((p), 0, (n) * sizeof(*(p)));                             \
  } while (0)

-void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
-
 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
                                       MB_ROW_DEC *mbrd, int count) {
  VP8_COMMON *const pc = &pbi->common;
@@ -50,9 +49,6 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
    mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
    mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;

-    mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
-    mbd->mode_info_stride = pc->mode_info_stride;
-
    mbd->frame_type = pc->frame_type;
    mbd->pre = xd->pre;
    mbd->dst = xd->dst;
@@ -83,7 +79,8 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
    if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
  }

-  for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;
+  for (i = 0; i < pc->mb_rows; ++i)
+    vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1);
 }

 static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
@@ -251,12 +248,13 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,

 static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
                              int start_mb_row) {
-  volatile const int *last_row_current_mb_col;
-  volatile int *current_mb_col;
+  const vpx_atomic_int *last_row_current_mb_col;
+  vpx_atomic_int *current_mb_col;
  int mb_row;
  VP8_COMMON *pc = &pbi->common;
  const int nsync = pbi->sync_range;
-  const int first_row_no_sync_above = pc->mb_cols + nsync;
+  const vpx_atomic_int first_row_no_sync_above =
+      VPX_ATOMIC_INIT(pc->mb_cols + nsync);
  int num_part = 1 << pbi->common.multi_token_partition;
  int last_mb_row = start_mb_row;

@@ -289,6 +287,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,

  xd->up_available = (start_mb_row != 0);

+  xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
+  xd->mode_info_stride = pc->mode_info_stride;
+
  for (mb_row = start_mb_row; mb_row < pc->mb_rows;
       mb_row += (pbi->decoding_thread_count + 1)) {
    int recon_yoffset, recon_uvoffset;
@@ -318,7 +319,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,

    xd->left_available = 0;

-    xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+    xd->mb_to_top_edge = -((mb_row * 16) << 3);
    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;

    if (pbi->common.filter_level) {
@@ -355,14 +356,13 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
                             xd->dst.uv_stride);
    }

-    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) {
-      *current_mb_col = mb_col - 1;
+    for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
+      if (((mb_col - 1) % nsync) == 0) {
+        vpx_atomic_store_release(current_mb_col, mb_col - 1);
+      }

-      if ((mb_col & (nsync - 1)) == 0) {
-        while (mb_col > (*last_row_current_mb_col - nsync)) {
-          x86_pause_hint();
-          thread_sleep(1);
-        }
+      if (mb_row && !(mb_col & (nsync - 1))) {
+        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
      }

      /* Distance of MB to the various image edges.
@@ -548,7 +548,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
    }

    /* last MB of row is ready just after extension is done */
-    *current_mb_col = mb_col + nsync;
+    vpx_atomic_store_release(current_mb_col, mb_col + nsync);

    ++xd->mode_info_context; /* skip prediction column */
    xd->up_available = 1;
@@ -568,10 +568,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
  ENTROPY_CONTEXT_PLANES mb_row_left_context;

  while (1) {
-    if (pbi->b_multithreaded_rd == 0) break;
+    if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break;

    if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
-      if (pbi->b_multithreaded_rd == 0) {
+      if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) {
        break;
      } else {
        MACROBLOCKD *xd = &mbrd->mbd;
@@ -589,7 +589,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
  int core_count = 0;
  unsigned int ithread;

-  pbi->b_multithreaded_rd = 0;
+  vpx_atomic_init(&pbi->b_multithreaded_rd, 0);
  pbi->allocated_decoding_thread_count = 0;

  /* limit decoding threads to the max number of token partitions */
@@ -601,7 +601,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
  }

  if (core_count > 1) {
-    pbi->b_multithreaded_rd = 1;
+    vpx_atomic_init(&pbi->b_multithreaded_rd, 1);
    pbi->decoding_thread_count = core_count - 1;

    CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
@@ -712,7 +712,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
  int i;
  int uv_width;

-  if (pbi->b_multithreaded_rd) {
+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
    vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);

    /* our internal buffers are always multiples of 16 */
@@ -730,27 +730,33 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {

    uv_width = width >> 1;

-    /* Allocate an int for each mb row. */
-    CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
+    /* Allocate a vpx_atomic_int for each mb row. */
+    CHECK_MEM_ERROR(pbi->mt_current_mb_col,
+                    vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));
+    for (i = 0; i < pc->mb_rows; ++i)
+      vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);

    /* Allocate memory for above_row buffers. */
    CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
    for (i = 0; i < pc->mb_rows; ++i)
-      CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
-                      vpx_memalign(16, sizeof(unsigned char) *
-                                           (width + (VP8BORDERINPIXELS << 1))));
+      CHECK_MEM_ERROR(
+          pbi->mt_yabove_row[i],
+          vpx_memalign(
+              16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));

    CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
    for (i = 0; i < pc->mb_rows; ++i)
-      CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
-                      vpx_memalign(16, sizeof(unsigned char) *
-                                           (uv_width + VP8BORDERINPIXELS)));
+      CHECK_MEM_ERROR(
+          pbi->mt_uabove_row[i],
+          vpx_memalign(16,
+                       sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));

    CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
    for (i = 0; i < pc->mb_rows; ++i)
-      CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
-                      vpx_memalign(16, sizeof(unsigned char) *
-                                           (uv_width + VP8BORDERINPIXELS)));
+      CHECK_MEM_ERROR(
+          pbi->mt_vabove_row[i],
+          vpx_memalign(16,
+                       sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));

    /* Allocate memory for left_col buffers. */
    CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
@@ -772,9 +778,9 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {

 void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
  /* shutdown MB Decoding thread; */
-  if (pbi->b_multithreaded_rd) {
+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
    int i;
-    pbi->b_multithreaded_rd = 0;
+    vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0);

    /* allow all threads to exit */
    for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {