aboutsummaryrefslogtreecommitdiffstats
path: root/recipes-multimedia/rpidistro-ffmpeg
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-multimedia/rpidistro-ffmpeg')
-rw-r--r--recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch24074
-rw-r--r--recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch79
-rw-r--r--recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch35
-rw-r--r--recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb192
4 files changed, 24380 insertions, 0 deletions
diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch b/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch
new file mode 100644
index 0000000..016cf40
--- /dev/null
+++ b/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch
@@ -0,0 +1,24074 @@
+
+Upstream-Status: Inappropriate
+
+RPI-Distro repo clones original ffmpeg and applies patches to enable
+raspiberry pi support.
+
+--- a/configure
++++ b/configure
+@@ -205,6 +205,7 @@ External library support:
+ --disable-bzlib disable bzlib [autodetect]
+ --disable-coreimage disable Apple CoreImage framework [autodetect]
+ --enable-chromaprint enable audio fingerprinting with chromaprint [no]
++ --disable-epoxy disable epoxy [autodetect]
+ --enable-frei0r enable frei0r video filtering [no]
+ --enable-gcrypt enable gcrypt, needed for rtmp(t)e support
+ if openssl, librtmp or gmp is not used [no]
+@@ -281,6 +282,7 @@ External library support:
+ if openssl, gnutls or mbedtls is not used [no]
+ --enable-libtwolame enable MP2 encoding via libtwolame [no]
+ --enable-libuavs3d enable AVS3 decoding via libuavs3d [no]
++ --disable-libudev disable libudev [autodetect]
+ --enable-libv4l2 enable libv4l2/v4l-utils [no]
+ --enable-libvidstab enable video stabilization using vid.stab [no]
+ --enable-libvmaf enable vmaf filter via libvmaf [no]
+@@ -343,12 +345,16 @@ External library support:
+ --enable-libmfx enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no]
+ --enable-libnpp enable Nvidia Performance Primitives-based code [no]
+ --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
++ --enable-sand enable sand video formats [rpi]
++ --enable-vout-drm enable the vout_drm module - for internal testing only [no]
++ --enable-vout-egl enable the vout_egl module - for internal testing only [no]
+ --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
+ --disable-nvenc disable Nvidia video encoding code [autodetect]
+ --enable-omx enable OpenMAX IL code [no]
+ --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no]
+ --enable-rkmpp enable Rockchip Media Process Platform code [no]
+ --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect]
++ --enable-v4l2-request enable V4L2 request API code [no]
+ --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
+ --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
+ --disable-videotoolbox disable VideoToolbox code [autodetect]
+@@ -1754,7 +1760,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST="
+ avfoundation
+ bzlib
+ coreimage
++ epoxy
+ iconv
++ libudev
+ libxcb
+ libxcb_shm
+ libxcb_shape
+@@ -1924,6 +1932,7 @@ HWACCEL_LIBRARY_LIST="
+ mmal
+ omx
+ opencl
++ v4l2_request
+ "
+
+ DOCUMENT_LIST="
+@@ -1941,10 +1950,14 @@ FEATURE_LIST="
+ omx_rpi
+ runtime_cpudetect
+ safe_bitstream_reader
++ sand
+ shared
+ small
+ static
+ swscale_alpha
++ vout_drm
++ vout_egl
++ v4l2_req_hevc_vx
+ "
+
+ # this list should be kept in linking order
+@@ -2501,6 +2514,7 @@ CONFIG_EXTRA="
+ rtpdec
+ rtpenc_chain
+ rv34dsp
++ sand
+ scene_sad
+ sinewin
+ snappy
+@@ -3011,6 +3025,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder
+ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
+ ffnvcodec_deps_any="libdl LoadLibrary"
+ nvdec_deps="ffnvcodec"
++v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
+ vaapi_x11_deps="xlib_x11"
+ videotoolbox_hwaccel_deps="videotoolbox pthreads"
+ videotoolbox_hwaccel_extralibs="-framework QuartzCore"
+@@ -3054,6 +3069,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicP
+ hevc_dxva2_hwaccel_select="hevc_decoder"
+ hevc_nvdec_hwaccel_deps="nvdec"
+ hevc_nvdec_hwaccel_select="hevc_decoder"
++hevc_v4l2request_hwaccel_deps="v4l2_request"
++hevc_v4l2request_hwaccel_select="hevc_decoder"
+ hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
+ hevc_vaapi_hwaccel_select="hevc_decoder"
+ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
+@@ -3539,8 +3556,11 @@ sndio_indev_deps="sndio"
+ sndio_outdev_deps="sndio"
+ v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_indev_suggest="libv4l2"
++v4l2_outdev_deps="libdrm"
+ v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_outdev_suggest="libv4l2"
++vout_drm_outdev_deps="libdrm"
++vout_egl_outdev_deps="xlib epoxy"
+ vfwcap_indev_deps="vfw32 vfwcap_defines"
+ xcbgrab_indev_deps="libxcb"
+ xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
+@@ -3745,6 +3765,7 @@ tonemap_opencl_filter_deps="opencl const
+ transpose_opencl_filter_deps="opencl"
+ transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
+ transpose_vulkan_filter_deps="vulkan spirv_compiler"
++unsand_filter_select="sand"
+ unsharp_opencl_filter_deps="opencl"
+ uspp_filter_deps="gpl avcodec"
+ vaguedenoiser_filter_deps="gpl"
+@@ -6296,6 +6317,12 @@ if enabled xlib; then
+ disable xlib
+ fi
+
++enabled libudev &&
++ check_pkg_config libudev libudev libudev.h udev_new
++
++enabled epoxy &&
++ check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
++
+ check_headers direct.h
+ check_headers dirent.h
+ check_headers dxgidebug.h
+@@ -6735,8 +6762,16 @@ enabled rkmpp && { require_p
+ { enabled libdrm ||
+ die "ERROR: rkmpp requires --enable-libdrm"; }
+ }
++enabled v4l2_request && { enabled libdrm ||
++ die "ERROR: v4l2-request requires --enable-libdrm"; } &&
++ { enabled libudev ||
++ die "ERROR: v4l2-request requires libudev"; }
+ enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
+
++enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; }
++
++enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } &&
++ { enabled xlib || die "ERROR: vout_egl requires xlib"; }
+
+ if enabled gcrypt; then
+ GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
+@@ -6817,6 +6852,10 @@ if enabled v4l2_m2m; then
+ check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
+ fi
+
++check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
++check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
++disable v4l2_req_hevc_vx
++
+ check_headers sys/videoio.h
+ test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
+
+@@ -7305,6 +7344,9 @@ check_deps $CONFIG_LIST \
+
+ enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
+
++# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done
++enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx
++
+ case $target_os in
+ haiku)
+ disable memalign
+--- a/fftools/ffmpeg.c
++++ b/fftools/ffmpeg.c
+@@ -1953,8 +1953,8 @@ static int ifilter_send_frame(InputFilte
+ av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout);
+ break;
+ case AVMEDIA_TYPE_VIDEO:
+- need_reinit |= ifilter->width != frame->width ||
+- ifilter->height != frame->height;
++ need_reinit |= ifilter->width != av_frame_cropped_width(frame) ||
++ ifilter->height != av_frame_cropped_height(frame);
+ break;
+ }
+
+@@ -1965,6 +1965,9 @@ static int ifilter_send_frame(InputFilte
+ (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
+ need_reinit = 1;
+
++ if (no_cvt_hw && fg->graph)
++ need_reinit = 0;
++
+ if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) {
+ if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9))
+ need_reinit = 1;
+@@ -2220,8 +2223,7 @@ static int decode_video(InputStream *ist
+ decoded_frame->top_field_first = ist->top_field_first;
+
+ ist->frames_decoded++;
+-
+- if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
++ if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
+ err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame);
+ if (err < 0)
+ goto fail;
+@@ -2418,7 +2420,12 @@ static int process_input_packet(InputStr
+ case AVMEDIA_TYPE_VIDEO:
+ ret = decode_video (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt,
+ &decode_failed);
+- if (!repeating || !pkt || got_output) {
++ // Pi: Do not inc dts if no_cvt_hw set
++ // V4L2 H264 decode has long latency and sometimes spits out a long
++ // stream of output without input. In this case incrementing DTS is wrong.
++ // There may be cases where the condition as written is correct so only
++ // "fix" in the cases which cause problems
++ if (!repeating || !pkt || (got_output && !no_cvt_hw)) {
+ if (pkt && pkt->duration) {
+ duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
+ } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
+@@ -2564,12 +2571,15 @@ static enum AVPixelFormat get_format(AVC
+ break;
+
+ if (ist->hwaccel_id == HWACCEL_GENERIC ||
+- ist->hwaccel_id == HWACCEL_AUTO) {
++ ist->hwaccel_id == HWACCEL_AUTO ||
++ no_cvt_hw) {
+ for (i = 0;; i++) {
+ config = avcodec_get_hw_config(s->codec, i);
+ if (!config)
+ break;
+- if (!(config->methods &
++ if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
++ av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
++ else if (!(config->methods &
+ AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+ continue;
+ if (config->pix_fmt == *p)
+--- a/fftools/ffmpeg.h
++++ b/fftools/ffmpeg.h
+@@ -626,6 +626,7 @@ extern enum VideoSyncMethod video_sync_m
+ extern float frame_drop_threshold;
+ extern int do_benchmark;
+ extern int do_benchmark_all;
++extern int no_cvt_hw;
+ extern int do_deinterlace;
+ extern int do_hex_dump;
+ extern int do_pkt_dump;
+--- a/fftools/ffmpeg_filter.c
++++ b/fftools/ffmpeg_filter.c
+@@ -1175,8 +1175,8 @@ int ifilter_parameters_from_frame(InputF
+
+ ifilter->format = frame->format;
+
+- ifilter->width = frame->width;
+- ifilter->height = frame->height;
++ ifilter->width = av_frame_cropped_width(frame);
++ ifilter->height = av_frame_cropped_height(frame);
+ ifilter->sample_aspect_ratio = frame->sample_aspect_ratio;
+
+ ifilter->sample_rate = frame->sample_rate;
+--- a/fftools/ffmpeg_hw.c
++++ b/fftools/ffmpeg_hw.c
+@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum
+ char *name;
+ size_t index_pos;
+ int index, index_limit = 1000;
++ if (!type_name)
++ return NULL;
+ index_pos = strlen(type_name);
+ name = av_malloc(index_pos + 4);
+ if (!name)
+--- a/fftools/ffmpeg_opt.c
++++ b/fftools/ffmpeg_opt.c
+@@ -162,6 +162,7 @@ enum VideoSyncMethod video_sync_method =
+ float frame_drop_threshold = 0;
+ int do_benchmark = 0;
+ int do_benchmark_all = 0;
++int no_cvt_hw = 0;
+ int do_hex_dump = 0;
+ int do_pkt_dump = 0;
+ int copy_ts = 0;
+@@ -3724,6 +3725,8 @@ const OptionDef options[] = {
+ "add timings for benchmarking" },
+ { "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all },
+ "add timings for each task" },
++ { "no_cvt_hw", OPT_BOOL | OPT_EXPERT, { &no_cvt_hw },
++ "do not auto-convert hw frames to sw" },
+ { "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress },
+ "write program-readable progress information", "url" },
+ { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction },
+--- a/libavcodec/Makefile
++++ b/libavcodec/Makefile
+@@ -161,7 +161,10 @@ OBJS-$(CONFIG_VIDEODSP) +
+ OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
+ OBJS-$(CONFIG_VP56DSP) += vp56dsp.o
+ OBJS-$(CONFIG_VP8DSP) += vp8dsp.o
+-OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
++OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
++ weak_link.o v4l2_req_dmabufs.o
++OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
++ v4l2_req_devscan.o weak_link.o
+ OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o
+ OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o
+
+@@ -972,6 +975,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)
+ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o
+ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o
+ OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o
++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o
++OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
+ OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o
+ OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o
+ OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o
+--- a/libavcodec/avcodec.h
++++ b/libavcodec/avcodec.h
+@@ -2212,6 +2212,17 @@ typedef struct AVHWAccel {
+ * that avctx->hwaccel_priv_data is invalid.
+ */
+ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
++
++ /**
++ * Called if parsing fails
++ *
++ * An error has occured, end_frame will not be called
++ * start_frame & decode_slice may or may not have been called
++ * Optional
++ *
++ * @param avctx the codec context
++ */
++ void (*abort_frame)(AVCodecContext *avctx);
+ } AVHWAccel;
+
+ /**
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v1.h
+@@ -0,0 +1,229 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_MPEG_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_MPEG_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_MPEG_BASE + 1011)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_MPEG_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_MPEG_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B 0
++#define V4L2_HEVC_SLICE_TYPE_P 1
++#define V4L2_HEVC_SLICE_TYPE_I 2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++ __u16 pic_width_in_luma_samples;
++ __u16 pic_height_in_luma_samples;
++ __u8 bit_depth_luma_minus8;
++ __u8 bit_depth_chroma_minus8;
++ __u8 log2_max_pic_order_cnt_lsb_minus4;
++ __u8 sps_max_dec_pic_buffering_minus1;
++ __u8 sps_max_num_reorder_pics;
++ __u8 sps_max_latency_increase_plus1;
++ __u8 log2_min_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_luma_coding_block_size;
++ __u8 log2_min_luma_transform_block_size_minus2;
++ __u8 log2_diff_max_min_luma_transform_block_size;
++ __u8 max_transform_hierarchy_depth_inter;
++ __u8 max_transform_hierarchy_depth_intra;
++ __u8 pcm_sample_bit_depth_luma_minus1;
++ __u8 pcm_sample_bit_depth_chroma_minus1;
++ __u8 log2_min_pcm_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_pcm_luma_coding_block_size;
++ __u8 num_short_term_ref_pic_sets;
++ __u8 num_long_term_ref_pics_sps;
++ __u8 chroma_format_idc;
++ __u8 sps_max_sub_layers_minus1;
++
++ __u64 flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++
++struct v4l2_ctrl_hevc_pps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++ __u8 num_extra_slice_header_bits;
++ __s8 init_qp_minus26;
++ __u8 diff_cu_qp_delta_depth;
++ __s8 pps_cb_qp_offset;
++ __s8 pps_cr_qp_offset;
++ __u8 num_tile_columns_minus1;
++ __u8 num_tile_rows_minus1;
++ __u8 column_width_minus1[20];
++ __u8 row_height_minus1[22];
++ __s8 pps_beta_offset_div2;
++ __s8 pps_tc_offset_div2;
++ __u8 log2_parallel_merge_level_minus2;
++
++ __u8 padding[4];
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02
++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16
++
++struct v4l2_hevc_dpb_entry {
++ __u64 timestamp;
++ __u8 rps;
++ __u8 field_pic;
++ __u16 pic_order_cnt[2];
++ __u8 padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __u8 padding[6];
++
++ __u8 luma_log2_weight_denom;
++ __s8 delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9)
++
++struct v4l2_ctrl_hevc_slice_params {
++ __u32 bit_size;
++ __u32 data_bit_offset;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u32 slice_segment_addr;
++ __u32 num_entry_point_offsets;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++ __u8 nal_unit_type;
++ __u8 nuh_temporal_id_plus1;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 slice_type;
++ __u8 colour_plane_id;
++ __u16 slice_pic_order_cnt;
++ __u8 num_ref_idx_l0_active_minus1;
++ __u8 num_ref_idx_l1_active_minus1;
++ __u8 collocated_ref_idx;
++ __u8 five_minus_max_num_merge_cand;
++ __s8 slice_qp_delta;
++ __s8 slice_cb_qp_offset;
++ __s8 slice_cr_qp_offset;
++ __s8 slice_act_y_qp_offset;
++ __s8 slice_act_cb_qp_offset;
++ __s8 slice_act_cr_qp_offset;
++ __s8 slice_beta_offset_div2;
++ __s8 slice_tc_offset_div2;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++ __u8 pic_struct;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 num_active_dpb_entries;
++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++ __u8 num_rps_poc_st_curr_before;
++ __u8 num_rps_poc_st_curr_after;
++ __u8 num_rps_poc_lt_curr;
++
++ __u8 padding;
++
++ __u32 entry_point_offset_minus1[256];
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++ struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++ __u64 flags;
++};
++
++struct v4l2_ctrl_hevc_scaling_matrix {
++ __u8 scaling_list_4x4[6][16];
++ __u8 scaling_list_8x8[6][64];
++ __u8 scaling_list_16x16[6][64];
++ __u8 scaling_list_32x32[2][64];
++ __u8 scaling_list_dc_coef_16x16[6];
++ __u8 scaling_list_dc_coef_32x32[2];
++};
++
++#endif
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v2.h
+@@ -0,0 +1,257 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B 0
++#define V4L2_HEVC_SLICE_TYPE_P 1
++#define V4L2_HEVC_SLICE_TYPE_I 2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++ __u16 pic_width_in_luma_samples;
++ __u16 pic_height_in_luma_samples;
++ __u8 bit_depth_luma_minus8;
++ __u8 bit_depth_chroma_minus8;
++ __u8 log2_max_pic_order_cnt_lsb_minus4;
++ __u8 sps_max_dec_pic_buffering_minus1;
++ __u8 sps_max_num_reorder_pics;
++ __u8 sps_max_latency_increase_plus1;
++ __u8 log2_min_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_luma_coding_block_size;
++ __u8 log2_min_luma_transform_block_size_minus2;
++ __u8 log2_diff_max_min_luma_transform_block_size;
++ __u8 max_transform_hierarchy_depth_inter;
++ __u8 max_transform_hierarchy_depth_intra;
++ __u8 pcm_sample_bit_depth_luma_minus1;
++ __u8 pcm_sample_bit_depth_chroma_minus1;
++ __u8 log2_min_pcm_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_pcm_luma_coding_block_size;
++ __u8 num_short_term_ref_pic_sets;
++ __u8 num_long_term_ref_pics_sps;
++ __u8 chroma_format_idc;
++ __u8 sps_max_sub_layers_minus1;
++
++ __u64 flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19)
++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20)
++
++struct v4l2_ctrl_hevc_pps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++ __u8 num_extra_slice_header_bits;
++ __u8 num_ref_idx_l0_default_active_minus1;
++ __u8 num_ref_idx_l1_default_active_minus1;
++ __s8 init_qp_minus26;
++ __u8 diff_cu_qp_delta_depth;
++ __s8 pps_cb_qp_offset;
++ __s8 pps_cr_qp_offset;
++ __u8 num_tile_columns_minus1;
++ __u8 num_tile_rows_minus1;
++ __u8 column_width_minus1[20];
++ __u8 row_height_minus1[22];
++ __s8 pps_beta_offset_div2;
++ __s8 pps_tc_offset_div2;
++ __u8 log2_parallel_merge_level_minus2;
++
++ __u8 padding[4];
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02
++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16
++
++struct v4l2_hevc_dpb_entry {
++ __u64 timestamp;
++ __u8 rps;
++ __u8 field_pic;
++ __u16 pic_order_cnt[2];
++ __u8 padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __u8 padding[6];
++
++ __u8 luma_log2_weight_denom;
++ __s8 delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9)
++
++struct v4l2_ctrl_hevc_slice_params {
++ __u32 bit_size;
++ __u32 data_bit_offset;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u32 slice_segment_addr;
++ __u32 num_entry_point_offsets;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++ __u8 nal_unit_type;
++ __u8 nuh_temporal_id_plus1;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 slice_type;
++ __u8 colour_plane_id;
++ __u16 slice_pic_order_cnt;
++ __u8 num_ref_idx_l0_active_minus1;
++ __u8 num_ref_idx_l1_active_minus1;
++ __u8 collocated_ref_idx;
++ __u8 five_minus_max_num_merge_cand;
++ __s8 slice_qp_delta;
++ __s8 slice_cb_qp_offset;
++ __s8 slice_cr_qp_offset;
++ __s8 slice_act_y_qp_offset;
++ __s8 slice_act_cb_qp_offset;
++ __s8 slice_act_cr_qp_offset;
++ __s8 slice_beta_offset_div2;
++ __s8 slice_tc_offset_div2;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++ __u8 pic_struct;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++ __u8 padding[5];
++
++ __u32 entry_point_offset_minus1[256];
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++ struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2
++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4
++
++struct v4l2_ctrl_hevc_decode_params {
++ __s32 pic_order_cnt_val;
++ __u8 num_active_dpb_entries;
++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 num_poc_st_curr_before;
++ __u8 num_poc_st_curr_after;
++ __u8 num_poc_lt_curr;
++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u64 flags;
++};
++
++/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
++#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200)
++/*
++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
++ * the number of data (in bits) to skip in the
++ * slice segment header.
++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
++ * to before syntax element "slice_temporal_mvp_enabled_flag".
++ * If IDR, the skipped bits are just "pic_output_flag"
++ * (separate_colour_plane_flag is not supported).
++ */
++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0)
++
++struct v4l2_ctrl_hevc_scaling_matrix {
++ __u8 scaling_list_4x4[6][16];
++ __u8 scaling_list_8x8[6][64];
++ __u8 scaling_list_16x16[6][64];
++ __u8 scaling_list_32x32[2][64];
++ __u8 scaling_list_dc_coef_16x16[6];
++ __u8 scaling_list_dc_coef_32x32[2];
++};
++
++#endif
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v3.h
+@@ -0,0 +1,255 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B 0
++#define V4L2_HEVC_SLICE_TYPE_P 1
++#define V4L2_HEVC_SLICE_TYPE_I 2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++ __u16 pic_width_in_luma_samples;
++ __u16 pic_height_in_luma_samples;
++ __u8 bit_depth_luma_minus8;
++ __u8 bit_depth_chroma_minus8;
++ __u8 log2_max_pic_order_cnt_lsb_minus4;
++ __u8 sps_max_dec_pic_buffering_minus1;
++ __u8 sps_max_num_reorder_pics;
++ __u8 sps_max_latency_increase_plus1;
++ __u8 log2_min_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_luma_coding_block_size;
++ __u8 log2_min_luma_transform_block_size_minus2;
++ __u8 log2_diff_max_min_luma_transform_block_size;
++ __u8 max_transform_hierarchy_depth_inter;
++ __u8 max_transform_hierarchy_depth_intra;
++ __u8 pcm_sample_bit_depth_luma_minus1;
++ __u8 pcm_sample_bit_depth_chroma_minus1;
++ __u8 log2_min_pcm_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_pcm_luma_coding_block_size;
++ __u8 num_short_term_ref_pic_sets;
++ __u8 num_long_term_ref_pics_sps;
++ __u8 chroma_format_idc;
++ __u8 sps_max_sub_layers_minus1;
++
++ __u64 flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19)
++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20)
++
++struct v4l2_ctrl_hevc_pps {
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++ __u8 num_extra_slice_header_bits;
++ __u8 num_ref_idx_l0_default_active_minus1;
++ __u8 num_ref_idx_l1_default_active_minus1;
++ __s8 init_qp_minus26;
++ __u8 diff_cu_qp_delta_depth;
++ __s8 pps_cb_qp_offset;
++ __s8 pps_cr_qp_offset;
++ __u8 num_tile_columns_minus1;
++ __u8 num_tile_rows_minus1;
++ __u8 column_width_minus1[20];
++ __u8 row_height_minus1[22];
++ __s8 pps_beta_offset_div2;
++ __s8 pps_tc_offset_div2;
++ __u8 log2_parallel_merge_level_minus2;
++
++ __u8 padding[4];
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16
++
++struct v4l2_hevc_dpb_entry {
++ __u64 timestamp;
++ __u8 flags;
++ __u8 field_pic;
++ __u16 pic_order_cnt[2];
++ __u8 padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __u8 padding[6];
++
++ __u8 luma_log2_weight_denom;
++ __s8 delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9)
++
++struct v4l2_ctrl_hevc_slice_params {
++ __u32 bit_size;
++ __u32 data_bit_offset;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u32 slice_segment_addr;
++ __u32 num_entry_point_offsets;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++ __u8 nal_unit_type;
++ __u8 nuh_temporal_id_plus1;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 slice_type;
++ __u8 colour_plane_id;
++ __u16 slice_pic_order_cnt;
++ __u8 num_ref_idx_l0_active_minus1;
++ __u8 num_ref_idx_l1_active_minus1;
++ __u8 collocated_ref_idx;
++ __u8 five_minus_max_num_merge_cand;
++ __s8 slice_qp_delta;
++ __s8 slice_cb_qp_offset;
++ __s8 slice_cr_qp_offset;
++ __s8 slice_act_y_qp_offset;
++ __s8 slice_act_cb_qp_offset;
++ __s8 slice_act_cr_qp_offset;
++ __s8 slice_beta_offset_div2;
++ __s8 slice_tc_offset_div2;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++ __u8 pic_struct;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++ __u8 padding[5];
++
++ __u32 entry_point_offset_minus1[256];
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++ struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2
++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4
++
++struct v4l2_ctrl_hevc_decode_params {
++ __s32 pic_order_cnt_val;
++ __u8 num_active_dpb_entries;
++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 num_poc_st_curr_before;
++ __u8 num_poc_st_curr_after;
++ __u8 num_poc_lt_curr;
++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u64 flags;
++};
++
++struct v4l2_ctrl_hevc_scaling_matrix {
++ __u8 scaling_list_4x4[6][16];
++ __u8 scaling_list_8x8[6][64];
++ __u8 scaling_list_16x16[6][64];
++ __u8 scaling_list_32x32[2][64];
++ __u8 scaling_list_dc_coef_16x16[6];
++ __u8 scaling_list_dc_coef_32x32[2];
++};
++
++/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
++#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200)
++/*
++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
++ * the number of data (in bits) to skip in the
++ * slice segment header.
++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
++ * to before syntax element "slice_temporal_mvp_enabled_flag".
++ * If IDR, the skipped bits are just "pic_output_flag"
++ * (separate_colour_plane_flag is not supported).
++ */
++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0)
++
++#endif
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v4.h
+@@ -0,0 +1,524 @@
++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
++/*
++ * Video for Linux Two controls header file
++ *
++ * Copyright (C) 1999-2012 the contributors
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * Alternatively you can redistribute this file under the terms of the
++ * BSD license as stated below:
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in
++ * the documentation and/or other materials provided with the
++ * distribution.
++ * 3. The names of its contributors may not be used to endorse or promote
++ * products derived from this software without specific prior written
++ * permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * The contents of this header was split off from videodev2.h. All control
++ * definitions should be added to this header, which is included by
++ * videodev2.h.
++ */
++
++#ifndef AVCODEC_HEVC_CTRLS_V4_H
++#define AVCODEC_HEVC_CTRLS_V4_H
++
++#include <linux/const.h>
++#include <linux/types.h>
++
++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */
++#endif
++#ifndef V4L2_CID_CODEC_STATELESS_BASE
++#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
++#endif
++
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400)
++#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401)
++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402)
++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403)
++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404)
++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405)
++#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406)
++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407)
++
++enum v4l2_stateless_hevc_decode_mode {
++ V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED,
++ V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_stateless_hevc_start_code {
++ V4L2_STATELESS_HEVC_START_CODE_NONE,
++ V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B 0
++#define V4L2_HEVC_SLICE_TYPE_P 1
++#define V4L2_HEVC_SLICE_TYPE_I 2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8)
++
++/**
++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set
++ *
++ * @video_parameter_set_id: specifies the value of the
++ * vps_video_parameter_set_id of the active VPS
++ * @seq_parameter_set_id: provides an identifier for the SPS for
++ * reference by other syntax elements
++ * @pic_width_in_luma_samples: specifies the width of each decoded picture
++ * in units of luma samples
++ * @pic_height_in_luma_samples: specifies the height of each decoded picture
++ * in units of luma samples
++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the
++ * samples of the luma array
++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the
++ * samples of the chroma arrays
++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of
++ * the variable MaxPicOrderCntLsb
++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum
++ * required size of the decoded picture
++ * buffer for the codec video sequence
++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures
++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the
++ * value of SpsMaxLatencyPictures array
++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum
++ * luma coding block size
++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between
++ * the maximum and minimum luma
++ * coding block size
++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma
++ * transform block size
++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between
++ * the maximum and minimum luma
++ * transform block size
++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy
++ * depth for transform units of
++ * coding units coded in inter
++ * prediction mode
++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy
++ * depth for transform units of
++ * coding units coded in intra
++ * prediction mode
++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of
++ * bits used to represent each of PCM sample
++ * values of the luma component
++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number
++ * of bits used to represent each of PCM
++ * sample values of the chroma components
++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the
++ * minimum size of coding blocks
++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between
++ * the maximum and minimum size of
++ * coding blocks
++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set()
++ * syntax structures included in the SPS
++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term
++ * reference pictures that are specified in the SPS
++ * @chroma_format_idc: specifies the chroma sampling
++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number
++ * of temporal sub-layers
++ * @reserved: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_SPS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_sps {
++ __u8 video_parameter_set_id;
++ __u8 seq_parameter_set_id;
++ __u16 pic_width_in_luma_samples;
++ __u16 pic_height_in_luma_samples;
++ __u8 bit_depth_luma_minus8;
++ __u8 bit_depth_chroma_minus8;
++ __u8 log2_max_pic_order_cnt_lsb_minus4;
++ __u8 sps_max_dec_pic_buffering_minus1;
++ __u8 sps_max_num_reorder_pics;
++ __u8 sps_max_latency_increase_plus1;
++ __u8 log2_min_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_luma_coding_block_size;
++ __u8 log2_min_luma_transform_block_size_minus2;
++ __u8 log2_diff_max_min_luma_transform_block_size;
++ __u8 max_transform_hierarchy_depth_inter;
++ __u8 max_transform_hierarchy_depth_intra;
++ __u8 pcm_sample_bit_depth_luma_minus1;
++ __u8 pcm_sample_bit_depth_chroma_minus1;
++ __u8 log2_min_pcm_luma_coding_block_size_minus3;
++ __u8 log2_diff_max_min_pcm_luma_coding_block_size;
++ __u8 num_short_term_ref_pic_sets;
++ __u8 num_long_term_ref_pics_sps;
++ __u8 chroma_format_idc;
++ __u8 sps_max_sub_layers_minus1;
++
++ __u8 reserved[6];
++ __u64 flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19)
++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20)
++
++/**
++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set
++ *
++ * @pic_parameter_set_id: identifies the PPS for reference by other
++ * syntax elements
++ * @num_extra_slice_header_bits: specifies the number of extra slice header
++ * bits that are present in the slice header RBSP
++ * for coded pictures referring to the PPS.
++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the
++ * inferred value of num_ref_idx_l0_active_minus1
++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the
++ * inferred value of num_ref_idx_l1_active_minus1
++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for
++ * each slice referring to the PPS
++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding
++ * tree block size and the minimum luma coding block
++ * size of coding units that convey cu_qp_delta_abs
++ * and cu_qp_delta_sign_flag
++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb
++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr
++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns
++ * partitioning the picture
++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning
++ * the picture
++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in
++ * units of coding tree blocks
++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in
++ * units of coding tree blocks
++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for
++ * beta divided by 2
++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC
++ * divided by 2
++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of
++ * the variable Log2ParMrgLevel
++ * @reserved: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_PPS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_pps {
++ __u8 pic_parameter_set_id;
++ __u8 num_extra_slice_header_bits;
++ __u8 num_ref_idx_l0_default_active_minus1;
++ __u8 num_ref_idx_l1_default_active_minus1;
++ __s8 init_qp_minus26;
++ __u8 diff_cu_qp_delta_depth;
++ __s8 pps_cb_qp_offset;
++ __s8 pps_cr_qp_offset;
++ __u8 num_tile_columns_minus1;
++ __u8 num_tile_rows_minus1;
++ __u8 column_width_minus1[20];
++ __u8 row_height_minus1[22];
++ __s8 pps_beta_offset_div2;
++ __s8 pps_tc_offset_div2;
++ __u8 log2_parallel_merge_level_minus2;
++ __u8 reserved;
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01
++
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16
++
++/**
++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry
++ *
++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference.
++ * @flags: long term flag for the reference frame
++ * @field_pic: whether the reference is a field picture or a frame.
++ * @reserved: padding field. Should be zeroed by applications.
++ * @pic_order_cnt_val: the picture order count of the current picture.
++ */
++struct v4l2_hevc_dpb_entry {
++ __u64 timestamp;
++ __u8 flags;
++ __u8 field_pic;
++ __u16 reserved;
++ __s32 pic_order_cnt_val;
++};
++
++/**
++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters
++ *
++ * @delta_luma_weight_l0: the difference of the weighting factor applied
++ * to the luma prediction value for list 0
++ * @luma_offset_l0: the additive offset applied to the luma prediction value
++ * for list 0
++ * @delta_chroma_weight_l0: the difference of the weighting factor applied
++ * to the chroma prediction values for list 0
++ * @chroma_offset_l0: the difference of the additive offset applied to
++ * the chroma prediction values for list 0
++ * @delta_luma_weight_l1: the difference of the weighting factor applied
++ * to the luma prediction value for list 1
++ * @luma_offset_l1: the additive offset applied to the luma prediction value
++ * for list 1
++ * @delta_chroma_weight_l1: the difference of the weighting factor applied
++ * to the chroma prediction values for list 1
++ * @chroma_offset_l1: the difference of the additive offset applied to
++ * the chroma prediction values for list 1
++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for
++ * all luma weighting factors
++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm
++ * of the denominator for all chroma
++ * weighting factors
++ */
++struct v4l2_hevc_pred_weight_table {
++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++ __u8 luma_log2_weight_denom;
++ __s8 delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9)
++
++/**
++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters
++ *
++ * This control is a dynamically sized 1-dimensional array,
++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it.
++ *
++ * @bit_size: size (in bits) of the current slice data
++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data
++ * @num_entry_point_offsets: specifies the number of entry point offset syntax
++ * elements in the slice header.
++ * @nal_unit_type: specifies the coding type of the slice (B, P or I)
++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit
++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{}
++ * @colour_plane_id: specifies the colour plane associated with the current slice
++ * @slice_pic_order_cnt: specifies the picture order count
++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum
++ * reference index for reference picture list 0
++ * that may be used to decode the slice
++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum
++ * reference index for reference picture list 1
++ * that may be used to decode the slice
++ * @collocated_ref_idx: specifies the reference index of the collocated picture used
++ * for temporal motion vector prediction
++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging
++ * motion vector prediction candidates supported in
++ * the slice subtracted from 5
++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding
++ * blocks in the slice
++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset
++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset
++ * @slice_act_y_qp_offset: screen content extension parameters
++ * @slice_act_cb_qp_offset: screen content extension parameters
++ * @slice_act_cr_qp_offset: screen content extension parameters
++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2
++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2
++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or
++ * more fields
++ * @reserved0: padding field. Should be zeroed by applications.
++ * @slice_segment_addr: specifies the address of the first coding tree block in
++ * the slice segment
++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB
++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB
++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
++ * pictures set included in the SPS
++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
++ * pictures set include in the SPS
++ * @pred_weight_table: the prediction weight coefficients for inter-picture
++ * prediction
++ * @reserved1: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_slice_params {
++ __u32 bit_size;
++ __u32 data_byte_offset;
++ __u32 num_entry_point_offsets;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++ __u8 nal_unit_type;
++ __u8 nuh_temporal_id_plus1;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u8 slice_type;
++ __u8 colour_plane_id;
++ __s32 slice_pic_order_cnt;
++ __u8 num_ref_idx_l0_active_minus1;
++ __u8 num_ref_idx_l1_active_minus1;
++ __u8 collocated_ref_idx;
++ __u8 five_minus_max_num_merge_cand;
++ __s8 slice_qp_delta;
++ __s8 slice_cb_qp_offset;
++ __s8 slice_cr_qp_offset;
++ __s8 slice_act_y_qp_offset;
++ __s8 slice_act_cb_qp_offset;
++ __s8 slice_act_cr_qp_offset;
++ __s8 slice_beta_offset_div2;
++ __s8 slice_tc_offset_div2;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++ __u8 pic_struct;
++
++ __u8 reserved0[3];
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ __u32 slice_segment_addr;
++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u16 short_term_ref_pic_set_size;
++ __u16 long_term_ref_pic_set_size;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++ struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++ __u8 reserved1[2];
++ __u64 flags;
++};
++
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2
++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4
++
++/**
++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters
++ *
++ * @pic_order_cnt_val: picture order count
++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
++ * pictures set included in the SPS of the first slice
++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
++ * pictures set include in the SPS of the first slice
++ * @num_active_dpb_entries: the number of entries in dpb
++ * @num_poc_st_curr_before: the number of reference pictures in the short-term
++ * set that come before the current frame
++ * @num_poc_st_curr_after: the number of reference pictures in the short-term
++ * set that come after the current frame
++ * @num_poc_lt_curr: the number of reference pictures in the long-term set
++ * @poc_st_curr_before: provides the index of the short term before references
++ * in DPB array
++ * @poc_st_curr_after: provides the index of the short term after references
++ * in DPB array
++ * @poc_lt_curr: provides the index of the long term references in DPB array
++ * @reserved: padding field. Should be zeroed by applications.
++ * @dpb: the decoded picture buffer, for meta-data about reference frames
++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_decode_params {
++ __s32 pic_order_cnt_val;
++ __u16 short_term_ref_pic_set_size;
++ __u16 long_term_ref_pic_set_size;
++ __u8 num_active_dpb_entries;
++ __u8 num_poc_st_curr_before;
++ __u8 num_poc_st_curr_after;
++ __u8 num_poc_lt_curr;
++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u8 reserved[4];
++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++ __u64 flags;
++};
++
++/**
++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters
++ *
++ * @scaling_list_4x4: scaling list is used for the scaling process for
++ * transform coefficients. The values on each scaling
++ * list are expected in raster scan order
++ * @scaling_list_8x8: scaling list is used for the scaling process for
++ * transform coefficients. The values on each scaling
++ * list are expected in raster scan order
++ * @scaling_list_16x16: scaling list is used for the scaling process for
++ * transform coefficients. The values on each scaling
++ * list are expected in raster scan order
++ * @scaling_list_32x32: scaling list is used for the scaling process for
++ * transform coefficients. The values on each scaling
++ * list are expected in raster scan order
++ * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process
++ * for transform coefficients. The values on each
++ * scaling list are expected in raster scan order.
++ * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process
++ * for transform coefficients. The values on each
++ * scaling list are expected in raster scan order.
++ */
++struct v4l2_ctrl_hevc_scaling_matrix {
++ __u8 scaling_list_4x4[6][16];
++ __u8 scaling_list_8x8[6][64];
++ __u8 scaling_list_16x16[6][64];
++ __u8 scaling_list_32x32[2][64];
++ __u8 scaling_list_dc_coef_16x16[6];
++ __u8 scaling_list_dc_coef_32x32[2];
++};
++
++#endif
+--- a/libavcodec/hevc_parser.c
++++ b/libavcodec/hevc_parser.c
+@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCod
+ avctx->profile = ps->sps->ptl.general_ptl.profile_idc;
+ avctx->level = ps->sps->ptl.general_ptl.level_idc;
+
++ if (ps->sps->chroma_format_idc == 1) {
++ avctx->chroma_sample_location = ps->sps->vui.chroma_loc_info_present_flag ?
++ ps->sps->vui.chroma_sample_loc_type_top_field + 1 :
++ AVCHROMA_LOC_LEFT;
++ }
++ else if (ps->sps->chroma_format_idc == 2 ||
++ ps->sps->chroma_format_idc == 3) {
++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++ }
++ else {
++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++ }
++
+ if (ps->vps->vps_timing_info_present_flag) {
+ num = ps->vps->vps_num_units_in_tick;
+ den = ps->vps->vps_time_scale;
+--- a/libavcodec/hevc_refs.c
++++ b/libavcodec/hevc_refs.c
+@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContex
+ if (!frame->rpl_buf)
+ goto fail;
+
+- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
+- if (!frame->tab_mvf_buf)
+- goto fail;
+- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
++ if (s->tab_mvf_pool) {
++ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
++ if (!frame->tab_mvf_buf)
++ goto fail;
++ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
++ }
+
+- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
+- if (!frame->rpl_tab_buf)
+- goto fail;
+- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data;
+- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+- for (j = 0; j < frame->ctb_count; j++)
+- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
++ if (s->rpl_tab_pool) {
++ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
++ if (!frame->rpl_tab_buf)
++ goto fail;
++ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data;
++ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
++ for (j = 0; j < frame->ctb_count; j++)
++ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
++ }
+
+ frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
+ frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
+@@ -284,14 +288,17 @@ static int init_slice_rpl(HEVCContext *s
+ int ctb_count = frame->ctb_count;
+ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
+ int i;
++ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
+
+ if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
+ return AVERROR_INVALIDDATA;
+
+- for (i = ctb_addr_ts; i < ctb_count; i++)
+- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
++ if (frame->rpl_tab) {
++ for (i = ctb_addr_ts; i < ctb_count; i++)
++ frame->rpl_tab[i] = tab;
++ }
+
+- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
++ frame->refPicList = tab->refPicList;
+
+ return 0;
+ }
+--- a/libavcodec/hevcdec.c
++++ b/libavcodec/hevcdec.c
+@@ -340,6 +340,19 @@ static void export_stream_params(HEVCCon
+
+ ff_set_sar(avctx, sps->vui.sar);
+
++ if (sps->chroma_format_idc == 1) {
++ avctx->chroma_sample_location = sps->vui.chroma_loc_info_present_flag ?
++ sps->vui.chroma_sample_loc_type_top_field + 1 :
++ AVCHROMA_LOC_LEFT;
++ }
++ else if (sps->chroma_format_idc == 2 ||
++ sps->chroma_format_idc == 3) {
++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++ }
++ else {
++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++ }
++
+ if (sps->vui.video_signal_type_present_flag)
+ avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
+ : AVCOL_RANGE_MPEG;
+@@ -402,6 +415,7 @@ static enum AVPixelFormat get_format(HEV
+ #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
+ CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
+ CONFIG_HEVC_NVDEC_HWACCEL + \
++ CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
+ CONFIG_HEVC_VAAPI_HWACCEL + \
+ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
+ CONFIG_HEVC_VDPAU_HWACCEL)
+@@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEV
+ #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+ *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+ #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++ *fmt++ = AV_PIX_FMT_DRM_PRIME;
++#endif
+ break;
+ case AV_PIX_FMT_YUV420P10:
+ #if CONFIG_HEVC_DXVA2_HWACCEL
+@@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEV
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+ *fmt++ = AV_PIX_FMT_CUDA;
+ #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++ *fmt++ = AV_PIX_FMT_DRM_PRIME;
++#endif
+ break;
+ case AV_PIX_FMT_YUV444P:
+ #if CONFIG_HEVC_VDPAU_HWACCEL
+@@ -504,6 +524,16 @@ static int set_sps(HEVCContext *s, const
+ if (!sps)
+ return 0;
+
++ // If hwaccel then we don't need all the s/w decode helper arrays
++ if (s->avctx->hwaccel) {
++ export_stream_params(s, sps);
++
++ s->avctx->pix_fmt = pix_fmt;
++ s->ps.sps = sps;
++ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
++ return 0;
++ }
++
+ ret = pic_arrays_init(s, sps);
+ if (ret < 0)
+ goto fail;
+@@ -3011,11 +3041,13 @@ static int hevc_frame_start(HEVCContext
+ ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
+ int ret;
+
+- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+- memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
+- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
++ if (s->horizontal_bs) {
++ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
++ memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
++ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
++ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
++ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
++ }
+
+ s->is_decoded = 0;
+ s->first_nal_type = s->nal_unit_type;
+@@ -3507,8 +3539,13 @@ static int hevc_decode_frame(AVCodecCont
+
+ s->ref = NULL;
+ ret = decode_nal_units(s, avpkt->data, avpkt->size);
+- if (ret < 0)
++ if (ret < 0) {
++ // Ensure that hwaccel knows this frame is over
++ if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame)
++ s->avctx->hwaccel->abort_frame(s->avctx);
++
+ return ret;
++ }
+
+ if (avctx->hwaccel) {
+ if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
+@@ -3558,15 +3595,19 @@ static int hevc_ref_frame(HEVCContext *s
+ dst->needs_fg = 1;
+ }
+
+- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
+- if (!dst->tab_mvf_buf)
+- goto fail;
+- dst->tab_mvf = src->tab_mvf;
++ if (src->tab_mvf_buf) {
++ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
++ if (!dst->tab_mvf_buf)
++ goto fail;
++ dst->tab_mvf = src->tab_mvf;
++ }
+
+- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
+- if (!dst->rpl_tab_buf)
+- goto fail;
+- dst->rpl_tab = src->rpl_tab;
++ if (src->rpl_tab_buf) {
++ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
++ if (!dst->rpl_tab_buf)
++ goto fail;
++ dst->rpl_tab = src->rpl_tab;
++ }
+
+ dst->rpl_buf = av_buffer_ref(src->rpl_buf);
+ if (!dst->rpl_buf)
+@@ -3900,6 +3941,9 @@ const FFCodec ff_hevc_decoder = {
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+ HWACCEL_NVDEC(hevc),
+ #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++ HWACCEL_V4L2REQUEST(hevc),
++#endif
+ #if CONFIG_HEVC_VAAPI_HWACCEL
+ HWACCEL_VAAPI(hevc),
+ #endif
+--- a/libavcodec/hwaccels.h
++++ b/libavcodec/hwaccels.h
+@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_h
+ extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
+ extern const AVHWAccel ff_hevc_dxva2_hwaccel;
+ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
++extern const AVHWAccel ff_hevc_v4l2request_hwaccel;
+ extern const AVHWAccel ff_hevc_vaapi_hwaccel;
+ extern const AVHWAccel ff_hevc_vdpau_hwaccel;
+ extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+--- a/libavcodec/hwconfig.h
++++ b/libavcodec/hwconfig.h
+@@ -24,6 +24,7 @@
+
+
+ #define HWACCEL_CAP_ASYNC_SAFE (1 << 0)
++#define HWACCEL_CAP_MT_SAFE (1 << 1)
+
+
+ typedef struct AVCodecHWConfigInternal {
+@@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal {
+ HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel)
+ #define HWACCEL_NVDEC(codec) \
+ HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel)
++#define HWACCEL_V4L2REQUEST(codec) \
++ HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel)
+ #define HWACCEL_VAAPI(codec) \
+ HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel)
+ #define HWACCEL_VDPAU(codec) \
+--- a/libavcodec/mmaldec.c
++++ b/libavcodec/mmaldec.c
+@@ -24,6 +24,9 @@
+ * MMAL Video Decoder
+ */
+
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
+ #include <bcm_host.h>
+ #include <interface/mmal/mmal.h>
+ #include <interface/mmal/mmal_parameters_video.h>
+@@ -31,6 +34,7 @@
+ #include <interface/mmal/util/mmal_util_params.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+ #include <interface/mmal/vc/mmal_vc_api.h>
++#pragma GCC diagnostic pop
+ #include <stdatomic.h>
+
+ #include "avcodec.h"
+--- a/libavcodec/pthread_frame.c
++++ b/libavcodec/pthread_frame.c
+@@ -217,7 +217,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
+
+ /* if the previous thread uses hwaccel then we take the lock to ensure
+ * the threads don't run concurrently */
+- if (avctx->hwaccel) {
++ if (avctx->hwaccel &&
++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+ pthread_mutex_lock(&p->parent->hwaccel_mutex);
+ p->hwaccel_serializing = 1;
+ }
+@@ -243,7 +244,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
+ p->hwaccel_serializing = 0;
+ pthread_mutex_unlock(&p->parent->hwaccel_mutex);
+ }
+- av_assert0(!avctx->hwaccel);
++ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
+
+ if (p->async_serializing) {
+ p->async_serializing = 0;
+@@ -331,6 +332,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
+ }
+
+ dst->hwaccel_flags = src->hwaccel_flags;
++ if (src->hwaccel &&
++ (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
++ dst->hwaccel = src->hwaccel;
++ dst->hwaccel_context = src->hwaccel_context;
++ dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
++ }
+
+ err = av_buffer_replace(&dst->internal->pool, src->internal->pool);
+ if (err < 0)
+@@ -461,10 +468,13 @@ static int submit_packet(PerThreadContex
+ }
+
+ /* transfer the stashed hwaccel state, if any */
+- av_assert0(!p->avctx->hwaccel);
+- FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel);
+- FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context);
+- FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
++ av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
++ if (p->avctx->hwaccel &&
++ !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
++ FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel);
++ FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context);
++ FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
++ }
+
+ av_packet_unref(p->avpkt);
+ ret = av_packet_ref(p->avpkt, avpkt);
+@@ -656,7 +666,9 @@ void ff_thread_finish_setup(AVCodecConte
+
+ if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
+
+- if (avctx->hwaccel && !p->hwaccel_serializing) {
++ if (avctx->hwaccel &&
++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
++ !p->hwaccel_serializing) {
+ pthread_mutex_lock(&p->parent->hwaccel_mutex);
+ p->hwaccel_serializing = 1;
+ }
+@@ -673,9 +685,12 @@ void ff_thread_finish_setup(AVCodecConte
+ * this is done here so that this worker thread can wipe its own hwaccel
+ * state after decoding, without requiring synchronization */
+ av_assert0(!p->parent->stash_hwaccel);
+- p->parent->stash_hwaccel = avctx->hwaccel;
+- p->parent->stash_hwaccel_context = avctx->hwaccel_context;
+- p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data;
++ if (avctx->hwaccel &&
++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
++ p->parent->stash_hwaccel = avctx->hwaccel;
++ p->parent->stash_hwaccel_context = avctx->hwaccel_context;
++ p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data;
++ }
+
+ pthread_mutex_lock(&p->progress_mutex);
+ if(atomic_load(&p->state) == STATE_SETUP_FINISHED){
+@@ -730,6 +745,15 @@ void ff_frame_thread_free(AVCodecContext
+
+ park_frame_worker_threads(fctx, thread_count);
+
++ if (fctx->prev_thread &&
++ avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
++ avctx->internal->hwaccel_priv_data !=
++ fctx->prev_thread->avctx->internal->hwaccel_priv_data) {
++ if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n");
++ }
++ }
++
+ for (i = 0; i < thread_count; i++) {
+ PerThreadContext *p = &fctx->threads[i];
+ AVCodecContext *ctx = p->avctx;
+@@ -778,10 +802,13 @@ void ff_frame_thread_free(AVCodecContext
+
+ /* if we have stashed hwaccel state, move it to the user-facing context,
+ * so it will be freed in avcodec_close() */
+- av_assert0(!avctx->hwaccel);
+- FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel);
+- FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context);
+- FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
++ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
++ if (avctx->hwaccel &&
++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
++ FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel);
++ FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context);
++ FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
++ }
+
+ av_freep(&avctx->internal->thread_ctx);
+ }
+--- a/libavcodec/raw.c
++++ b/libavcodec/raw.c
+@@ -294,6 +294,12 @@ static const PixelFormatTag raw_pix_fmt_
+ { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */
+ { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
+
++ /* RPI (Might as well define for everything) */
++ { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') },
++ { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') },
++ { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') },
++ { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') },
++
+ { AV_PIX_FMT_NONE, 0 },
+ };
+
+--- a/libavcodec/rawenc.c
++++ b/libavcodec/rawenc.c
+@@ -24,6 +24,7 @@
+ * Raw Video Encoder
+ */
+
++#include "config.h"
+ #include "avcodec.h"
+ #include "codec_internal.h"
+ #include "encode.h"
+@@ -33,6 +34,10 @@
+ #include "libavutil/intreadwrite.h"
+ #include "libavutil/imgutils.h"
+ #include "libavutil/internal.h"
++#include "libavutil/avassert.h"
++#if CONFIG_SAND
++#include "libavutil/rpi_sand_fns.h"
++#endif
+
+ static av_cold int raw_encode_init(AVCodecContext *avctx)
+ {
+@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCod
+ return 0;
+ }
+
++#if CONFIG_SAND
++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++ const AVFrame *frame)
++{
++ const int width = av_frame_cropped_width(frame);
++ const int height = av_frame_cropped_height(frame);
++ const int x0 = frame->crop_left;
++ const int y0 = frame->crop_top;
++ const int size = width * height * 3 / 2;
++ uint8_t * dst;
++ int ret;
++
++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
++ return ret;
++
++ dst = pkt->data;
++
++ av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++ dst += width * height;
++ av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
++ return 0;
++}
++
++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++ const AVFrame *frame)
++{
++ const int width = av_frame_cropped_width(frame);
++ const int height = av_frame_cropped_height(frame);
++ const int x0 = frame->crop_left;
++ const int y0 = frame->crop_top;
++ const int size = width * height * 3;
++ uint8_t * dst;
++ int ret;
++
++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
++ return ret;
++
++ dst = pkt->data;
++
++ av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
++ dst += width * height * 2;
++ av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
++ return 0;
++}
++
++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++ const AVFrame *frame)
++{
++ const int width = av_frame_cropped_width(frame);
++ const int height = av_frame_cropped_height(frame);
++ const int x0 = frame->crop_left;
++ const int y0 = frame->crop_top;
++ const int size = width * height * 3;
++ uint8_t * dst;
++ int ret;
++
++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
++ return ret;
++
++ dst = pkt->data;
++
++ av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++ dst += width * height * 2;
++ av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
++ return 0;
++}
++#endif
++
++
+ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
+- const AVFrame *frame, int *got_packet)
++ const AVFrame *src_frame, int *got_packet)
+ {
+- int ret = av_image_get_buffer_size(frame->format,
+- frame->width, frame->height, 1);
++ int ret;
++ AVFrame * frame = NULL;
+
+- if (ret < 0)
++#if CONFIG_SAND
++ if (av_rpi_is_sand_frame(src_frame)) {
++ ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
++ av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
++ av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
++ *got_packet = (ret == 0);
+ return ret;
++ }
++#endif
++
++ if ((frame = av_frame_clone(src_frame)) == NULL) {
++ ret = AVERROR(ENOMEM);
++ goto fail;
++ }
++
++ if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
++ goto fail;
++
++ ret = av_image_get_buffer_size(frame->format,
++ frame->width, frame->height, 1);
++ if (ret < 0)
++ goto fail;
+
+ if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
+- return ret;
++ goto fail;
+ if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
+ (const uint8_t **)frame->data, frame->linesize,
+ frame->format,
+ frame->width, frame->height, 1)) < 0)
+- return ret;
++ goto fail;
+
+ if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
+ frame->format == AV_PIX_FMT_YUYV422) {
+@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *av
+ AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
+ }
+ }
++ pkt->flags |= AV_PKT_FLAG_KEY;
++ av_frame_free(&frame);
+ *got_packet = 1;
+ return 0;
++
++fail:
++ av_frame_free(&frame);
++ *got_packet = 0;
++ return ret;
+ }
+
+ const FFCodec ff_rawvideo_encoder = {
+--- a/libavcodec/v4l2_buffers.c
++++ b/libavcodec/v4l2_buffers.c
+@@ -21,6 +21,7 @@
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
++#include <drm_fourcc.h>
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <sys/mman.h>
+@@ -28,57 +29,89 @@
+ #include <fcntl.h>
+ #include <poll.h>
+ #include "libavcodec/avcodec.h"
++#include "libavcodec/internal.h"
++#include "libavutil/avassert.h"
+ #include "libavutil/pixdesc.h"
++#include "libavutil/hwcontext.h"
+ #include "v4l2_context.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_m2m.h"
++#include "v4l2_req_dmabufs.h"
++#include "weak_link.h"
+
+ #define USEC_PER_SEC 1000000
+-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
+
+-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
+ {
+- return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
+- container_of(buf->context, V4L2m2mContext, output) :
+- container_of(buf->context, V4L2m2mContext, capture);
++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++ container_of(ctx, V4L2m2mContext, output) :
++ container_of(ctx, V4L2m2mContext, capture);
+ }
+
+-static inline AVCodecContext *logger(V4L2Buffer *buf)
++static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
+ {
+- return buf_to_m2mctx(buf)->avctx;
++ return ctx_to_m2mctx(buf->context);
+ }
+
+-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
++static inline AVCodecContext *logger(const V4L2Buffer * const buf)
+ {
+- V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++ return buf_to_m2mctx(buf)->avctx;
++}
+
+- if (s->avctx->pkt_timebase.num)
+- return s->avctx->pkt_timebase;
+- return s->avctx->time_base;
++static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
++{
++ const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++ const AVRational tb = s->avctx->pkt_timebase.num ?
++ s->avctx->pkt_timebase :
++ s->avctx->time_base;
++ return tb.num && tb.den ? tb : v4l2_timebase;
+ }
+
+-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
++static inline struct timeval tv_from_int(const int64_t t)
+ {
+- int64_t v4l2_pts;
++ return (struct timeval){
++ .tv_usec = t % USEC_PER_SEC,
++ .tv_sec = t / USEC_PER_SEC
++ };
++}
+
+- if (pts == AV_NOPTS_VALUE)
+- pts = 0;
++static inline int64_t int_from_tv(const struct timeval t)
++{
++ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
++}
+
++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
++{
+ /* convert pts to v4l2 timebase */
+- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
+- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
++ const int64_t v4l2_pts =
++ pts == AV_NOPTS_VALUE ? 0 :
++ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
++ out->buf.timestamp = tv_from_int(v4l2_pts);
+ }
+
+-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
++static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
+ {
+- int64_t v4l2_pts;
+-
++ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
++ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
++#if 0
+ /* convert pts back to encoder timebase */
+- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
+- avbuf->buf.timestamp.tv_usec;
++ return
++ avbuf->context->no_pts_rescale ? v4l2_pts :
++ v4l2_pts == 0 ? AV_NOPTS_VALUE :
++ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++#endif
++}
+
+- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
++{
++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++ out->planes[plane].bytesused = bytesused;
++ out->planes[plane].length = length;
++ } else {
++ out->buf.bytesused = bytesused;
++ out->buf.length = length;
++ }
+ }
+
+ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
+@@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_co
+ return AVCOL_PRI_UNSPECIFIED;
+ }
+
++static void v4l2_set_color(V4L2Buffer *buf,
++ const enum AVColorPrimaries avcp,
++ const enum AVColorSpace avcs,
++ const enum AVColorTransferCharacteristic avxc)
++{
++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
++
++ switch (avcp) {
++ case AVCOL_PRI_BT709:
++ cs = V4L2_COLORSPACE_REC709;
++ ycbcr = V4L2_YCBCR_ENC_709;
++ break;
++ case AVCOL_PRI_BT470M:
++ cs = V4L2_COLORSPACE_470_SYSTEM_M;
++ ycbcr = V4L2_YCBCR_ENC_601;
++ break;
++ case AVCOL_PRI_BT470BG:
++ cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++ break;
++ case AVCOL_PRI_SMPTE170M:
++ cs = V4L2_COLORSPACE_SMPTE170M;
++ break;
++ case AVCOL_PRI_SMPTE240M:
++ cs = V4L2_COLORSPACE_SMPTE240M;
++ break;
++ case AVCOL_PRI_BT2020:
++ cs = V4L2_COLORSPACE_BT2020;
++ break;
++ case AVCOL_PRI_SMPTE428:
++ case AVCOL_PRI_SMPTE431:
++ case AVCOL_PRI_SMPTE432:
++ case AVCOL_PRI_EBU3213:
++ case AVCOL_PRI_RESERVED:
++ case AVCOL_PRI_FILM:
++ case AVCOL_PRI_UNSPECIFIED:
++ default:
++ break;
++ }
++
++ switch (avcs) {
++ case AVCOL_SPC_RGB:
++ cs = V4L2_COLORSPACE_SRGB;
++ break;
++ case AVCOL_SPC_BT709:
++ cs = V4L2_COLORSPACE_REC709;
++ break;
++ case AVCOL_SPC_FCC:
++ cs = V4L2_COLORSPACE_470_SYSTEM_M;
++ break;
++ case AVCOL_SPC_BT470BG:
++ cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++ break;
++ case AVCOL_SPC_SMPTE170M:
++ cs = V4L2_COLORSPACE_SMPTE170M;
++ break;
++ case AVCOL_SPC_SMPTE240M:
++ cs = V4L2_COLORSPACE_SMPTE240M;
++ break;
++ case AVCOL_SPC_BT2020_CL:
++ cs = V4L2_COLORSPACE_BT2020;
++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
++ break;
++ case AVCOL_SPC_BT2020_NCL:
++ cs = V4L2_COLORSPACE_BT2020;
++ break;
++ default:
++ break;
++ }
++
++ switch (xfer) {
++ case AVCOL_TRC_BT709:
++ xfer = V4L2_XFER_FUNC_709;
++ break;
++ case AVCOL_TRC_IEC61966_2_1:
++ xfer = V4L2_XFER_FUNC_SRGB;
++ break;
++ case AVCOL_TRC_SMPTE240M:
++ xfer = V4L2_XFER_FUNC_SMPTE240M;
++ break;
++ case AVCOL_TRC_SMPTE2084:
++ xfer = V4L2_XFER_FUNC_SMPTE2084;
++ break;
++ default:
++ break;
++ }
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++ buf->context->format.fmt.pix_mp.colorspace = cs;
++ buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
++ buf->context->format.fmt.pix_mp.xfer_func = xfer;
++ } else {
++ buf->context->format.fmt.pix.colorspace = cs;
++ buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
++ buf->context->format.fmt.pix.xfer_func = xfer;
++ }
++}
++
+ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
+ {
+ enum v4l2_quantization qt;
+@@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_
+ return AVCOL_RANGE_UNSPECIFIED;
+ }
+
++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
++{
++ const enum v4l2_quantization q =
++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
++ V4L2_QUANTIZATION_DEFAULT;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++ buf->context->format.fmt.pix_mp.quantization = q;
++ } else {
++ buf->context->format.fmt.pix.quantization = q;
++ }
++}
++
+ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
+ {
+ enum v4l2_ycbcr_encoding ycbcr;
+@@ -209,73 +355,218 @@ static enum AVColorTransferCharacteristi
+ return AVCOL_TRC_UNSPECIFIED;
+ }
+
+-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
+ {
+- V4L2Buffer* avbuf = opaque;
+- V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++ return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
++}
+
+- if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
+- atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
++{
++ return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
++}
+
+- if (s->reinit) {
+- if (!atomic_load(&s->refcount))
+- sem_post(&s->refsync);
+- } else {
+- if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
+- /* no need to queue more buffers to the driver */
+- avbuf->status = V4L2BUF_AVAILABLE;
+- }
+- else if (avbuf->context->streamon)
+- ff_v4l2_buffer_enqueue(avbuf);
+- }
++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
++{
++ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
++ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
++}
+
+- av_buffer_unref(&avbuf->context_ref);
++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
++{
++ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
++ AVDRMLayerDescriptor *layer;
++
++ /* fill the DRM frame descriptor */
++ drm_desc->nb_objects = avbuf->num_planes;
++ drm_desc->nb_layers = 1;
++
++ layer = &drm_desc->layers[0];
++ layer->nb_planes = avbuf->num_planes;
++
++ for (int i = 0; i < avbuf->num_planes; i++) {
++ layer->planes[i].object_index = i;
++ layer->planes[i].offset = avbuf->plane_info[i].offset;
++ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
++ }
++
++ switch (avbuf->context->av_pix_fmt) {
++ case AV_PIX_FMT_0BGR:
++ layer->format = DRM_FORMAT_RGBX8888;
++ break;
++ case AV_PIX_FMT_RGB0:
++ layer->format = DRM_FORMAT_XBGR8888;
++ break;
++ case AV_PIX_FMT_0RGB:
++ layer->format = DRM_FORMAT_BGRX8888;
++ break;
++ case AV_PIX_FMT_BGR0:
++ layer->format = DRM_FORMAT_XRGB8888;
++ break;
++
++ case AV_PIX_FMT_ABGR:
++ layer->format = DRM_FORMAT_RGBA8888;
++ break;
++ case AV_PIX_FMT_RGBA:
++ layer->format = DRM_FORMAT_ABGR8888;
++ break;
++ case AV_PIX_FMT_ARGB:
++ layer->format = DRM_FORMAT_BGRA8888;
++ break;
++ case AV_PIX_FMT_BGRA:
++ layer->format = DRM_FORMAT_ARGB8888;
++ break;
++
++ case AV_PIX_FMT_BGR24:
++ layer->format = DRM_FORMAT_BGR888;
++ break;
++ case AV_PIX_FMT_RGB24:
++ layer->format = DRM_FORMAT_RGB888;
++ break;
++
++ case AV_PIX_FMT_YUYV422:
++
++ layer->format = DRM_FORMAT_YUYV;
++ layer->nb_planes = 1;
++
++ break;
++
++ case AV_PIX_FMT_NV12:
++ case AV_PIX_FMT_NV21:
++
++ layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
++ DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
++
++ if (avbuf->num_planes > 1)
++ break;
++
++ layer->nb_planes = 2;
++
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++ avbuf->context->format.fmt.pix.height;
++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
++ break;
++
++ case AV_PIX_FMT_YUV420P:
++
++ layer->format = DRM_FORMAT_YUV420;
++
++ if (avbuf->num_planes > 1)
++ break;
++
++ layer->nb_planes = 3;
++
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++ avbuf->context->format.fmt.pix.height;
++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
++
++ layer->planes[2].object_index = 0;
++ layer->planes[2].offset = layer->planes[1].offset +
++ ((avbuf->plane_info[0].bytesperline *
++ avbuf->context->format.fmt.pix.height) >> 2);
++ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
++ break;
++
++ default:
++ drm_desc->nb_layers = 0;
++ break;
+ }
++
++ return (uint8_t *) drm_desc;
+ }
+
+-static int v4l2_buf_increase_ref(V4L2Buffer *in)
++static void v4l2_free_bufref(void *opaque, uint8_t *data)
+ {
+- V4L2m2mContext *s = buf_to_m2mctx(in);
++ AVBufferRef * bufref = (AVBufferRef *)data;
++ V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
++ struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
+
+- if (in->context_ref)
+- atomic_fetch_add(&in->context_refcount, 1);
+- else {
+- in->context_ref = av_buffer_ref(s->self_ref);
+- if (!in->context_ref)
+- return AVERROR(ENOMEM);
++ if (ctx != NULL) {
++ // Buffer still attached to context
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++
++ if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
++ for (unsigned int i = 0; i != avbuf->num_planes; ++i)
++ dmabuf_read_end(avbuf->dmabuf[i]);
++ }
++
++ ff_mutex_lock(&ctx->lock);
++
++ ff_v4l2_buffer_set_avail(avbuf);
++ avbuf->buf.timestamp.tv_sec = 0;
++ avbuf->buf.timestamp.tv_usec = 0;
++
++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
++ }
++ else if (ctx->streamon) {
++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
++ ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER
++ }
++ else {
++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
++ }
+
+- in->context_refcount = 1;
++ ff_mutex_unlock(&ctx->lock);
+ }
+
+- in->status = V4L2BUF_RET_USER;
+- atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
++ ff_weak_link_unlock(avbuf->context_wl);
++ av_buffer_unref(&bufref);
++}
+
+- return 0;
++static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
+ }
+
+-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
+ {
+- int ret;
++ int i, ret;
++ const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
+
+- if (plane >= in->num_planes)
+- return AVERROR(EINVAL);
++ for (i = 0; i < avbuf->num_planes; i++) {
++ int dma_fd = -1;
++ const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
+
+- /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
+- *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
+- in->plane_info[plane].length, v4l2_free_buffer, in, 0);
+- if (!*buf)
+- return AVERROR(ENOMEM);
++ if (s->db_ctl != NULL) {
++ if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
++ return AVERROR(ENOMEM);
++ dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
++ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
++ avbuf->buf.m.planes[i].m.fd = dma_fd;
++ else
++ avbuf->buf.m.fd = dma_fd;
+
+- ret = v4l2_buf_increase_ref(in);
+- if (ret)
+- av_buffer_unref(buf);
++ if (!s->output_drm)
++ avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]);
++ }
++ else {
++ struct v4l2_exportbuffer expbuf;
++ memset(&expbuf, 0, sizeof(expbuf));
++
++ expbuf.index = avbuf->buf.index;
++ expbuf.type = avbuf->buf.type;
++ expbuf.plane = i;
++
++ ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
++ if (ret < 0)
++ return AVERROR(errno);
++ dma_fd = expbuf.fd;
++ }
+
+- return ret;
++ avbuf->drm_frame.objects[i].size = blen;
++ avbuf->drm_frame.objects[i].fd = dma_fd;
++ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
++ }
++
++ return 0;
+ }
+
+ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
+ {
+ unsigned int bytesused, length;
++ int rv = 0;
+
+ if (plane >= out->num_planes)
+ return AVERROR(EINVAL);
+@@ -283,32 +574,61 @@ static int v4l2_bufref_to_buf(V4L2Buffer
+ length = out->plane_info[plane].length;
+ bytesused = FFMIN(size+offset, length);
+
+- memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
+-
+- if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+- out->planes[plane].bytesused = bytesused;
+- out->planes[plane].length = length;
+- } else {
+- out->buf.bytesused = bytesused;
+- out->buf.length = length;
++ if (size > length - offset) {
++ size = length - offset;
++ rv = AVERROR(ENOMEM);
+ }
+
+- return 0;
++ memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
++
++ set_buf_length(out, plane, bytesused, length);
++
++ return rv;
++}
++
++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
++{
++ AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
++ AVBufferRef * newbuf;
++
++ if (!bufref)
++ return NULL;
++
++ newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
++ if (newbuf == NULL)
++ av_buffer_unref(&bufref);
++
++ avbuf->status = V4L2BUF_RET_USER;
++ return newbuf;
+ }
+
+ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+ {
+- int i, ret;
++ int i;
+
+ frame->format = avbuf->context->av_pix_fmt;
+
+- for (i = 0; i < avbuf->num_planes; i++) {
+- ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
+- if (ret)
+- return ret;
++ frame->buf[0] = wrap_avbuf(avbuf);
++ if (frame->buf[0] == NULL)
++ return AVERROR(ENOMEM);
++
++ if (buf_to_m2mctx(avbuf)->output_drm) {
++ /* 1. get references to the actual data */
++ const int rv = ff_v4l2_context_frames_set(avbuf->context);
++ if (rv != 0)
++ return rv;
++
++ frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
++ frame->format = AV_PIX_FMT_DRM_PRIME;
++ frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
++ return 0;
++ }
++
+
++ /* 1. get references to the actual data */
++ for (i = 0; i < avbuf->num_planes; i++) {
++ frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
+ frame->linesize[i] = avbuf->plane_info[i].bytesperline;
+- frame->data[i] = frame->buf[i]->data;
+ }
+
+ /* fixup special cases */
+@@ -317,88 +637,152 @@ static int v4l2_buffer_buf_to_swframe(AV
+ case AV_PIX_FMT_NV21:
+ if (avbuf->num_planes > 1)
+ break;
+- frame->linesize[1] = avbuf->plane_info[0].bytesperline;
+- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
++ frame->linesize[1] = frame->linesize[0];
++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
+ break;
+
+ case AV_PIX_FMT_YUV420P:
+ if (avbuf->num_planes > 1)
+ break;
+- frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
+- frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
+- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
+- frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
++ frame->linesize[1] = frame->linesize[0] / 2;
++ frame->linesize[2] = frame->linesize[1];
++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
++ frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
+ break;
+
+ default:
+ break;
+ }
+
++ if (avbuf->dmabuf[0] != NULL) {
++ for (unsigned int i = 0; i != avbuf->num_planes; ++i)
++ dmabuf_read_start(avbuf->dmabuf[i]);
++ }
++
++ return 0;
++}
++
++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
++{
++ if (dst_stride == src_stride && w + 32 >= dst_stride) {
++ memcpy(dst, src, dst_stride * h);
++ }
++ else {
++ while (--h >= 0) {
++ memcpy(dst, src, w);
++ dst += dst_stride;
++ src += src_stride;
++ }
++ }
++}
++
++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
++{
++ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
++}
++
++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++{
++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
++ return AVERROR(EINVAL);
++
++ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++ // Only currently cope with single buffer types
++ if (out->buf.length != 1)
++ return AVERROR_PATCHWELCOME;
++ if (src->nb_objects != 1)
++ return AVERROR(EINVAL);
++
++ out->planes[0].m.fd = src->objects[0].fd;
++ }
++ else {
++ if (src->nb_objects != 1)
++ return AVERROR(EINVAL);
++
++ out->buf.m.fd = src->objects[0].fd;
++ }
++
++ // No need to copy src AVDescriptor and if we did then we may confuse
++ // fd close on free
++ out->ref_buf = av_buffer_ref(frame->buf[0]);
++
+ return 0;
+ }
+
+ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+ {
+- int i, ret;
+- struct v4l2_format fmt = out->context->format;
+- int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+- fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
+- int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+- fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
+- int is_planar_format = 0;
+-
+- switch (pixel_format) {
+- case V4L2_PIX_FMT_YUV420M:
+- case V4L2_PIX_FMT_YVU420M:
+-#ifdef V4L2_PIX_FMT_YUV422M
+- case V4L2_PIX_FMT_YUV422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU422M
+- case V4L2_PIX_FMT_YVU422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YUV444M
+- case V4L2_PIX_FMT_YUV444M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU444M
+- case V4L2_PIX_FMT_YVU444M:
+-#endif
+- case V4L2_PIX_FMT_NV12M:
+- case V4L2_PIX_FMT_NV21M:
+- case V4L2_PIX_FMT_NV12MT_16X16:
+- case V4L2_PIX_FMT_NV12MT:
+- case V4L2_PIX_FMT_NV16M:
+- case V4L2_PIX_FMT_NV61M:
+- is_planar_format = 1;
+- }
+-
+- if (!is_planar_format) {
+- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+- int planes_nb = 0;
+- int offset = 0;
+-
+- for (i = 0; i < desc->nb_components; i++)
+- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+-
+- for (i = 0; i < planes_nb; i++) {
+- int size, h = height;
+- if (i == 1 || i == 2) {
++ int i;
++ int num_planes = 0;
++ int pel_strides[4] = {0};
++
++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
++
++ if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
++ av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
++ return -1;
++ }
++
++ for (i = 0; i != desc->nb_components; ++i) {
++ if (desc->comp[i].plane >= num_planes)
++ num_planes = desc->comp[i].plane + 1;
++ pel_strides[desc->comp[i].plane] = desc->comp[i].step;
++ }
++
++ if (out->num_planes > 1) {
++ if (num_planes != out->num_planes) {
++ av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
++ return -1;
++ }
++ for (i = 0; i != num_planes; ++i) {
++ int w = frame->width;
++ int h = frame->height;
++ if (is_chroma(desc, i, num_planes)) {
++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+ }
+- size = frame->linesize[i] * h;
+- ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
+- if (ret)
+- return ret;
+- offset += size;
++
++ cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
++ frame->data[i], frame->linesize[i],
++ w * pel_strides[i], h);
++ set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
+ }
+- return 0;
+ }
++ else
++ {
++ unsigned int offset = 0;
++
++ for (i = 0; i != num_planes; ++i) {
++ int w = frame->width;
++ int h = frame->height;
++ int dst_stride = out->plane_info[0].bytesperline;
++ uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
++
++ if (is_chroma(desc, i, num_planes)) {
++ // Is chroma
++ dst_stride >>= desc->log2_chroma_w;
++ offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
++ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
++ }
++ else {
++ // Is luma or alpha
++ offset += dst_stride * out->context->height;
++ }
++ if (offset > out->plane_info[0].length) {
++ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
++ return -1;
++ }
+
+- for (i = 0; i < out->num_planes; i++) {
+- ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
+- if (ret)
+- return ret;
++ cpy_2d(dst, dst_stride,
++ frame->data[i], frame->linesize[i],
++ w * pel_strides[i], h);
++ }
++ set_buf_length(out, 0, offset, out->plane_info[0].length);
+ }
+-
+ return 0;
+ }
+
+@@ -408,16 +792,31 @@ static int v4l2_buffer_swframe_to_buf(co
+ *
+ ******************************************************************************/
+
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
+ {
+- v4l2_set_pts(out, frame->pts);
+-
+- return v4l2_buffer_swframe_to_buf(frame, out);
++ out->buf.flags = frame->key_frame ?
++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
++ // Beware that colour info is held in format rather than the actual
++ // v4l2 buffer struct so this may not be as useful as you might hope
++ v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
++ v4l2_set_color_range(out, frame->color_range);
++ // PTS & interlace are buffer vars
++ if (track_ts)
++ out->buf.timestamp = tv_from_int(track_ts);
++ else
++ v4l2_set_pts(out, frame->pts);
++ v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
++
++ return frame->format == AV_PIX_FMT_DRM_PRIME ?
++ v4l2_buffer_primeframe_to_buf(frame, out) :
++ v4l2_buffer_swframe_to_buf(frame, out);
+ }
+
+ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+ {
+ int ret;
++ V4L2Context * const ctx = avbuf->context;
+
+ av_frame_unref(frame);
+
+@@ -428,17 +827,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
+
+ /* 2. get frame information */
+ frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
++ frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I :
++ (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
++ (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
++ AV_PICTURE_TYPE_NONE;
+ frame->color_primaries = v4l2_get_color_primaries(avbuf);
+ frame->colorspace = v4l2_get_color_space(avbuf);
+ frame->color_range = v4l2_get_color_range(avbuf);
+ frame->color_trc = v4l2_get_color_trc(avbuf);
+ frame->pts = v4l2_get_pts(avbuf);
+ frame->pkt_dts = AV_NOPTS_VALUE;
++ frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
++ frame->top_field_first = v4l2_buf_is_top_first(avbuf);
+
+ /* these values are updated also during re-init in v4l2_process_driver_event */
+- frame->height = avbuf->context->height;
+- frame->width = avbuf->context->width;
+- frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
++ frame->height = ctx->height;
++ frame->width = ctx->width;
++ frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
++
++ if (ctx->selection.height && ctx->selection.width) {
++ frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
++ frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0;
++ frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
++ frame->width - (ctx->selection.left + ctx->selection.width) : 0;
++ frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
++ frame->height - (ctx->selection.top + ctx->selection.height) : 0;
++ }
+
+ /* 3. report errors upstream */
+ if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
+@@ -451,15 +865,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
+
+ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
+ {
+- int ret;
+-
+ av_packet_unref(pkt);
+- ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
+- if (ret)
+- return ret;
++
++ pkt->buf = wrap_avbuf(avbuf);
++ if (pkt->buf == NULL)
++ return AVERROR(ENOMEM);
+
+ pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
+- pkt->data = pkt->buf->data;
++ pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
++ pkt->flags = 0;
+
+ if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
+ pkt->flags |= AV_PKT_FLAG_KEY;
+@@ -474,39 +888,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
+ return 0;
+ }
+
+-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++ const void *extdata, size_t extlen,
++ const int64_t timestamp)
+ {
+ int ret;
+
+- ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
+- if (ret)
++ if (extlen) {
++ ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
++ if (ret)
++ return ret;
++ }
++
++ ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
++ if (ret && ret != AVERROR(ENOMEM))
+ return ret;
+
+- v4l2_set_pts(out, pkt->pts);
++ if (timestamp)
++ out->buf.timestamp = tv_from_int(timestamp);
++ else
++ v4l2_set_pts(out, pkt->pts);
++
++ out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
+
+- if (pkt->flags & AV_PKT_FLAG_KEY)
+- out->flags = V4L2_BUF_FLAG_KEYFRAME;
++ return ret;
++}
+
+- return 0;
++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++{
++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
++}
++
++
++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
++{
++ V4L2Buffer * const avbuf = (V4L2Buffer *)data;
++ int i;
++
++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
++ struct V4L2Plane_info *p = avbuf->plane_info + i;
++ if (p->mm_addr != NULL)
++ munmap(p->mm_addr, p->length);
++ }
++
++ if (avbuf->dmabuf[0] == NULL) {
++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++ if (avbuf->drm_frame.objects[i].fd != -1)
++ close(avbuf->drm_frame.objects[i].fd);
++ }
++ }
++ else {
++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
++ dmabuf_free(avbuf->dmabuf[i]);
++ }
++ }
++
++ av_buffer_unref(&avbuf->ref_buf);
++
++ ff_weak_link_unref(&avbuf->context_wl);
++
++ av_free(avbuf);
+ }
+
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++
++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
+ {
+- V4L2Context *ctx = avbuf->context;
+ int ret, i;
++ V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
++ AVBufferRef * bufref;
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++ int want_mmap;
+
+- avbuf->buf.memory = V4L2_MEMORY_MMAP;
++ *pbufref = NULL;
++ if (avbuf == NULL)
++ return AVERROR(ENOMEM);
++
++ bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
++ if (bufref == NULL) {
++ av_free(avbuf);
++ return AVERROR(ENOMEM);
++ }
++
++ avbuf->context = ctx;
++ avbuf->buf.memory = mem;
+ avbuf->buf.type = ctx->type;
+ avbuf->buf.index = index;
+
++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++ avbuf->drm_frame.objects[i].fd = -1;
++ }
++
++ avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
++
+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+ avbuf->buf.length = VIDEO_MAX_PLANES;
+ avbuf->buf.m.planes = avbuf->planes;
+ }
+
+- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
++ ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
+ if (ret < 0)
+- return AVERROR(errno);
++ goto fail;
+
+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+ avbuf->num_planes = 0;
+@@ -518,33 +1001,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+ } else
+ avbuf->num_planes = 1;
+
+- for (i = 0; i < avbuf->num_planes; i++) {
++ want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
++ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
+
++ for (i = 0; i < avbuf->num_planes; i++) {
+ avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+ ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
+ ctx->format.fmt.pix.bytesperline;
+
+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+ avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
+- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
+- PROT_READ | PROT_WRITE, MAP_SHARED,
+- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
++ avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
++
++ if (want_mmap)
++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
++ PROT_READ | PROT_WRITE, MAP_SHARED,
++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
+ } else {
+ avbuf->plane_info[i].length = avbuf->buf.length;
+- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
+- PROT_READ | PROT_WRITE, MAP_SHARED,
+- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
++ avbuf->plane_info[i].offset = 0;
++
++ if (want_mmap)
++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
++ PROT_READ | PROT_WRITE, MAP_SHARED,
++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
+ }
+
+- if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
+- return AVERROR(ENOMEM);
++ if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
++ avbuf->plane_info[i].mm_addr = NULL;
++ ret = AVERROR(ENOMEM);
++ goto fail;
++ }
+ }
+
+ avbuf->status = V4L2BUF_AVAILABLE;
+
+- if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+- return 0;
+-
+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+ avbuf->buf.m.planes = avbuf->planes;
+ avbuf->buf.length = avbuf->num_planes;
+@@ -554,20 +1045,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+ avbuf->buf.length = avbuf->planes[0].length;
+ }
+
+- return ff_v4l2_buffer_enqueue(avbuf);
++ if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) {
++ // export_drm does dmabuf alloc if we aren't using v4l2 alloc
++ ret = v4l2_buffer_export_drm(avbuf);
++ if (ret) {
++ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
++ goto fail;
++ }
++ }
++
++ *pbufref = bufref;
++ return 0;
++
++fail:
++ av_buffer_unref(&bufref);
++ return ret;
+ }
+
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
+ {
+ int ret;
++ int qc;
+
+- avbuf->buf.flags = avbuf->flags;
++ if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++ avbuf->context->name, avbuf->buf.index,
++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
++ avbuf->context->q_count);
++ }
+
+ ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
+- if (ret < 0)
+- return AVERROR(errno);
++ if (ret < 0) {
++ int err = errno;
++ av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
++ avbuf->context->name, avbuf->buf.index,
++ err, strerror(err));
++ return AVERROR(err);
++ }
+
++ // Lock not wanted - if called from buffer free then lock already obtained
++ qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
+ avbuf->status = V4L2BUF_IN_DRIVER;
++ pthread_cond_broadcast(&avbuf->context->cond);
++
++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++ avbuf->context->name, avbuf->buf.index,
++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
+
+ return 0;
+ }
+--- a/libavcodec/v4l2_buffers.h
++++ b/libavcodec/v4l2_buffers.h
+@@ -28,31 +28,47 @@
+ #include <stddef.h>
+ #include <linux/videodev2.h>
+
++#include "avcodec.h"
+ #include "libavutil/buffer.h"
+ #include "libavutil/frame.h"
++#include "libavutil/hwcontext_drm.h"
+ #include "packet.h"
+
+ enum V4L2Buffer_status {
+ V4L2BUF_AVAILABLE,
+ V4L2BUF_IN_DRIVER,
++ V4L2BUF_IN_USE,
+ V4L2BUF_RET_USER,
+ };
+
+ /**
+ * V4L2Buffer (wrapper for v4l2_buffer management)
+ */
++struct V4L2Context;
++struct ff_weak_link_client;
++struct dmabuf_h;
++
+ typedef struct V4L2Buffer {
+- /* each buffer needs to have a reference to its context */
++ /* each buffer needs to have a reference to its context
++ * The pointer is good enough for most operation but once the buffer has
++ * been passed to the user the buffer may become orphaned so for free ops
++ * the weak link must be used to ensure that the context is actually
++ * there
++ */
+ struct V4L2Context *context;
++ struct ff_weak_link_client *context_wl;
+
+- /* This object is refcounted per-plane, so we need to keep track
+- * of how many context-refs we are holding. */
+- AVBufferRef *context_ref;
+- atomic_uint context_refcount;
++ /* DRM descriptor */
++ AVDRMFrameDescriptor drm_frame;
++ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
++ * are done
++ */
++ AVBufferRef * ref_buf;
+
+ /* keep track of the mmap address and mmap length */
+ struct V4L2Plane_info {
+- int bytesperline;
++ size_t bytesperline;
++ size_t offset;
+ void * mm_addr;
+ size_t length;
+ } plane_info[VIDEO_MAX_PLANES];
+@@ -63,9 +79,9 @@ typedef struct V4L2Buffer {
+ struct v4l2_buffer buf;
+ struct v4l2_plane planes[VIDEO_MAX_PLANES];
+
+- int flags;
+ enum V4L2Buffer_status status;
+
++ struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
+ } V4L2Buffer;
+
+ /**
+@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
+ */
+ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
+
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++ const void *extdata, size_t extlen,
++ const int64_t timestamp);
++
+ /**
+ * Extracts the data from an AVFrame to a V4L2Buffer
+ *
+@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AV
+ *
+ * @returns 0 in case of success, a negative AVERROR code otherwise
+ */
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
+
+ /**
+ * Initializes a V4L2Buffer
+@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const
+ *
+ * @returns 0 in case of success, a negative AVERROR code otherwise
+ */
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
+
+ /**
+ * Enqueues a V4L2Buffer
+@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+ */
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
+
++static inline void
++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
++{
++ avbuf->status = V4L2BUF_AVAILABLE;
++ av_buffer_unref(&avbuf->ref_buf);
++}
++
+
+ #endif // AVCODEC_V4L2_BUFFERS_H
+--- a/libavcodec/v4l2_context.c
++++ b/libavcodec/v4l2_context.c
+@@ -27,11 +27,14 @@
+ #include <unistd.h>
+ #include <fcntl.h>
+ #include <poll.h>
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/internal.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
++#include "weak_link.h"
+
+ struct v4l2_format_update {
+ uint32_t v4l2_fmt;
+@@ -41,26 +44,168 @@ struct v4l2_format_update {
+ int update_avfmt;
+ };
+
+-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
++
++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
+ {
+- return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+- container_of(ctx, V4L2m2mContext, output) :
+- container_of(ctx, V4L2m2mContext, capture);
++ return (int64_t)n;
+ }
+
+-static inline AVCodecContext *logger(V4L2Context *ctx)
++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
+ {
+- return ctx_to_m2mctx(ctx)->avctx;
++ return (unsigned int)pts;
++}
++
++// FFmpeg requires us to propagate a number of vars from the coded pkt into
++// the decoded frame. The only thing that tracks like that in V4L2 stateful
++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
++// guarantees about PTS being unique or specified for every frame so replace
++// the supplied PTS with a simple incrementing number and keep a circular
++// buffer of all the things we want preserved (including the original PTS)
++// indexed by the tracking no.
++static int64_t
++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
++{
++ int64_t track_pts;
++
++ // Avoid 0
++ if (++x->track_no == 0)
++ x->track_no = 1;
++
++ track_pts = track_to_pts(avctx, x->track_no);
++
++ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++ .discard = 0,
++ .pending = 1,
++ .pkt_size = avpkt->size,
++ .pts = avpkt->pts,
++ .dts = avpkt->dts,
++ .reordered_opaque = avctx->reordered_opaque,
++ .pkt_pos = avpkt->pos,
++ .pkt_duration = avpkt->duration,
++ .track_pts = track_pts
++ };
++ return track_pts;
++}
++
++static int64_t
++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
++{
++ int64_t track_pts;
++
++ // Avoid 0
++ if (++x->track_no == 0)
++ x->track_no = 1;
++
++ track_pts = track_to_pts(avctx, x->track_no);
++
++ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++ .discard = 0,
++ .pending = 1,
++ .pkt_size = 0,
++ .pts = frame->pts,
++ .dts = AV_NOPTS_VALUE,
++ .reordered_opaque = frame->reordered_opaque,
++ .pkt_pos = frame->pkt_pos,
++ .pkt_duration = frame->pkt_duration,
++ .track_pts = track_pts
++ };
++ return track_pts;
++}
++
++
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_frame_out(AVCodecContext *const avctx,
++ xlat_track_t * const x,
++ AVFrame *const frame)
++{
++ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
++ V4L2m2mTrackEl *const t = x->track_els + n;
++ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
++ {
++ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++ frame->pts = AV_NOPTS_VALUE;
++ frame->pkt_dts = AV_NOPTS_VALUE;
++ frame->reordered_opaque = x->last_opaque;
++ frame->pkt_pos = -1;
++ frame->pkt_duration = 0;
++ frame->pkt_size = -1;
++ }
++ else if (!t->discard)
++ {
++ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE;
++ frame->pkt_dts = t->dts;
++ frame->reordered_opaque = t->reordered_opaque;
++ frame->pkt_pos = t->pkt_pos;
++ frame->pkt_duration = t->pkt_duration;
++ frame->pkt_size = t->pkt_size;
++
++ x->last_opaque = x->track_els[n].reordered_opaque;
++ if (frame->pts != AV_NOPTS_VALUE)
++ x->last_pts = frame->pts;
++ t->pending = 0;
++ }
++ else
++ {
++ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++ return -1;
++ }
++
++ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
++ return 0;
++}
++
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_pkt_out(AVCodecContext *const avctx,
++ xlat_track_t * const x,
++ AVPacket *const pkt)
++{
++ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
++ V4L2m2mTrackEl *const t = x->track_els + n;
++ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
++ {
++ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++ pkt->pts = AV_NOPTS_VALUE;
++ }
++ else if (!t->discard)
++ {
++ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE;
++
++ x->last_opaque = x->track_els[n].reordered_opaque;
++ if (pkt->pts != AV_NOPTS_VALUE)
++ x->last_pts = pkt->pts;
++ t->pending = 0;
++ }
++ else
++ {
++ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++ return -1;
++ }
++
++ // * Would like something much better than this...xlat(offset + out_count)?
++ pkt->dts = pkt->pts;
++ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++ pkt->pts, t->track_pts, n);
++ return 0;
+ }
+
+-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
++
++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
+ {
+- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++ container_of(ctx, V4L2m2mContext, output) :
++ container_of(ctx, V4L2m2mContext, capture);
+ }
+
+-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
++static inline AVCodecContext *logger(const V4L2Context *ctx)
+ {
+- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++ return ctx_to_m2mctx(ctx)->avctx;
+ }
+
+ static AVRational v4l2_get_sar(V4L2Context *ctx)
+@@ -81,21 +226,29 @@ static AVRational v4l2_get_sar(V4L2Conte
+ return sar;
+ }
+
+-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
++static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
++{
++ return ctx->bufrefs != NULL;
++}
++
++// Width/Height changed or we don't have an alloc in the first place?
++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
+ {
+- struct v4l2_format *fmt1 = &ctx->format;
+- int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+- fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
+- fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
+- :
+- fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
+- fmt1->fmt.pix.height != fmt2->fmt.pix.height;
++ const struct v4l2_format *fmt1 = &ctx->format;
++ int ret = !ctx_buffers_alloced(ctx) ||
++ (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
++ fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
++ fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
++ :
++ fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
++ fmt1->fmt.pix.height != fmt2->fmt.pix.height);
+
+ if (ret)
+- av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
++ av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
+ ctx->name,
+- v4l2_get_width(fmt1), v4l2_get_height(fmt1),
+- v4l2_get_width(fmt2), v4l2_get_height(fmt2));
++ ctx_buffers_alloced(ctx),
++ ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
++ ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
+
+ return ret;
+ }
+@@ -153,76 +306,100 @@ static inline void v4l2_save_to_context(
+ }
+ }
+
+-static int v4l2_start_decode(V4L2Context *ctx)
++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
+ {
+- struct v4l2_decoder_cmd cmd = {
+- .cmd = V4L2_DEC_CMD_START,
+- .flags = 0,
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++ struct v4l2_selection selection = {
++ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
++ .target = V4L2_SEL_TGT_COMPOSE
+ };
+- int ret;
+
+- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
+- if (ret)
++ memset(r, 0, sizeof(*r));
++ if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
+ return AVERROR(errno);
+
++ *r = selection.r;
+ return 0;
+ }
+
+-/**
+- * handle resolution change event and end of stream event
+- * returns 1 if reinit was successful, negative if it failed
+- * returns 0 if reinit was not executed
+- */
+-static int v4l2_handle_event(V4L2Context *ctx)
++static int do_source_change(V4L2m2mContext * const s)
+ {
+- V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+- struct v4l2_format cap_fmt = s->capture.format;
+- struct v4l2_event evt = { 0 };
++ AVCodecContext *const avctx = s->avctx;
++
+ int ret;
++ int reinit;
++ struct v4l2_format cap_fmt = s->capture.format;
+
+- ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
+- if (ret < 0) {
+- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
+- return 0;
+- }
++ s->capture.done = 0;
+
+- if (evt.type == V4L2_EVENT_EOS) {
+- ctx->done = 1;
++ ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
++ if (ret) {
++ av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
+ return 0;
+ }
+
+- if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
+- return 0;
++ get_default_selection(&s->capture, &s->capture.selection);
+
+- ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
+- if (ret) {
+- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
+- return 0;
++ reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
++ if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
++ reinit = 1;
++
++ s->capture.format = cap_fmt;
++ if (reinit) {
++ s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
++ s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
+ }
+
+- if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
+- s->capture.height = v4l2_get_height(&cap_fmt);
+- s->capture.width = v4l2_get_width(&cap_fmt);
+- s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
+- } else {
+- v4l2_start_decode(ctx);
+- return 0;
++ // If we don't support selection (or it is bust) and we obviously have HD then kludge
++ if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
++ (s->capture.height == 1088 && s->capture.width == 1920)) {
++ s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
+ }
+
+- s->reinit = 1;
++ s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
+
+- if (s->avctx)
+- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
+- if (ret < 0)
+- av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
++ av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
++ av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)),
++ s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
++ s->capture.width, s->capture.height,
++ s->capture.selection.width, s->capture.selection.height,
++ s->capture.selection.left, s->capture.selection.top, reinit);
+
+- ret = ff_v4l2_m2m_codec_reinit(s);
+- if (ret) {
+- av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
+- return AVERROR(EINVAL);
++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++ if (ret)
++ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
++ s->draining = 0;
++
++ if (!reinit) {
++ /* Buffers are OK so just stream off to ack */
++ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
++ }
++ else {
++ if (avctx)
++ ret = ff_set_dimensions(s->avctx,
++ s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
++ s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
++ if (ret < 0)
++ av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
++
++ ff_v4l2_context_release(&s->capture);
++
++ if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
++ s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
++ av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
++ s->capture.width, s->capture.height,
++ ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
++ return AVERROR(EINVAL);
++ }
++
++ // Update pixel format - should only actually do something on initial change
++ s->capture.av_pix_fmt =
++ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
++ avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt;
++ avctx->sw_pix_fmt = s->capture.av_pix_fmt;
+ }
+
+- /* reinit executed */
++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
+ return 1;
+ }
+
+@@ -266,171 +443,293 @@ static int v4l2_stop_encode(V4L2Context
+ return 0;
+ }
+
+-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
+-{
+- struct v4l2_plane planes[VIDEO_MAX_PLANES];
+- struct v4l2_buffer buf = { 0 };
+- V4L2Buffer *avbuf;
+- struct pollfd pfd = {
+- .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
+- .fd = ctx_to_m2mctx(ctx)->fd,
++// DQ a buffer
++// Amalgamates all the various ways there are of signalling EOS/Event to
++// generate a consistant EPIPE.
++//
++// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
++//
++// Returns:
++// 0 Success
++// AVERROR(EPIPE) Nothing more to read
++// AVERROR(ENOSPC) No buffers in Q to put result in
++// * AVERROR(..)
++
++ static int
++dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
++{
++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
++ AVCodecContext * const avctx = m->avctx;
++ V4L2Buffer * avbuf;
++ const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
++
++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++
++ struct v4l2_buffer buf = {
++ .type = ctx->type,
++ .memory = V4L2_MEMORY_MMAP,
+ };
+- int i, ret;
+
+- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
+- for (i = 0; i < ctx->num_buffers; i++) {
+- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
+- break;
+- }
+- if (i == ctx->num_buffers)
+- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
+- "userspace. Increase num_capture_buffers "
+- "to prevent device deadlock or dropped "
+- "packets/frames.\n");
++ *ppavbuf = NULL;
++
++ if (ctx->flag_last)
++ return AVERROR(EPIPE);
++
++ if (is_mp) {
++ buf.length = VIDEO_MAX_PLANES;
++ buf.m.planes = planes;
+ }
+
+- /* if we are draining and there are no more capture buffers queued in the driver we are done */
+- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
+- for (i = 0; i < ctx->num_buffers; i++) {
+- /* capture buffer initialization happens during decode hence
+- * detection happens at runtime
+- */
+- if (!ctx->buffers)
+- break;
++ while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
++ const int err = errno;
++ av_assert0(AVERROR(err) < 0);
++ if (err != EINTR) {
++ av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
++ ctx->name, av_err2str(AVERROR(err)));
++
++ if (err == EPIPE)
++ ctx->flag_last = 1;
+
+- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
+- goto start;
++ return AVERROR(err);
+ }
+- ctx->done = 1;
+- return NULL;
+ }
++ atomic_fetch_sub(&ctx->q_count, 1);
+
+-start:
+- if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+- pfd.events = POLLOUT | POLLWRNORM;
+- else {
+- /* no need to listen to requests for more input while draining */
+- if (ctx_to_m2mctx(ctx)->draining)
+- pfd.events = POLLIN | POLLRDNORM | POLLPRI;
++ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
++ ff_v4l2_buffer_set_avail(avbuf);
++ avbuf->buf = buf;
++ if (is_mp) {
++ memcpy(avbuf->planes, planes, sizeof(planes));
++ avbuf->buf.m.planes = avbuf->planes;
++ }
++ // Done with any attached buffer
++ av_buffer_unref(&avbuf->ref_buf);
++
++ if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
++ // Zero length cap buffer return == EOS
++ if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
++ av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
++
++ // Must reQ so we don't leak
++ // May not matter if the next thing we do is release all the
++ // buffers but better to be tidy.
++ ff_v4l2_buffer_enqueue(avbuf);
++
++ ctx->flag_last = 1;
++ return AVERROR(EPIPE);
++ }
++
++#ifdef V4L2_BUF_FLAG_LAST
++ // If flag_last set then this contains data but is the last frame
++ // so remember that but return OK
++ if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
++ ctx->flag_last = 1;
++#endif
+ }
+
+- for (;;) {
+- ret = poll(&pfd, 1, timeout);
+- if (ret > 0)
+- break;
+- if (errno == EINTR)
++ *ppavbuf = avbuf;
++ return 0;
++}
++
++/**
++ * handle resolution change event and end of stream event
++ * Expects to be called after the stream has stopped
++ *
++ * returns 1 if reinit was successful, negative if it failed
++ * returns 0 if reinit was not executed
++ */
++static int
++get_event(V4L2m2mContext * const m)
++{
++ AVCodecContext * const avctx = m->avctx;
++ struct v4l2_event evt = { 0 };
++
++ while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
++ const int rv = AVERROR(errno);
++ if (rv == AVERROR(EINTR))
+ continue;
+- return NULL;
++ if (rv == AVERROR(EAGAIN)) {
++ av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
++ return AVERROR_EOF;
++ }
++ av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
++ return rv;
+ }
+
+- /* 0. handle errors */
+- if (pfd.revents & POLLERR) {
+- /* if we are trying to get free buffers but none have been queued yet
+- no need to raise a warning */
+- if (timeout == 0) {
+- for (i = 0; i < ctx->num_buffers; i++) {
+- if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
+- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+- }
+- }
+- else
+- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
++ av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
+
+- return NULL;
++ if (evt.type == V4L2_EVENT_EOS) {
++ av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
++ return AVERROR_EOF;
+ }
+
+- /* 1. handle resolution changes */
+- if (pfd.revents & POLLPRI) {
+- ret = v4l2_handle_event(ctx);
+- if (ret < 0) {
+- /* if re-init failed, abort */
+- ctx->done = 1;
+- return NULL;
++ if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
++ return do_source_change(m);
++
++ return 0;
++}
++
++static inline int
++dq_ok(const V4L2Context * const c)
++{
++ return c->streamon && atomic_load(&c->q_count) != 0;
++}
++
++// Get a buffer
++// If output then just gets the buffer in the expected way
++// If capture then runs the capture state m/c to deal with res change etc.
++// If return value == 0 then *ppavbuf != NULL
++
++static int
++get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
++{
++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
++ AVCodecContext * const avctx = m->avctx;
++ const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
++
++ const unsigned int poll_cap = (POLLIN | POLLRDNORM);
++ const unsigned int poll_out = (POLLOUT | POLLWRNORM);
++ const unsigned int poll_event = POLLPRI;
++
++ *ppavbuf = NULL;
++
++ for (;;) {
++ struct pollfd pfd = {
++ .fd = m->fd,
++ // If capture && stream not started then assume we are waiting for the initial event
++ .events = !is_cap ? poll_out :
++ !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
++ poll_event,
++ };
++ int ret;
++
++ if (ctx->done) {
++ av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
++ return AVERROR_EOF;
+ }
+- if (ret) {
+- /* if re-init was successful drop the buffer (if there was one)
+- * since we had to reconfigure capture (unmap all buffers)
+- */
+- return NULL;
++
++ // If capture && timeout == -1 then also wait for rx buffer free
++ if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
++ pfd.events |= poll_out;
++
++ // If nothing Qed all we will get is POLLERR - avoid that
++ if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
++ (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
++ (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
++ return AVERROR(ENOSPC);
+ }
+- }
+
+- /* 2. dequeue the buffer */
+- if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
++ // Timeout kludged s.t. "forever" eventually gives up & produces logging
++ // If waiting for an event when we have seen a last_frame then we expect
++ // it to be ready already so force a short timeout
++ ret = poll(&pfd, 1,
++ ff_v4l2_ctx_eos(ctx) ? 10 :
++ timeout == -1 ? 3000 : timeout);
++ if (ret < 0) {
++ ret = AVERROR(errno); // Remember errno before logging etc.
++ av_assert0(ret < 0);
++ }
++
++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
++ ctx->name, ret, timeout, pfd.events, pfd.revents);
+
+- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+- /* there is a capture buffer ready */
+- if (pfd.revents & (POLLIN | POLLRDNORM))
+- goto dequeue;
++ if (ret < 0) {
++ if (ret == AVERROR(EINTR))
++ continue;
++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
++ return ret;
++ }
+
+- /* the driver is ready to accept more input; instead of waiting for the capture
+- * buffer to complete we return NULL so input can proceed (we are single threaded)
+- */
+- if (pfd.revents & (POLLOUT | POLLWRNORM))
+- return NULL;
++ if (ret == 0) {
++ if (timeout == -1)
++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
++ if (ff_v4l2_ctx_eos(ctx)) {
++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
++ ret = get_event(m);
++ if (ret < 0) {
++ ctx->done = 1;
++ return ret;
++ }
++ }
++ return AVERROR(EAGAIN);
+ }
+
+-dequeue:
+- memset(&buf, 0, sizeof(buf));
+- buf.memory = V4L2_MEMORY_MMAP;
+- buf.type = ctx->type;
+- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+- memset(planes, 0, sizeof(planes));
+- buf.length = VIDEO_MAX_PLANES;
+- buf.m.planes = planes;
++ if ((pfd.revents & POLLERR) != 0) {
++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
++ return AVERROR_UNKNOWN;
+ }
+
+- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
+- if (ret) {
+- if (errno != EAGAIN) {
++ if ((pfd.revents & poll_event) != 0) {
++ ret = get_event(m);
++ if (ret < 0) {
+ ctx->done = 1;
+- if (errno != EPIPE)
+- av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
+- ctx->name, av_err2str(AVERROR(errno)));
++ return ret;
+ }
+- return NULL;
++ continue;
+ }
+
+- if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+- int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
+- buf.m.planes[0].bytesused : buf.bytesused;
+- if (bytesused == 0) {
+- ctx->done = 1;
+- return NULL;
+- }
+-#ifdef V4L2_BUF_FLAG_LAST
+- if (buf.flags & V4L2_BUF_FLAG_LAST)
+- ctx->done = 1;
+-#endif
++ if ((pfd.revents & poll_cap) != 0) {
++ ret = dq_buf(ctx, ppavbuf);
++ if (ret == AVERROR(EPIPE))
++ continue;
++ return ret;
+ }
+
+- avbuf = &ctx->buffers[buf.index];
+- avbuf->status = V4L2BUF_AVAILABLE;
+- avbuf->buf = buf;
+- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+- memcpy(avbuf->planes, planes, sizeof(planes));
+- avbuf->buf.m.planes = avbuf->planes;
++ if ((pfd.revents & poll_out) != 0) {
++ if (is_cap)
++ return AVERROR(EAGAIN);
++ return dq_buf(ctx, ppavbuf);
+ }
+- return avbuf;
++
++ av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
++ return AVERROR_UNKNOWN;
+ }
++}
+
+- return NULL;
++// Clear out flags and timestamps that should should be set by the user
++// Returns the passed avbuf
++static V4L2Buffer *
++clean_v4l2_buffer(V4L2Buffer * const avbuf)
++{
++ struct v4l2_buffer *const buf = &avbuf->buf;
++
++ buf->flags = 0;
++ buf->field = V4L2_FIELD_ANY;
++ buf->timestamp = (struct timeval){0};
++ buf->timecode = (struct v4l2_timecode){0};
++ buf->sequence = 0;
++
++ return avbuf;
++}
++
++int
++ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
++{
++ V4L2Buffer * avbuf;
++ if (timeout1 != 0) {
++ int rv = get_qbuf(ctx, &avbuf, timeout1);
++ if (rv != 0)
++ return rv;
++ }
++ do {
++ get_qbuf(ctx, &avbuf, 0);
++ } while (avbuf);
++ return 0;
+ }
+
+ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
+ {
+- int timeout = 0; /* return when no more buffers to dequeue */
+ int i;
+
+ /* get back as many output buffers as possible */
+- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+- do {
+- } while (v4l2_dequeue_v4l2buf(ctx, timeout));
+- }
++ if (V4L2_TYPE_IS_OUTPUT(ctx->type))
++ ff_v4l2_dq_all(ctx, 0);
+
+ for (i = 0; i < ctx->num_buffers; i++) {
+- if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
+- return &ctx->buffers[i];
++ V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++ if (avbuf->status == V4L2BUF_AVAILABLE)
++ return clean_v4l2_buffer(avbuf);
+ }
+
+ return NULL;
+@@ -438,25 +737,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(
+
+ static int v4l2_release_buffers(V4L2Context* ctx)
+ {
+- struct v4l2_requestbuffers req = {
+- .memory = V4L2_MEMORY_MMAP,
+- .type = ctx->type,
+- .count = 0, /* 0 -> unmaps buffers from the driver */
+- };
+- int i, j;
++ int i;
++ int ret = 0;
++ const int fd = ctx_to_m2mctx(ctx)->fd;
+
+- for (i = 0; i < ctx->num_buffers; i++) {
+- V4L2Buffer *buffer = &ctx->buffers[i];
++ // Orphan any buffers in the wild
++ ff_weak_link_break(&ctx->wl_master);
+
+- for (j = 0; j < buffer->num_planes; j++) {
+- struct V4L2Plane_info *p = &buffer->plane_info[j];
+- if (p->mm_addr && p->length)
+- if (munmap(p->mm_addr, p->length) < 0)
+- av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
++ if (ctx->bufrefs) {
++ for (i = 0; i < ctx->num_buffers; i++)
++ av_buffer_unref(ctx->bufrefs + i);
++ }
++
++ if (fd != -1) {
++ struct v4l2_requestbuffers req = {
++ .memory = V4L2_MEMORY_MMAP,
++ .type = ctx->type,
++ .count = 0, /* 0 -> unmap all buffers from the driver */
++ };
++
++ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
++ if (errno == EINTR)
++ continue;
++
++ ret = AVERROR(errno);
++
++ av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
++ ctx->name, av_err2str(AVERROR(errno)));
++
++ if (ctx_to_m2mctx(ctx)->output_drm)
++ av_log(logger(ctx), AV_LOG_ERROR,
++ "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
++ "for all buffers: \n"
++ " 1. drmModeRmFB(..)\n"
++ " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
+ }
+ }
++ atomic_store(&ctx->q_count, 0);
+
+- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
++ return ret;
+ }
+
+ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
+@@ -485,6 +804,8 @@ static inline int v4l2_try_raw_format(V4
+
+ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
+ {
++ V4L2m2mContext* s = ctx_to_m2mctx(ctx);
++ V4L2m2mPriv *priv = s->avctx->priv_data;
+ enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
+ struct v4l2_fmtdesc fdesc;
+ int ret;
+@@ -498,21 +819,22 @@ static int v4l2_get_raw_format(V4L2Conte
+ return 0;
+ }
+
+- for (;;) {
++ for (;; ++fdesc.index) {
+ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
+ if (ret)
+ return AVERROR(EINVAL);
+
++ if (priv->pix_fmt != AV_PIX_FMT_NONE) {
++ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
++ continue;
++ }
++
+ pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
+ ret = v4l2_try_raw_format(ctx, pixfmt);
+- if (ret){
+- fdesc.index++;
+- continue;
++ if (ret == 0) {
++ *p = pixfmt;
++ return 0;
+ }
+-
+- *p = pixfmt;
+-
+- return 0;
+ }
+
+ return AVERROR(EINVAL);
+@@ -555,30 +877,131 @@ static int v4l2_get_coded_format(V4L2Con
+ *
+ *****************************************************************************/
+
+-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
++
++static void flush_all_buffers_status(V4L2Context* const ctx)
++{
++ int i;
++
++ if (!ctx->bufrefs)
++ return;
++
++ for (i = 0; i < ctx->num_buffers; ++i) {
++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++ if (buf->status == V4L2BUF_IN_DRIVER)
++ ff_v4l2_buffer_set_avail(buf);
++ }
++ atomic_store(&ctx->q_count, 0);
++}
++
++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
++{
++ int i;
++ int rv;
++
++ if (!ctx->bufrefs) {
++ rv = ff_v4l2_context_init(ctx);
++ if (rv) {
++ av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
++ return rv;
++ }
++ }
++
++ ff_mutex_lock(&ctx->lock);
++ for (i = 0; i < ctx->num_buffers; ++i) {
++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++ if (buf->status == V4L2BUF_AVAILABLE) {
++ rv = ff_v4l2_buffer_enqueue(buf);
++ if (rv < 0)
++ break;
++ }
++ }
++ ff_mutex_unlock(&ctx->lock);
++ return rv;
++}
++
++static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx)
+ {
+ int type = ctx->type;
+- int ret;
++ int ret = 0;
+
+- ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
+- if (ret < 0)
+- return AVERROR(errno);
++ if (!V4L2_TYPE_IS_OUTPUT(ctx->type))
++ stuff_all_buffers(avctx, ctx);
+
+- ctx->streamon = (cmd == VIDIOC_STREAMON);
++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) {
++ ret = AVERROR(errno);
++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
++ av_err2str(ret));
++ return ret;
++ }
+
+- return 0;
++ ctx->first_buf = 1;
++ ctx->streamon = 1;
++ ctx->flag_last = 0;
++ av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name);
++ return ret;
++}
++
++static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx)
++{
++ int type = ctx->type;
++ int ret = 0;
++ const int has_bufs = ctx_buffers_alloced(ctx);
++
++ // Avoid doing anything if there is nothing we can do
++ if (!has_bufs && !ctx->streamon)
++ return 0;
++
++ if (has_bufs)
++ ff_mutex_lock(&ctx->lock);
++
++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) {
++ ret = AVERROR(errno);
++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
++ av_err2str(ret));
++ }
++ else {
++ flush_all_buffers_status(ctx);
++
++ ctx->streamon = 0;
++ ctx->flag_last = 0;
++
++ av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name);
++ }
++
++ if (has_bufs)
++ ff_mutex_unlock(&ctx->lock);
++ return ret;
++}
++
++
++int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
++{
++ AVCodecContext * const avctx = logger(ctx);
++
++ switch (cmd) {
++ case VIDIOC_STREAMOFF:
++ return set_streamoff(avctx, ctx);
++ case VIDIOC_STREAMON:
++ return set_streamon(avctx, ctx);
++ default:
++ av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd);
++ break;
++ }
++ return AVERROR_BUG;
+ }
+
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+ {
+- V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++ V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
++ AVCodecContext *const avctx = s->avctx;
++ int64_t track_ts;
+ V4L2Buffer* avbuf;
+ int ret;
+
+ if (!frame) {
+ ret = v4l2_stop_encode(ctx);
+ if (ret)
+- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
++ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
+ s->draining= 1;
+ return 0;
+ }
+@@ -587,23 +1010,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Co
+ if (!avbuf)
+ return AVERROR(EAGAIN);
+
+- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
++ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
++
++ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
+ if (ret)
+ return ret;
+
+ return ff_v4l2_buffer_enqueue(avbuf);
+ }
+
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
++ const void * extdata, size_t extlen)
+ {
+ V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++ AVCodecContext *const avctx = s->avctx;
+ V4L2Buffer* avbuf;
+ int ret;
++ int64_t track_ts;
+
+ if (!pkt->size) {
+ ret = v4l2_stop_decode(ctx);
++ // Log but otherwise ignore stop failure
+ if (ret)
+- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
++ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
+ s->draining = 1;
+ return 0;
+ }
+@@ -612,8 +1041,13 @@ int ff_v4l2_context_enqueue_packet(V4L2C
+ if (!avbuf)
+ return AVERROR(EAGAIN);
+
+- ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
+- if (ret)
++ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
++
++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
++ if (ret == AVERROR(ENOMEM))
++ av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
++ __func__, pkt->size, avbuf->planes[0].length);
++ else if (ret)
+ return ret;
+
+ return ff_v4l2_buffer_enqueue(avbuf);
+@@ -621,42 +1055,77 @@ int ff_v4l2_context_enqueue_packet(V4L2C
+
+ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
+ {
++ V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++ AVCodecContext *const avctx = s->avctx;
+ V4L2Buffer *avbuf;
++ int rv;
+
+- /*
+- * timeout=-1 blocks until:
+- * 1. decoded frame available
+- * 2. an input buffer is ready to be dequeued
+- */
+- avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
+- if (!avbuf) {
+- if (ctx->done)
+- return AVERROR_EOF;
+-
+- return AVERROR(EAGAIN);
+- }
++ do {
++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
++ return rv;
++ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
++ return rv;
++ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
+
+- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
++ return 0;
+ }
+
+-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
+ {
++ V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++ AVCodecContext *const avctx = s->avctx;
+ V4L2Buffer *avbuf;
++ int rv;
+
+- /*
+- * blocks until:
+- * 1. encoded packet available
+- * 2. an input buffer ready to be dequeued
+- */
+- avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
+- if (!avbuf) {
+- if (ctx->done)
+- return AVERROR_EOF;
++ do {
++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
++ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC
++ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
++ return rv;
++ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
+
+- return AVERROR(EAGAIN);
++ return 0;
++}
++
++// Return 0 terminated list of drm fourcc video formats for this context
++// NULL if none found or error
++// Returned list is malloced so must be freed
++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN)
++{
++ unsigned int i;
++ unsigned int n = 0;
++ unsigned int size = 0;
++ uint32_t * e = NULL;
++ *pN = 0;
++
++ for (i = 0; i < 1024; ++i) {
++ struct v4l2_fmtdesc fdesc = {
++ .index = i,
++ .type = ctx->type
++ };
++
++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc))
++ return e;
++
++ if (n + 1 >= size) {
++ unsigned int newsize = (size == 0) ? 16 : size * 2;
++ uint32_t * t = av_realloc(e, newsize * sizeof(*t));
++ if (!t)
++ return e;
++ e = t;
++ size = newsize;
++ }
++
++ e[n] = fdesc.pixelformat;
++ e[++n] = 0;
++ if (pN)
++ *pN = n;
+ }
+
+- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
++ // If we've looped 1024 times we are clearly confused
++ *pN = 0;
++ av_free(e);
++ return NULL;
+ }
+
+ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
+@@ -688,78 +1157,194 @@ int ff_v4l2_context_get_format(V4L2Conte
+
+ int ff_v4l2_context_set_format(V4L2Context* ctx)
+ {
+- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++ int ret;
++
++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++ if (ret != 0)
++ return ret;
++
++ // Check returned size against min size and if smaller have another go
++ // Only worry about plane[0] as this is meant to enforce limits for
++ // encoded streams where we might know a bit more about the shape
++ // than the driver
++ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
++ if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
++ return 0;
++ ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
++ }
++ else {
++ if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
++ return 0;
++ ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
++ }
++
++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++ return ret;
+ }
+
+ void ff_v4l2_context_release(V4L2Context* ctx)
+ {
+ int ret;
+
+- if (!ctx->buffers)
++ if (!ctx->bufrefs)
+ return;
+
+ ret = v4l2_release_buffers(ctx);
+ if (ret)
+ av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
+
+- av_freep(&ctx->buffers);
++ av_freep(&ctx->bufrefs);
++ av_buffer_unref(&ctx->frames_ref);
++
++ ff_mutex_destroy(&ctx->lock);
++ pthread_cond_destroy(&ctx->cond);
+ }
+
+-int ff_v4l2_context_init(V4L2Context* ctx)
++
++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
+ {
+- V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+ struct v4l2_requestbuffers req;
+- int ret, i;
+-
+- if (!v4l2_type_supported(ctx)) {
+- av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
+- return AVERROR_PATCHWELCOME;
+- }
++ int ret;
++ int i;
+
+- ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
+- if (ret)
+- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
++ av_assert0(ctx->bufrefs == NULL);
+
+ memset(&req, 0, sizeof(req));
+- req.count = ctx->num_buffers;
+- req.memory = V4L2_MEMORY_MMAP;
++ req.count = req_buffers;
++ req.memory = mem;
+ req.type = ctx->type;
+- ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
+- if (ret < 0) {
+- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
+- return AVERROR(errno);
++ while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
++ if (errno != EINTR) {
++ ret = AVERROR(errno);
++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
++ return ret;
++ }
+ }
+
+ ctx->num_buffers = req.count;
+- ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
+- if (!ctx->buffers) {
++ ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
++ if (!ctx->bufrefs) {
+ av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
+- return AVERROR(ENOMEM);
++ goto fail_release;
+ }
+
+- for (i = 0; i < req.count; i++) {
+- ctx->buffers[i].context = ctx;
+- ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
+- if (ret < 0) {
++ ctx->wl_master = ff_weak_link_new(ctx);
++ if (!ctx->wl_master) {
++ ret = AVERROR(ENOMEM);
++ goto fail_release;
++ }
++
++ for (i = 0; i < ctx->num_buffers; i++) {
++ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
++ if (ret) {
+ av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
+- goto error;
++ goto fail_release;
+ }
+ }
+
+ av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
+ V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
+ req.count,
+- v4l2_get_width(&ctx->format),
+- v4l2_get_height(&ctx->format),
++ ff_v4l2_get_format_width(&ctx->format),
++ ff_v4l2_get_format_height(&ctx->format),
+ V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
+ V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
+
+ return 0;
+
+-error:
++fail_release:
+ v4l2_release_buffers(ctx);
++ av_freep(&ctx->bufrefs);
++ return ret;
++}
++
++int ff_v4l2_context_frames_set(V4L2Context *const ctx)
++{
++ AVHWFramesContext *hwframes;
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++ const int w = ctx->width != 0 ? ctx->width : s->avctx->width;
++ const int h = ctx->height != 0 ? ctx->height : s->avctx->height;
++ int ret;
++
++ if (ctx->frames_ref != NULL) {
++ const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data;
++ if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h)
++ return 0;
++ av_buffer_unref(&ctx->frames_ref);
++ }
++
++ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
++ if (!ctx->frames_ref)
++ return AVERROR(ENOMEM);
++
++ hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
++ hwframes->format = AV_PIX_FMT_DRM_PRIME;
++ hwframes->sw_format = ctx->av_pix_fmt;
++ hwframes->width = w;
++ hwframes->height = h;
++ ret = av_hwframe_ctx_init(ctx->frames_ref);
++ if (ret < 0) {
++ av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret));
++ av_buffer_unref(&ctx->frames_ref);
++ return ret;
++ }
++
++ av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__,
++ av_get_pix_fmt_name(ctx->av_pix_fmt), w, h);
++ return 0;
++}
++
++int ff_v4l2_context_init(V4L2Context* ctx)
++{
++ struct v4l2_queryctrl qctrl;
++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++ int ret;
++
++ // It is not valid to reinit a context without a previous release
++ av_assert0(ctx->bufrefs == NULL);
++
++ if (!v4l2_type_supported(ctx)) {
++ av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
++ return AVERROR_PATCHWELCOME;
++ }
++
++ ff_mutex_init(&ctx->lock, NULL);
++ pthread_cond_init(&ctx->cond, NULL);
++ atomic_init(&ctx->q_count, 0);
++
++ ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
++ if (ret) {
++ ret = AVERROR(errno);
++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
++ goto fail_unlock;
++ }
++
++ memset(&qctrl, 0, sizeof(qctrl));
++ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
++ ret = AVERROR(errno);
++ if (ret != AVERROR(EINVAL)) {
++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
++ goto fail_unlock;
++ }
++ // Control unsupported - set default if wanted
++ if (ctx->num_buffers < 2)
++ ctx->num_buffers = 4;
++ }
++ else {
++ if (ctx->num_buffers < 2)
++ ctx->num_buffers = qctrl.minimum + 2;
++ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
++ }
+
+- av_freep(&ctx->buffers);
++ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
++ if (ret < 0)
++ goto fail_unlock;
++
++ return 0;
+
++fail_unlock:
++ ff_mutex_destroy(&ctx->lock);
+ return ret;
+ }
+--- a/libavcodec/v4l2_context.h
++++ b/libavcodec/v4l2_context.h
+@@ -32,6 +32,8 @@
+ #include "libavutil/rational.h"
+ #include "codec_id.h"
+ #include "packet.h"
++#include "libavutil/buffer.h"
++#include "libavutil/thread.h"
+ #include "v4l2_buffers.h"
+
+ typedef struct V4L2Context {
+@@ -71,11 +73,18 @@ typedef struct V4L2Context {
+ */
+ int width, height;
+ AVRational sample_aspect_ratio;
++ struct v4l2_rect selection;
+
+ /**
+- * Indexed array of V4L2Buffers
++ * If the default size of buffer is less than this then try to
++ * set to this.
+ */
+- V4L2Buffer *buffers;
++ uint32_t min_buf_size;
++
++ /**
++ * Indexed array of pointers to V4L2Buffers
++ */
++ AVBufferRef **bufrefs;
+
+ /**
+ * Readonly after init.
+@@ -83,16 +92,38 @@ typedef struct V4L2Context {
+ int num_buffers;
+
+ /**
++ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
++ */
++ enum v4l2_memory buf_mem;
++
++ /**
+ * Whether the stream has been started (VIDIOC_STREAMON has been sent).
+ */
+ int streamon;
+
++ /* 1st buffer after stream on */
++ int first_buf;
++
+ /**
+ * Either no more buffers available or an unrecoverable error was notified
+ * by the V4L2 kernel driver: once set the context has to be exited.
+ */
+ int done;
+
++ int flag_last;
++
++ /**
++ * If NZ then when Qing frame/pkt use this rather than the
++ * "real" PTS
++ */
++ uint64_t track_ts;
++
++ AVBufferRef *frames_ref;
++ atomic_int q_count;
++ struct ff_weak_link_master *wl_master;
++
++ AVMutex lock;
++ pthread_cond_t cond;
+ } V4L2Context;
+
+ /**
+@@ -104,6 +135,14 @@ typedef struct V4L2Context {
+ int ff_v4l2_context_init(V4L2Context* ctx);
+
+ /**
++ * (re)set the hwframecontext from the current v4l2 context
++ *
++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
++ * @return 0 in case of success, a negative value representing the error otherwise.
++ */
++int ff_v4l2_context_frames_set(V4L2Context *const ctx);
++
++/**
+ * Sets the V4L2Context format in the v4l2 driver.
+ *
+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
+@@ -121,6 +160,19 @@ int ff_v4l2_context_set_format(V4L2Conte
+ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe);
+
+ /**
++ * Get the list of drm fourcc pixel formats for this context
++ *
++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context
++ * description for required variables.
++ * @param[in] pN A pointer to receive the number of formats
++ * found. May be NULL if not wanted.
++ * @return Pointer to malloced list of zero terminated formats,
++ * NULL if none or error. As list is malloced it must be
++ * freed.
++ */
++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN);
++
++/**
+ * Releases a V4L2Context.
+ *
+ * @param[in] ctx A pointer to a V4L2Context.
+@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Conte
+ * @param[inout] pkt The AVPacket to dequeue to.
+ * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+ */
+-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
+
+ /**
+ * Dequeues a buffer from a V4L2Context to an AVFrame.
+@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2C
+ * @param[in] ctx The V4L2Context to dequeue from.
+ * @param[inout] f The AVFrame to dequeue to.
+ * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
++ *
+ * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
++ * AVERROR(ENOSPC) if no buffer availible to put
++ * the frame in
+ */
+ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
+
+@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co
+ * @param[in] pkt A pointer to an AVPacket.
+ * @return 0 in case of success, a negative error otherwise.
+ */
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
+
+ /**
+ * Enqueues a buffer to a V4L2Context from an AVFrame
+@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2C
+ */
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
+
++/**
++ * Dequeue all buffers on this queue
++ *
++ * Used to recycle output buffers
++ *
++ * @param[in] ctx The V4L2Context to dequeue from.
++ * @param[in] timeout1 A timeout on dequeuing the 1st buffer,
++ * all others have a timeout of zero
++ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
++ * of the first dequeue operation, 0 otherwise.
++ */
++int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
++
++/**
++ * Returns the number of buffers currently queued
++ *
++ * @param[in] ctx The V4L2Context to evaluate
++ */
++static inline int
++ff_v4l2_context_q_count(const V4L2Context* const ctx)
++{
++ return atomic_load(&ctx->q_count);
++}
++
+ #endif // AVCODEC_V4L2_CONTEXT_H
+--- a/libavcodec/v4l2_fmt.c
++++ b/libavcodec/v4l2_fmt.c
+@@ -42,6 +42,14 @@ static const struct fmt_conversion {
+ { AV_FMT(RGB24), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB24) },
+ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGR32) },
+ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB32) },
++ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRX32) },
++ { AV_FMT(RGB0), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBX32) },
++ { AV_FMT(0BGR), AV_CODEC(RAWVIDEO), V4L2_FMT(XBGR32) },
++ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(XRGB32) },
++ { AV_FMT(BGRA), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRA32) },
++ { AV_FMT(RGBA), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBA32) },
++ { AV_FMT(ABGR), AV_CODEC(RAWVIDEO), V4L2_FMT(ABGR32) },
++ { AV_FMT(ARGB), AV_CODEC(RAWVIDEO), V4L2_FMT(ARGB32) },
+ { AV_FMT(GRAY8), AV_CODEC(RAWVIDEO), V4L2_FMT(GREY) },
+ { AV_FMT(YUV420P), AV_CODEC(RAWVIDEO), V4L2_FMT(YUV420) },
+ { AV_FMT(YUYV422), AV_CODEC(RAWVIDEO), V4L2_FMT(YUYV) },
+--- a/libavcodec/v4l2_m2m.c
++++ b/libavcodec/v4l2_m2m.c
+@@ -34,6 +34,15 @@
+ #include "v4l2_context.h"
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
++#include "v4l2_req_dmabufs.h"
++
++static void
++xlat_init(xlat_track_t * const x)
++{
++ memset(x, 0, sizeof(*x));
++ x->last_pts = AV_NOPTS_VALUE;
++}
++
+
+ static inline int v4l2_splane_video(struct v4l2_capability *cap)
+ {
+@@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2m
+
+ s->capture.done = s->output.done = 0;
+ s->capture.name = "capture";
++ s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+ s->output.name = "output";
++ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+ atomic_init(&s->refcount, 0);
+ sem_init(&s->refsync, 0, 0);
+
+@@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2m
+ if (v4l2_mplane_video(&cap)) {
+ s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+ s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++ s->output.format.type = s->output.type;
+ return 0;
+ }
+
+ if (v4l2_splane_video(&cap)) {
+ s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++ s->output.format.type = s->output.type;
+ return 0;
+ }
+
+ return AVERROR(EINVAL);
+ }
+
++static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++ struct v4l2_format fmt = {.type = s->output.type};
++ int rv;
++ uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
++ unsigned int w;
++ unsigned int h;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++ fmt.fmt.pix_mp.pixelformat = pixfmt;
++ fmt.fmt.pix_mp.width = avctx->width;
++ fmt.fmt.pix_mp.height = avctx->height;
++ }
++ else {
++ fmt.fmt.pix.pixelformat = pixfmt;
++ fmt.fmt.pix.width = avctx->width;
++ fmt.fmt.pix.height = avctx->height;
++ }
++
++ rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
++
++ if (rv != 0) {
++ rv = AVERROR(errno);
++ av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
++ return rv;
++ }
++
++ w = ff_v4l2_get_format_width(&fmt);
++ h = ff_v4l2_get_format_height(&fmt);
++
++ if (w < avctx->width || h < avctx->height) {
++ av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
++ return AVERROR(EINVAL);
++ }
++
++ return 0;
++}
++
+ static int v4l2_probe_driver(V4L2m2mContext *s)
+ {
+ void *log_ctx = s->avctx;
+@@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mCont
+ goto done;
+ }
+
++ // If being given frames (encode) check that V4L2 can cope with the size
++ if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
++ (ret = check_size(s->avctx, s)) != 0)
++ goto done;
++
+ ret = ff_v4l2_context_get_format(&s->capture, 1);
+ if (ret) {
+ av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
+@@ -214,13 +270,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
+ av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
+
+ /* 2. unmap the capture buffers (v4l2 and ffmpeg):
+- * we must wait for all references to be released before being allowed
+- * to queue new buffers.
+ */
+- av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
+- if (atomic_load(&s->refcount))
+- while(sem_wait(&s->refsync) == -1 && errno == EINTR);
+-
+ ff_v4l2_context_release(&s->capture);
+
+ /* 3. get the new capture format */
+@@ -239,7 +289,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
+
+ /* 5. complete reinit */
+ s->draining = 0;
+- s->reinit = 0;
+
+ return 0;
+ }
+@@ -256,6 +305,9 @@ static void v4l2_m2m_destroy_context(voi
+ av_frame_unref(s->frame);
+ av_frame_free(&s->frame);
+ av_packet_unref(&s->buf_pkt);
++ av_freep(&s->extdata_data);
++
++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
+
+ av_free(s);
+ }
+@@ -268,6 +320,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p
+ if (!s)
+ return 0;
+
++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
++
++ if (s->avctx && av_codec_is_decoder(s->avctx->codec))
++ av_packet_unref(&s->buf_pkt);
++
+ if (s->fd >= 0) {
+ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
+ if (ret)
+@@ -279,8 +336,20 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p
+ }
+
+ ff_v4l2_context_release(&s->output);
++ av_buffer_unref(&s->device_ref);
++
++ dmabufs_ctl_unref(&s->db_ctl);
++
++ if (s->fd != -1) {
++ close(s->fd);
++ s->fd = -1;
++ }
+
+ s->self_ref = NULL;
++ // This is only called on avctx close so after this point we don't have that
++ // Crash sooner if we find we are using it (can still log with avctx = NULL)
++ s->avctx = NULL;
++ priv->context = NULL;
+ av_buffer_unref(&priv->context_ref);
+
+ return 0;
+@@ -324,35 +393,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *
+ return v4l2_configure_contexts(s);
+ }
+
+-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps)
+ {
+- *s = av_mallocz(sizeof(V4L2m2mContext));
+- if (!*s)
++ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext));
++
++ *pps = NULL;
++ if (!s)
+ return AVERROR(ENOMEM);
+
+- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
++ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s),
+ &v4l2_m2m_destroy_context, NULL, 0);
+ if (!priv->context_ref) {
+- av_freep(s);
++ av_free(s);
+ return AVERROR(ENOMEM);
+ }
+
+ /* assign the context */
+- priv->context = *s;
+- (*s)->priv = priv;
++ priv->context = s;
++ s->priv = priv;
+
+ /* populate it */
+- priv->context->capture.num_buffers = priv->num_capture_buffers;
+- priv->context->output.num_buffers = priv->num_output_buffers;
+- priv->context->self_ref = priv->context_ref;
+- priv->context->fd = -1;
++ s->capture.num_buffers = priv->num_capture_buffers;
++ s->output.num_buffers = priv->num_output_buffers;
++ s->self_ref = priv->context_ref;
++ s->fd = -1;
++ xlat_init(&s->xlat);
+
+ priv->context->frame = av_frame_alloc();
+ if (!priv->context->frame) {
+ av_buffer_unref(&priv->context_ref);
+- *s = NULL; /* freed when unreferencing context_ref */
+ return AVERROR(ENOMEM);
+ }
+
++ *pps = s;
+ return 0;
+ }
+--- a/libavcodec/v4l2_m2m.h
++++ b/libavcodec/v4l2_m2m.h
+@@ -30,6 +30,7 @@
+ #include <linux/videodev2.h>
+
+ #include "libavcodec/avcodec.h"
++#include "libavutil/pixfmt.h"
+ #include "v4l2_context.h"
+
+ #define container_of(ptr, type, member) ({ \
+@@ -38,7 +39,39 @@
+
+ #define V4L_M2M_DEFAULT_OPTS \
+ { "num_output_buffers", "Number of buffers in the output context",\
+- OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
++
++#define FF_V4L2_M2M_TRACK_SIZE 128
++typedef struct V4L2m2mTrackEl {
++ int discard; // If we see this buffer its been flushed, so discard
++ int pending;
++ int pkt_size;
++ int64_t pts;
++ int64_t dts;
++ int64_t reordered_opaque;
++ int64_t pkt_pos;
++ int64_t pkt_duration;
++ int64_t track_pts;
++} V4L2m2mTrackEl;
++
++typedef struct pts_stats_s
++{
++ void * logctx;
++ const char * name; // For debug
++ unsigned int last_count;
++ unsigned int last_interval;
++ int64_t last_pts;
++ int64_t guess;
++} pts_stats_t;
++
++typedef struct xlat_track_s {
++ unsigned int track_no;
++ int64_t last_pts; // Last valid PTS decoded
++ int64_t last_opaque;
++ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
++} xlat_track_t;
++
++struct dmabufs_ctl;
+
+ typedef struct V4L2m2mContext {
+ char devname[PATH_MAX];
+@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext {
+ AVCodecContext *avctx;
+ sem_t refsync;
+ atomic_uint refcount;
+- int reinit;
+
+ /* null frame/packet received */
+ int draining;
++ int running;
+ AVPacket buf_pkt;
+
+ /* Reference to a frame. Only used during encoding */
+@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext {
+
+ /* reference back to V4L2m2mPriv */
+ void *priv;
++
++ AVBufferRef *device_ref;
++
++ /* generate DRM frames */
++ int output_drm;
++
++ /* input frames are drmprime */
++ int input_drm;
++
++ /* Frame tracking */
++ xlat_track_t xlat;
++
++ pts_stats_t pts_stat;
++
++ /* req pkt */
++ int req_pkt;
++ int reorder_size;
++
++ /* Ext data sent */
++ int extdata_sent;
++ /* Ext data sent in packet - overrides ctx */
++ void * extdata_data;
++ size_t extdata_size;
++
++#define FF_V4L2_QUIRK_REINIT_ALWAYS 1
++#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2
++ /* Quirks */
++ unsigned int quirks;
++
++ struct dmabufs_ctl * db_ctl;
+ } V4L2m2mContext;
+
+ typedef struct V4L2m2mPriv {
+@@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv {
+
+ int num_output_buffers;
+ int num_capture_buffers;
++ const char * dmabuf_alloc;
++ enum AVPixelFormat pix_fmt;
+ } V4L2m2mPriv;
+
+ /**
+@@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
+ */
+ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
+
++
++static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++}
++
++static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++}
++
++static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
++}
++
++static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
++{
++ return ctx->flag_last;
++}
++
++
+ #endif /* AVCODEC_V4L2_M2M_H */
+--- a/libavcodec/v4l2_m2m_dec.c
++++ b/libavcodec/v4l2_m2m_dec.c
+@@ -21,8 +21,14 @@
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
++#include "config_components.h"
++
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
++
++#include "libavutil/avassert.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
+ #include "libavutil/pixfmt.h"
+ #include "libavutil/pixdesc.h"
+ #include "libavutil/opt.h"
+@@ -30,75 +36,279 @@
+ #include "codec_internal.h"
+ #include "libavcodec/decode.h"
+
++#include "libavcodec/hwaccels.h"
++#include "libavcodec/internal.h"
++#include "libavcodec/hwconfig.h"
++
+ #include "v4l2_context.h"
+ #include "v4l2_m2m.h"
+ #include "v4l2_fmt.h"
++#include "v4l2_req_dmabufs.h"
+
+-static int v4l2_try_start(AVCodecContext *avctx)
++#if CONFIG_H264_DECODER
++#include "h264_parse.h"
++#endif
++#if CONFIG_HEVC_DECODER
++#include "hevc_parse.h"
++#endif
++
++// Pick 64 for max last count - that is >1sec at 60fps
++#define STATS_LAST_COUNT_MAX 64
++#define STATS_INTERVAL_MAX (1 << 30)
++
++#ifndef FF_API_BUFFER_SIZE_T
++#define FF_API_BUFFER_SIZE_T 1
++#endif
++
++#define DUMP_FAILED_EXTRADATA 0
++
++#if DUMP_FAILED_EXTRADATA
++static inline char hex1(unsigned int x)
+ {
+- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+- V4L2Context *const capture = &s->capture;
+- V4L2Context *const output = &s->output;
+- struct v4l2_selection selection = { 0 };
+- int ret;
++ x &= 0xf;
++ return x <= 9 ? '0' + x : 'a' + x - 10;
++}
+
+- /* 1. start the output process */
+- if (!output->streamon) {
+- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
+- if (ret < 0) {
+- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
+- return ret;
+- }
++static inline char * hex2(char * s, unsigned int x)
++{
++ *s++ = hex1(x >> 4);
++ *s++ = hex1(x);
++ return s;
++}
++
++static inline char * hex4(char * s, unsigned int x)
++{
++ s = hex2(s, x >> 8);
++ s = hex2(s, x);
++ return s;
++}
++
++static inline char * dash2(char * s)
++{
++ *s++ = '-';
++ *s++ = '-';
++ return s;
++}
++
++static void
++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
++{
++ size_t i;
++ s = hex4(s, offset);
++ m += offset;
++ for (i = 0; i != 8; ++i) {
++ *s++ = ' ';
++ s = len > i + offset ? hex2(s, *m++) : dash2(s);
+ }
++ *s++ = ' ';
++ *s++ = ':';
++ for (; i != 16; ++i) {
++ *s++ = ' ';
++ s = len > i + offset ? hex2(s, *m++) : dash2(s);
++ }
++ *s++ = 0;
++}
+
+- if (capture->streamon)
+- return 0;
++static void
++log_dump(void * logctx, int lvl, const void * const data, const size_t len)
++{
++ size_t i;
++ for (i = 0; i < len; i += 16) {
++ char buf[80];
++ data16(buf, i, data, len);
++ av_log(logctx, lvl, "%s\n", buf);
++ }
++}
++#endif
+
+- /* 2. get the capture format */
+- capture->format.type = capture->type;
+- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
+- if (ret) {
+- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
+- return ret;
++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
++{
++ return stats->last_interval;
++}
++
++static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
++{
++ if (stats->last_count <= 1)
++ return stats->last_pts;
++ if (stats->last_pts == AV_NOPTS_VALUE ||
++ fail_bad_guess && (stats->last_interval == 0 ||
++ stats->last_count >= STATS_LAST_COUNT_MAX))
++ return AV_NOPTS_VALUE;
++ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
++}
++
++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
++{
++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
++ if (stats->last_count < STATS_LAST_COUNT_MAX)
++ ++stats->last_count;
++ return;
+ }
+
+- /* 2.1 update the AVCodecContext */
+- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
+- capture->av_pix_fmt = avctx->pix_fmt;
++ if (stats->last_pts != AV_NOPTS_VALUE) {
++ const int64_t interval = pts - stats->last_pts;
+
+- /* 3. set the crop parameters */
+- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+- selection.r.height = avctx->coded_height;
+- selection.r.width = avctx->coded_width;
+- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
+- if (!ret) {
+- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
+- if (ret) {
+- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
+- } else {
+- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
+- /* update the size of the resulting frame */
+- capture->height = selection.r.height;
+- capture->width = selection.r.width;
++ if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
++ stats->last_count >= STATS_LAST_COUNT_MAX) {
++ if (stats->last_interval != 0)
++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
++ __func__, stats->name, interval, stats->last_count);
++ stats->last_interval = 0;
++ }
++ else {
++ const int64_t frame_time = interval / (int64_t)stats->last_count;
++
++ if (frame_time != stats->last_interval)
++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
++ stats->last_interval = frame_time;
+ }
+ }
+
+- /* 4. init the capture context now that we have the capture format */
+- if (!capture->buffers) {
+- ret = ff_v4l2_context_init(capture);
+- if (ret) {
+- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+- return AVERROR(ENOMEM);
++ stats->last_pts = pts;
++ stats->last_count = 1;
++}
++
++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
++{
++ *stats = (pts_stats_t){
++ .logctx = logctx,
++ .name = name,
++ .last_count = 1,
++ .last_interval = 0,
++ .last_pts = AV_NOPTS_VALUE
++ };
++}
++
++// If abdata == NULL then this just counts space required
++// Unpacks avcC if detected
++static int
++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
++{
++ const uint8_t * const xdend = extradata + extrasize;
++ const uint8_t * p = extradata;
++ uint8_t * d = abdata;
++ unsigned int n;
++ unsigned int len;
++ const unsigned int hdrlen = 4;
++ unsigned int need_pps = 1;
++
++ if (extrasize < 8)
++ return AVERROR(EINVAL);
++
++ if (p[0] == 0 && p[1] == 0) {
++ // Assume a couple of leading zeros are good enough to indicate NAL
++ if (abdata)
++ memcpy(d, p, extrasize);
++ return extrasize;
++ }
++
++ // avcC starts with a 1
++ if (p[0] != 1)
++ return AVERROR(EINVAL);
++
++ p += 5;
++ n = *p++ & 0x1f;
++
++doxps:
++ while (n--) {
++ if (xdend - p < 2)
++ return AVERROR(EINVAL);
++ len = (p[0] << 8) | p[1];
++ p += 2;
++ if (xdend - p < (ptrdiff_t)len)
++ return AVERROR(EINVAL);
++ if (abdata) {
++ d[0] = 0;
++ d[1] = 0;
++ d[2] = 0;
++ d[3] = 1;
++ memcpy(d + 4, p, len);
+ }
++ d += len + hdrlen;
++ p += len;
++ }
++ if (need_pps) {
++ need_pps = 0;
++ if (p >= xdend)
++ return AVERROR(EINVAL);
++ n = *p++;
++ goto doxps;
+ }
+
+- /* 5. start the capture process */
+- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+- if (ret) {
+- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
++ return d - abdata;
++}
++
++static int
++copy_extradata(AVCodecContext * const avctx,
++ const void * const src_data, const int src_len,
++ void ** const pdst_data, size_t * const pdst_len)
++{
++ int len;
++
++ *pdst_len = 0;
++ av_freep(pdst_data);
++
++ if (avctx->codec_id == AV_CODEC_ID_H264)
++ len = h264_xd_copy(src_data, src_len, NULL);
++ else
++ len = src_len < 0 ? AVERROR(EINVAL) : src_len;
++
++ // Zero length is OK but we want to stop - -ve is error val
++ if (len <= 0)
++ return len;
++
++ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++ return AVERROR(ENOMEM);
++
++ if (avctx->codec_id == AV_CODEC_ID_H264)
++ h264_xd_copy(src_data, src_len, *pdst_data);
++ else
++ memcpy(*pdst_data, src_data, len);
++ *pdst_len = len;
++
++ return 0;
++}
++
++
++
++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
++{
++ int ret;
++ struct v4l2_decoder_cmd cmd = {
++ .cmd = V4L2_DEC_CMD_START,
++ .flags = 0,
++ };
++
++ if (s->output.streamon)
++ return 0;
++
++ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
++ if (ret != 0) {
++ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
+ return ret;
+ }
+
++ // STREAMON should do implicit START so this just for those that don't.
++ // It is optional so don't worry if it fails
++ if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
++ ret = AVERROR(errno);
++ av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
++ }
++ else {
++ av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
++ }
++ return 0;
++}
++
++static int v4l2_try_start(AVCodecContext *avctx)
++{
++ V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++ int ret;
++
++ /* 1. start the output process */
++ if ((ret = check_output_streamon(avctx, s)) != 0)
++ return ret;
+ return 0;
+ }
+
+@@ -133,51 +343,823 @@ static int v4l2_prepare_decoder(V4L2m2mC
+ return 0;
+ }
+
+-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++static void
++set_best_effort_pts(AVCodecContext *const avctx,
++ pts_stats_t * const ps,
++ AVFrame *const frame)
++{
++ pts_stats_add(ps, frame->pts);
++
++ frame->best_effort_timestamp = pts_stats_guess(ps, 1);
++ // If we can't guess from just PTS - try DTS
++ if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
++ frame->best_effort_timestamp = frame->pkt_dts;
++
++ // We can't emulate what s/w does in a useful manner and using the
++ // "correct" answer seems to just confuse things.
++ frame->pkt_dts = frame->pts;
++ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
++}
++
++static void
++xlat_flush(xlat_track_t * const x)
++{
++ unsigned int i;
++ // Do not reset track_no - this ensures that any frames left in the decoder
++ // that turn up later get discarded.
++
++ x->last_pts = AV_NOPTS_VALUE;
++ x->last_opaque = 0;
++ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
++ x->track_els[i].pending = 0;
++ x->track_els[i].discard = 1;
++ }
++}
++
++static void
++xlat_init(xlat_track_t * const x)
++{
++ memset(x, 0, sizeof(*x));
++ xlat_flush(x);
++}
++
++static int
++xlat_pending(const V4L2m2mContext * const s)
++{
++ const xlat_track_t *const x = &s->xlat;
++ unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
++ int i;
++ const int64_t now = pts_stats_guess(&s->pts_stat, 0);
++ int64_t first_dts = AV_NOPTS_VALUE;
++ int no_dts_count = 0;
++ unsigned int interval = pts_stats_interval(&s->pts_stat);
++
++ for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
++ const V4L2m2mTrackEl * const t = x->track_els + n;
++
++ if (first_dts == AV_NOPTS_VALUE)
++ if (t->dts == AV_NOPTS_VALUE)
++ ++no_dts_count;
++ else
++ first_dts = t->dts;
++
++ // Discard only set on never-set or flushed entries
++ // So if we get here we've never successfully decoded a frame so allow
++ // more frames into the buffer before stalling
++ if (t->discard)
++ return i - 16;
++
++ // If we've got this frame out then everything before this point
++ // must have entered the decoder
++ if (!t->pending)
++ break;
++
++ // If we've never seen a pts all we can do is count frames
++ if (now == AV_NOPTS_VALUE)
++ continue;
++
++ if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
++ break;
++ }
++
++ if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
++ const int iframes = (first_dts - now) / (int)interval;
++ const int t = iframes - s->reorder_size + no_dts_count;
++
++// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
++// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
++
++ if (iframes > 0 && iframes < 64 && t < i) {
++ return t;
++ }
++ }
++
++ return i;
++}
++
++static inline int stream_started(const V4L2m2mContext * const s) {
++ return s->output.streamon;
++}
++
++#define NQ_OK 0
++#define NQ_Q_FULL 1
++#define NQ_SRC_EMPTY 2
++#define NQ_NONE 3
++#define NQ_DRAINING 4
++#define NQ_DEAD 5
++
++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
++#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
++
++// do_not_get If true then no new packet will be got but status will
++// be set appropriately
++
++// AVERROR_EOF Flushing an already flushed stream
++// -ve Error (all errors except EOF are unexpected)
++// NQ_OK (0) OK
++// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now)
++// NQ_SRC_EMPTY Src empty (do not retry)
++// NQ_NONE Enqueue not attempted
++// NQ_DRAINING At EOS, dQ dest until EOS there too
++// NQ_DEAD Not running (do not retry, do not attempt capture dQ)
++
++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
+ {
+- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+- V4L2Context *const capture = &s->capture;
+- V4L2Context *const output = &s->output;
+ int ret;
+
+- if (!s->buf_pkt.size) {
+- ret = ff_decode_get_packet(avctx, &s->buf_pkt);
++ // If we don't already have a coded packet - get a new one
++ // We will already have a coded pkt if the output Q was full last time we
++ // tried to Q it
++ if (!s->buf_pkt.size && !do_not_get) {
++ unsigned int i;
++
++ for (i = 0; i < 256; ++i) {
++ uint8_t * side_data;
++ size_t side_size;
++
++ ret = ff_decode_get_packet(avctx, &s->buf_pkt);
++ if (ret != 0)
++ break;
++
++ // New extradata is the only side-data we undertand
++ side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
++ if (side_data) {
++ av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
++ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
++ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
++ s->extdata_sent = 0;
++ }
++
++ if (s->buf_pkt.size != 0)
++ break;
++
++ if (s->buf_pkt.side_data_elems == 0) {
++ av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
++ ret = AVERROR_EOF;
++ break;
++ }
++
++ // Retry a side-data only pkt
++ }
++ // If i >= 256 something has gone wrong
++ if (i >= 256) {
++ av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
++ return AVERROR(EIO);
++ }
++
++ if (ret == AVERROR(EAGAIN)) {
++ if (!stream_started(s)) {
++ av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
++ return NQ_DEAD;
++ }
++ return NQ_SRC_EMPTY;
++ }
++
++ if (ret == AVERROR_EOF) {
++ // EOF - enter drain mode
++ av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
++ ret, s->buf_pkt.size, stream_started(s), s->draining);
++ if (!stream_started(s)) {
++ av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
++ s->draining = 1;
++ s->capture.done = 1;
++ return AVERROR_EOF;
++ }
++
++ if (!s->draining) {
++ // Calling enqueue with an empty pkt starts drain
++ av_assert0(s->buf_pkt.size == 0);
++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
++ if (ret) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
++ return ret;
++ }
++ }
++ return NQ_DRAINING;
++ }
++
+ if (ret < 0) {
+- if (ret == AVERROR(EAGAIN))
+- return ff_v4l2_context_dequeue_frame(capture, frame, 0);
+- else if (ret != AVERROR_EOF)
+- return ret;
++ av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
++ return ret;
+ }
+ }
+
+- if (s->draining)
+- goto dequeue;
++ if (s->draining) {
++ if (s->buf_pkt.size) {
++ av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
++ av_packet_unref(&s->buf_pkt);
++ }
++ return NQ_DRAINING;
++ }
++
++ if (!s->buf_pkt.size)
++ return NQ_NONE;
+
+- ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
+- if (ret < 0 && ret != AVERROR(EAGAIN))
+- goto fail;
++ if ((ret = check_output_streamon(avctx, s)) != 0)
++ return ret;
+
+- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
+- if (ret != AVERROR(EAGAIN))
++ if (s->extdata_sent)
++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
++ else
++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
++
++ if (ret == AVERROR(EAGAIN)) {
++ // Out of input buffers - keep packet
++ ret = NQ_Q_FULL;
++ }
++ else {
++ // In all other cases we are done with this packet
+ av_packet_unref(&s->buf_pkt);
++ s->extdata_sent = 1;
+
+- if (!s->draining) {
+- ret = v4l2_try_start(avctx);
+ if (ret) {
+- /* cant recover */
+- if (ret != AVERROR(ENOMEM))
+- ret = 0;
+- goto fail;
++ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
++ return ret;
++ }
++ }
++
++ // Start if we haven't
++ {
++ const int ret2 = v4l2_try_start(avctx);
++ if (ret2) {
++ av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
++ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
+ }
+ }
+
+-dequeue:
+- return ff_v4l2_context_dequeue_frame(capture, frame, -1);
+-fail:
+- av_packet_unref(&s->buf_pkt);
+ return ret;
+ }
+
++static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
++{
++ int rv = 0;
++
++ ff_mutex_lock(&ctx->lock);
++
++ while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
++ if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
++ rv = AVERROR(errno);
++ av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
++ break;
++ }
++ }
++
++ ff_mutex_unlock(&ctx->lock);
++ return rv;
++}
++
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++ V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++ int src_rv = -1;
++ int dst_rv = 1; // Non-zero (done), non-negative (error) number
++ unsigned int i = 0;
++
++ do {
++ const int pending = xlat_pending(s);
++ const int prefer_dq = (pending > 4);
++ const int last_src_rv = src_rv;
++
++ av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
++
++ // Enqueue another pkt for decode if
++ // (a) We don't have a lot of stuff in the buffer already OR
++ // (b) ... we (think we) do but we've failed to get a frame already OR
++ // (c) We've dequeued a lot of frames without asking for input
++ src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
++
++ // If we got a frame last time or we've already tried to get a frame and
++ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
++ // indicating that we want more input.
++ // This should mean that once decode starts we enter a stable state where
++ // we alternately ask for input and produce output
++ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
++ break;
++
++ if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
++ av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
++ break;
++ }
++
++ // Try to get a new frame if
++ // (a) we haven't already got one AND
++ // (b) enqueue returned a status indicating that decode should be attempted
++ if (dst_rv != 0 && TRY_DQ(src_rv)) {
++ // Pick a timeout depending on state
++ // The pending count isn't completely reliable so it is good enough
++ // hint that we want a frame but not good enough to require it in
++ // all cases; however if it has got > 31 that exceeds its margin of
++ // error so require a frame to prevent ridiculous levels of latency
++ const int t =
++ src_rv == NQ_Q_FULL ? -1 :
++ src_rv == NQ_DRAINING ? 300 :
++ prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
++
++ // Dequeue frame will unref any previous contents of frame
++ // if it returns success so we don't need an explicit unref
++ // when discarding
++ // This returns AVERROR(EAGAIN) on timeout or if
++ // there is room in the input Q and timeout == -1
++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
++
++ // Failure due to no buffer in Q?
++ if (dst_rv == AVERROR(ENOSPC)) {
++ // Wait & retry
++ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
++ }
++ }
++
++ if (dst_rv == 0) {
++ set_best_effort_pts(avctx, &s->pts_stat, frame);
++ if (!s->running) {
++ s->running = 1;
++ av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
++ }
++ }
++
++ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
++ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
++ dst_rv = AVERROR_EOF;
++ s->capture.done = 1;
++ }
++ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
++ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
++ s->draining, s->capture.done);
++ else if (dst_rv && dst_rv != AVERROR(EAGAIN))
++ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
++ s->draining, s->capture.done, dst_rv);
++ }
++
++ ++i;
++ if (i >= 256) {
++ av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
++ src_rv = AVERROR(EIO);
++ }
++
++ // Continue trying to enqueue packets if either
++ // (a) we succeeded last time OR
++ // (b) we didn't ret a frame and we can retry the input
++ } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
++
++ // Ensure that the frame contains nothing if we aren't returning a frame
++ // (might happen when discarding)
++ if (dst_rv)
++ av_frame_unref(frame);
++
++ // If we got a frame this time ask for a pkt next time
++ s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
++
++#if 0
++ if (dst_rv == 0)
++ {
++ static int z = 0;
++ if (++z > 50) {
++ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
++ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++ return -1;
++ }
++ }
++#endif
++
++ return dst_rv == 0 ? 0 :
++ src_rv < 0 ? src_rv :
++ dst_rv < 0 ? dst_rv :
++ AVERROR(EAGAIN);
++}
++
++#if 0
++#include <time.h>
++static int64_t us_time(void)
++{
++ struct timespec ts;
++ clock_gettime(CLOCK_MONOTONIC, &ts);
++ return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
++}
++
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++ int ret;
++ const int64_t now = us_time();
++ int64_t done;
++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++ ret = v4l2_receive_frame2(avctx, frame);
++ done = us_time();
++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
++ return ret;
++}
++#endif
++
++static uint32_t
++avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
++{
++ switch (codec_id) {
++ case AV_CODEC_ID_H264:
++ switch (avprofile) {
++ case FF_PROFILE_H264_BASELINE:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
++ case FF_PROFILE_H264_CONSTRAINED_BASELINE:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
++ case FF_PROFILE_H264_MAIN:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
++ case FF_PROFILE_H264_EXTENDED:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
++ case FF_PROFILE_H264_HIGH:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
++ case FF_PROFILE_H264_HIGH_10:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
++ case FF_PROFILE_H264_HIGH_10_INTRA:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
++ case FF_PROFILE_H264_MULTIVIEW_HIGH:
++ case FF_PROFILE_H264_HIGH_422:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
++ case FF_PROFILE_H264_HIGH_422_INTRA:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
++ case FF_PROFILE_H264_STEREO_HIGH:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
++ case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
++ case FF_PROFILE_H264_HIGH_444_INTRA:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
++ case FF_PROFILE_H264_CAVLC_444:
++ return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
++ case FF_PROFILE_H264_HIGH_444:
++ default:
++ break;
++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12,
++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13,
++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14,
++// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16,
++// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17,
++ }
++ break;
++ case AV_CODEC_ID_MPEG2VIDEO:
++ case AV_CODEC_ID_MPEG4:
++ case AV_CODEC_ID_VC1:
++ case AV_CODEC_ID_VP8:
++ case AV_CODEC_ID_VP9:
++ case AV_CODEC_ID_AV1:
++ // Most profiles are a simple number that matches the V4L2 enum
++ return avprofile;
++ default:
++ break;
++ }
++ return ~(uint32_t)0;
++}
++
++// This check mirrors Chrome's profile check by testing to see if the profile
++// exists as a possible value for the V4L2 profile control
++static int
++check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
++{
++ struct v4l2_queryctrl query_ctrl;
++ struct v4l2_querymenu query_menu;
++ uint32_t profile_id;
++
++ // An unset profile is almost certainly zero or -99 - do not reject
++ if (avctx->profile <= 0) {
++ av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
++ return 0;
++ }
++
++ memset(&query_ctrl, 0, sizeof(query_ctrl));
++ switch (avctx->codec_id) {
++ case AV_CODEC_ID_MPEG2VIDEO:
++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
++ break;
++ case AV_CODEC_ID_MPEG4:
++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
++ break;
++ case AV_CODEC_ID_H264:
++ profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
++ break;
++ case AV_CODEC_ID_VP8:
++ profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
++ break;
++ case AV_CODEC_ID_VP9:
++ profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
++ break;
++#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
++ case AV_CODEC_ID_AV1:
++ profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
++ break;
++#endif
++ default:
++ av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
++ return 0;
++ }
++
++ query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
++ av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
++ }
++ else {
++ av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
++
++ query_menu = (struct v4l2_querymenu){
++ .id = query_ctrl.id,
++ .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
++ };
++
++ if (query_menu.index > query_ctrl.maximum ||
++ query_menu.index < query_ctrl.minimum ||
++ ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
++ return AVERROR(ENOENT);
++ }
++ }
++
++ return 0;
++};
++
++static int
++check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc)
++{
++ unsigned int i;
++ const uint32_t w = avctx->coded_width;
++ const uint32_t h = avctx->coded_height;
++
++ if (w == 0 || h == 0 || fcc == 0) {
++ av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
++ return 0;
++ }
++ if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
++ av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
++ return 0;
++ }
++
++ for (i = 0;; ++i) {
++ struct v4l2_frmsizeenum fs = {
++ .index = i,
++ .pixel_format = fcc,
++ };
++
++ while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
++ const int err = AVERROR(errno);
++ if (err == AVERROR(EINTR))
++ continue;
++ if (i == 0 && err == AVERROR(ENOTTY)) {
++ av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
++ return 0;
++ }
++ if (err != AVERROR(EINVAL)) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
++ return err;
++ }
++ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
++ w, h, av_fourcc2str(fcc), i);
++ return err;
++ }
++
++ switch (fs.type) {
++ case V4L2_FRMSIZE_TYPE_DISCRETE:
++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
++ fs.discrete.width,fs.discrete.height);
++ if (w == fs.discrete.width && h == fs.discrete.height)
++ return 0;
++ break;
++ case V4L2_FRMSIZE_TYPE_STEPWISE:
++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
++ fs.stepwise.min_width, fs.stepwise.min_height,
++ fs.stepwise.max_width, fs.stepwise.max_height,
++ fs.stepwise.step_width,fs.stepwise.step_height);
++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
++ (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
++ (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
++ return 0;
++ break;
++ case V4L2_FRMSIZE_TYPE_CONTINUOUS:
++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
++ fs.stepwise.min_width, fs.stepwise.min_height,
++ fs.stepwise.max_width, fs.stepwise.max_height,
++ fs.stepwise.step_width,fs.stepwise.step_height);
++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
++ return 0;
++ break;
++ default:
++ av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
++ return AVERROR(EINVAL);
++ }
++ }
++}
++
++static int
++get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++ struct v4l2_capability cap;
++
++ memset(&cap, 0, sizeof(cap));
++ while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
++ int err = errno;
++ if (err == EINTR)
++ continue;
++ av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
++ return AVERROR(err);
++ }
++
++ // Could be made table driven if we have a few more but right now there
++ // seems no point
++
++ // Meson (amlogic) always gives a resolution changed event after output
++ // streamon and userspace must (re)allocate capture buffers and streamon
++ // capture to clear the event even if the capture buffers were the right
++ // size in the first place.
++ if (strcmp(cap.driver, "meson-vdec") == 0)
++ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
++
++ av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
++ return 0;
++}
++
++// This heuristic is for H264 but use for everything
++static uint32_t max_coded_size(const AVCodecContext * const avctx)
++{
++ uint32_t wxh = avctx->coded_width * avctx->coded_height;
++ uint32_t size;
++
++ size = wxh * 3 / 2;
++ // H.264 Annex A table A-1 gives minCR which is either 2 or 4
++ // unfortunately that doesn't yield an actually useful limit
++ // and it should be noted that frame 0 is special cased to allow
++ // a bigger number which really isn't helpful for us. So just pick
++ // frame_size / 2
++ size /= 2;
++ // Add 64k to allow for any overheads and/or encoder hopefulness
++ // with small WxH
++ return size + (1 << 16);
++}
++
++static void
++parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++ s->reorder_size = 0;
++
++ if (!avctx->extradata || !avctx->extradata_size)
++ return;
++
++ switch (avctx->codec_id) {
++#if CONFIG_H264_DECODER
++ case AV_CODEC_ID_H264:
++ {
++ H264ParamSets ps;
++ int is_avc = 0;
++ int nal_length_size = 0;
++ int ret;
++
++ memset(&ps, 0, sizeof(ps));
++
++ ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
++ &ps, &is_avc, &nal_length_size,
++ avctx->err_recognition, avctx);
++ if (ret > 0) {
++ const SPS * sps = NULL;
++ unsigned int i;
++ for (i = 0; i != MAX_SPS_COUNT; ++i) {
++ if (ps.sps_list[i]) {
++ sps = (const SPS *)ps.sps_list[i]->data;
++ break;
++ }
++ }
++ if (sps) {
++ avctx->profile = ff_h264_get_profile(sps);
++ avctx->level = sps->level_idc;
++ s->reorder_size = sps->num_reorder_frames;
++ }
++ }
++ ff_h264_ps_uninit(&ps);
++ break;
++ }
++#endif
++#if CONFIG_HEVC_DECODER
++ case AV_CODEC_ID_HEVC:
++ {
++ HEVCParamSets ps;
++ HEVCSEI sei;
++ int is_nalff = 0;
++ int nal_length_size = 0;
++ int ret;
++
++ memset(&ps, 0, sizeof(ps));
++ memset(&sei, 0, sizeof(sei));
++
++ ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
++ &ps, &sei, &is_nalff, &nal_length_size,
++ avctx->err_recognition, 0, avctx);
++ if (ret > 0) {
++ const HEVCSPS * sps = NULL;
++ unsigned int i;
++ for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
++ if (ps.sps_list[i]) {
++ sps = (const HEVCSPS *)ps.sps_list[i]->data;
++ break;
++ }
++ }
++ if (sps) {
++ avctx->profile = sps->ptl.general_ptl.profile_idc;
++ avctx->level = sps->ptl.general_ptl.level_idc;
++ s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
++ }
++ }
++ ff_hevc_ps_uninit(&ps);
++ ff_hevc_reset_sei(&sei);
++ break;
++ }
++#endif
++ default:
++ break;
++ }
++}
++
++static int
++choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++ const V4L2m2mPriv * const priv = avctx->priv_data;
++ unsigned int fmts_n;
++ uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
++ enum AVPixelFormat *fmts2 = NULL;
++ enum AVPixelFormat gf_pix_fmt;
++ unsigned int i;
++ unsigned int n = 0;
++ unsigned int pref_n = 1;
++ int rv = AVERROR(ENOENT);
++
++ if (!fmts)
++ return AVERROR(ENOENT);
++
++ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) {
++ rv = AVERROR(ENOMEM);
++ goto error;
++ }
++
++ // Filter for formats that are supported by ffmpeg and
++ // can accomodate the stream size
++ fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
++ for (i = 0; i != fmts_n; ++i) {
++ const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
++ av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f));
++ if (f == AV_PIX_FMT_NONE)
++ continue;
++
++ if (check_size(avctx, s, fmts[i]) != 0)
++ continue;
++
++ if (f == priv->pix_fmt)
++ pref_n = n;
++ fmts2[n++] = f;
++ }
++
++ if (n < 2) {
++ av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
++ goto error;
++ }
++
++ if (n != 2) {
++ // ffmpeg.c really only expects one s/w format. It thinks that the
++ // last format in the list is the s/w format of the h/w format but
++ // also chooses the first non-h/w format as the preferred s/w format.
++ // The only way of reconciling this is to dup our preferred format into
++ // both last & first place :-(
++ const enum AVPixelFormat t = fmts2[pref_n];
++ fmts2[pref_n] = fmts2[1];
++ fmts2[1] = t;
++ fmts2[n++] = t;
++ }
++
++ fmts2[n] = AV_PIX_FMT_NONE;
++
++ gf_pix_fmt = ff_get_format(avctx, fmts2);
++ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
++ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
++ avctx->coded_width, avctx->coded_height,
++ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
++
++ if (gf_pix_fmt == AV_PIX_FMT_NONE)
++ goto error;
++
++ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
++ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
++ s->capture.av_pix_fmt = avctx->sw_pix_fmt;
++ s->output_drm = 1;
++ }
++ else {
++ avctx->pix_fmt = gf_pix_fmt;
++ s->capture.av_pix_fmt = gf_pix_fmt;
++ s->output_drm = 0;
++ }
++
++ // Get format converts capture.av_pix_fmt back into a V4L2 format in the context
++ if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0)
++ goto error;
++ rv = ff_v4l2_context_set_format(&s->capture);
++
++error:
++ av_free(fmts2);
++ av_free(fmts);
++ return rv;
++}
++
+ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+ {
+ V4L2Context *capture, *output;
+@@ -185,10 +1167,27 @@ static av_cold int v4l2_decode_init(AVCo
+ V4L2m2mPriv *priv = avctx->priv_data;
+ int ret;
+
++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++ if (avctx->codec_id == AV_CODEC_ID_H264) {
++ if (avctx->ticks_per_frame == 1) {
++ if(avctx->time_base.den < INT_MAX/2) {
++ avctx->time_base.den *= 2;
++ } else
++ avctx->time_base.num /= 2;
++ }
++ avctx->ticks_per_frame = 2;
++ }
++
+ ret = ff_v4l2_m2m_create_context(priv, &s);
+ if (ret < 0)
+ return ret;
+
++ parse_extradata(avctx, s);
++
++ xlat_init(&s->xlat);
++ pts_stats_init(&s->pts_stat, avctx, "decoder");
++
+ capture = &s->capture;
+ output = &s->output;
+
+@@ -196,14 +1195,45 @@ static av_cold int v4l2_decode_init(AVCo
+ * by the v4l2 driver; this event will trigger a full pipeline reconfig and
+ * the proper values will be retrieved from the kernel driver.
+ */
+- output->height = capture->height = avctx->coded_height;
+- output->width = capture->width = avctx->coded_width;
++// output->height = capture->height = avctx->coded_height;
++// output->width = capture->width = avctx->coded_width;
++ output->height = capture->height = 0;
++ output->width = capture->width = 0;
+
+ output->av_codec_id = avctx->codec_id;
+ output->av_pix_fmt = AV_PIX_FMT_NONE;
++ output->min_buf_size = max_coded_size(avctx);
+
+ capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+ capture->av_pix_fmt = avctx->pix_fmt;
++ capture->min_buf_size = 0;
++
++ capture->av_pix_fmt = AV_PIX_FMT_NONE;
++ s->output_drm = 0;
++
++ s->db_ctl = NULL;
++ if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
++ if (strcmp(priv->dmabuf_alloc, "cma") == 0)
++ s->db_ctl = dmabufs_ctl_new();
++ else {
++ av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
++ return AVERROR(EINVAL);
++ }
++ if (!s->db_ctl) {
++ av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
++ return AVERROR(ENOMEM);
++ }
++ }
++
++ s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
++ if (!s->device_ref) {
++ ret = AVERROR(ENOMEM);
++ return ret;
++ }
++
++ ret = av_hwdevice_ctx_init(s->device_ref);
++ if (ret < 0)
++ return ret;
+
+ s->avctx = avctx;
+ ret = ff_v4l2_m2m_codec_init(priv);
+@@ -212,12 +1242,90 @@ static av_cold int v4l2_decode_init(AVCo
+ return ret;
+ }
+
+- return v4l2_prepare_decoder(s);
++ if (avctx->extradata &&
++ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
++#if DUMP_FAILED_EXTRADATA
++ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
++#endif
++ return ret;
++ }
++
++ if ((ret = get_quirks(avctx, s)) != 0)
++ return ret;
++
++ if ((ret = check_profile(avctx, s)) != 0) {
++ av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
++ return ret;
++ }
++
++ // Size check done as part of format filtering
++ if ((ret = choose_capture_format(avctx, s)) != 0)
++ return ret;
++
++ if ((ret = v4l2_prepare_decoder(s)) < 0)
++ return ret;
++
++ return 0;
+ }
+
+ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
+ {
+- return ff_v4l2_m2m_codec_end(avctx->priv_data);
++ int rv;
++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++ rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
++ return rv;
++}
++
++static void v4l2_decode_flush(AVCodecContext *avctx)
++{
++ // An alternatve and more drastic form of flush is to simply do this:
++ // v4l2_decode_close(avctx);
++ // v4l2_decode_init(avctx);
++ // The downside is that this keeps a decoder open until all the frames
++ // associated with it have been returned. This is a bit wasteful on
++ // possibly limited h/w resources and fails on a Pi for this reason unless
++ // more GPU mem is allocated than is the default.
++
++ V4L2m2mPriv * const priv = avctx->priv_data;
++ V4L2m2mContext * const s = priv->context;
++ V4L2Context * const output = &s->output;
++ V4L2Context * const capture = &s->capture;
++
++ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
++
++ // Reflushing everything is benign, quick and avoids having to worry about
++ // states like EOS processing so don't try to optimize out (having got it
++ // wrong once)
++
++ ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
++
++ // Clear any buffered input packet
++ av_packet_unref(&s->buf_pkt);
++
++ // Clear a pending EOS
++ if (ff_v4l2_ctx_eos(capture)) {
++ // Arguably we could delay this but this is easy and doesn't require
++ // thought or extra vars
++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
++ }
++
++ // V4L2 makes no guarantees about whether decoded frames are flushed or not
++ // so mark all frames we are tracking to be discarded if they appear
++ xlat_flush(&s->xlat);
++
++ // resend extradata
++ s->extdata_sent = 0;
++ // clear status vars
++ s->running = 0;
++ s->draining = 0;
++ output->done = 0;
++ capture->done = 0;
++
++ // Stream on will occur when we actually submit a new frame
++ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
+ }
+
+ #define OFFSET(x) offsetof(V4L2m2mPriv, x)
+@@ -226,10 +1334,17 @@ static av_cold int v4l2_decode_close(AVC
+ static const AVOption options[] = {
+ V4L_M2M_DEFAULT_OPTS,
+ { "num_capture_buffers", "Number of buffers in the capture context",
+- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
++ { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
++ { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+ { NULL},
+ };
+
++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
++ HW_CONFIG_INTERNAL(DRM_PRIME),
++ NULL
++};
++
+ #define M2MDEC_CLASS(NAME) \
+ static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
+ .class_name = #NAME "_v4l2m2m_decoder", \
+@@ -250,10 +1365,16 @@ static const AVOption options[] = {
+ .init = v4l2_decode_init, \
+ FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
+ .close = v4l2_decode_close, \
++ .flush = v4l2_decode_flush, \
+ .bsfs = bsf_name, \
+ .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
+ .caps_internal = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \
+ .p.wrapper_name = "v4l2m2m", \
++ .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
++ AV_PIX_FMT_NV12, \
++ AV_PIX_FMT_YUV420P, \
++ AV_PIX_FMT_NONE}, \
++ .hw_configs = v4l2_m2m_hw_configs, \
+ }
+
+ M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb");
+--- a/libavcodec/v4l2_m2m_enc.c
++++ b/libavcodec/v4l2_m2m_enc.c
+@@ -24,6 +24,8 @@
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <search.h>
++#include <drm_fourcc.h>
++
+ #include "encode.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavutil/pixdesc.h"
+@@ -38,6 +40,34 @@
+ #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
+ #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
+
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in videodev2.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
+ static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
+ {
+ struct v4l2_streamparm parm = { 0 };
+@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_fro
+ static int v4l2_check_b_frame_support(V4L2m2mContext *s)
+ {
+ if (s->avctx->max_b_frames)
+- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
++ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
+
+- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
++ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
+ v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
+ if (s->avctx->max_b_frames == 0)
+ return 0;
+
+ avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
+-
+ return AVERROR_PATCHWELCOME;
+ }
+
+@@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mC
+ return 0;
+ }
+
++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
++{
++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++ const uint32_t drm_fmt = src->layers[0].format;
++ // Treat INVALID as LINEAR
++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
++ uint32_t pix_fmt = 0;
++ uint32_t w = 0;
++ uint32_t h = 0;
++ uint32_t bpl = src->layers[0].planes[0].pitch;
++
++ // We really don't expect multiple layers
++ // All formats that we currently cope with are single object
++
++ if (src->nb_layers != 1 || src->nb_objects != 1)
++ return AVERROR(EINVAL);
++
++ switch (drm_fmt) {
++ case DRM_FORMAT_YUV420:
++ if (mod == DRM_FORMAT_MOD_LINEAR) {
++ if (src->layers[0].nb_planes != 3)
++ break;
++ pix_fmt = V4L2_PIX_FMT_YUV420;
++ h = src->layers[0].planes[1].offset / bpl;
++ w = bpl;
++ }
++ break;
++
++ case DRM_FORMAT_NV12:
++ if (mod == DRM_FORMAT_MOD_LINEAR) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12;
++ h = src->layers[0].planes[1].offset / bpl;
++ w = bpl;
++ }
++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12_COL128;
++ w = bpl;
++ h = src->layers[0].planes[1].offset / 128;
++ bpl = fourcc_mod_broadcom_param(mod);
++ }
++ break;
++
++ case DRM_FORMAT_P030:
++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128;
++ w = bpl / 2; // Matching lie to how we construct this
++ h = src->layers[0].planes[1].offset / 128;
++ bpl = fourcc_mod_broadcom_param(mod);
++ }
++ break;
++
++ default:
++ break;
++ }
++
++ if (!pix_fmt)
++ return AVERROR(EINVAL);
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
++
++ pix->width = w;
++ pix->height = h;
++ pix->pixelformat = pix_fmt;
++ pix->plane_fmt[0].bytesperline = bpl;
++ pix->num_planes = 1;
++ }
++ else {
++ struct v4l2_pix_format *const pix = &format->fmt.pix;
++
++ pix->width = w;
++ pix->height = h;
++ pix->pixelformat = pix_fmt;
++ pix->bytesperline = bpl;
++ }
++
++ return 0;
++}
++
++// Do we have similar enough formats to be usable?
++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
++{
++ if (a->type != b->type)
++ return 0;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
++ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
++ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
++ unsigned int i;
++ if (pa->pixelformat != pb->pixelformat ||
++ pa->num_planes != pb->num_planes)
++ return 0;
++ for (i = 0; i != pa->num_planes; ++i) {
++ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
++ return 0;
++ }
++ }
++ else {
++ const struct v4l2_pix_format *const pa = &a->fmt.pix;
++ const struct v4l2_pix_format *const pb = &b->fmt.pix;
++ if (pa->pixelformat != pb->pixelformat ||
++ pa->bytesperline != pb->bytesperline)
++ return 0;
++ }
++ return 1;
++}
++
++static inline int q_full(const V4L2Context *const output)
++{
++ return ff_v4l2_context_q_count(output) == output->num_buffers;
++}
++
+ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+ {
+ V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+ V4L2Context *const output = &s->output;
++ int rv;
++ const int needs_slot = q_full(output);
++
++ av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
++
++ // Signal EOF if needed (doesn't need q slot)
++ if (!frame) {
++ av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
++ return ff_v4l2_context_enqueue_frame(output, frame);
++ }
++
++ if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
++ // We should be able to return AVERROR(EAGAIN) to indicate buffer
++ // exhaustion, but ffmpeg currently treats that as fatal.
++ av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
++ return rv;
++ }
++
++ if (s->input_drm && !output->streamon) {
++ struct v4l2_format req_format = {.type = output->format.type};
++
++ // Set format when we first get a buffer
++ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
++ return rv;
++ }
++
++ ff_v4l2_context_release(output);
++
++ output->format = req_format;
++
++ if ((rv = ff_v4l2_context_set_format(output)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
++ return rv;
++ }
++
++ if (!fmt_eq(&req_format, &output->format)) {
++ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
++ return AVERROR(EINVAL);
++ }
++
++ output->selection.top = frame->crop_top;
++ output->selection.left = frame->crop_left;
++ output->selection.width = av_frame_cropped_width(frame);
++ output->selection.height = av_frame_cropped_height(frame);
++
++ if ((rv = ff_v4l2_context_init(output)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
++ return rv;
++ }
++
++ {
++ struct v4l2_selection selection = {
++ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
++ .target = V4L2_SEL_TGT_CROP,
++ .r = output->selection
++ };
++ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
++ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
++ selection.r.width, selection.r.height, selection.r.left, selection.r.top,
++ av_err2str(AVERROR(errno)));
++ }
++ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
++ selection.r.width, selection.r.height, selection.r.left, selection.r.top);
++ }
++ }
+
+ #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
+- if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
++ if (frame->pict_type == AV_PICTURE_TYPE_I)
+ v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
+ #endif
+
+- return ff_v4l2_context_enqueue_frame(output, frame);
++ rv = ff_v4l2_context_enqueue_frame(output, frame);
++ if (rv) {
++ av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
++ }
++
++ return rv;
+ }
+
+ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+@@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecCo
+ AVFrame *frame = s->frame;
+ int ret;
+
++ av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
++ ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
++
++ ff_v4l2_dq_all(output, 0);
++
+ if (s->draining)
+ goto dequeue;
+
+@@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecCo
+ }
+
+ dequeue:
+- return ff_v4l2_context_dequeue_packet(capture, avpkt);
++ // Dequeue a frame
++ for (;;) {
++ int t = q_full(output) ? -1 : s->draining ? 300 : 0;
++ int rv2;
++
++ // If output is full wait for either a packet or output to become not full
++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
++
++ // If output was full retry packet dequeue
++ t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
++ rv2 = ff_v4l2_dq_all(output, t);
++ if (t == 0 || rv2 != 0)
++ break;
++ }
++ if (ret)
++ return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
++
++ if (capture->first_buf == 1) {
++ uint8_t * data;
++ const int len = avpkt->size;
++
++ // 1st buffer after streamon should be SPS/PPS
++ capture->first_buf = 2;
++
++ // Clear both possible stores so there is no chance of confusion
++ av_freep(&s->extdata_data);
++ s->extdata_size = 0;
++ av_freep(&avctx->extradata);
++ avctx->extradata_size = 0;
++
++ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++ goto fail_no_mem;
++
++ memcpy(data, avpkt->data, len);
++ av_packet_unref(avpkt);
++
++ // We need to copy the header, but keep local if not global
++ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
++ avctx->extradata = data;
++ avctx->extradata_size = len;
++ }
++ else {
++ s->extdata_data = data;
++ s->extdata_size = len;
++ }
++
++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
++ ff_v4l2_dq_all(output, 0);
++ if (ret)
++ return ret;
++ }
++
++ // First frame must be key so mark as such even if encoder forgot
++ if (capture->first_buf == 2) {
++ avpkt->flags |= AV_PKT_FLAG_KEY;
++
++ // Add any extradata to the 1st packet we emit as we cannot create it at init
++ if (avctx->extradata_size > 0 && avctx->extradata) {
++ void * const side = av_packet_new_side_data(avpkt,
++ AV_PKT_DATA_NEW_EXTRADATA,
++ avctx->extradata_size);
++ if (!side)
++ goto fail_no_mem;
++
++ memcpy(side, avctx->extradata, avctx->extradata_size);
++ }
++ }
++
++ // Add SPS/PPS to the start of every key frame if non-global headers
++ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
++ const size_t newlen = s->extdata_size + avpkt->size;
++ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
++
++ if (buf == NULL)
++ goto fail_no_mem;
++
++ memcpy(buf->data, s->extdata_data, s->extdata_size);
++ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
++
++ av_buffer_unref(&avpkt->buf);
++ avpkt->buf = buf;
++ avpkt->data = buf->data;
++ avpkt->size = newlen;
++ }
++ else if (ff_v4l2_context_q_count(capture) < 2) {
++ // Avoid running out of capture buffers
++ // In most cases the buffers will be returned quickly in which case
++ // we don't copy and can use the v4l2 buffers directly but sometimes
++ // ffmpeg seems to hold onto all of them for a long time (.mkv
++ // creation?) so avoid deadlock in those cases.
++ AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
++ if (buf == NULL)
++ goto fail_no_mem;
++
++ memcpy(buf->data, avpkt->data, avpkt->size);
++ av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer
++
++ avpkt->buf = buf;
++ avpkt->data = buf->data;
++ }
++
++ capture->first_buf = 0;
++ return 0;
++
++fail_no_mem:
++ av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
++ ret = AVERROR(ENOMEM);
++ av_packet_unref(avpkt);
++ return ret;
+ }
+
+ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+@@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCo
+ uint32_t v4l2_fmt_output;
+ int ret;
+
++ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
++
+ ret = ff_v4l2_m2m_create_context(priv, &s);
+ if (ret < 0)
+ return ret;
+@@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCo
+ capture = &s->capture;
+ output = &s->output;
+
++ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
++
+ /* common settings output/capture */
+ output->height = capture->height = avctx->height;
+ output->width = capture->width = avctx->width;
+
+ /* output context */
+ output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+- output->av_pix_fmt = avctx->pix_fmt;
++ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
++ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
++ AV_PIX_FMT_YUV420P;
+
+ /* capture context */
+ capture->av_codec_id = avctx->codec_id;
+@@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCo
+ v4l2_fmt_output = output->format.fmt.pix.pixelformat;
+
+ pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
+- if (pix_fmt_output != avctx->pix_fmt) {
++ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
+ av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
+ return AVERROR(EINVAL);
+@@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVC
+ #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+ #define V4L_M2M_CAPTURE_OPTS \
+- V4L_M2M_DEFAULT_OPTS,\
++ { "num_output_buffers", "Number of buffers in the output context",\
++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
+ { "num_capture_buffers", "Number of buffers in the capture context", \
+- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
+
+ static const AVOption mpeg4_options[] = {
+ V4L_M2M_CAPTURE_OPTS,
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.c
+@@ -0,0 +1,84 @@
++#include <memory.h>
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_decode_q.h"
++
++int decode_q_in_q(const req_decode_ent * const d)
++{
++ return d->in_q;
++}
++
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
++{
++ pthread_mutex_lock(&q->q_lock);
++ if (!q->head) {
++ q->head = d;
++ q->tail = d;
++ d->prev = NULL;
++ }
++ else {
++ q->tail->next = d;
++ d->prev = q->tail;
++ q->tail = d;
++ }
++ d->next = NULL;
++ d->in_q = 1;
++ pthread_mutex_unlock(&q->q_lock);
++}
++
++// Remove entry from Q - if head wake-up anything that was waiting
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
++{
++ int try_signal = 0;
++
++ if (!d->in_q)
++ return;
++
++ pthread_mutex_lock(&q->q_lock);
++ if (d->prev)
++ d->prev->next = d->next;
++ else {
++ try_signal = 1; // Only need to signal if we were head
++ q->head = d->next;
++ }
++
++ if (d->next)
++ d->next->prev = d->prev;
++ else
++ q->tail = d->prev;
++
++ // Not strictly needed but makes debug easier
++ d->next = NULL;
++ d->prev = NULL;
++ d->in_q = 0;
++ pthread_mutex_unlock(&q->q_lock);
++
++ if (try_signal)
++ pthread_cond_broadcast(&q->q_cond);
++}
++
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
++{
++ pthread_mutex_lock(&q->q_lock);
++
++ while (q->head != d)
++ pthread_cond_wait(&q->q_cond, &q->q_lock);
++
++ pthread_mutex_unlock(&q->q_lock);
++}
++
++void decode_q_uninit(req_decode_q * const q)
++{
++ pthread_mutex_destroy(&q->q_lock);
++ pthread_cond_destroy(&q->q_cond);
++}
++
++void decode_q_init(req_decode_q * const q)
++{
++ memset(q, 0, sizeof(*q));
++ pthread_mutex_init(&q->q_lock, NULL);
++ pthread_cond_init(&q->q_cond, NULL);
++}
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.h
+@@ -0,0 +1,27 @@
++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
++#define AVCODEC_V4L2_REQ_DECODE_Q_H
++
++#include <pthread.h>
++
++typedef struct req_decode_ent {
++ struct req_decode_ent * next;
++ struct req_decode_ent * prev;
++ int in_q;
++} req_decode_ent;
++
++typedef struct req_decode_q {
++ pthread_mutex_t q_lock;
++ pthread_cond_t q_cond;
++ req_decode_ent * head;
++ req_decode_ent * tail;
++} req_decode_q;
++
++int decode_q_in_q(const req_decode_ent * const d);
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_uninit(req_decode_q * const q);
++void decode_q_init(req_decode_q * const q);
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.c
+@@ -0,0 +1,451 @@
++#include <errno.h>
++#include <fcntl.h>
++#include <libudev.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++
++#include <sys/ioctl.h>
++#include <sys/sysmacros.h>
++
++#include <linux/media.h>
++#include <linux/videodev2.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_utils.h"
++
++struct decdev {
++ enum v4l2_buf_type src_type;
++ uint32_t src_fmt_v4l2;
++ const char * vname;
++ const char * mname;
++};
++
++struct devscan {
++ struct decdev env;
++ unsigned int dev_size;
++ unsigned int dev_count;
++ struct decdev *devs;
++};
++
++static int video_src_pixfmt_supported(uint32_t fmt)
++{
++ return 1;
++}
++
++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
++ unsigned int width, unsigned int height,
++ unsigned int pixelformat)
++{
++ unsigned int sizeimage;
++
++ memset(format, 0, sizeof(*format));
++ format->type = type;
++
++ sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
++ format->fmt.pix_mp.width = width;
++ format->fmt.pix_mp.height = height;
++ format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
++ format->fmt.pix_mp.pixelformat = pixelformat;
++ } else {
++ format->fmt.pix.width = width;
++ format->fmt.pix.height = height;
++ format->fmt.pix.sizeimage = sizeimage;
++ format->fmt.pix.pixelformat = pixelformat;
++ }
++}
++
++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
++ unsigned int width, unsigned int height)
++{
++ struct v4l2_format format;
++
++ v4l2_setup_format(&format, type, width, height, pixelformat);
++
++ return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
++}
++
++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
++{
++ struct v4l2_capability capability = { 0 };
++ int rc;
++
++ rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
++ if (rc < 0)
++ return -errno;
++
++ if (capabilities != NULL) {
++ if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
++ *capabilities = capability.device_caps;
++ else
++ *capabilities = capability.capabilities;
++ }
++
++ return 0;
++}
++
++static int devscan_add(struct devscan *const scan,
++ enum v4l2_buf_type src_type,
++ uint32_t src_fmt_v4l2,
++ const char * vname,
++ const char * mname)
++{
++ struct decdev *d;
++
++ if (scan->dev_size <= scan->dev_count) {
++ unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
++ d = realloc(scan->devs, n * sizeof(*d));
++ if (!d)
++ return -ENOMEM;
++ scan->devs = d;
++ scan->dev_size = n;
++ }
++
++ d = scan->devs + scan->dev_count;
++ d->src_type = src_type;
++ d->src_fmt_v4l2 = src_fmt_v4l2;
++ d->vname = strdup(vname);
++ if (!d->vname)
++ return -ENOMEM;
++ d->mname = strdup(mname);
++ if (!d->mname) {
++ free((char *)d->vname);
++ return -ENOMEM;
++ }
++ ++scan->dev_count;
++ return 0;
++}
++
++void devscan_delete(struct devscan **const pScan)
++{
++ unsigned int i;
++ struct devscan * const scan = *pScan;
++
++ if (!scan)
++ return;
++ *pScan = NULL;
++
++ for (i = 0; i < scan->dev_count; ++i) {
++ free((char*)scan->devs[i].mname);
++ free((char*)scan->devs[i].vname);
++ }
++ free(scan->devs);
++ free(scan);
++}
++
++#define REQ_BUF_CAPS (\
++ V4L2_BUF_CAP_SUPPORTS_DMABUF |\
++ V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
++ V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
++
++static void probe_formats(void * const dc,
++ struct devscan *const scan,
++ const int fd,
++ const unsigned int type_v4l2,
++ const char *const mpath,
++ const char *const vpath)
++{
++ unsigned int i;
++ for (i = 0;; ++i) {
++ struct v4l2_fmtdesc fmtdesc = {
++ .index = i,
++ .type = type_v4l2
++ };
++ struct v4l2_requestbuffers rbufs = {
++ .count = 0,
++ .type = type_v4l2,
++ .memory = V4L2_MEMORY_MMAP
++ };
++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++ if (errno == EINTR)
++ continue;
++ if (errno != EINVAL)
++ request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
++ return;
++ }
++ if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
++ continue;
++
++ if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
++ request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
++ continue;
++ }
++
++ while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
++ if (errno != EINTR) {
++ request_debug(dc, "%s: Reqbufs failed\n", vpath);
++ continue;
++ }
++ }
++
++ if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
++ request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
++ continue;
++ }
++
++ request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
++ mpath, vpath, fmtdesc.pixelformat, type_v4l2);
++ devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
++ }
++}
++
++
++static int probe_video_device(void * const dc,
++ struct udev_device *const device,
++ struct devscan *const scan,
++ const char *const mpath)
++{
++ int ret;
++ unsigned int capabilities = 0;
++ int video_fd = -1;
++
++ const char *path = udev_device_get_devnode(device);
++ if (!path) {
++ request_err(dc, "%s: get video device devnode failed\n", __func__);
++ ret = -EINVAL;
++ goto fail;
++ }
++
++ video_fd = open(path, O_RDWR, 0);
++ if (video_fd == -1) {
++ ret = -errno;
++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
++ goto fail;
++ }
++
++ ret = v4l2_query_capabilities(video_fd, &capabilities);
++ if (ret < 0) {
++ request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++ goto fail;
++ }
++
++ request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
++
++ if (!(capabilities & V4L2_CAP_STREAMING)) {
++ request_debug(dc, "%s: missing required streaming capability\n", __func__);
++ ret = -EINVAL;
++ goto fail;
++ }
++
++ if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
++ request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
++ ret = -EINVAL;
++ goto fail;
++ }
++
++ /* Should check capture formats too... */
++ if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
++ if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
++
++ close(video_fd);
++ return 0;
++
++fail:
++ if (video_fd >= 0)
++ close(video_fd);
++ return ret;
++}
++
++static int probe_media_device(void * const dc,
++ struct udev_device *const device,
++ struct devscan *const scan)
++{
++ int ret;
++ int rv;
++ struct media_device_info device_info = { 0 };
++ struct media_v2_topology topology = { 0 };
++ struct media_v2_interface *interfaces = NULL;
++ struct udev *udev = udev_device_get_udev(device);
++ struct udev_device *video_device;
++ dev_t devnum;
++ int media_fd = -1;
++
++ const char *path = udev_device_get_devnode(device);
++ if (!path) {
++ request_err(dc, "%s: get media device devnode failed\n", __func__);
++ ret = -EINVAL;
++ goto fail;
++ }
++
++ media_fd = open(path, O_RDWR, 0);
++ if (media_fd < 0) {
++ ret = -errno;
++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
++ goto fail;
++ }
++
++ rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
++ if (rv < 0) {
++ ret = -errno;
++ request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++ goto fail;
++ }
++
++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++ if (rv < 0) {
++ ret = -errno;
++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++ goto fail;
++ }
++
++ if (topology.num_interfaces <= 0) {
++ request_err(dc, "%s: media device has no interfaces\n", __func__);
++ ret = -EINVAL;
++ goto fail;
++ }
++
++ interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
++ if (!interfaces) {
++ request_err(dc, "%s: allocating media interface struct failed\n", __func__);
++ ret = -ENOMEM;
++ goto fail;
++ }
++
++ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++ if (rv < 0) {
++ ret = -errno;
++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++ goto fail;
++ }
++
++ for (int i = 0; i < topology.num_interfaces; i++) {
++ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
++ continue;
++
++ devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
++ video_device = udev_device_new_from_devnum(udev, 'c', devnum);
++ if (!video_device) {
++ ret = -errno;
++ request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
++ continue;
++ }
++
++ ret = probe_video_device(dc, video_device, scan, path);
++ udev_device_unref(video_device);
++
++ if (ret != 0)
++ goto fail;
++ }
++
++fail:
++ free(interfaces);
++ if (media_fd != -1)
++ close(media_fd);
++ return ret;
++}
++
++const char *decdev_media_path(const struct decdev *const dev)
++{
++ return !dev ? NULL : dev->mname;
++}
++
++const char *decdev_video_path(const struct decdev *const dev)
++{
++ return !dev ? NULL : dev->vname;
++}
++
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
++{
++ return !dev ? 0 : dev->src_type;
++}
++
++uint32_t decdev_src_pixelformat(const struct decdev *const dev)
++{
++ return !dev ? 0 : dev->src_fmt_v4l2;
++}
++
++
++const struct decdev *devscan_find(struct devscan *const scan,
++ const uint32_t src_fmt_v4l2)
++{
++ unsigned int i;
++
++ if (scan->env.mname && scan->env.vname)
++ return &scan->env;
++
++ if (!src_fmt_v4l2)
++ return scan->dev_count ? scan->devs + 0 : NULL;
++
++ for (i = 0; i != scan->dev_count; ++i) {
++ if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
++ return scan->devs + i;
++ }
++ return NULL;
++}
++
++int devscan_build(void * const dc, struct devscan **pscan)
++{
++ int ret;
++ struct udev *udev;
++ struct udev_enumerate *enumerate;
++ struct udev_list_entry *devices;
++ struct udev_list_entry *entry;
++ struct udev_device *device;
++ struct devscan * scan;
++
++ *pscan = NULL;
++
++ scan = calloc(1, sizeof(*scan));
++ if (!scan) {
++ ret = -ENOMEM;
++ goto fail;
++ }
++
++ scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
++ scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
++ if (scan->env.mname && scan->env.vname) {
++ request_info(dc, "Media/video device env overrides found: %s,%s\n",
++ scan->env.mname, scan->env.vname);
++ *pscan = scan;
++ return 0;
++ }
++
++ udev = udev_new();
++ if (!udev) {
++ request_err(dc, "%s: allocating udev context failed\n", __func__);
++ ret = -ENOMEM;
++ goto fail;
++ }
++
++ enumerate = udev_enumerate_new(udev);
++ if (!enumerate) {
++ request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
++ ret = -ENOMEM;
++ goto fail;
++ }
++
++ udev_enumerate_add_match_subsystem(enumerate, "media");
++ udev_enumerate_scan_devices(enumerate);
++
++ devices = udev_enumerate_get_list_entry(enumerate);
++ udev_list_entry_foreach(entry, devices) {
++ const char *path = udev_list_entry_get_name(entry);
++ if (!path)
++ continue;
++
++ device = udev_device_new_from_syspath(udev, path);
++ if (!device)
++ continue;
++
++ probe_media_device(dc, device, scan);
++ udev_device_unref(device);
++ }
++
++ udev_enumerate_unref(enumerate);
++ udev_unref(udev);
++
++ *pscan = scan;
++ return 0;
++
++fail:
++ if (udev)
++ udev_unref(udev);
++ devscan_delete(&scan);
++ return ret;
++}
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.h
+@@ -0,0 +1,23 @@
++#ifndef _DEVSCAN_H_
++#define _DEVSCAN_H_
++
++#include <stdint.h>
++
++struct devscan;
++struct decdev;
++enum v4l2_buf_type;
++
++/* These return pointers to data in the devscan structure and so are vaild
++ * for the lifetime of that
++ */
++const char *decdev_media_path(const struct decdev *const dev);
++const char *decdev_video_path(const struct decdev *const dev);
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
++uint32_t decdev_src_pixelformat(const struct decdev *const dev);
++
++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
++
++int devscan_build(void * const dc, struct devscan **pscan);
++void devscan_delete(struct devscan **const pScan);
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_dmabufs.c
+@@ -0,0 +1,409 @@
++#include <stdatomic.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <fcntl.h>
++#include <errno.h>
++#include <string.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++#include <linux/mman.h>
++#include <linux/dma-buf.h>
++#include <linux/dma-heap.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_utils.h"
++
++#define TRACE_ALLOC 0
++
++#ifndef __O_CLOEXEC
++#define __O_CLOEXEC 0
++#endif
++
++struct dmabufs_ctl;
++struct dmabuf_h;
++
++struct dmabuf_fns {
++ int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
++ void (*buf_free)(struct dmabuf_h * dh);
++ int (*ctl_new)(struct dmabufs_ctl * dbsc);
++ void (*ctl_free)(struct dmabufs_ctl * dbsc);
++};
++
++struct dmabufs_ctl {
++ atomic_int ref_count;
++ int fd;
++ size_t page_size;
++ void * v;
++ const struct dmabuf_fns * fns;
++};
++
++struct dmabuf_h {
++ int fd;
++ size_t size;
++ size_t len;
++ void * mapptr;
++ void * v;
++ const struct dmabuf_fns * fns;
++};
++
++#if TRACE_ALLOC
++static unsigned int total_bufs = 0;
++static size_t total_size = 0;
++#endif
++
++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size)
++{
++ struct dmabuf_h *dh;
++
++ if (mapptr == MAP_FAILED)
++ return NULL;
++
++ dh = malloc(sizeof(*dh));
++ if (!dh)
++ return NULL;
++
++ *dh = (struct dmabuf_h) {
++ .fd = -1,
++ .size = size,
++ .mapptr = mapptr
++ };
++
++ return dh;
++}
++
++struct dmabuf_h * dmabuf_import(int fd, size_t size)
++{
++ struct dmabuf_h *dh;
++
++ fd = dup(fd);
++ if (fd < 0 || size == 0)
++ return NULL;
++
++ dh = malloc(sizeof(*dh));
++ if (!dh) {
++ close(fd);
++ return NULL;
++ }
++
++ *dh = (struct dmabuf_h) {
++ .fd = fd,
++ .size = size,
++ .mapptr = MAP_FAILED
++ };
++
++#if TRACE_ALLOC
++ ++total_bufs;
++ total_size += dh->size;
++ request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++ return dh;
++}
++
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
++{
++ struct dmabuf_h * dh;
++ if (old != NULL) {
++ if (old->size >= size) {
++ return old;
++ }
++ dmabuf_free(old);
++ }
++
++ if (size == 0 ||
++ (dh = malloc(sizeof(*dh))) == NULL)
++ return NULL;
++
++ *dh = (struct dmabuf_h){
++ .fd = -1,
++ .mapptr = MAP_FAILED,
++ .fns = dbsc->fns
++ };
++
++ if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
++ goto fail;
++
++
++#if TRACE_ALLOC
++ ++total_bufs;
++ total_size += dh->size;
++ request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++ return dh;
++
++fail:
++ free(dh);
++ return NULL;
++}
++
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
++{
++ struct dma_buf_sync sync = {
++ .flags = flags
++ };
++ if (dh->fd == -1)
++ return 0;
++ while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
++ const int err = errno;
++ if (errno == EINTR)
++ continue;
++ request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
++ return -err;
++ }
++ return 0;
++}
++
++int dmabuf_write_start(struct dmabuf_h * const dh)
++{
++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_write_end(struct dmabuf_h * const dh)
++{
++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_read_start(struct dmabuf_h * const dh)
++{
++ if (!dmabuf_map(dh))
++ return -1;
++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
++}
++
++int dmabuf_read_end(struct dmabuf_h * const dh)
++{
++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
++}
++
++
++void * dmabuf_map(struct dmabuf_h * const dh)
++{
++ if (!dh)
++ return NULL;
++ if (dh->mapptr != MAP_FAILED)
++ return dh->mapptr;
++ dh->mapptr = mmap(NULL, dh->size,
++ PROT_READ | PROT_WRITE,
++ MAP_SHARED | MAP_POPULATE,
++ dh->fd, 0);
++ if (dh->mapptr == MAP_FAILED) {
++ request_log("%s: Map failed\n", __func__);
++ return NULL;
++ }
++ return dh->mapptr;
++}
++
++int dmabuf_fd(const struct dmabuf_h * const dh)
++{
++ if (!dh)
++ return -1;
++ return dh->fd;
++}
++
++size_t dmabuf_size(const struct dmabuf_h * const dh)
++{
++ if (!dh)
++ return 0;
++ return dh->size;
++}
++
++size_t dmabuf_len(const struct dmabuf_h * const dh)
++{
++ if (!dh)
++ return 0;
++ return dh->len;
++}
++
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
++{
++ dh->len = len;
++}
++
++void dmabuf_free(struct dmabuf_h * dh)
++{
++ if (!dh)
++ return;
++
++#if TRACE_ALLOC
++ --total_bufs;
++ total_size -= dh->size;
++ request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++ if (dh->fns != NULL && dh->fns->buf_free)
++ dh->fns->buf_free(dh);
++
++ if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
++ munmap(dh->mapptr, dh->size);
++ if (dh->fd != -1)
++ while (close(dh->fd) == -1 && errno == EINTR)
++ /* loop */;
++ free(dh);
++}
++
++static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
++{
++ struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
++
++ if (!dbsc)
++ return NULL;
++
++ dbsc->fd = -1;
++ dbsc->fns = fns;
++ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
++
++ if (fns->ctl_new(dbsc) != 0)
++ goto fail;
++
++ return dbsc;
++
++fail:
++ free(dbsc);
++ return NULL;
++}
++
++static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
++{
++ request_debug(NULL, "Free dmabuf ctl\n");
++
++ dbsc->fns->ctl_free(dbsc);
++
++ free(dbsc);
++}
++
++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
++{
++ struct dmabufs_ctl * const dbsc = *pDbsc;
++
++ if (!dbsc)
++ return;
++ *pDbsc = NULL;
++
++ if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
++ return;
++
++ dmabufs_ctl_free(dbsc);
++}
++
++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
++{
++ atomic_fetch_add(&dbsc->ref_count, 1);
++ return dbsc;
++}
++
++//-----------------------------------------------------------------------------
++//
++// Alloc dmabuf via CMA
++
++static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names)
++{
++ for (; *names != NULL; ++names)
++ {
++ while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 &&
++ errno == EINTR)
++ /* Loop */;
++ if (dbsc->fd != -1)
++ {
++ request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names);
++ return 0;
++ }
++ request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno));
++ }
++ request_log("Unable to open any dma_heap device\n");
++ return -1;
++}
++
++static int ctl_cma_new(struct dmabufs_ctl * dbsc)
++{
++ static const char * const names[] = {
++ "/dev/dma_heap/linux,cma",
++ "/dev/dma_heap/reserved",
++ NULL
++ };
++
++ return ctl_cma_new2(dbsc, names);
++}
++
++static void ctl_cma_free(struct dmabufs_ctl * dbsc)
++{
++ if (dbsc->fd != -1)
++ while (close(dbsc->fd) == -1 && errno == EINTR)
++ /* loop */;
++}
++
++static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
++{
++ struct dma_heap_allocation_data data = {
++ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
++ .fd = 0,
++ .fd_flags = O_RDWR,
++ .heap_flags = 0
++ };
++
++ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
++ int err = errno;
++ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
++ (uint64_t)data.len,
++ dbsc->fd,
++ err,
++ strerror(err));
++ if (err == EINTR)
++ continue;
++ return -err;
++ }
++
++ dh->fd = data.fd;
++ dh->size = (size_t)data.len;
++
++// fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__,
++// dh->size, (size_t)lseek(dh->fd, 0, SEEK_END));
++
++ return 0;
++}
++
++static void buf_cma_free(struct dmabuf_h * dh)
++{
++ // Nothing needed
++}
++
++static const struct dmabuf_fns dmabuf_cma_fns = {
++ .buf_alloc = buf_cma_alloc,
++ .buf_free = buf_cma_free,
++ .ctl_new = ctl_cma_new,
++ .ctl_free = ctl_cma_free,
++};
++
++struct dmabufs_ctl * dmabufs_ctl_new(void)
++{
++ request_debug(NULL, "Dmabufs using CMA\n");
++ return dmabufs_ctl_new2(&dmabuf_cma_fns);
++}
++
++static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc)
++{
++ static const char * const names[] = {
++ "/dev/dma_heap/vidbuf_cached",
++ "/dev/dma_heap/linux,cma",
++ "/dev/dma_heap/reserved",
++ NULL
++ };
++
++ return ctl_cma_new2(dbsc, names);
++}
++
++static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = {
++ .buf_alloc = buf_cma_alloc,
++ .buf_free = buf_cma_free,
++ .ctl_new = ctl_cma_new_vidbuf_cached,
++ .ctl_free = ctl_cma_free,
++};
++
++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void)
++{
++ request_debug(NULL, "Dmabufs using Vidbuf\n");
++ return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns);
++}
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_dmabufs.h
+@@ -0,0 +1,45 @@
++#ifndef DMABUFS_H
++#define DMABUFS_H
++
++#include <stddef.h>
++
++struct dmabufs_ctl;
++struct dmabuf_h;
++
++struct dmabufs_ctl * dmabufs_ctl_new(void);
++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void);
++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
++
++// Need not preserve old contents
++// On NULL return old buffer is freed
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
++
++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
++ return dmabuf_realloc(dbsc, NULL, size);
++}
++/* Create from existing fd - dups(fd) */
++struct dmabuf_h * dmabuf_import(int fd, size_t size);
++/* Import an MMAP - return NULL if mapptr = MAP_FAIL */
++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size);
++
++void * dmabuf_map(struct dmabuf_h * const dh);
++
++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
++
++int dmabuf_write_start(struct dmabuf_h * const dh);
++int dmabuf_write_end(struct dmabuf_h * const dh);
++int dmabuf_read_start(struct dmabuf_h * const dh);
++int dmabuf_read_end(struct dmabuf_h * const dh);
++
++int dmabuf_fd(const struct dmabuf_h * const dh);
++/* Allocated size */
++size_t dmabuf_size(const struct dmabuf_h * const dh);
++/* Bytes in use */
++size_t dmabuf_len(const struct dmabuf_h * const dh);
++/* Set bytes in use */
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
++void dmabuf_free(struct dmabuf_h * dh);
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v1.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 1
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v2.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 2
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v3.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 3
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v4.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 4
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_vx.c
+@@ -0,0 +1,1362 @@
++// File included by v4l2_req_hevc_v* - not compiled on its own
++
++#include "decode.h"
++#include "hevcdec.h"
++#include "hwconfig.h"
++#include "internal.h"
++#include "thread.h"
++
++#if HEVC_CTRLS_VERSION == 1
++#include "hevc-ctrls-v1.h"
++
++// Fixup renamed entries
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
++
++#elif HEVC_CTRLS_VERSION == 2
++#include "hevc-ctrls-v2.h"
++#elif HEVC_CTRLS_VERSION == 3
++#include "hevc-ctrls-v3.h"
++#elif HEVC_CTRLS_VERSION == 4
++#include <linux/v4l2-controls.h>
++#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
++#include "hevc-ctrls-v4.h"
++#endif
++#else
++#error Unknown HEVC_CTRLS_VERSION
++#endif
++
++#ifndef V4L2_CID_STATELESS_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS
++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
++#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
++
++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
++#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
++#endif
++
++#include "v4l2_request_hevc.h"
++
++#include "libavutil/hwcontext_drm.h"
++
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++// Attached to buf[0] in frame
++// Pooled in hwcontext so generally create once - 1/frame
++typedef struct V4L2MediaReqDescriptor {
++ AVDRMFrameDescriptor drm;
++
++ // Media
++ uint64_t timestamp;
++ struct qent_dst * qe_dst;
++
++ // Decode only - should be NULL by the time we emit the frame
++ struct req_decode_ent decode_ent;
++
++ struct media_request *req;
++ struct qent_src *qe_src;
++
++#if HEVC_CTRLS_VERSION >= 2
++ struct v4l2_ctrl_hevc_decode_params dec;
++#endif
++
++ size_t num_slices;
++ size_t alloced_slices;
++ struct v4l2_ctrl_hevc_slice_params * slice_params;
++ struct slice_info * slices;
++
++ size_t num_offsets;
++ size_t alloced_offsets;
++ uint32_t *offsets;
++
++} V4L2MediaReqDescriptor;
++
++struct slice_info {
++ const uint8_t * ptr;
++ size_t len; // bytes
++ size_t n_offsets;
++};
++
++// Handy container for accumulating controls before setting
++struct req_controls {
++ int has_scaling;
++ struct timeval tv;
++ struct v4l2_ctrl_hevc_sps sps;
++ struct v4l2_ctrl_hevc_pps pps;
++ struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
++};
++
++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
++
++
++// Get an FFmpeg format from the v4l2 format
++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
++{
++ switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
++ format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
++ case V4L2_PIX_FMT_YUV420:
++ return AV_PIX_FMT_YUV420P;
++ case V4L2_PIX_FMT_NV12:
++ return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
++ case V4L2_PIX_FMT_NV12_COL128:
++ return AV_PIX_FMT_RPI4_8;
++ case V4L2_PIX_FMT_NV12_10_COL128:
++ return AV_PIX_FMT_RPI4_10;
++#endif
++ default:
++ break;
++ }
++ return AV_PIX_FMT_NONE;
++}
++
++static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
++{
++ const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++ return rd->timestamp;
++}
++
++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
++{
++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++ rd->timestamp = dpb_stamp;
++}
++
++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
++{
++ int32_t luma_weight_denom, chroma_weight_denom;
++ const SliceHeader *sh = &h->sh;
++
++ if (sh->slice_type == HEVC_SLICE_I ||
++ (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) ||
++ (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag))
++ return;
++
++ table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
++
++ if (h->ps.sps->chroma_format_idc)
++ table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
++
++ luma_weight_denom = (1 << sh->luma_log2_weight_denom);
++ chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
++
++ for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
++ table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
++ table->luma_offset_l0[i] = sh->luma_offset_l0[i];
++ table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
++ table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
++ table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
++ table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
++ }
++
++ if (sh->slice_type != HEVC_SLICE_B)
++ return;
++
++ for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
++ table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
++ table->luma_offset_l1[i] = sh->luma_offset_l1[i];
++ table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
++ table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
++ table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
++ table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
++ }
++}
++
++#if HEVC_CTRLS_VERSION <= 2
++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
++{
++ const HEVCFrame *frame;
++ int i;
++
++ for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
++ frame = h->rps[ST_CURR_BEF].ref[i];
++ if (frame && timestamp == frame_capture_dpb(frame->frame))
++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
++ }
++
++ for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
++ frame = h->rps[ST_CURR_AFT].ref[i];
++ if (frame && timestamp == frame_capture_dpb(frame->frame))
++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
++ }
++
++ for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
++ frame = h->rps[LT_CURR].ref[i];
++ if (frame && timestamp == frame_capture_dpb(frame->frame))
++ return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
++ }
++
++ return 0;
++}
++#endif
++
++static unsigned int
++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
++ const struct v4l2_hevc_dpb_entry * const entries,
++ const unsigned int num_entries)
++{
++ uint64_t timestamp;
++
++ if (!frame)
++ return 0;
++
++ timestamp = frame_capture_dpb(frame->frame);
++
++ for (unsigned int i = 0; i < num_entries; i++) {
++ if (entries[i].timestamp == timestamp)
++ return i;
++ }
++
++ return 0;
++}
++
++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
++{
++ unsigned int z = 0;
++ while (idx--) {
++ if (*b++ == 0) {
++ ++z;
++ if (z >= 2 && *b == 3) {
++ ++b;
++ z = 0;
++ }
++ }
++ else {
++ z = 0;
++ }
++ }
++ return b;
++}
++
++static int slice_add(V4L2MediaReqDescriptor * const rd)
++{
++ if (rd->num_slices >= rd->alloced_slices) {
++ struct v4l2_ctrl_hevc_slice_params * p2;
++ struct slice_info * s2;
++ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
++
++ p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
++ if (p2 == NULL)
++ return AVERROR(ENOMEM);
++ rd->slice_params = p2;
++
++ s2 = av_realloc_array(rd->slices, n2, sizeof(*s2));
++ if (s2 == NULL)
++ return AVERROR(ENOMEM);
++ rd->slices = s2;
++
++ rd->alloced_slices = n2;
++ }
++ ++rd->num_slices;
++ return 0;
++}
++
++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
++{
++ if (rd->num_offsets + n > rd->alloced_offsets) {
++ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
++ void * p2;
++ while (rd->num_offsets + n > n2)
++ n2 *= 2;
++ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL)
++ return AVERROR(ENOMEM);
++ rd->offsets = p2;
++ rd->alloced_offsets = n2;
++ }
++ for (size_t i = 0; i != n; ++i)
++ rd->offsets[rd->num_offsets++] = offsets[i] - 1;
++ return 0;
++}
++
++static unsigned int
++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
++{
++ unsigned int i;
++ unsigned int n = 0;
++ const HEVCFrame * const pic = h->ref;
++
++ for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
++ const HEVCFrame * const frame = &h->DPB[i];
++ if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
++ struct v4l2_hevc_dpb_entry * const entry = entries + n++;
++
++ entry->timestamp = frame_capture_dpb(frame->frame);
++#if HEVC_CTRLS_VERSION <= 2
++ entry->rps = find_frame_rps_type(h, entry->timestamp);
++#else
++ entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 :
++ V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
++#endif
++ entry->field_pic = frame->frame->interlaced_frame;
++
++#if HEVC_CTRLS_VERSION <= 3
++ /* TODO: Interleaved: Get the POC for each field. */
++ entry->pic_order_cnt[0] = frame->poc;
++ entry->pic_order_cnt[1] = frame->poc;
++#else
++ entry->pic_order_cnt_val = frame->poc;
++#endif
++ }
++ }
++ return n;
++}
++
++static void fill_slice_params(const HEVCContext * const h,
++#if HEVC_CTRLS_VERSION >= 2
++ const struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++ struct v4l2_ctrl_hevc_slice_params *slice_params,
++ uint32_t bit_size, uint32_t bit_offset)
++{
++ const SliceHeader * const sh = &h->sh;
++#if HEVC_CTRLS_VERSION >= 2
++ const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
++ const unsigned int dpb_n = dec->num_active_dpb_entries;
++#else
++ struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
++ unsigned int dpb_n;
++#endif
++ unsigned int i;
++ RefPicList *rpl;
++
++ *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
++ .bit_size = bit_size,
++#if HEVC_CTRLS_VERSION <= 3
++ .data_bit_offset = bit_offset,
++#else
++ .data_byte_offset = bit_offset / 8 + 1,
++#endif
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ .slice_segment_addr = sh->slice_segment_addr,
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++ .nal_unit_type = h->nal_unit_type,
++ .nuh_temporal_id_plus1 = h->temporal_id + 1,
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ .slice_type = sh->slice_type,
++ .colour_plane_id = sh->colour_plane_id,
++ .slice_pic_order_cnt = h->ref->poc,
++ .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
++ .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
++ .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
++ .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
++ .slice_qp_delta = sh->slice_qp_delta,
++ .slice_cb_qp_offset = sh->slice_cb_qp_offset,
++ .slice_cr_qp_offset = sh->slice_cr_qp_offset,
++ .slice_act_y_qp_offset = 0,
++ .slice_act_cb_qp_offset = 0,
++ .slice_act_cr_qp_offset = 0,
++ .slice_beta_offset_div2 = sh->beta_offset / 2,
++ .slice_tc_offset_div2 = sh->tc_offset / 2,
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++ .pic_struct = h->sei.picture_timing.picture_struct,
++
++#if HEVC_CTRLS_VERSION < 2
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++ .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++ .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++ .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++#endif
++ };
++
++ if (sh->slice_sample_adaptive_offset_flag[0])
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
++
++ if (sh->slice_sample_adaptive_offset_flag[1])
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
++
++ if (sh->slice_temporal_mvp_enabled_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
++
++ if (sh->mvd_l1_zero_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
++
++ if (sh->cabac_init_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
++
++ if (sh->collocated_list == L0)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
++
++ if (sh->disable_deblocking_filter_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
++
++ if (sh->slice_loop_filter_across_slices_enabled_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++ if (sh->dependent_slice_segment_flag)
++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
++
++#if HEVC_CTRLS_VERSION < 2
++ dpb_n = fill_dpb_entries(h, dpb);
++ slice_params->num_active_dpb_entries = dpb_n;
++#endif
++
++ if (sh->slice_type != HEVC_SLICE_I) {
++ rpl = &h->ref->refPicList[0];
++ for (i = 0; i < rpl->nb_refs; i++)
++ slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++ }
++
++ if (sh->slice_type == HEVC_SLICE_B) {
++ rpl = &h->ref->refPicList[1];
++ for (i = 0; i < rpl->nb_refs; i++)
++ slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++ }
++
++ fill_pred_table(h, &slice_params->pred_weight_table);
++
++ slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
++#if HEVC_CTRLS_VERSION <= 3
++ if (slice_params->num_entry_point_offsets > 256) {
++ slice_params->num_entry_point_offsets = 256;
++ av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
++ }
++
++ for (i = 0; i < slice_params->num_entry_point_offsets; i++)
++ slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
++#endif
++}
++
++#if HEVC_CTRLS_VERSION >= 2
++static void
++fill_decode_params(const HEVCContext * const h,
++ struct v4l2_ctrl_hevc_decode_params * const dec)
++{
++ unsigned int i;
++
++ *dec = (struct v4l2_ctrl_hevc_decode_params){
++ .pic_order_cnt_val = h->poc,
++ .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++ .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++ .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++ };
++
++ dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
++
++ // The docn does seem to ask that we fit our 32 bit signed POC into
++ // a U8 so... (To be fair 16 bits would be enough)
++ // Luckily we (Pi) don't use these fields
++ for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
++ dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
++ for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
++ dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
++ for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
++ dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
++
++ if (IS_IRAP(h))
++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
++ if (IS_IDR(h))
++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
++ if (h->sh.no_output_of_prior_pics_flag)
++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
++
++}
++#endif
++
++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
++{
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++ *ctrl = (struct v4l2_ctrl_hevc_sps) {
++ .chroma_format_idc = sps->chroma_format_idc,
++ .pic_width_in_luma_samples = sps->width,
++ .pic_height_in_luma_samples = sps->height,
++ .bit_depth_luma_minus8 = sps->bit_depth - 8,
++ .bit_depth_chroma_minus8 = sps->bit_depth - 8,
++ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
++ .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
++ .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
++ .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
++ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
++ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
++ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
++ .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
++ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
++ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
++ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
++ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
++ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
++ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
++ .num_short_term_ref_pic_sets = sps->nb_st_rps,
++ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
++ .chroma_format_idc = sps->chroma_format_idc,
++ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
++ };
++
++ if (sps->separate_colour_plane_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
++
++ if (sps->scaling_list_enable_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
++
++ if (sps->amp_enabled_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
++
++ if (sps->sao_enabled)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
++
++ if (sps->pcm_enabled_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
++
++ if (sps->pcm.loop_filter_disable_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
++
++ if (sps->long_term_ref_pics_present_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
++
++ if (sps->sps_temporal_mvp_enabled_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
++
++ if (sps->sps_strong_intra_smoothing_enable_flag)
++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
++}
++
++static void fill_scaling_matrix(const ScalingList * const sl,
++ struct v4l2_ctrl_hevc_scaling_matrix * const sm)
++{
++ unsigned int i;
++
++ for (i = 0; i < 6; i++) {
++ unsigned int j;
++
++ for (j = 0; j < 16; j++)
++ sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
++ for (j = 0; j < 64; j++) {
++ sm->scaling_list_8x8[i][j] = sl->sl[1][i][j];
++ sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
++ if (i < 2)
++ sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
++ }
++ sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
++ if (i < 2)
++ sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
++ }
++}
++
++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
++{
++ uint64_t flags = 0;
++
++ if (pps->dependent_slice_segments_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
++
++ if (pps->output_flag_present_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
++
++ if (pps->sign_data_hiding_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
++
++ if (pps->cabac_init_present_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
++
++ if (pps->constrained_intra_pred_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
++
++ if (pps->transform_skip_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
++
++ if (pps->cu_qp_delta_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
++
++ if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
++
++ if (pps->weighted_pred_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
++
++ if (pps->weighted_bipred_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
++
++ if (pps->transquant_bypass_enable_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
++
++ if (pps->tiles_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
++
++ if (pps->entropy_coding_sync_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
++
++ if (pps->loop_filter_across_tiles_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
++
++ if (pps->seq_loop_filter_across_slices_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++ if (pps->deblocking_filter_override_enabled_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
++
++ if (pps->disable_dbf)
++ flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
++
++ if (pps->lists_modification_present_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
++
++ if (pps->slice_header_extension_present_flag)
++ flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
++
++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++ *ctrl = (struct v4l2_ctrl_hevc_pps) {
++ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
++ .init_qp_minus26 = pps->pic_init_qp_minus26,
++ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
++ .pps_cb_qp_offset = pps->cb_qp_offset,
++ .pps_cr_qp_offset = pps->cr_qp_offset,
++ .pps_beta_offset_div2 = pps->beta_offset / 2,
++ .pps_tc_offset_div2 = pps->tc_offset / 2,
++ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
++ .flags = flags
++ };
++
++
++ if (pps->tiles_enabled_flag) {
++ ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
++ ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
++
++ for (int i = 0; i < pps->num_tile_columns; i++)
++ ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
++
++ for (int i = 0; i < pps->num_tile_rows; i++)
++ ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
++ }
++}
++
++// Called before finally returning the frame to the user
++// Set corrupt flag here as this is actually the frame structure that
++// is going to the user (in MT land each thread has its own pool)
++static int frame_post_process(void *logctx, AVFrame *frame)
++{
++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
++
++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++ frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
++ if (rd->qe_dst) {
++ MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
++ if (stat != MEDIABUFS_STATUS_SUCCESS) {
++ av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
++ frame->flags |= AV_FRAME_FLAG_CORRUPT;
++ }
++ }
++
++ return 0;
++}
++
++static inline struct timeval cvt_dpb_to_tv(uint64_t t)
++{
++ t /= 1000;
++ return (struct timeval){
++ .tv_usec = t % 1000000,
++ .tv_sec = t / 1000000
++ };
++}
++
++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
++{
++ return (uint64_t)t * 1000;
++}
++
++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
++ av_unused const uint8_t *buffer,
++ av_unused uint32_t size)
++{
++ const HEVCContext *h = avctx->priv_data;
++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++ decode_q_add(&ctx->decode_q, &rd->decode_ent);
++
++ rd->num_slices = 0;
++ ctx->timestamp++;
++ rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
++
++ {
++ FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data;
++ fdd->post_process = frame_post_process;
++ }
++
++ // qe_dst needs to be bound to the data buffer and only returned when that is
++ if (!rd->qe_dst)
++ {
++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++ return AVERROR(ENOMEM);
++ }
++ }
++
++ ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
++
++ return 0;
++}
++
++// Object fd & size will be zapped by this & need setting later
++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
++{
++ AVDRMLayerDescriptor *layer = &desc->layers[0];
++ unsigned int width;
++ unsigned int height;
++ unsigned int bpl;
++ uint32_t pixelformat;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++ width = format->fmt.pix_mp.width;
++ height = format->fmt.pix_mp.height;
++ pixelformat = format->fmt.pix_mp.pixelformat;
++ bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline;
++ }
++ else {
++ width = format->fmt.pix.width;
++ height = format->fmt.pix.height;
++ pixelformat = format->fmt.pix.pixelformat;
++ bpl = format->fmt.pix.bytesperline;
++ }
++
++ switch (pixelformat) {
++ case V4L2_PIX_FMT_NV12:
++ layer->format = DRM_FORMAT_NV12;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++ break;
++#if CONFIG_SAND
++ case V4L2_PIX_FMT_NV12_COL128:
++ layer->format = DRM_FORMAT_NV12;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++ break;
++ case V4L2_PIX_FMT_NV12_10_COL128:
++ layer->format = DRM_FORMAT_P030;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++ break;
++#endif
++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
++ case V4L2_PIX_FMT_SUNXI_TILED_NV12:
++ layer->format = DRM_FORMAT_NV12;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
++ break;
++#endif
++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
++ case V4L2_PIX_FMT_NV15:
++ layer->format = DRM_FORMAT_NV15;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++ break;
++#endif
++ case V4L2_PIX_FMT_NV16:
++ layer->format = DRM_FORMAT_NV16;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++ break;
++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
++ case V4L2_PIX_FMT_NV20:
++ layer->format = DRM_FORMAT_NV20;
++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++ break;
++#endif
++ default:
++ return -1;
++ }
++
++ desc->nb_objects = 1;
++ desc->objects[0].fd = -1;
++ desc->objects[0].size = 0;
++
++ desc->nb_layers = 1;
++ layer->nb_planes = 2;
++
++ layer->planes[0].object_index = 0;
++ layer->planes[0].offset = 0;
++ layer->planes[0].pitch = bpl;
++#if CONFIG_SAND
++ if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = height * 128;
++ layer->planes[0].pitch = width;
++ layer->planes[1].pitch = width;
++ }
++ else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = height * 128;
++ layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
++ layer->planes[1].pitch = width * 2;
++ }
++ else
++#endif
++ {
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = layer->planes[0].pitch * height;
++ layer->planes[1].pitch = layer->planes[0].pitch;
++ }
++
++ return 0;
++}
++
++static int
++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
++ struct req_controls *const controls,
++#if HEVC_CTRLS_VERSION >= 2
++ struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
++ void * const offsets, const size_t offset_count)
++{
++ int rv;
++#if HEVC_CTRLS_VERSION >= 2
++ unsigned int n = 3;
++#else
++ unsigned int n = 2;
++#endif
++
++ struct v4l2_ext_control control[6] = {
++ {
++ .id = V4L2_CID_STATELESS_HEVC_SPS,
++ .ptr = &controls->sps,
++ .size = sizeof(controls->sps),
++ },
++ {
++ .id = V4L2_CID_STATELESS_HEVC_PPS,
++ .ptr = &controls->pps,
++ .size = sizeof(controls->pps),
++ },
++#if HEVC_CTRLS_VERSION >= 2
++ {
++ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
++ .ptr = dec,
++ .size = sizeof(*dec),
++ },
++#endif
++ };
++
++ if (slices)
++ control[n++] = (struct v4l2_ext_control) {
++ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
++ .ptr = slices,
++ .size = sizeof(*slices) * slice_count,
++ };
++
++ if (controls->has_scaling)
++ control[n++] = (struct v4l2_ext_control) {
++ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
++ .ptr = &controls->scaling_matrix,
++ .size = sizeof(controls->scaling_matrix),
++ };
++
++#if HEVC_CTRLS_VERSION >= 4
++ if (offsets)
++ control[n++] = (struct v4l2_ext_control) {
++ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
++ .ptr = offsets,
++ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
++ };
++#endif
++
++ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
++
++ return rv;
++}
++
++// This only works because we started out from a single coded frame buffer
++// that will remain intact until after end_frame
++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
++{
++ const HEVCContext * const h = avctx->priv_data;
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
++ int bcount = get_bits_count(&h->HEVClc->gb);
++ uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
++
++ const unsigned int n = rd->num_slices;
++ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
++
++ int rv;
++ struct slice_info * si;
++
++ // This looks dodgy but we know that FFmpeg has parsed this from a buffer
++ // that contains the entire frame including the start code
++ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
++ buffer -= 3;
++ size += 3;
++ boff += 24;
++ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
++ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
++ buffer[0], buffer[1], buffer[2]);
++ }
++ }
++
++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
++ if (rd->slices == NULL) {
++ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
++ return AVERROR(ENOMEM);
++ rd->slices->ptr = buffer;
++ rd->num_slices = 1;
++ }
++ rd->slices->len = buffer - rd->slices->ptr + size;
++ return 0;
++ }
++
++ if ((rv = slice_add(rd)) != 0)
++ return rv;
++
++ si = rd->slices + n;
++ si->ptr = buffer;
++ si->len = size;
++ si->n_offsets = rd->num_offsets;
++
++ if (n != block_start) {
++ struct slice_info *const si0 = rd->slices + block_start;
++ const size_t offset = (buffer - si0->ptr);
++ boff += offset * 8;
++ size += offset;
++ si0->len = si->len + offset;
++ }
++
++#if HEVC_CTRLS_VERSION >= 2
++ if (n == 0)
++ fill_decode_params(h, &rd->dec);
++ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
++#else
++ fill_slice_params(h, rd->slice_params + n, size * 8, boff);
++#endif
++ if (ctx->max_offsets != 0 &&
++ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
++ return rv;
++
++ return 0;
++}
++
++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx)
++{
++ const HEVCContext * const h = avctx->priv_data;
++ if (h->ref != NULL) {
++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++ media_request_abort(&rd->req);
++ mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
++
++ decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++ }
++}
++
++static int send_slice(AVCodecContext * const avctx,
++ V4L2MediaReqDescriptor * const rd,
++ struct req_controls *const controls,
++ const unsigned int i, const unsigned int j)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++ const int is_last = (j == rd->num_slices);
++ struct slice_info *const si = rd->slices + i;
++ struct media_request * req = NULL;
++ struct qent_src * src = NULL;
++ MediaBufsStatus stat;
++ void * offsets = rd->offsets + rd->slices[i].n_offsets;
++ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
++
++ if ((req = media_request_get(ctx->mpool)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
++ return AVERROR(ENOMEM);
++ }
++
++ if (set_req_ctls(ctx, req,
++ controls,
++#if HEVC_CTRLS_VERSION >= 2
++ &rd->dec,
++#endif
++ rd->slice_params + i, j - i,
++ offsets, n_offsets)) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
++ goto fail1;
++ }
++
++ if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
++ goto fail1;
++ }
++
++ if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
++ goto fail2;
++ }
++
++ if (qent_src_params_set(src, &controls->tv)) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
++ goto fail2;
++ }
++
++ stat = mediabufs_start_request(ctx->mbufs, &req, &src,
++ i == 0 ? rd->qe_dst : NULL,
++ is_last);
++
++ if (stat != MEDIABUFS_STATUS_SUCCESS) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
++ return AVERROR_UNKNOWN;
++ }
++ return 0;
++
++fail2:
++ mediabufs_src_qent_abort(ctx->mbufs, &src);
++fail1:
++ media_request_abort(&req);
++ return AVERROR_UNKNOWN;
++}
++
++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
++{
++ const HEVCContext * const h = avctx->priv_data;
++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++ struct req_controls rc;
++ unsigned int i;
++ int rv;
++
++ // It is possible, though maybe a bug, to get an end_frame without
++ // a previous start_frame. If we do then give up.
++ if (!decode_q_in_q(&rd->decode_ent)) {
++ av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
++ return AVERROR_INVALIDDATA;
++ }
++
++ {
++ const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
++ &h->ps.pps->scaling_list :
++ h->ps.sps->scaling_list_enable_flag ?
++ &h->ps.sps->scaling_list : NULL;
++
++
++ memset(&rc, 0, sizeof(rc));
++ rc.tv = cvt_dpb_to_tv(rd->timestamp);
++ fill_sps(&rc.sps, h->ps.sps);
++ fill_pps(&rc.pps, h->ps.pps);
++ if (sl) {
++ rc.has_scaling = 1;
++ fill_scaling_matrix(sl, &rc.scaling_matrix);
++ }
++ }
++
++ decode_q_wait(&ctx->decode_q, &rd->decode_ent);
++
++ // qe_dst needs to be bound to the data buffer and only returned when that is
++ // Alloc almost certainly wants to be serialised if there is any chance of blocking
++ // so we get the next frame to be free in the thread that needs it for decode first.
++ //
++ // In our current world this probably isn't a concern but put it here anyway
++ if (!rd->qe_dst)
++ {
++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++ rv = AVERROR(ENOMEM);
++ goto fail;
++ }
++ }
++
++ // Send as slices
++ for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
++ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
++ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0)
++ goto fail;
++ }
++
++ // Set the drm_prime desriptor
++ drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
++ rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
++ rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
++
++ decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++ return 0;
++
++fail:
++ decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++ return rv;
++}
++
++static inline int
++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
++{
++ return v >= c->minimum && v <= c->maximum;
++}
++
++// Initial check & init
++static int
++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++ const HEVCContext *h = avctx->priv_data;
++ const HEVCSPS * const sps = h->ps.sps;
++ struct v4l2_ctrl_hevc_sps ctrl_sps;
++ unsigned int i;
++
++ // Check for var slice array
++ struct v4l2_query_ext_ctrl qc[] = {
++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++ { .id = V4L2_CID_STATELESS_HEVC_SPS },
++ { .id = V4L2_CID_STATELESS_HEVC_PPS },
++ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
++#if HEVC_CTRLS_VERSION >= 2
++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
++#endif
++ };
++ // Order & size must match!
++ static const size_t ctrl_sizes[] = {
++ sizeof(struct v4l2_ctrl_hevc_slice_params),
++ sizeof(int32_t),
++ sizeof(struct v4l2_ctrl_hevc_sps),
++ sizeof(struct v4l2_ctrl_hevc_pps),
++ sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
++#if HEVC_CTRLS_VERSION >= 2
++ sizeof(struct v4l2_ctrl_hevc_decode_params),
++#endif
++ };
++ const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
++
++#if HEVC_CTRLS_VERSION == 2
++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0))
++ return AVERROR(EINVAL);
++#elif HEVC_CTRLS_VERSION == 3
++ if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0))
++ return AVERROR(EINVAL);
++#endif
++
++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
++ i = 0;
++#if HEVC_CTRLS_VERSION >= 4
++ // Skip slice check if no slice mode
++ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++ i = 1;
++#else
++ // Fail frame mode silently for anything prior to V4
++ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++ return AVERROR(EINVAL);
++#endif
++ for (; i != noof_ctrls; ++i) {
++ if (qc[i].type == 0) {
++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
++ return AVERROR(EINVAL);
++ }
++ if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
++ HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
++ return AVERROR(EINVAL);
++ }
++ }
++
++ fill_sps(&ctrl_sps, sps);
++
++ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
++ return AVERROR(EINVAL);
++ }
++
++ return 0;
++}
++
++// Final init
++static int
++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++ int ret;
++
++ struct v4l2_query_ext_ctrl querys[] = {
++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
++#if HEVC_CTRLS_VERSION >= 4
++ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
++#endif
++ };
++
++ struct v4l2_ext_control ctrls[] = {
++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
++ };
++
++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
++
++ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
++ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
++ 1 : querys[2].dims[0];
++ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
++
++#if HEVC_CTRLS_VERSION >= 4
++ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
++ 0 : querys[3].dims[0];
++ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
++#else
++ ctx->max_offsets = 0;
++#endif
++
++ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
++ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
++ ctx->decode_mode = querys[0].default_value;
++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
++ else {
++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
++ return AVERROR(EINVAL);
++ }
++
++ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
++ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
++ ctx->start_code = querys[1].default_value;
++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
++ else {
++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
++ return AVERROR(EINVAL);
++ }
++
++ // If we are in slice mode & START_CODE_NONE supported then pick that
++ // as it doesn't require the slightly dodgy look backwards in our raw buffer
++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
++ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
++
++ ctrls[0].value = ctx->decode_mode;
++ ctrls[1].value = ctx->start_code;
++
++ ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
++ return !ret ? 0 : AVERROR(-ret);
++}
++
++static void v4l2_req_frame_free(void *opaque, uint8_t *data)
++{
++ AVCodecContext *avctx = opaque;
++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
++
++ av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
++
++ qent_dst_unref(&rd->qe_dst);
++
++ // We don't expect req or qe_src to be set
++ if (rd->req || rd->qe_src)
++ av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
++
++ av_freep(&rd->slices);
++ av_freep(&rd->slice_params);
++ av_freep(&rd->offsets);
++
++ av_free(rd);
++}
++
++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
++{
++ AVCodecContext *avctx = opaque;
++// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++// V4L2MediaReqDescriptor *req;
++ AVBufferRef *ref;
++ uint8_t *data;
++// int ret;
++
++ data = av_mallocz(size);
++ if (!data)
++ return NULL;
++
++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
++ ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
++ if (!ref) {
++ av_freep(&data);
++ return NULL;
++ }
++ return ref;
++}
++
++#if 0
++static void v4l2_req_pool_free(void *opaque)
++{
++ av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
++}
++
++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
++{
++ av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
++
++ av_buffer_pool_uninit(&hwfc->pool);
++}
++#endif
++
++static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
++{
++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++ AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
++ const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
++
++ hwfc->format = AV_PIX_FMT_DRM_PRIME;
++ hwfc->sw_format = pixel_format_from_format(vfmt);
++ if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
++ hwfc->width = vfmt->fmt.pix_mp.width;
++ hwfc->height = vfmt->fmt.pix_mp.height;
++ } else {
++ hwfc->width = vfmt->fmt.pix.width;
++ hwfc->height = vfmt->fmt.pix.height;
++ }
++#if 0
++ hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
++ if (!hwfc->pool)
++ return AVERROR(ENOMEM);
++
++ hwfc->free = v4l2_req_hwframe_ctx_free;
++
++ hwfc->initial_pool_size = 1;
++
++ switch (avctx->codec_id) {
++ case AV_CODEC_ID_VP9:
++ hwfc->initial_pool_size += 8;
++ break;
++ case AV_CODEC_ID_VP8:
++ hwfc->initial_pool_size += 3;
++ break;
++ default:
++ hwfc->initial_pool_size += 2;
++ }
++#endif
++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
++
++ return 0;
++}
++
++static int alloc_frame(AVCodecContext * avctx, AVFrame *frame)
++{
++ int rv;
++
++ frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor));
++ if (!frame->buf[0])
++ return AVERROR(ENOMEM);
++
++ frame->data[0] = frame->buf[0]->data;
++
++ frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
++
++ if ((rv = ff_attach_decode_data(frame)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n");
++ av_frame_unref(frame);
++ return rv;
++ }
++
++ return 0;
++}
++
++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
++ .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
++ .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
++ .probe = probe,
++ .set_controls = set_controls,
++
++ .start_frame = v4l2_request_hevc_start_frame,
++ .decode_slice = v4l2_request_hevc_decode_slice,
++ .end_frame = v4l2_request_hevc_end_frame,
++ .abort_frame = v4l2_request_hevc_abort_frame,
++ .frame_params = frame_params,
++ .alloc_frame = alloc_frame,
++};
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_media.c
+@@ -0,0 +1,1808 @@
++/*
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <errno.h>
++#include <fcntl.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++#include <linux/media.h>
++#include <linux/mman.h>
++#include <sys/ioctl.h>
++#include <sys/select.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++
++#include <linux/videodev2.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++#include "weak_link.h"
++
++
++/* floor(log2(x)) */
++static unsigned int log2_size(size_t x)
++{
++ unsigned int n = 0;
++
++ if (x & ~0xffff) {
++ n += 16;
++ x >>= 16;
++ }
++ if (x & ~0xff) {
++ n += 8;
++ x >>= 8;
++ }
++ if (x & ~0xf) {
++ n += 4;
++ x >>= 4;
++ }
++ if (x & ~3) {
++ n += 2;
++ x >>= 2;
++ }
++ return (x & ~1) ? n + 1 : n;
++}
++
++static size_t round_up_size(const size_t x)
++{
++ /* Admit no size < 256 */
++ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
++
++ return x >= (3 << n) ? 4 << n : (3 << n);
++}
++
++struct media_request;
++
++struct media_pool {
++ int fd;
++ sem_t sem;
++ pthread_mutex_t lock;
++ unsigned int pool_n;
++ struct media_request * pool_reqs;
++ struct media_request * free_reqs;
++ struct pollqueue * pq;
++};
++
++struct media_request {
++ struct media_request * next;
++ struct media_pool * mp;
++ int fd;
++ struct polltask * pt;
++};
++
++static inline enum v4l2_memory
++mediabufs_memory_to_v4l2(const enum mediabufs_memory m)
++{
++ return (enum v4l2_memory)m;
++}
++
++const char *
++mediabufs_memory_name(const enum mediabufs_memory m)
++{
++ switch (m) {
++ case MEDIABUFS_MEMORY_UNSET:
++ return "Unset";
++ case MEDIABUFS_MEMORY_MMAP:
++ return "MMap";
++ case MEDIABUFS_MEMORY_USERPTR:
++ return "UserPtr";
++ case MEDIABUFS_MEMORY_OVERLAY:
++ return "Overlay";
++ case MEDIABUFS_MEMORY_DMABUF:
++ return "DMABuf";
++ default:
++ break;
++ }
++ return "Unknown";
++}
++
++
++static inline int do_trywait(sem_t *const sem)
++{
++ while (sem_trywait(sem)) {
++ if (errno != EINTR)
++ return -errno;
++ }
++ return 0;
++}
++
++static inline int do_wait(sem_t *const sem)
++{
++ while (sem_wait(sem)) {
++ if (errno != EINTR)
++ return -errno;
++ }
++ return 0;
++}
++
++static int request_buffers(int video_fd, unsigned int type,
++ enum mediabufs_memory memory, unsigned int buffers_count)
++{
++ struct v4l2_requestbuffers buffers;
++ int rc;
++
++ memset(&buffers, 0, sizeof(buffers));
++ buffers.type = type;
++ buffers.memory = mediabufs_memory_to_v4l2(memory);
++ buffers.count = buffers_count;
++
++ rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
++ if (rc < 0) {
++ rc = -errno;
++ request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
++ return rc;
++ }
++
++ return 0;
++}
++
++
++static int set_stream(int video_fd, unsigned int type, bool enable)
++{
++ enum v4l2_buf_type buf_type = type;
++ int rc;
++
++ rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
++ &buf_type);
++ if (rc < 0) {
++ rc = -errno;
++ request_log("Unable to %sable stream: %s\n",
++ enable ? "en" : "dis", strerror(-rc));
++ return rc;
++ }
++
++ return 0;
++}
++
++
++
++struct media_request * media_request_get(struct media_pool * const mp)
++{
++ struct media_request *req = NULL;
++
++ /* Timeout handled by poll code */
++ if (do_wait(&mp->sem))
++ return NULL;
++
++ pthread_mutex_lock(&mp->lock);
++ req = mp->free_reqs;
++ if (req) {
++ mp->free_reqs = req->next;
++ req->next = NULL;
++ }
++ pthread_mutex_unlock(&mp->lock);
++ return req;
++}
++
++int media_request_fd(const struct media_request * const req)
++{
++ return req->fd;
++}
++
++int media_request_start(struct media_request * const req)
++{
++ while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
++ {
++ const int err = errno;
++ if (err == EINTR)
++ continue;
++ request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
++ return -err;
++ }
++
++ pollqueue_add_task(req->pt, 2000);
++ return 0;
++}
++
++static void media_request_done(void *v, short revents)
++{
++ struct media_request *const req = v;
++ struct media_pool *const mp = req->mp;
++
++ /* ** Not sure what to do about timeout */
++
++ if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
++ request_log("Unable to reinit media request: %s\n",
++ strerror(errno));
++
++ pthread_mutex_lock(&mp->lock);
++ req->next = mp->free_reqs;
++ mp->free_reqs = req;
++ pthread_mutex_unlock(&mp->lock);
++ sem_post(&mp->sem);
++}
++
++int media_request_abort(struct media_request ** const preq)
++{
++ struct media_request * const req = *preq;
++
++ if (req == NULL)
++ return 0;
++ *preq = NULL;
++
++ media_request_done(req, 0);
++ return 0;
++}
++
++static void free_req_pool(struct media_request * const pool, const unsigned int n)
++{
++ unsigned int i;
++ for (i = 0; i != n; ++i) {
++ struct media_request * const req = pool + i;
++ if (req->pt)
++ polltask_delete(&req->pt);
++ if (req->fd != -1)
++ close(req->fd);
++ }
++ free(pool);
++}
++
++struct media_pool * media_pool_new(const char * const media_path,
++ struct pollqueue * const pq,
++ const unsigned int n)
++{
++ struct media_pool * const mp = calloc(1, sizeof(*mp));
++ unsigned int i;
++
++ if (!mp)
++ goto fail0;
++
++ mp->pq = pq;
++ pthread_mutex_init(&mp->lock, NULL);
++ mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
++ if (mp->fd == -1) {
++ request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
++ goto fail1;
++ }
++
++ if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL)
++ goto fail3;
++ mp->pool_n = n;
++ for (i = 0; i != n; ++i) {
++ mp->pool_reqs[i].mp = mp;
++ mp->pool_reqs[i].fd = -1;
++ }
++
++ for (i = 0; i != n; ++i) {
++ struct media_request * const req = mp->pool_reqs + i;
++
++ if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
++ request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
++ goto fail4;
++ }
++
++ req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
++ if (!req->pt)
++ goto fail4;
++
++ req->next = mp->free_reqs,
++ mp->free_reqs = req;
++ }
++
++ sem_init(&mp->sem, 0, n);
++
++ return mp;
++
++fail4:
++ free_req_pool(mp->pool_reqs, mp->pool_n);
++fail3:
++ close(mp->fd);
++ pthread_mutex_destroy(&mp->lock);
++fail1:
++ free(mp);
++fail0:
++ return NULL;
++}
++
++void media_pool_delete(struct media_pool ** pMp)
++{
++ struct media_pool * const mp = *pMp;
++
++ if (!mp)
++ return;
++ *pMp = NULL;
++
++ free_req_pool(mp->pool_reqs, mp->pool_n);
++ close(mp->fd);
++ sem_destroy(&mp->sem);
++ pthread_mutex_destroy(&mp->lock);
++ free(mp);
++}
++
++
++#define INDEX_UNSET (~(uint32_t)0)
++
++enum qent_status {
++ QENT_NEW = 0, // Initial state - shouldn't last
++ QENT_FREE, // On free chain
++ QENT_PENDING, // User has ent
++ QENT_WAITING, // On inuse
++ QENT_DONE, // Frame rx
++ QENT_ERROR, // Error
++ QENT_IMPORT
++};
++
++struct qent_base {
++ atomic_int ref_count;
++ struct qent_base *next;
++ struct qent_base *prev;
++ enum qent_status status;
++ enum mediabufs_memory memtype;
++ uint32_t index;
++ struct dmabuf_h *dh[VIDEO_MAX_PLANES];
++ struct timeval timestamp;
++};
++
++struct qent_src {
++ struct qent_base base;
++ int fixed_size;
++};
++
++struct qent_dst {
++ struct qent_base base;
++ bool waiting;
++ pthread_mutex_t lock;
++ pthread_cond_t cond;
++ struct ff_weak_link_client * mbc_wl;
++};
++
++struct qe_list_head {
++ struct qent_base *head;
++ struct qent_base *tail;
++};
++
++struct buf_pool {
++ enum mediabufs_memory memtype;
++ pthread_mutex_t lock;
++ sem_t free_sem;
++ struct qe_list_head free;
++ struct qe_list_head inuse;
++};
++
++
++static inline struct qent_dst *base_to_dst(struct qent_base *be)
++{
++ return (struct qent_dst *)be;
++}
++
++static inline struct qent_src *base_to_src(struct qent_base *be)
++{
++ return (struct qent_src *)be;
++}
++
++
++#define QENT_BASE_INITIALIZER(mtype) {\
++ .ref_count = ATOMIC_VAR_INIT(0),\
++ .status = QENT_NEW,\
++ .memtype = (mtype),\
++ .index = INDEX_UNSET\
++}
++
++static void qe_base_uninit(struct qent_base *const be)
++{
++ unsigned int i;
++ for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
++ dmabuf_free(be->dh[i]);
++ be->dh[i] = NULL;
++ }
++}
++
++static void qe_src_free(struct qent_src *const be_src)
++{
++ if (!be_src)
++ return;
++ qe_base_uninit(&be_src->base);
++ free(be_src);
++}
++
++static struct qent_src * qe_src_new(enum mediabufs_memory mtype)
++{
++ struct qent_src *const be_src = malloc(sizeof(*be_src));
++ if (!be_src)
++ return NULL;
++ *be_src = (struct qent_src){
++ .base = QENT_BASE_INITIALIZER(mtype)
++ };
++ return be_src;
++}
++
++static void qe_dst_free(struct qent_dst *const be_dst)
++{
++ if (!be_dst)
++ return;
++
++ ff_weak_link_unref(&be_dst->mbc_wl);
++ pthread_cond_destroy(&be_dst->cond);
++ pthread_mutex_destroy(&be_dst->lock);
++ qe_base_uninit(&be_dst->base);
++ free(be_dst);
++}
++
++static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype)
++{
++ struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
++ if (!be_dst)
++ return NULL;
++ *be_dst = (struct qent_dst){
++ .base = QENT_BASE_INITIALIZER(memtype),
++ .lock = PTHREAD_MUTEX_INITIALIZER,
++ .cond = PTHREAD_COND_INITIALIZER,
++ .mbc_wl = ff_weak_link_ref(wl)
++ };
++ return be_dst;
++}
++
++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
++{
++ if (ql->tail)
++ ql->tail->next = be;
++ else
++ ql->head = be;
++ be->prev = ql->tail;
++ be->next = NULL;
++ ql->tail = be;
++}
++
++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
++{
++ if (!be)
++ return NULL;
++
++ if (be->next)
++ be->next->prev = be->prev;
++ else
++ ql->tail = be->prev;
++ if (be->prev)
++ be->prev->next = be->next;
++ else
++ ql->head = be->next;
++ be->next = NULL;
++ be->prev = NULL;
++ return be;
++}
++
++
++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
++{
++ ql_add_tail(&bp->free, be);
++}
++
++static struct qent_base * bq_get_free(struct buf_pool *const bp)
++{
++ return ql_extract(&bp->free, bp->free.head);
++}
++
++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
++{
++ return ql_extract(&bp->inuse, be);
++}
++
++static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
++{
++ return ql_extract(&bp->inuse, bp->inuse.head);
++}
++
++static void bq_free_all_free_src(struct buf_pool *const bp)
++{
++ struct qent_base *be;
++ while ((be = bq_get_free(bp)) != NULL)
++ qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_inuse_src(struct buf_pool *const bp)
++{
++ struct qent_base *be;
++ while ((be = bq_get_inuse(bp)) != NULL)
++ qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_free_dst(struct buf_pool *const bp)
++{
++ struct qent_base *be;
++ while ((be = bq_get_free(bp)) != NULL)
++ qe_dst_free(base_to_dst(be));
++}
++
++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
++{
++ unsigned int i;
++
++ pthread_mutex_lock(&bp->lock);
++ /* Clear out state vars */
++ be->timestamp.tv_sec = 0;
++ be->timestamp.tv_usec = 0;
++ be->status = QENT_FREE;
++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
++ dmabuf_len_set(be->dh[i], 0);
++ bq_put_free(bp, be);
++ pthread_mutex_unlock(&bp->lock);
++ sem_post(&bp->free_sem);
++}
++
++static bool queue_is_inuse(const struct buf_pool *const bp)
++{
++ return bp->inuse.tail != NULL;
++}
++
++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
++{
++ if (!be)
++ return;
++ pthread_mutex_lock(&bp->lock);
++ ql_add_tail(&bp->inuse, be);
++ be->status = QENT_WAITING;
++ pthread_mutex_unlock(&bp->lock);
++}
++
++static struct qent_base *queue_get_free(struct buf_pool *const bp)
++{
++ struct qent_base *buf;
++
++ if (do_wait(&bp->free_sem))
++ return NULL;
++ pthread_mutex_lock(&bp->lock);
++ buf = bq_get_free(bp);
++ pthread_mutex_unlock(&bp->lock);
++ return buf;
++}
++
++static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
++{
++ struct qent_base *buf;
++
++ if (do_trywait(&bp->free_sem))
++ return NULL;
++ pthread_mutex_lock(&bp->lock);
++ buf = bq_get_free(bp);
++ pthread_mutex_unlock(&bp->lock);
++ return buf;
++}
++
++static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index)
++{
++ struct qent_base *be;
++
++ pthread_mutex_lock(&bp->lock);
++ /* Expect 1st in Q, but allow anywhere */
++ for (be = bp->inuse.head; be; be = be->next) {
++ if (be->index == index) {
++ bq_extract_inuse(bp, be);
++ break;
++ }
++ }
++ pthread_mutex_unlock(&bp->lock);
++
++ return be;
++}
++
++static void queue_delete(struct buf_pool *const bp)
++{
++ sem_destroy(&bp->free_sem);
++ pthread_mutex_destroy(&bp->lock);
++ free(bp);
++}
++
++static struct buf_pool* queue_new(const int vfd)
++{
++ struct buf_pool *bp = calloc(1, sizeof(*bp));
++ if (!bp)
++ return NULL;
++ pthread_mutex_init(&bp->lock, NULL);
++ sem_init(&bp->free_sem, 0, 0);
++ return bp;
++}
++
++
++struct mediabufs_ctl {
++ atomic_int ref_count; /* 0 is single ref for easier atomics */
++ void * dc;
++ int vfd;
++ bool stream_on;
++ bool polling;
++ bool dst_fixed; // Dst Q is fixed size
++ pthread_mutex_t lock;
++ struct buf_pool * src;
++ struct buf_pool * dst;
++ struct polltask * pt;
++ struct pollqueue * pq;
++ struct ff_weak_link_master * this_wlm;
++
++ enum mediabufs_memory src_memtype;
++ enum mediabufs_memory dst_memtype;
++ struct v4l2_format src_fmt;
++ struct v4l2_format dst_fmt;
++ struct v4l2_capability capability;
++};
++
++static int qe_v4l2_queue(struct qent_base *const be,
++ const int vfd, struct media_request *const mreq,
++ const struct v4l2_format *const fmt,
++ const bool is_dst, const bool hold_flag)
++{
++ struct v4l2_buffer buffer = {
++ .type = fmt->type,
++ .memory = mediabufs_memory_to_v4l2(be->memtype),
++ .index = be->index
++ };
++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ unsigned int i;
++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++ if (is_dst)
++ dmabuf_len_set(be->dh[i], 0);
++
++ /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
++ planes[i].length = dmabuf_size(be->dh[i]);
++ planes[i].bytesused = dmabuf_len(be->dh[i]);
++ if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
++ planes[i].m.fd = dmabuf_fd(be->dh[i]);
++ else
++ planes[i].m.mem_offset = 0;
++ }
++ buffer.m.planes = planes;
++ buffer.length = i;
++ }
++ else {
++ if (is_dst)
++ dmabuf_len_set(be->dh[0], 0);
++
++ buffer.bytesused = dmabuf_len(be->dh[0]);
++ buffer.length = dmabuf_size(be->dh[0]);
++ if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
++ buffer.m.fd = dmabuf_fd(be->dh[0]);
++ else
++ buffer.m.offset = 0;
++ }
++
++ if (!is_dst && mreq) {
++ buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
++ buffer.request_fd = media_request_fd(mreq);
++ if (hold_flag)
++ buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++ }
++
++ if (is_dst)
++ be->timestamp = (struct timeval){0,0};
++
++ buffer.timestamp = be->timestamp;
++
++ while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
++ const int err = errno;
++ if (err != EINTR) {
++ request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
++ return -err;
++ }
++ }
++ return 0;
++}
++
++static struct qent_base * qe_dequeue(struct buf_pool *const bp,
++ const int vfd,
++ const struct v4l2_format * const f)
++{
++ struct qent_base *be;
++ int rc;
++ const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++ struct v4l2_buffer buffer = {
++ .type = f->type,
++ .memory = mediabufs_memory_to_v4l2(bp->memtype)
++ };
++ if (mp) {
++ buffer.length = f->fmt.pix_mp.num_planes;
++ buffer.m.planes = planes;
++ }
++
++ while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
++ errno == EINTR)
++ /* Loop */;
++ if (rc) {
++ request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
++ return NULL;
++ }
++
++ be = queue_find_extract_index(bp, buffer.index);
++ if (!be) {
++ request_log("Failed to find index %d in Q\n", buffer.index);
++ return NULL;
++ }
++
++ if (mp) {
++ unsigned int i;
++ for (i = 0; i != buffer.length; ++i)
++ dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0);
++ }
++ else
++ dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0);
++
++ be->timestamp = buffer.timestamp;
++ be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
++ return be;
++}
++
++static void qe_dst_done(struct qent_dst * dst_be)
++{
++ pthread_mutex_lock(&dst_be->lock);
++ dst_be->waiting = false;
++ pthread_cond_broadcast(&dst_be->cond);
++ pthread_mutex_unlock(&dst_be->lock);
++
++ qent_dst_unref(&dst_be);
++}
++
++static bool qe_dst_waiting(struct qent_dst *const dst_be)
++{
++ bool waiting;
++ pthread_mutex_lock(&dst_be->lock);
++ waiting = dst_be->waiting;
++ dst_be->waiting = true;
++ pthread_mutex_unlock(&dst_be->lock);
++ return waiting;
++}
++
++
++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
++{
++ return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
++}
++
++static void mediabufs_poll_cb(void * v, short revents)
++{
++ struct mediabufs_ctl *mbc = v;
++ struct qent_src *src_be = NULL;
++ struct qent_dst *dst_be = NULL;
++
++ if (!revents)
++ request_err(mbc->dc, "%s: Timeout\n", __func__);
++
++ pthread_mutex_lock(&mbc->lock);
++ mbc->polling = false;
++
++ if ((revents & POLLOUT) != 0)
++ src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
++ if ((revents & POLLIN) != 0)
++ dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
++
++ /* Reschedule */
++ if (mediabufs_wants_poll(mbc)) {
++ mbc->polling = true;
++ pollqueue_add_task(mbc->pt, 2000);
++ }
++ pthread_mutex_unlock(&mbc->lock);
++
++ if (src_be)
++ queue_put_free(mbc->src, &src_be->base);
++ if (dst_be)
++ qe_dst_done(dst_be);
++}
++
++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
++{
++ struct qent_base *const be = &be_src->base;
++
++ be->timestamp = *timestamp;
++ return 0;
++}
++
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
++{
++ return be_dst->base.timestamp;
++}
++
++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
++{
++ if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
++ size_t newsize = round_up_size(len);
++ request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
++ if (!dbsc) {
++ request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
++ return -ENOMEM;
++ }
++ if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
++ request_log("%s: Realloc %zd failed\n", __func__, newsize);
++ return -ENOMEM;
++ }
++ }
++ return 0;
++}
++
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++ struct qent_base *const be = &be_src->base;
++ return qent_base_realloc(be, len, dbsc);
++}
++
++
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++ void * dst;
++ struct qent_base *const be = &be_src->base;
++ int rv;
++
++ // Realloc doesn't copy so don't alloc if offset != 0
++ if ((rv = qent_base_realloc(be, offset + len,
++ be_src->fixed_size || offset ? NULL : dbsc)) != 0)
++ return rv;
++
++ dmabuf_write_start(be->dh[0]);
++ dst = dmabuf_map(be->dh[0]);
++ if (!dst)
++ return -1;
++ memcpy((char*)dst + offset, src, len);
++ dmabuf_len_set(be->dh[0], len);
++ dmabuf_write_end(be->dh[0]);
++ return 0;
++}
++
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
++{
++ const struct qent_base *const be = &be_dst->base;
++
++ return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
++}
++
++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
++{
++ return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
++}
++
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++ struct media_request **const pmreq,
++ struct qent_src **const psrc_be,
++ struct qent_dst *const dst_be,
++ const bool is_final)
++{
++ struct media_request * mreq = *pmreq;
++ struct qent_src *const src_be = *psrc_be;
++
++ // Req & src are always both "consumed"
++ *pmreq = NULL;
++ *psrc_be = NULL;
++
++ pthread_mutex_lock(&mbc->lock);
++
++ if (!src_be)
++ goto fail1;
++
++ if (dst_be) {
++ if (qe_dst_waiting(dst_be)) {
++ request_info(mbc->dc, "Request buffer already waiting on start\n");
++ goto fail1;
++ }
++ dst_be->base.timestamp = (struct timeval){0,0};
++ if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
++ goto fail1;
++
++ qent_dst_ref(dst_be);
++ queue_put_inuse(mbc->dst, &dst_be->base);
++ }
++
++ if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
++ goto fail1;
++ queue_put_inuse(mbc->src, &src_be->base);
++
++ if (!mbc->polling && mediabufs_wants_poll(mbc)) {
++ mbc->polling = true;
++ pollqueue_add_task(mbc->pt, 2000);
++ }
++ pthread_mutex_unlock(&mbc->lock);
++
++ if (media_request_start(mreq))
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++ return MEDIABUFS_STATUS_SUCCESS;
++
++fail1:
++ media_request_abort(&mreq);
++ if (src_be)
++ queue_put_free(mbc->src, &src_be->base);
++
++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
++ if (dst_be) {
++ dst_be->base.status = QENT_ERROR;
++ qe_dst_done(dst_be);
++ }
++ pthread_mutex_unlock(&mbc->lock);
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++static int qe_alloc_from_fmt(struct qent_base *const be,
++ struct dmabufs_ctl *const dbsc,
++ const struct v4l2_format *const fmt)
++{
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ unsigned int i;
++ for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
++ be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
++ fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
++ /* On failure tidy up and die */
++ if (!be->dh[i]) {
++ while (i--) {
++ dmabuf_free(be->dh[i]);
++ be->dh[i] = NULL;
++ }
++ return -1;
++ }
++ }
++ }
++ else {
++// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
++ size_t size = fmt->fmt.pix.sizeimage;
++ be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
++ if (!be->dh[0])
++ return -1;
++ }
++ return 0;
++}
++
++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
++ const enum v4l2_buf_type buftype,
++ uint32_t pixfmt,
++ const unsigned int width, const unsigned int height,
++ const size_t bufsize)
++{
++ *fmt = (struct v4l2_format){.type = buftype};
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++ fmt->fmt.pix_mp.width = width;
++ fmt->fmt.pix_mp.height = height;
++ fmt->fmt.pix_mp.pixelformat = pixfmt;
++ if (bufsize) {
++ fmt->fmt.pix_mp.num_planes = 1;
++ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
++ }
++ }
++ else {
++ fmt->fmt.pix.width = width;
++ fmt->fmt.pix.height = height;
++ fmt->fmt.pix.pixelformat = pixfmt;
++ fmt->fmt.pix.sizeimage = bufsize;
++ }
++
++ while (ioctl(fd, VIDIOC_S_FMT, fmt))
++ if (errno != EINTR)
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++ // Treat anything where we don't get at least what we asked for as a fail
++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++ if (fmt->fmt.pix_mp.width < width ||
++ fmt->fmt.pix_mp.height < height ||
++ fmt->fmt.pix_mp.pixelformat != pixfmt) {
++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++ }
++ }
++ else {
++ if (fmt->fmt.pix.width < width ||
++ fmt->fmt.pix.height < height ||
++ fmt->fmt.pix.pixelformat != pixfmt) {
++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++ }
++ }
++
++ return MEDIABUFS_STATUS_SUCCESS;
++}
++
++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
++ const int fd,
++ const unsigned int type_v4l2,
++ const uint32_t flags_must,
++ const uint32_t flags_not,
++ const unsigned int width,
++ const unsigned int height,
++ mediabufs_dst_fmt_accept_fn *const accept_fn,
++ void *const accept_v)
++{
++ unsigned int i;
++
++ for (i = 0;; ++i) {
++ struct v4l2_fmtdesc fmtdesc = {
++ .index = i,
++ .type = type_v4l2
++ };
++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++ if (errno != EINTR)
++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++ }
++ if ((fmtdesc.flags & flags_must) != flags_must ||
++ (fmtdesc.flags & flags_not))
++ continue;
++ if (!accept_fn(accept_v, &fmtdesc))
++ continue;
++
++ if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
++ width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
++ return MEDIABUFS_STATUS_SUCCESS;
++ }
++ return 0;
++}
++
++
++/* Wait for qent done */
++
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
++{
++ struct qent_base *const be = &be_dst->base;
++ enum qent_status estat;
++
++ pthread_mutex_lock(&be_dst->lock);
++ while (be_dst->waiting &&
++ !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
++ /* Loop */;
++ estat = be->status;
++ pthread_mutex_unlock(&be_dst->lock);
++
++ return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
++ estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
++ MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
++{
++ struct qent_base *const be = &be_dst->base;
++ return dmabuf_map(be->dh[buf_no]);
++}
++
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
++{
++ struct qent_base *const be = &be_dst->base;
++ unsigned int i;
++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++ if (dmabuf_read_start(be->dh[i])) {
++ while (i--)
++ dmabuf_read_end(be->dh[i]);
++ return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++ }
++ }
++ return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
++{
++ struct qent_base *const be = &be_dst->base;
++ unsigned int i;
++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++ if (dmabuf_read_end(be->dh[i]))
++ status = MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++ return status;
++}
++
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
++{
++ if (be_dst)
++ atomic_fetch_add(&be_dst->base.ref_count, 1);
++ return be_dst;
++}
++
++void qent_dst_unref(struct qent_dst ** const pbe_dst)
++{
++ struct qent_dst * const be_dst = *pbe_dst;
++ struct mediabufs_ctl * mbc;
++ if (!be_dst)
++ return;
++ *pbe_dst = NULL;
++
++ if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
++ return;
++
++ if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
++ queue_put_free(mbc->dst, &be_dst->base);
++ ff_weak_link_unlock(be_dst->mbc_wl);
++ }
++ else {
++ qe_dst_free(be_dst);
++ }
++}
++
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++ unsigned int plane,
++ int fd, size_t size)
++{
++ struct qent_base *const be = &be_dst->base;
++ struct dmabuf_h * dh;
++
++ if (be->status != QENT_IMPORT || be->dh[plane])
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++ dh = dmabuf_import(fd, size);
++ if (!dh)
++ return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++
++ be->dh[plane] = dh;
++ return MEDIABUFS_STATUS_SUCCESS;
++}
++
++// Returns noof buffers created, -ve for error
++static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[])
++{
++ unsigned int i;
++
++ struct v4l2_create_buffers cbuf = {
++ .count = n,
++ .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype),
++ .format = mbc->dst_fmt,
++ };
++
++ while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
++ const int err = -errno;
++ if (err != EINTR) {
++ request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
++ return -err;
++ }
++ }
++
++ if (cbuf.count != n)
++ request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n);
++
++ for (i = 0; i != cbuf.count; ++i)
++ qes[i]->base.index = cbuf.index + i;
++
++ return cbuf.count;
++}
++
++static MediaBufsStatus
++qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt,
++ const unsigned int n, const bool x_dmabuf)
++{
++ struct v4l2_buffer buf = {
++ .index = n,
++ .type = fmt->type,
++ };
++ struct v4l2_plane planes[VIDEO_MAX_PLANES];
++ int ret;
++
++ if (be->dh[0])
++ return 0;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ memset(planes, 0, sizeof(planes));
++ buf.m.planes = planes;
++ buf.length = VIDEO_MAX_PLANES;
++ }
++
++ if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) {
++ request_err(mbc->dc, "VIDIOC_QUERYBUF failed");
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type))
++ {
++ unsigned int i;
++ for (i = 0; i != buf.length; ++i) {
++ if (x_dmabuf) {
++ struct v4l2_exportbuffer xbuf = {
++ .type = buf.type,
++ .index = buf.index,
++ .plane = i,
++ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
++ };
++ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) {
++ be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length);
++ close(xbuf.fd); // dmabuf_import dups the fd so close this one
++ }
++ }
++ else {
++ be->dh[i] = dmabuf_import_mmap(
++ mmap(NULL, planes[i].length,
++ PROT_READ | PROT_WRITE,
++ MAP_SHARED | MAP_POPULATE,
++ mbc->vfd, planes[i].m.mem_offset),
++ planes[i].length);
++ }
++ /* On failure tidy up and die */
++ if (!be->dh[i]) {
++ while (i--) {
++ dmabuf_free(be->dh[i]);
++ be->dh[i] = NULL;
++ }
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++ }
++ }
++ else
++ {
++ if (x_dmabuf) {
++ struct v4l2_exportbuffer xbuf = {
++ .type = buf.type,
++ .index = buf.index,
++ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
++ };
++ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
++ be->dh[0] = dmabuf_import(xbuf.fd, buf.length);
++ }
++ else {
++ be->dh[0] = dmabuf_import_mmap(
++ mmap(NULL, buf.length,
++ PROT_READ | PROT_WRITE,
++ MAP_SHARED | MAP_POPULATE,
++ mbc->vfd, buf.m.offset),
++ buf.length);
++ }
++ /* On failure tidy up and die */
++ if (!be->dh[0]) {
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++ }
++
++ return 0;
++}
++
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
++{
++ struct qent_dst * be_dst;
++
++ if (mbc == NULL) {
++ be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF);
++ if (be_dst)
++ be_dst->base.status = QENT_IMPORT;
++ return be_dst;
++ }
++
++ if (mbc->dst_fixed) {
++ be_dst = base_to_dst(queue_get_free(mbc->dst));
++ if (!be_dst)
++ return NULL;
++ }
++ else {
++ be_dst = base_to_dst(queue_tryget_free(mbc->dst));
++ if (!be_dst) {
++ be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype);
++ if (!be_dst)
++ return NULL;
++
++ if (create_dst_bufs(mbc, 1, &be_dst) != 1) {
++ qe_dst_free(be_dst);
++ return NULL;
++ }
++ }
++ }
++
++ if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) {
++ if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) {
++ request_err(mbc->dc, "Failed to export as dmabuf\n");
++ queue_put_free(mbc->dst, &be_dst->base);
++ return NULL;
++ }
++ }
++ else {
++ if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
++ /* Given how create buf works we can't uncreate it on alloc failure
++ * all we can do is put it on the free Q
++ */
++ queue_put_free(mbc->dst, &be_dst->base);
++ return NULL;
++ }
++ }
++
++ be_dst->base.status = QENT_PENDING;
++ atomic_store(&be_dst->base.ref_count, 0);
++ return be_dst;
++}
++
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
++{
++ return &mbc->dst_fmt;
++}
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++ const unsigned int width,
++ const unsigned int height,
++ mediabufs_dst_fmt_accept_fn *const accept_fn,
++ void *const accept_v)
++{
++ MediaBufsStatus status;
++ unsigned int i;
++ const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
++ static const struct {
++ unsigned int flags_must;
++ unsigned int flags_not;
++ } trys[] = {
++ {0, V4L2_FMT_FLAG_EMULATED},
++ {V4L2_FMT_FLAG_EMULATED, 0},
++ };
++ for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
++ status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
++ buf_type,
++ trys[i].flags_must,
++ trys[i].flags_not,
++ width, height, accept_fn, accept_v);
++ if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
++ return status;
++ }
++
++ if (status != MEDIABUFS_STATUS_SUCCESS)
++ return status;
++
++ /* Try to create a buffer - don't alloc */
++ return status;
++}
++
++// ** This is a mess if we get partial alloc but without any way to remove
++// individual V4L2 Q members we are somewhat stuffed
++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype)
++{
++ unsigned int i;
++ int a = 0;
++ unsigned int qc;
++ struct qent_dst * qes[32];
++
++ if (n > 32)
++ return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++
++ mbc->dst->memtype = memtype;
++
++ // Create qents first as it is hard to get rid of the V4L2 buffers on error
++ for (qc = 0; qc != n; ++qc)
++ {
++ if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL)
++ goto fail;
++ }
++
++ if ((a = create_dst_bufs(mbc, n, qes)) < 0)
++ goto fail;
++
++ for (i = 0; i != a; ++i)
++ queue_put_free(mbc->dst, &qes[i]->base);
++
++ if (a != n)
++ goto fail;
++
++ mbc->dst_fixed = fixed;
++ return MEDIABUFS_STATUS_SUCCESS;
++
++fail:
++ for (i = (a < 0 ? 0 : a); i != qc; ++i)
++ qe_dst_free(qes[i]);
++
++ return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++}
++
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
++{
++ struct qent_base * buf = queue_get_free(mbc->src);
++ buf->status = QENT_PENDING;
++ return base_to_src(buf);
++}
++
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
++{
++ struct qent_src *const qe_src = *pqe_src;
++ if (!qe_src)
++ return;
++ *pqe_src = NULL;
++ queue_put_free(mbc->src, &qe_src->base);
++}
++
++static MediaBufsStatus
++chk_memory_type(struct mediabufs_ctl *const mbc,
++ const struct v4l2_format * const f,
++ const enum mediabufs_memory m)
++{
++ struct v4l2_create_buffers cbuf = {
++ .count = 0,
++ .memory = V4L2_MEMORY_MMAP,
++ .format = *f
++ };
++
++ if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0)
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++ switch (m) {
++ case MEDIABUFS_MEMORY_DMABUF:
++ // 0 = Unknown but assume not in that case
++ if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0)
++ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
++ break;
++ case MEDIABUFS_MEMORY_MMAP:
++ break;
++ default:
++ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
++ }
++
++ return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus
++mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
++{
++ return chk_memory_type(mbc, &mbc->src_fmt, memtype);
++}
++
++MediaBufsStatus
++mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
++{
++ return chk_memory_type(mbc, &mbc->dst_fmt, memtype);
++}
++
++/* src format must have been set up before this */
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
++ struct dmabufs_ctl * const dbsc,
++ unsigned int n, const enum mediabufs_memory memtype)
++{
++ unsigned int i;
++ struct v4l2_requestbuffers req = {
++ .count = n,
++ .type = mbc->src_fmt.type,
++ .memory = mediabufs_memory_to_v4l2(memtype)
++ };
++
++ bq_free_all_free_src(mbc->src);
++
++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
++ if (errno != EINTR) {
++ request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++ }
++
++ if (n > req.count) {
++ request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
++ n = req.count;
++ }
++
++ for (i = 0; i != n; ++i) {
++ struct qent_src *const be_src = qe_src_new(memtype);
++ if (!be_src) {
++ request_err(mbc->dc, "Failed to create src be %d\n", i);
++ goto fail;
++ }
++ switch (memtype) {
++ case MEDIABUFS_MEMORY_MMAP:
++ if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) {
++ qe_src_free(be_src);
++ goto fail;
++ }
++ be_src->fixed_size = 1;
++ break;
++ case MEDIABUFS_MEMORY_DMABUF:
++ if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
++ qe_src_free(be_src);
++ goto fail;
++ }
++ be_src->fixed_size = !mediabufs_src_resizable(mbc);
++ break;
++ default:
++ request_err(mbc->dc, "Unexpected memorty type\n");
++ goto fail;
++ }
++ be_src->base.index = i;
++
++ queue_put_free(mbc->src, &be_src->base);
++ }
++
++ mbc->src->memtype = memtype;
++ return MEDIABUFS_STATUS_SUCCESS;
++
++fail:
++ bq_free_all_free_src(mbc->src);
++ req.count = 0;
++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
++ errno == EINTR)
++ /* Loop */;
++
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++
++/*
++ * Set stuff order:
++ * Set src fmt
++ * Set parameters (sps) on vfd
++ * Negotiate dst format (dst_fmt_set)
++ * Create src buffers
++ * Alloc a dst buffer or Create dst slots
++*/
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
++{
++ if (mbc->stream_on)
++ return MEDIABUFS_STATUS_SUCCESS;
++
++ if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
++ request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++
++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
++ request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
++ set_stream(mbc->vfd, mbc->src_fmt.type, false);
++ return MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++
++ mbc->stream_on = true;
++ return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
++{
++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++ if (!mbc->stream_on)
++ return MEDIABUFS_STATUS_SUCCESS;
++
++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
++ request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
++ status = MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++
++ if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
++ request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
++ status = MEDIABUFS_ERROR_OPERATION_FAILED;
++ }
++
++ mbc->stream_on = false;
++ return status;
++}
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
++{
++ struct v4l2_ext_controls controls = {
++ .controls = control_array,
++ .count = n
++ };
++
++ if (mreq) {
++ controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
++ controls.request_fd = media_request_fd(mreq);
++ }
++
++ while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
++ {
++ const int err = errno;
++ if (err != EINTR) {
++ request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
++ return -err;
++ }
++ }
++
++ return 0;
++}
++
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++ struct media_request * const mreq,
++ unsigned int id, void *data,
++ unsigned int size)
++{
++ struct v4l2_ext_control control = {
++ .id = id,
++ .ptr = data,
++ .size = size
++ };
++
++ int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
++ return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++ enum v4l2_buf_type buf_type,
++ const uint32_t pixfmt,
++ const uint32_t width, const uint32_t height,
++ const size_t bufsize)
++{
++ MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
++ if (rv != MEDIABUFS_STATUS_SUCCESS)
++ request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
++
++ return rv;
++}
++
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
++{
++ int rv = 0;
++ while (n--) {
++ while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
++ const int err = errno;
++ if (err != EINTR) {
++ // Often used for probing - errors are to be expected
++ request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
++ ctrls->type = 0; // 0 is invalid
++ rv = -err;
++ break;
++ }
++ }
++ ++ctrls;
++ }
++ return rv;
++}
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
++{
++#if 1
++ return 0;
++#else
++ // Single planar OUTPUT can only take exact size buffers
++ // Multiplanar will take larger than negotiated
++ return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
++#endif
++}
++
++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
++{
++ if (!mbc)
++ return;
++
++ // Break the weak link first
++ ff_weak_link_break(&mbc->this_wlm);
++
++ polltask_delete(&mbc->pt);
++
++ mediabufs_stream_off(mbc);
++
++ // Empty v4l2 buffer stash
++ request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
++ request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
++
++ bq_free_all_free_src(mbc->src);
++ bq_free_all_inuse_src(mbc->src);
++ bq_free_all_free_dst(mbc->dst);
++
++ {
++ struct qent_dst *dst_be;
++ while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
++ dst_be->base.timestamp = (struct timeval){0};
++ dst_be->base.status = QENT_ERROR;
++ qe_dst_done(dst_be);
++ }
++ }
++
++ queue_delete(mbc->dst);
++ queue_delete(mbc->src);
++ close(mbc->vfd);
++ pthread_mutex_destroy(&mbc->lock);
++
++ free(mbc);
++}
++
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
++{
++ atomic_fetch_add(&mbc->ref_count, 1);
++ return mbc;
++}
++
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
++{
++ struct mediabufs_ctl *const mbc = *pmbc;
++ int n;
++
++ if (!mbc)
++ return;
++ *pmbc = NULL;
++ n = atomic_fetch_sub(&mbc->ref_count, 1);
++ if (n)
++ return;
++ mediabufs_ctl_delete(mbc);
++}
++
++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc)
++{
++ return mbc->capability.version;
++}
++
++static int set_capabilities(struct mediabufs_ctl *const mbc)
++{
++ uint32_t caps;
++
++ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) {
++ int err = errno;
++ request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
++ return -err;
++ }
++
++ caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
++ mbc->capability.device_caps :
++ mbc->capability.capabilities;
++
++ if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
++ }
++ else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++ }
++ else {
++ request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++/* One of these per context */
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
++{
++ struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
++
++ if (!mbc)
++ return NULL;
++
++ mbc->dc = dc;
++ // Default mono planar
++ mbc->pq = pq;
++ pthread_mutex_init(&mbc->lock, NULL);
++
++ /* Pick a default - could we scan for this? */
++ if (vpath == NULL)
++ vpath = "/dev/media0";
++
++ while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
++ {
++ const int err = errno;
++ if (err != EINTR) {
++ request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
++ goto fail0;
++ }
++ }
++
++ if (set_capabilities(mbc)) {
++ request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
++ goto fail1;
++ }
++
++ mbc->src = queue_new(mbc->vfd);
++ if (!mbc->src)
++ goto fail1;
++ mbc->dst = queue_new(mbc->vfd);
++ if (!mbc->dst)
++ goto fail2;
++ mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
++ if (!mbc->pt)
++ goto fail3;
++ mbc->this_wlm = ff_weak_link_new(mbc);
++ if (!mbc->this_wlm)
++ goto fail4;
++
++ /* Cannot add polltask now - polling with nothing pending
++ * generates infinite error polls
++ */
++ return mbc;
++
++fail4:
++ polltask_delete(&mbc->pt);
++fail3:
++ queue_delete(mbc->dst);
++fail2:
++ queue_delete(mbc->src);
++fail1:
++ close(mbc->vfd);
++fail0:
++ free(mbc);
++ request_info(dc, "%s: FAILED\n", __func__);
++ return NULL;
++}
++
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_media.h
+@@ -0,0 +1,171 @@
++/*
++e.h
++*
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef _MEDIA_H_
++#define _MEDIA_H_
++
++#include <stdbool.h>
++#include <stdint.h>
++
++struct v4l2_format;
++struct v4l2_fmtdesc;
++struct v4l2_query_ext_ctrl;
++
++struct pollqueue;
++struct media_request;
++struct media_pool;
++
++typedef enum media_buf_status {
++ MEDIABUFS_STATUS_SUCCESS = 0,
++ MEDIABUFS_ERROR_OPERATION_FAILED,
++ MEDIABUFS_ERROR_DECODING_ERROR,
++ MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
++ MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
++ MEDIABUFS_ERROR_ALLOCATION_FAILED,
++ MEDIABUFS_ERROR_UNSUPPORTED_MEMORY,
++} MediaBufsStatus;
++
++struct media_pool * media_pool_new(const char * const media_path,
++ struct pollqueue * const pq,
++ const unsigned int n);
++void media_pool_delete(struct media_pool ** pmp);
++
++// Obtain a media request
++// Will block if none availible - has a 2sec timeout
++struct media_request * media_request_get(struct media_pool * const mp);
++int media_request_fd(const struct media_request * const req);
++
++// Start this request
++// Request structure is returned to pool once done
++int media_request_start(struct media_request * const req);
++
++// Return an *unstarted* media_request to the pool
++// May later be upgraded to allow for aborting a started req
++int media_request_abort(struct media_request ** const preq);
++
++
++struct mediabufs_ctl;
++struct qent_src;
++struct qent_dst;
++struct dmabuf_h;
++struct dmabufs_ctl;
++
++// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties
++enum mediabufs_memory {
++ MEDIABUFS_MEMORY_UNSET = 0,
++ MEDIABUFS_MEMORY_MMAP = 1,
++ MEDIABUFS_MEMORY_USERPTR = 2,
++ MEDIABUFS_MEMORY_OVERLAY = 3,
++ MEDIABUFS_MEMORY_DMABUF = 4,
++};
++
++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
++
++// prealloc
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
++// dbsc may be NULL if realloc not required
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
++void qent_dst_delete(struct qent_dst *const be);
++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
++void qent_dst_unref(struct qent_dst ** const pbe_dst);
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
++
++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
++/* Import an fd unattached to any mediabuf */
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++ unsigned int plane,
++ int fd, size_t size);
++
++const char * mediabufs_memory_name(const enum mediabufs_memory m);
++
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++ struct media_request **const pmreq,
++ struct qent_src **const psrc_be,
++ struct qent_dst *const dst_be,
++ const bool is_final);
++// Get / alloc a dst buffer & associate with a slot
++// If the dst pool is empty then behaviour depends on the fixed flag passed to
++// dst_slots_create. Default is !fixed = unlimited alloc
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
++ struct dmabufs_ctl *const dbsc);
++// Create dst slots without alloc
++// If fixed true then qent_alloc will only get slots from this pool and will
++// block until a qent has been unrefed
++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype);
++
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
++
++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++ const unsigned int width,
++ const unsigned int height,
++ mediabufs_dst_fmt_accept_fn *const accept_fn,
++ void *const accept_v);
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
++ struct v4l2_ext_control control_array[], unsigned int n);
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++ struct media_request * const mreq,
++ unsigned int id, void *data,
++ unsigned int size);
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++ enum v4l2_buf_type buf_type,
++ const uint32_t pixfmt,
++ const uint32_t width, const uint32_t height,
++ const size_t bufsize);
++
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
++ struct dmabufs_ctl * const dbsc,
++ unsigned int n,
++ const enum mediabufs_memory memtype);
++
++// Want to have appropriate formats set first
++MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
++MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
++
++#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
++
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
++ const char *vpath, struct pollqueue *const pq);
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
++
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.c
+@@ -0,0 +1,361 @@
++#include <errno.h>
++#include <limits.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <stdio.h>
++#include <string.h>
++#include <unistd.h>
++#include <sys/eventfd.h>
++
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++
++
++struct pollqueue;
++
++enum polltask_state {
++ POLLTASK_UNQUEUED = 0,
++ POLLTASK_QUEUED,
++ POLLTASK_RUNNING,
++ POLLTASK_Q_KILL,
++ POLLTASK_RUN_KILL,
++};
++
++struct polltask {
++ struct polltask *next;
++ struct polltask *prev;
++ struct pollqueue *q;
++ enum polltask_state state;
++
++ int fd;
++ short events;
++
++ void (*fn)(void *v, short revents);
++ void * v;
++
++ uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
++ sem_t kill_sem;
++};
++
++struct pollqueue {
++ atomic_int ref_count;
++ pthread_mutex_t lock;
++
++ struct polltask *head;
++ struct polltask *tail;
++
++ bool kill;
++ bool no_prod;
++ int prod_fd;
++ struct polltask *prod_pt;
++ pthread_t worker;
++};
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++ const int fd, const short events,
++ void (*const fn)(void *v, short revents),
++ void *const v)
++{
++ struct polltask *pt;
++
++ if (!events)
++ return NULL;
++
++ pt = malloc(sizeof(*pt));
++ if (!pt)
++ return NULL;
++
++ *pt = (struct polltask){
++ .next = NULL,
++ .prev = NULL,
++ .q = pollqueue_ref(pq),
++ .fd = fd,
++ .events = events,
++ .fn = fn,
++ .v = v
++ };
++
++ sem_init(&pt->kill_sem, 0, 0);
++
++ return pt;
++}
++
++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
++{
++ if (pt->prev)
++ pt->prev->next = pt->next;
++ else
++ pq->head = pt->next;
++ if (pt->next)
++ pt->next->prev = pt->prev;
++ else
++ pq->tail = pt->prev;
++ pt->next = NULL;
++ pt->prev = NULL;
++}
++
++static void polltask_free(struct polltask * const pt)
++{
++ sem_destroy(&pt->kill_sem);
++ free(pt);
++}
++
++static int pollqueue_prod(const struct pollqueue *const pq)
++{
++ static const uint64_t one = 1;
++ return write(pq->prod_fd, &one, sizeof(one));
++}
++
++void polltask_delete(struct polltask **const ppt)
++{
++ struct polltask *const pt = *ppt;
++ struct pollqueue * pq;
++ enum polltask_state state;
++ bool prodme;
++
++ if (!pt)
++ return;
++
++ pq = pt->q;
++ pthread_mutex_lock(&pq->lock);
++ state = pt->state;
++ pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
++ prodme = !pq->no_prod;
++ pthread_mutex_unlock(&pq->lock);
++
++ if (state != POLLTASK_UNQUEUED) {
++ if (prodme)
++ pollqueue_prod(pq);
++ while (sem_wait(&pt->kill_sem) && errno == EINTR)
++ /* loop */;
++ }
++
++ // Leave zapping the ref until we have DQed the PT as might well be
++ // legitimately used in it
++ *ppt = NULL;
++ polltask_free(pt);
++ pollqueue_unref(&pq);
++}
++
++static uint64_t pollqueue_now(int timeout)
++{
++ struct timespec now;
++ uint64_t now_ms;
++
++ if (clock_gettime(CLOCK_MONOTONIC, &now))
++ return 0;
++ now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
++ return now_ms ? now_ms : (uint64_t)1;
++}
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout)
++{
++ bool prodme = false;
++ struct pollqueue * const pq = pt->q;
++
++ pthread_mutex_lock(&pq->lock);
++ if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
++ if (pq->tail)
++ pq->tail->next = pt;
++ else
++ pq->head = pt;
++ pt->prev = pq->tail;
++ pt->next = NULL;
++ pt->state = POLLTASK_QUEUED;
++ pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
++ pq->tail = pt;
++ prodme = !pq->no_prod;
++ }
++ pthread_mutex_unlock(&pq->lock);
++ if (prodme)
++ pollqueue_prod(pq);
++}
++
++static void *poll_thread(void *v)
++{
++ struct pollqueue *const pq = v;
++ struct pollfd *a = NULL;
++ size_t asize = 0;
++
++ pthread_mutex_lock(&pq->lock);
++ do {
++ unsigned int i;
++ unsigned int n = 0;
++ struct polltask *pt;
++ struct polltask *pt_next;
++ uint64_t now = pollqueue_now(0);
++ int timeout = -1;
++ int rv;
++
++ for (pt = pq->head; pt; pt = pt_next) {
++ int64_t t;
++
++ pt_next = pt->next;
++
++ if (pt->state == POLLTASK_Q_KILL) {
++ pollqueue_rem_task(pq, pt);
++ sem_post(&pt->kill_sem);
++ continue;
++ }
++
++ if (n >= asize) {
++ asize = asize ? asize * 2 : 4;
++ a = realloc(a, asize * sizeof(*a));
++ if (!a) {
++ request_log("Failed to realloc poll array to %zd\n", asize);
++ goto fail_locked;
++ }
++ }
++
++ a[n++] = (struct pollfd){
++ .fd = pt->fd,
++ .events = pt->events
++ };
++
++ t = (int64_t)(pt->timeout - now);
++ if (pt->timeout && t < INT_MAX &&
++ (timeout < 0 || (int)t < timeout))
++ timeout = (t < 0) ? 0 : (int)t;
++ }
++ pthread_mutex_unlock(&pq->lock);
++
++ if ((rv = poll(a, n, timeout)) == -1) {
++ if (errno != EINTR) {
++ request_log("Poll error: %s\n", strerror(errno));
++ goto fail_unlocked;
++ }
++ }
++
++ pthread_mutex_lock(&pq->lock);
++ now = pollqueue_now(0);
++
++ /* Prodding in this loop is pointless and might lead to
++ * infinite looping
++ */
++ pq->no_prod = true;
++ for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) {
++ pt_next = pt->next;
++
++ /* Pending? */
++ if (a[i].revents ||
++ (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
++ pollqueue_rem_task(pq, pt);
++ if (pt->state == POLLTASK_QUEUED)
++ pt->state = POLLTASK_RUNNING;
++ if (pt->state == POLLTASK_Q_KILL)
++ pt->state = POLLTASK_RUN_KILL;
++ pthread_mutex_unlock(&pq->lock);
++
++ /* This can add new entries to the Q but as
++ * those are added to the tail our existing
++ * chain remains intact
++ */
++ pt->fn(pt->v, a[i].revents);
++
++ pthread_mutex_lock(&pq->lock);
++ if (pt->state == POLLTASK_RUNNING)
++ pt->state = POLLTASK_UNQUEUED;
++ if (pt->state == POLLTASK_RUN_KILL)
++ sem_post(&pt->kill_sem);
++ }
++ }
++ pq->no_prod = false;
++
++ } while (!pq->kill);
++
++fail_locked:
++ pthread_mutex_unlock(&pq->lock);
++fail_unlocked:
++ free(a);
++ return NULL;
++}
++
++static void prod_fn(void *v, short revents)
++{
++ struct pollqueue *const pq = v;
++ char buf[8];
++ if (revents)
++ read(pq->prod_fd, buf, 8);
++ if (!pq->kill)
++ pollqueue_add_task(pq->prod_pt, -1);
++}
++
++struct pollqueue * pollqueue_new(void)
++{
++ struct pollqueue *pq = malloc(sizeof(*pq));
++ if (!pq)
++ return NULL;
++ *pq = (struct pollqueue){
++ .ref_count = ATOMIC_VAR_INIT(0),
++ .lock = PTHREAD_MUTEX_INITIALIZER,
++ .head = NULL,
++ .tail = NULL,
++ .kill = false,
++ .prod_fd = -1
++ };
++
++ pq->prod_fd = eventfd(0, EFD_NONBLOCK);
++ if (pq->prod_fd == 1)
++ goto fail1;
++ pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
++ if (!pq->prod_pt)
++ goto fail2;
++ pollqueue_add_task(pq->prod_pt, -1);
++ if (pthread_create(&pq->worker, NULL, poll_thread, pq))
++ goto fail3;
++ // Reset ref count which will have been inced by the add_task
++ atomic_store(&pq->ref_count, 0);
++ return pq;
++
++fail3:
++ polltask_free(pq->prod_pt);
++fail2:
++ close(pq->prod_fd);
++fail1:
++ free(pq);
++ return NULL;
++}
++
++static void pollqueue_free(struct pollqueue *const pq)
++{
++ void *rv;
++
++ pthread_mutex_lock(&pq->lock);
++ pq->kill = true;
++ pollqueue_prod(pq);
++ pthread_mutex_unlock(&pq->lock);
++
++ pthread_join(pq->worker, &rv);
++ polltask_free(pq->prod_pt);
++ pthread_mutex_destroy(&pq->lock);
++ close(pq->prod_fd);
++ free(pq);
++}
++
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
++{
++ atomic_fetch_add(&pq->ref_count, 1);
++ return pq;
++}
++
++void pollqueue_unref(struct pollqueue **const ppq)
++{
++ struct pollqueue * const pq = *ppq;
++
++ if (!pq)
++ return;
++ *ppq = NULL;
++
++ if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
++ return;
++
++ pollqueue_free(pq);
++}
++
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.h
+@@ -0,0 +1,18 @@
++#ifndef POLLQUEUE_H_
++#define POLLQUEUE_H_
++
++struct polltask;
++struct pollqueue;
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++ const int fd, const short events,
++ void (*const fn)(void *v, short revents),
++ void *const v);
++void polltask_delete(struct polltask **const ppt);
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout);
++struct pollqueue * pollqueue_new(void);
++void pollqueue_unref(struct pollqueue **const ppq);
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
++
++#endif /* POLLQUEUE_H_ */
+--- /dev/null
++++ b/libavcodec/v4l2_req_utils.h
+@@ -0,0 +1,27 @@
++#ifndef AVCODEC_V4L2_REQ_UTILS_H
++#define AVCODEC_V4L2_REQ_UTILS_H
++
++#include <stdint.h>
++#include "libavutil/log.h"
++
++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
++
++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
++#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
++
++static inline char safechar(char c) {
++ return c > 0x20 && c < 0x7f ? c : '.';
++}
++
++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
++ tbuf[0] = safechar((fcc >> 0) & 0xff);
++ tbuf[1] = safechar((fcc >> 8) & 0xff);
++ tbuf[2] = safechar((fcc >> 16) & 0xff);
++ tbuf[3] = safechar((fcc >> 24) & 0xff);
++ tbuf[4] = '\0';
++ return tbuf;
++}
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_request_hevc.c
+@@ -0,0 +1,351 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++#include "config.h"
++#include "decode.h"
++#include "hevcdec.h"
++#include "hwconfig.h"
++#include "internal.h"
++
++#include "v4l2_request_hevc.h"
++
++#include "libavutil/hwcontext_drm.h"
++#include "libavutil/pixdesc.h"
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
++{
++ const size_t wxh = w * h;
++ size_t bits_alloc;
++
++ /* Annex A gives a min compression of 2 @ lvl 3.1
++ * (wxh <= 983040) and min 4 thereafter but avoid
++ * the odity of 983041 having a lower limit than
++ * 983040.
++ * Multiply by 3/2 for 4:2:0
++ */
++ bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
++ wxh < 983040 * 2 ? 983040 * 3 / 4 :
++ wxh * 3 / 8;
++ /* Allow for bit depth */
++ bits_alloc += (bits_alloc * bits_minus8) / 8;
++ /* Add a few bytes (16k) for overhead */
++ bits_alloc += 0x4000;
++ return bits_alloc;
++}
++
++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
++ av_unused const uint8_t *buffer,
++ av_unused uint32_t size)
++{
++ const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ return ctx->fns->start_frame(avctx, buffer, size);
++}
++
++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ return ctx->fns->decode_slice(avctx, buffer, size);
++}
++
++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
++{
++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++ return ctx->fns->end_frame(avctx);
++}
++
++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ ctx->fns->abort_frame(avctx);
++}
++
++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ return ctx->fns->frame_params(avctx, hw_frames_ctx);
++}
++
++static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ return ctx->fns->alloc_frame(avctx, frame);
++}
++
++
++static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
++{
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode
++
++ mediabufs_ctl_unref(&ctx->mbufs);
++ media_pool_delete(&ctx->mpool);
++ pollqueue_unref(&ctx->pq);
++ dmabufs_ctl_unref(&ctx->dbufs);
++ devscan_delete(&ctx->devscan);
++
++ decode_q_uninit(&ctx->decode_q);
++
++// if (avctx->hw_frames_ctx) {
++// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
++// av_buffer_pool_flush(hwfc->pool);
++// }
++ return 0;
++}
++
++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
++{
++ AVCodecContext *const avctx = v;
++ const HEVCContext *const h = avctx->priv_data;
++
++ if (h->ps.sps->bit_depth == 8) {
++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
++ fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
++ return 1;
++ }
++ }
++ else if (h->ps.sps->bit_depth == 10) {
++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++ return 1;
++ }
++ }
++ return 0;
++}
++
++static int v4l2_request_hevc_init(AVCodecContext *avctx)
++{
++ const HEVCContext *h = avctx->priv_data;
++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++ const HEVCSPS * const sps = h->ps.sps;
++ int ret;
++ const struct decdev * decdev;
++ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes
++ size_t src_size;
++ enum mediabufs_memory src_memtype;
++ enum mediabufs_memory dst_memtype;
++
++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ // Give up immediately if this is something that we have no code to deal with
++ if (h->ps.sps->chroma_format_idc != 1) {
++ av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc);
++ return AVERROR_PATCHWELCOME;
++ }
++ if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) ||
++ h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) {
++ av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma);
++ return AVERROR_PATCHWELCOME;
++ }
++
++ if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
++ av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
++ return (AVERROR(-ret));
++ }
++ ret = AVERROR(ENOMEM); // Assume mem fail by default for these
++
++ if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
++ {
++ av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
++ ret = AVERROR(ENODEV);
++ goto fail0;
++ }
++ av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
++ decdev_media_path(decdev), decdev_video_path(decdev));
++
++ if ((ctx->pq = pollqueue_new()) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
++ goto fail1;
++ }
++
++ if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
++ goto fail2;
++ }
++
++ if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
++ av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
++ goto fail3;
++ }
++
++ // Version test for functional Pi5 HEVC iommu.
++ // rpivid kernel patch was merged in 6.1.57
++ // *** Remove when it is unlikely that there are any broken kernels left
++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57))
++ ctx->dbufs = dmabufs_ctl_new_vidbuf_cached();
++ else
++ ctx->dbufs = dmabufs_ctl_new();
++
++ if (ctx->dbufs == NULL) {
++ av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
++ src_memtype = MEDIABUFS_MEMORY_MMAP;
++ dst_memtype = MEDIABUFS_MEMORY_MMAP;
++ }
++ else {
++ av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
++ src_memtype = MEDIABUFS_MEMORY_DMABUF;
++ dst_memtype = MEDIABUFS_MEMORY_DMABUF;
++ }
++
++ // Ask for an initial bitbuf size of max size / 4
++ // We will realloc if we need more
++ // Must use sps->h/w as avctx contains cropped size
++retry_src_memtype:
++ src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
++ if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs))
++ src_size /= 4;
++ // Kludge for conformance tests which break Annex A limits
++ else if (src_size < 0x40000)
++ src_size = 0x40000;
++
++ if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
++ sps->width, sps->height, src_size)) {
++ char tbuf1[5];
++ av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++ goto fail4;
++ }
++
++ if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) {
++ if (src_memtype == MEDIABUFS_MEMORY_DMABUF) {
++ src_memtype = MEDIABUFS_MEMORY_MMAP;
++ goto retry_src_memtype;
++ }
++ av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n");
++ goto fail4;
++ }
++
++ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0)
++ ctx->fns = &V2(ff_v4l2_req_hevc, 4);
++#if CONFIG_V4L2_REQ_HEVC_VX
++ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0)
++ ctx->fns = &V2(ff_v4l2_req_hevc, 3);
++ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0)
++ ctx->fns = &V2(ff_v4l2_req_hevc, 2);
++ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0)
++ ctx->fns = &V2(ff_v4l2_req_hevc, 1);
++#endif
++ else {
++ av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
++ ret = AVERROR(EINVAL);
++ goto fail4;
++ }
++
++ av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n",
++ ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs));
++
++ if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
++ char tbuf1[5];
++ av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++ goto fail4;
++ }
++
++ if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
++ goto fail4;
++ }
++
++ {
++ unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering +
++ avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6);
++ av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots,
++ sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
++ avctx->thread_count, avctx->extra_hw_frames);
++
++ if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) {
++ if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n");
++ goto fail4;
++ }
++ av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n");
++ dst_memtype = MEDIABUFS_MEMORY_MMAP;
++ }
++
++ // extra_hw_frames is -1 if unset
++ if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
++ goto fail4;
++ }
++ }
++
++ if (mediabufs_stream_on(ctx->mbufs)) {
++ av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
++ goto fail4;
++ }
++
++ if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
++ goto fail4;
++ }
++
++ if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
++ goto fail5;
++ }
++
++ decode_q_init(&ctx->decode_q);
++
++ // Set our s/w format
++ avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
++
++ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n",
++ ctx->fns->name,
++ decdev_media_path(decdev), decdev_video_path(decdev),
++ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype),
++ av_get_pix_fmt_name(avctx->sw_pix_fmt));
++
++ return 0;
++
++fail5:
++ av_buffer_unref(&avctx->hw_frames_ctx);
++fail4:
++ mediabufs_ctl_unref(&ctx->mbufs);
++fail3:
++ media_pool_delete(&ctx->mpool);
++fail2:
++ pollqueue_unref(&ctx->pq);
++fail1:
++ dmabufs_ctl_unref(&ctx->dbufs);
++fail0:
++ devscan_delete(&ctx->devscan);
++ return ret;
++}
++
++const AVHWAccel ff_hevc_v4l2request_hwaccel = {
++ .name = "hevc_v4l2request",
++ .type = AVMEDIA_TYPE_VIDEO,
++ .id = AV_CODEC_ID_HEVC,
++ .pix_fmt = AV_PIX_FMT_DRM_PRIME,
++ .alloc_frame = v4l2_req_hevc_alloc_frame,
++ .start_frame = v4l2_req_hevc_start_frame,
++ .decode_slice = v4l2_req_hevc_decode_slice,
++ .end_frame = v4l2_req_hevc_end_frame,
++ .abort_frame = v4l2_req_hevc_abort_frame,
++ .init = v4l2_request_hevc_init,
++ .uninit = v4l2_request_hevc_uninit,
++ .priv_data_size = sizeof(V4L2RequestContextHEVC),
++ .frame_params = v4l2_req_hevc_frame_params,
++ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
++};
+--- /dev/null
++++ b/libavcodec/v4l2_request_hevc.h
+@@ -0,0 +1,102 @@
++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
++#define AVCODEC_V4L2_REQUEST_HEVC_H
++
++#include <stdint.h>
++#include <drm_fourcc.h>
++#include "v4l2_req_decode_q.h"
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#include <linux/videodev2.h>
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800
++#endif
++
++#define VCAT(name, version) name##_v##version
++#define V2(n,v) VCAT(n, v)
++#define V(n) V2(n, HEVC_CTRLS_VERSION)
++
++#define S2(x) #x
++#define STR(x) S2(x)
++
++// 1 per decoder
++struct v4l2_req_decode_fns;
++
++typedef struct V4L2RequestContextHEVC {
++// V4L2RequestContext base;
++ const struct v4l2_req_decode_fns * fns;
++
++ unsigned int timestamp; // ?? maybe uint64_t
++
++ int decode_mode;
++ int start_code;
++ unsigned int max_slices; // 0 => not wanted (frame mode)
++ unsigned int max_offsets; // 0 => not wanted
++
++ req_decode_q decode_q;
++
++ struct devscan *devscan;
++ struct dmabufs_ctl *dbufs;
++ struct pollqueue *pq;
++ struct media_pool * mpool;
++ struct mediabufs_ctl *mbufs;
++} V4L2RequestContextHEVC;
++
++typedef struct v4l2_req_decode_fns {
++ int src_pix_fmt_v4l2;
++ const char * name;
++
++ // Init setup
++ int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++ int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++
++ // Passthrough of hwaccel fns
++ int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
++ int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
++ int (*end_frame)(AVCodecContext *avctx);
++ void (*abort_frame)(AVCodecContext *avctx);
++ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
++ int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame);
++} v4l2_req_decode_fns;
++
++
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
++
++#endif
+--- /dev/null
++++ b/libavcodec/weak_link.c
+@@ -0,0 +1,103 @@
++#include <stdlib.h>
++#include <pthread.h>
++#include <stdatomic.h>
++#include "weak_link.h"
++
++struct ff_weak_link_master {
++ atomic_int ref_count; /* 0 is single ref for easier atomics */
++ pthread_rwlock_t lock;
++ void * ptr;
++};
++
++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
++{
++ return (struct ff_weak_link_master *)c;
++}
++
++struct ff_weak_link_master * ff_weak_link_new(void * p)
++{
++ struct ff_weak_link_master * w = malloc(sizeof(*w));
++ if (!w)
++ return NULL;
++ atomic_init(&w->ref_count, 0);
++ w->ptr = p;
++ if (pthread_rwlock_init(&w->lock, NULL)) {
++ free(w);
++ return NULL;
++ }
++ return w;
++}
++
++static void weak_link_do_unref(struct ff_weak_link_master * const w)
++{
++ int n = atomic_fetch_sub(&w->ref_count, 1);
++ if (n)
++ return;
++
++ pthread_rwlock_destroy(&w->lock);
++ free(w);
++}
++
++// Unref & break link
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
++{
++ struct ff_weak_link_master * const w = *ppLink;
++ if (!w)
++ return;
++
++ *ppLink = NULL;
++ pthread_rwlock_wrlock(&w->lock);
++ w->ptr = NULL;
++ pthread_rwlock_unlock(&w->lock);
++
++ weak_link_do_unref(w);
++}
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
++{
++ if (!w)
++ return NULL;
++ atomic_fetch_add(&w->ref_count, 1);
++ return (struct ff_weak_link_client*)w;
++}
++
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
++{
++ struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++ if (!w)
++ return;
++
++ *ppLink = NULL;
++ weak_link_do_unref(w);
++}
++
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
++{
++ struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++
++ if (!w)
++ return NULL;
++
++ if (pthread_rwlock_rdlock(&w->lock))
++ goto broken;
++
++ if (w->ptr)
++ return w->ptr;
++
++ pthread_rwlock_unlock(&w->lock);
++
++broken:
++ *ppLink = NULL;
++ weak_link_do_unref(w);
++ return NULL;
++}
++
++// Ignores a NULL c (so can be on the return path of both broken & live links)
++void ff_weak_link_unlock(struct ff_weak_link_client * c)
++{
++ struct ff_weak_link_master * const w = weak_link_x(c);
++ if (w)
++ pthread_rwlock_unlock(&w->lock);
++}
++
++
+--- /dev/null
++++ b/libavcodec/weak_link.h
+@@ -0,0 +1,23 @@
++struct ff_weak_link_master;
++struct ff_weak_link_client;
++
++struct ff_weak_link_master * ff_weak_link_new(void * p);
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
++
++// Returns NULL if link broken - in this case it will also zap
++// *ppLink and unref the weak_link.
++// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
++//
++// The above does mean that there is a race if this is called simultainiously
++// by two threads using the same weak_link_client (so don't do that)
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
++void ff_weak_link_unlock(struct ff_weak_link_client * c);
++
++
++
++
++
++
+--- a/libavdevice/Makefile
++++ b/libavdevice/Makefile
+@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)
+ OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o
+ OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o
+ OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o
++OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o
++OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o
+ OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o
+ OBJS-$(CONFIG_XV_OUTDEV) += xv.o
+
+--- a/libavdevice/alldevices.c
++++ b/libavdevice/alldevices.c
+@@ -51,6 +51,8 @@ extern const AVOutputFormat ff_sndio_mux
+ extern const AVInputFormat ff_v4l2_demuxer;
+ extern const AVOutputFormat ff_v4l2_muxer;
+ extern const AVInputFormat ff_vfwcap_demuxer;
++extern const AVOutputFormat ff_vout_drm_muxer;
++extern const AVOutputFormat ff_vout_egl_muxer;
+ extern const AVInputFormat ff_xcbgrab_demuxer;
+ extern const AVOutputFormat ff_xv_muxer;
+
+--- /dev/null
++++ b/libavdevice/drm_vout.c
+@@ -0,0 +1,680 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++// limited to testing.
++
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/internal.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <unistd.h>
++
++#include <xf86drm.h>
++#include <xf86drmMode.h>
++#include <drm_fourcc.h>
++
++#define TRACE_ALL 0
++
++#define DRM_MODULE "vc4"
++
++#define ERRSTR strerror(errno)
++
++struct drm_setup {
++ int conId;
++ uint32_t crtcId;
++ int crtcIdx;
++ uint32_t planeId;
++ unsigned int out_fourcc;
++ struct {
++ int x, y, width, height;
++ } compose;
++};
++
++typedef struct drm_aux_s {
++ unsigned int fb_handle;
++ uint32_t bo_handles[AV_DRM_MAX_PLANES];
++ AVFrame * frame;
++} drm_aux_t;
++
++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
++// we get initial flicker probably due to dodgy drm timing
++#define AUX_SIZE 3
++typedef struct drm_display_env_s
++{
++ AVClass *class;
++
++ int drm_fd;
++ uint32_t con_id;
++ struct drm_setup setup;
++ enum AVPixelFormat avfmt;
++
++ int show_all;
++ const char * drm_module;
++
++ unsigned int ano;
++ drm_aux_t aux[AUX_SIZE];
++
++ pthread_t q_thread;
++ sem_t q_sem_in;
++ sem_t q_sem_out;
++ int q_terminate;
++ AVFrame * q_next;
++
++} drm_display_env_t;
++
++
++static int drm_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++ return 0;
++}
++
++static int drm_vout_write_header(AVFormatContext *s)
++{
++ const AVCodecParameters * const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++ if ( s->nb_streams > 1
++ || par->codec_type != AVMEDIA_TYPE_VIDEO
++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) {
++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++ return AVERROR(EINVAL);
++ }
++
++ return 0;
++}
++
++static int find_plane(struct AVFormatContext * const avctx,
++ const int drmfd, const int crtcidx, const uint32_t format,
++ uint32_t * const pplane_id)
++{
++ drmModePlaneResPtr planes;
++ drmModePlanePtr plane;
++ drmModeObjectPropertiesPtr props = NULL;
++ drmModePropertyPtr prop = NULL;
++ unsigned int i;
++ unsigned int j;
++ int ret = -1;
++
++ planes = drmModeGetPlaneResources(drmfd);
++ if (!planes)
++ {
++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
++ return -1;
++ }
++
++ for (i = 0; i < planes->count_planes; ++i) {
++ plane = drmModeGetPlane(drmfd, planes->planes[i]);
++ if (!planes)
++ {
++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
++ break;
++ }
++
++ if (!(plane->possible_crtcs & (1 << crtcidx))) {
++ drmModeFreePlane(plane);
++ continue;
++ }
++
++ for (j = 0; j < plane->count_formats; ++j) {
++ if (plane->formats[j] == format)
++ break;
++ }
++
++ if (j == plane->count_formats) {
++ drmModeFreePlane(plane);
++ continue;
++ }
++
++ *pplane_id = plane->plane_id;
++ drmModeFreePlane(plane);
++ break;
++ }
++
++ if (i == planes->count_planes) {
++ ret = -1;
++ goto fail;
++ }
++
++ props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE);
++ if (!props)
++ goto fail;
++ for (i = 0; i != props->count_props; ++i) {
++ if (prop)
++ drmModeFreeProperty(prop);
++ prop = drmModeGetProperty(drmfd, props->props[i]);
++ if (!prop)
++ goto fail;
++ if (strcmp("zpos", prop->name) == 0) {
++ if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0)
++ av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]);
++ else
++ av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n");
++ break;
++ }
++ }
++
++ ret = 0;
++fail:
++ if (props)
++ drmModeFreeObjectProperties(props);
++ if (prop)
++ drmModeFreeProperty(prop);
++ drmModeFreePlaneResources(planes);
++ return ret;
++}
++
++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
++{
++ if (da->fb_handle != 0) {
++ drmModeRmFB(de->drm_fd, da->fb_handle);
++ da->fb_handle = 0;
++ }
++
++ for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
++ if (da->bo_handles[i]) {
++ struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
++ drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
++ da->bo_handles[i] = 0;
++ }
++ }
++ av_frame_free(&da->frame);
++}
++
++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
++{
++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
++ drm_aux_t * da = de->aux + de->ano;
++ const uint32_t format = desc->layers[0].format;
++ int ret = 0;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
++#endif
++
++ if (de->setup.out_fourcc != format) {
++ if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
++ av_frame_free(&frame);
++ av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
++ return -1;
++ }
++ de->setup.out_fourcc = format;
++ }
++
++ {
++ drmVBlank vbl = {
++ .request = {
++ .type = DRM_VBLANK_RELATIVE,
++ .sequence = 0
++ }
++ };
++
++ while (drmWaitVBlank(de->drm_fd, &vbl)) {
++ if (errno != EINTR) {
++// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
++ break;
++ }
++ }
++ }
++
++ da_uninit(de, da);
++
++ {
++ uint32_t pitches[4] = {0};
++ uint32_t offsets[4] = {0};
++ uint64_t modifiers[4] = {0};
++ uint32_t bo_handles[4] = {0};
++ int has_mods = 0;
++ int i, j, n;
++
++ da->frame = frame;
++
++ for (i = 0; i < desc->nb_objects; ++i) {
++ if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
++ av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
++ return -1;
++ }
++ if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR &&
++ desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID)
++ has_mods = 1;
++ }
++
++ n = 0;
++ for (i = 0; i < desc->nb_layers; ++i) {
++ for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
++ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
++ pitches[n] = p->pitch;
++ offsets[n] = p->offset;
++ modifiers[n] = obj->format_modifier;
++ bo_handles[n] = da->bo_handles[p->object_index];
++ ++n;
++ }
++ }
++
++#if 1 && TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
++ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
++ av_frame_cropped_width(frame),
++ av_frame_cropped_height(frame),
++ desc->layers[0].format,
++ bo_handles[0],
++ bo_handles[1],
++ bo_handles[2],
++ bo_handles[3],
++ pitches[0],
++ pitches[1],
++ pitches[2],
++ pitches[3],
++ offsets[0],
++ offsets[1],
++ offsets[2],
++ offsets[3],
++ (long long)modifiers[0],
++ (long long)modifiers[1],
++ (long long)modifiers[2],
++ (long long)modifiers[3]
++ );
++#endif
++
++ if (drmModeAddFB2WithModifiers(de->drm_fd,
++ av_frame_cropped_width(frame),
++ av_frame_cropped_height(frame),
++ desc->layers[0].format, bo_handles,
++ pitches, offsets,
++ has_mods ? modifiers : NULL,
++ &da->fb_handle,
++ has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) {
++ av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
++ return -1;
++ }
++ }
++
++ ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
++ da->fb_handle, 0,
++ de->setup.compose.x, de->setup.compose.y,
++ de->setup.compose.width,
++ de->setup.compose.height,
++ 0, 0,
++ av_frame_cropped_width(frame) << 16,
++ av_frame_cropped_height(frame) << 16);
++
++ if (ret != 0) {
++ av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
++ }
++
++ de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
++
++ return ret;
++}
++
++static int do_sem_wait(sem_t * const sem, const int nowait)
++{
++ while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
++ if (errno != EINTR)
++ return -errno;
++ }
++ return 0;
++}
++
++static void * display_thread(void * v)
++{
++ AVFormatContext * const s = v;
++ drm_display_env_t * const de = s->priv_data;
++ int i;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++#endif
++
++ sem_post(&de->q_sem_out);
++
++ for (;;) {
++ AVFrame * frame;
++
++ do_sem_wait(&de->q_sem_in, 0);
++
++ if (de->q_terminate)
++ break;
++
++ frame = de->q_next;
++ de->q_next = NULL;
++ sem_post(&de->q_sem_out);
++
++ do_display(s, de, frame);
++ }
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++#endif
++
++ for (i = 0; i != AUX_SIZE; ++i)
++ da_uninit(de, de->aux + i);
++
++ av_frame_free(&de->q_next);
++
++ return NULL;
++}
++
++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++ const AVFrame * const src_frame = (AVFrame *)pkt->data;
++ AVFrame * frame;
++ drm_display_env_t * const de = s->priv_data;
++ int ret;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++ if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
++ av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
++ return 0;
++ }
++
++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++ frame = av_frame_alloc();
++ av_frame_ref(frame, src_frame);
++ }
++ else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++ frame = av_frame_alloc();
++ frame->format = AV_PIX_FMT_DRM_PRIME;
++ if (av_hwframe_map(frame, src_frame, 0) != 0)
++ {
++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++ av_frame_free(&frame);
++ return AVERROR(EINVAL);
++ }
++ }
++ else {
++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++ return AVERROR(EINVAL);
++ }
++
++ ret = do_sem_wait(&de->q_sem_out, !de->show_all);
++ if (ret) {
++ av_frame_free(&frame);
++ }
++ else {
++ de->q_next = frame;
++ sem_post(&de->q_sem_in);
++ }
++
++ return 0;
++}
++
++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++ unsigned flags)
++{
++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++ return AVERROR_PATCHWELCOME;
++}
++
++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++ av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
++#endif
++ switch(type) {
++ case AV_APP_TO_DEV_WINDOW_REPAINT:
++ return 0;
++ default:
++ break;
++ }
++ return AVERROR(ENOSYS);
++}
++
++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
++{
++ int ret = -1;
++ int i;
++ drmModeRes *res = drmModeGetResources(drmfd);
++ drmModeConnector *c;
++
++ if(!res)
++ {
++ printf( "drmModeGetResources failed: %s\n", ERRSTR);
++ return -1;
++ }
++
++ if (res->count_crtcs <= 0)
++ {
++ printf( "drm: no crts\n");
++ goto fail_res;
++ }
++
++ if (!s->conId) {
++ fprintf(stderr,
++ "No connector ID specified. Choosing default from list:\n");
++
++ for (i = 0; i < res->count_connectors; i++) {
++ drmModeConnector *con =
++ drmModeGetConnector(drmfd, res->connectors[i]);
++ drmModeEncoder *enc = NULL;
++ drmModeCrtc *crtc = NULL;
++
++ if (con->encoder_id) {
++ enc = drmModeGetEncoder(drmfd, con->encoder_id);
++ if (enc->crtc_id) {
++ crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
++ }
++ }
++
++ if (!s->conId && crtc) {
++ s->conId = con->connector_id;
++ s->crtcId = crtc->crtc_id;
++ }
++
++ av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
++ con->connector_id,
++ crtc ? crtc->crtc_id : 0,
++ con->connector_type,
++ crtc ? crtc->width : 0,
++ crtc ? crtc->height : 0,
++ (s->conId == (int)con->connector_id ?
++ " (chosen)" : ""));
++
++ if (crtc)
++ drmModeFreeCrtc(crtc);
++ if (enc)
++ drmModeFreeEncoder(enc);
++ if (con)
++ drmModeFreeConnector(con);
++ }
++
++ if (!s->conId) {
++ av_log(avctx, AV_LOG_ERROR,
++ "No suitable enabled connector found.\n");
++ return -1;;
++ }
++ }
++
++ s->crtcIdx = -1;
++
++ for (i = 0; i < res->count_crtcs; ++i) {
++ if (s->crtcId == res->crtcs[i]) {
++ s->crtcIdx = i;
++ break;
++ }
++ }
++
++ if (s->crtcIdx == -1)
++ {
++ av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
++ goto fail_res;
++ }
++
++ if (res->count_connectors <= 0)
++ {
++ av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
++ goto fail_res;
++ }
++
++ c = drmModeGetConnector(drmfd, s->conId);
++ if (!c)
++ {
++ av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
++ goto fail_res;
++ }
++
++ if (!c->count_modes)
++ {
++ av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
++ goto fail_conn;
++ }
++
++ {
++ drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
++ s->compose.x = crtc->x;
++ s->compose.y = crtc->y;
++ s->compose.width = crtc->width;
++ s->compose.height = crtc->height;
++ drmModeFreeCrtc(crtc);
++ }
++
++ if (pConId)
++ *pConId = c->connector_id;
++ ret = 0;
++
++fail_conn:
++ drmModeFreeConnector(c);
++
++fail_res:
++ drmModeFreeResources(res);
++
++ return ret;
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int drm_vout_init(struct AVFormatContext * s)
++{
++ drm_display_env_t * const de = s->priv_data;
++ int rv;
++
++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ de->drm_fd = -1;
++ de->con_id = 0;
++ de->setup = (struct drm_setup){0};
++ de->q_terminate = 0;
++
++ if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0)
++ {
++ rv = AVERROR(errno);
++ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv));
++ return rv;
++ }
++
++ if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
++ {
++ av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
++ rv = AVERROR(EINVAL);
++ goto fail_close;
++ }
++
++ sem_init(&de->q_sem_in, 0, 0);
++ sem_init(&de->q_sem_out, 0, 0);
++ if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
++ rv = AVERROR(errno);
++ av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv));
++ goto fail_close;
++ }
++
++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++ return 0;
++
++fail_close:
++ close(de->drm_fd);
++ de->drm_fd = -1;
++ av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
++
++ return rv;
++}
++
++static void drm_vout_deinit(struct AVFormatContext * s)
++{
++ drm_display_env_t * const de = s->priv_data;
++
++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ de->q_terminate = 1;
++ sem_post(&de->q_sem_in);
++ pthread_join(de->q_thread, NULL);
++ sem_destroy(&de->q_sem_in);
++ sem_destroy(&de->q_sem_out);
++
++ for (unsigned int i = 0; i != AUX_SIZE; ++i)
++ da_uninit(de, de->aux + i);
++
++ av_frame_free(&de->q_next);
++
++ if (de->drm_fd >= 0) {
++ close(de->drm_fd);
++ de->drm_fd = -1;
++ }
++
++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++
++#define OFFSET(x) offsetof(drm_display_env_t, x)
++static const AVOption options[] = {
++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++ { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++ { NULL }
++};
++
++static const AVClass drm_vout_class = {
++ .class_name = "drm vid outdev",
++ .item_name = av_default_item_name,
++ .option = options,
++ .version = LIBAVUTIL_VERSION_INT,
++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++AVOutputFormat ff_vout_drm_muxer = {
++ .name = "vout_drm",
++ .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"),
++ .priv_data_size = sizeof(drm_display_env_t),
++ .audio_codec = AV_CODEC_ID_NONE,
++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME,
++ .write_header = drm_vout_write_header,
++ .write_packet = drm_vout_write_packet,
++ .write_uncoded_frame = drm_vout_write_frame,
++ .write_trailer = drm_vout_write_trailer,
++ .control_message = drm_vout_control_message,
++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++ .priv_class = &drm_vout_class,
++ .init = drm_vout_init,
++ .deinit = drm_vout_deinit,
++};
++
+--- /dev/null
++++ b/libavdevice/egl_vout.c
+@@ -0,0 +1,781 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++// limited to testing.
++// Amongst other issues it doesn't wait for the pic to be displayed before
++// returning the buffer so flikering does occur.
++
++#include <epoxy/gl.h>
++#include <epoxy/egl.h>
++
++#include "libavutil/opt.h"
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/imgutils.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/internal.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <unistd.h>
++
++#include <X11/Xlib.h>
++#include <X11/Xutil.h>
++
++#include "libavutil/rpi_sand_fns.h"
++
++#define TRACE_ALL 0
++
++struct egl_setup {
++ int conId;
++
++ Display *dpy;
++ EGLDisplay egl_dpy;
++ EGLContext ctx;
++ EGLSurface surf;
++ Window win;
++
++ uint32_t crtcId;
++ int crtcIdx;
++ uint32_t planeId;
++ struct {
++ int x, y, width, height;
++ } compose;
++};
++
++typedef struct egl_aux_s {
++ int fd;
++ GLuint texture;
++
++} egl_aux_t;
++
++typedef struct egl_display_env_s {
++ AVClass *class;
++
++ struct egl_setup setup;
++ enum AVPixelFormat avfmt;
++
++ int show_all;
++ int window_width, window_height;
++ int window_x, window_y;
++ int fullscreen;
++
++ egl_aux_t aux[32];
++
++ pthread_t q_thread;
++ pthread_mutex_t q_lock;
++ sem_t display_start_sem;
++ sem_t q_sem;
++ int q_terminate;
++ AVFrame *q_this;
++ AVFrame *q_next;
++
++} egl_display_env_t;
++
++
++/**
++ * Remove window border/decorations.
++ */
++static void
++no_border(Display *dpy, Window w)
++{
++ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
++ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
++
++ typedef struct {
++ unsigned long flags;
++ unsigned long functions;
++ unsigned long decorations;
++ long inputMode;
++ unsigned long status;
++ } PropMotifWmHints;
++
++ PropMotifWmHints motif_hints;
++ Atom prop, proptype;
++ unsigned long flags = 0;
++
++ /* setup the property */
++ motif_hints.flags = MWM_HINTS_DECORATIONS;
++ motif_hints.decorations = flags;
++
++ /* get the atom for the property */
++ prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True);
++ if (!prop) {
++ /* something went wrong! */
++ return;
++ }
++
++ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
++ proptype = prop;
++
++ XChangeProperty(dpy, w, /* display, window */
++ prop, proptype, /* property, type */
++ 32, /* format: 32-bit datums */
++ PropModeReplace, /* mode */
++ (unsigned char *)&motif_hints, /* data */
++ PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */
++ );
++}
++
++
++/*
++ * Create an RGB, double-buffered window.
++ * Return the window and context handles.
++ */
++static int
++make_window(struct AVFormatContext *const s,
++ egl_display_env_t *const de,
++ Display *dpy, EGLDisplay egl_dpy, const char *name,
++ Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
++{
++ int scrnum = DefaultScreen(dpy);
++ XSetWindowAttributes attr;
++ unsigned long mask;
++ Window root = RootWindow(dpy, scrnum);
++ Window win;
++ EGLContext ctx;
++ const int fullscreen = de->fullscreen;
++ EGLConfig config;
++ int x = de->window_x;
++ int y = de->window_y;
++ int width = de->window_width ? de->window_width : 1280;
++ int height = de->window_height ? de->window_height : 720;
++
++
++ if (fullscreen) {
++ int scrnum = DefaultScreen(dpy);
++
++ x = 0; y = 0;
++ width = DisplayWidth(dpy, scrnum);
++ height = DisplayHeight(dpy, scrnum);
++ }
++
++ {
++ EGLint num_configs;
++ static const EGLint attribs[] = {
++ EGL_RED_SIZE, 1,
++ EGL_GREEN_SIZE, 1,
++ EGL_BLUE_SIZE, 1,
++ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
++ EGL_NONE
++ };
++
++ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
++ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
++ return -1;
++ }
++ }
++
++ {
++ EGLint vid;
++ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
++ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
++ return -1;
++ }
++
++ {
++ XVisualInfo visTemplate = {
++ .visualid = vid,
++ };
++ int num_visuals;
++ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
++ &visTemplate, &num_visuals);
++
++ /* window attributes */
++ attr.background_pixel = 0;
++ attr.border_pixel = 0;
++ attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone);
++ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
++ /* XXX this is a bad way to get a borderless window! */
++ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
++
++ win = XCreateWindow(dpy, root, x, y, width, height,
++ 0, visinfo->depth, InputOutput,
++ visinfo->visual, mask, &attr);
++ XFree(visinfo);
++ }
++ }
++
++ if (fullscreen)
++ no_border(dpy, win);
++
++ /* set hints and properties */
++ {
++ XSizeHints sizehints;
++ sizehints.x = x;
++ sizehints.y = y;
++ sizehints.width = width;
++ sizehints.height = height;
++ sizehints.flags = USSize | USPosition;
++ XSetNormalHints(dpy, win, &sizehints);
++ XSetStandardProperties(dpy, win, name, name,
++ None, (char **)NULL, 0, &sizehints);
++ }
++
++ eglBindAPI(EGL_OPENGL_ES_API);
++
++ {
++ static const EGLint ctx_attribs[] = {
++ EGL_CONTEXT_CLIENT_VERSION, 2,
++ EGL_NONE
++ };
++ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs);
++ if (!ctx) {
++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++ return -1;
++ }
++ }
++
++
++ XMapWindow(dpy, win);
++
++ {
++ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
++ if (!surf) {
++ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
++ return -1;
++ }
++
++ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++ return -1;
++ }
++
++ *winRet = win;
++ *ctxRet = ctx;
++ *surfRet = surf;
++ }
++
++ return 0;
++}
++
++static GLint
++compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source)
++{
++ GLuint s = glCreateShader(target);
++
++ if (s == 0) {
++ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
++ return 0;
++ }
++
++ glShaderSource(s, 1, (const GLchar **)&source, NULL);
++ glCompileShader(s);
++
++ {
++ GLint ok;
++ glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
++
++ if (!ok) {
++ GLchar *info;
++ GLint size;
++
++ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
++ info = malloc(size);
++
++ glGetShaderInfoLog(s, size, NULL, info);
++ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
++
++ return 0;
++ }
++ }
++
++ return s;
++}
++
++static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs)
++{
++ GLuint prog = glCreateProgram();
++
++ if (prog == 0) {
++ av_log(s, AV_LOG_ERROR, "Failed to create program\n");
++ return 0;
++ }
++
++ glAttachShader(prog, vs);
++ glAttachShader(prog, fs);
++ glLinkProgram(prog);
++
++ {
++ GLint ok;
++ glGetProgramiv(prog, GL_LINK_STATUS, &ok);
++ if (!ok) {
++ /* Some drivers return a size of 1 for an empty log. This is the size
++ * of a log that contains only a terminating NUL character.
++ */
++ GLint size;
++ GLchar *info = NULL;
++ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
++ if (size > 1) {
++ info = malloc(size);
++ glGetProgramInfoLog(prog, size, NULL, info);
++ }
++
++ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
++ (info != NULL) ? info : "<empty log>");
++ return 0;
++ }
++ }
++
++ return prog;
++}
++
++static int
++gl_setup(struct AVFormatContext *const s)
++{
++ const char *vs =
++ "attribute vec4 pos;\n"
++ "varying vec2 texcoord;\n"
++ "\n"
++ "void main() {\n"
++ " gl_Position = pos;\n"
++ " texcoord.x = (pos.x + 1.0) / 2.0;\n"
++ " texcoord.y = (-pos.y + 1.0) / 2.0;\n"
++ "}\n";
++ const char *fs =
++ "#extension GL_OES_EGL_image_external : enable\n"
++ "precision mediump float;\n"
++ "uniform samplerExternalOES s;\n"
++ "varying vec2 texcoord;\n"
++ "void main() {\n"
++ " gl_FragColor = texture2D(s, texcoord);\n"
++ "}\n";
++
++ GLuint vs_s;
++ GLuint fs_s;
++ GLuint prog;
++
++ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
++ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
++ !(prog = link_program(s, vs_s, fs_s)))
++ return -1;
++
++ glUseProgram(prog);
++
++ {
++ static const float verts[] = {
++ -1, -1,
++ 1, -1,
++ 1, 1,
++ -1, 1,
++ };
++ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
++ }
++
++ glEnableVertexAttribArray(0);
++ return 0;
++}
++
++static int egl_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++ return 0;
++}
++
++static int egl_vout_write_header(AVFormatContext *s)
++{
++ const AVCodecParameters *const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++ if (s->nb_streams > 1
++ || par->codec_type != AVMEDIA_TYPE_VIDEO
++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) {
++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++ return AVERROR(EINVAL);
++ }
++
++ return 0;
++}
++
++
++static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame)
++{
++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0];
++ egl_aux_t *da = NULL;
++ unsigned int i;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++
++ for (i = 0; i != 32; ++i) {
++ if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
++ da = de->aux + i;
++ break;
++ }
++ }
++
++ if (da == NULL) {
++ av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
++ return AVERROR(EINVAL);
++ }
++
++ if (da->texture == 0) {
++ EGLint attribs[50];
++ EGLint *a = attribs;
++ int i, j;
++ static const EGLint anames[] = {
++ EGL_DMA_BUF_PLANE0_FD_EXT,
++ EGL_DMA_BUF_PLANE0_OFFSET_EXT,
++ EGL_DMA_BUF_PLANE0_PITCH_EXT,
++ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
++ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
++ EGL_DMA_BUF_PLANE1_FD_EXT,
++ EGL_DMA_BUF_PLANE1_OFFSET_EXT,
++ EGL_DMA_BUF_PLANE1_PITCH_EXT,
++ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
++ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
++ EGL_DMA_BUF_PLANE2_FD_EXT,
++ EGL_DMA_BUF_PLANE2_OFFSET_EXT,
++ EGL_DMA_BUF_PLANE2_PITCH_EXT,
++ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
++ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
++ };
++ const EGLint *b = anames;
++
++ *a++ = EGL_WIDTH;
++ *a++ = av_frame_cropped_width(frame);
++ *a++ = EGL_HEIGHT;
++ *a++ = av_frame_cropped_height(frame);
++ *a++ = EGL_LINUX_DRM_FOURCC_EXT;
++ *a++ = desc->layers[0].format;
++
++ for (i = 0; i < desc->nb_layers; ++i) {
++ for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++ const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j;
++ const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index;
++ *a++ = *b++;
++ *a++ = obj->fd;
++ *a++ = *b++;
++ *a++ = p->offset;
++ *a++ = *b++;
++ *a++ = p->pitch;
++ if (obj->format_modifier == 0) {
++ b += 2;
++ }
++ else {
++ *a++ = *b++;
++ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
++ *a++ = *b++;
++ *a++ = (EGLint)(obj->format_modifier >> 32);
++ }
++ }
++ }
++
++ *a = EGL_NONE;
++
++#if TRACE_ALL
++ for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
++ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
++ }
++#endif
++ {
++ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
++ EGL_NO_CONTEXT,
++ EGL_LINUX_DMA_BUF_EXT,
++ NULL, attribs);
++ if (!image) {
++ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
++ return -1;
++ }
++
++ glGenTextures(1, &da->texture);
++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
++
++ eglDestroyImageKHR(de->setup.egl_dpy, image);
++ }
++
++ da->fd = desc->objects[0].fd;
++ }
++
++ glClearColor(0.5, 0.5, 0.5, 0.5);
++ glClear(GL_COLOR_BUFFER_BIT);
++
++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
++ eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
++
++ glDeleteTextures(1, &da->texture);
++ da->texture = 0;
++ da->fd = -1;
++
++ return 0;
++}
++
++static void* display_thread(void *v)
++{
++ AVFormatContext *const s = v;
++ egl_display_env_t *const de = s->priv_data;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++ {
++ EGLint egl_major, egl_minor;
++
++ de->setup.dpy = XOpenDisplay(NULL);
++ if (!de->setup.dpy) {
++ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
++ goto fail;
++ }
++
++ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
++ if (!de->setup.egl_dpy) {
++ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
++ goto fail;
++ }
++
++ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
++ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
++ goto fail;
++ }
++
++ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
++
++ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
++ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
++ goto fail;
++ }
++ }
++
++ if (!de->window_width || !de->window_height) {
++ de->window_width = 1280;
++ de->window_height = 720;
++ }
++ if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
++ &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
++ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
++ goto fail;
++ }
++
++ if (gl_setup(s)) {
++ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
++ goto fail;
++ }
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
++#endif
++ sem_post(&de->display_start_sem);
++
++ for (;;) {
++ AVFrame *frame;
++
++ while (sem_wait(&de->q_sem) != 0) {
++ av_assert0(errno == EINTR);
++ }
++
++ if (de->q_terminate)
++ break;
++
++ pthread_mutex_lock(&de->q_lock);
++ frame = de->q_next;
++ de->q_next = NULL;
++ pthread_mutex_unlock(&de->q_lock);
++
++ do_display(s, de, frame);
++
++ av_frame_free(&de->q_this);
++ de->q_this = frame;
++ }
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
++#endif
++
++ return NULL;
++
++fail:
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
++#endif
++ de->q_terminate = 1;
++ sem_post(&de->display_start_sem);
++
++ return NULL;
++}
++
++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++ const AVFrame *const src_frame = (AVFrame *)pkt->data;
++ AVFrame *frame;
++ egl_display_env_t *const de = s->priv_data;
++
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++ frame = av_frame_alloc();
++ av_frame_ref(frame, src_frame);
++ }
++ else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++ frame = av_frame_alloc();
++ frame->format = AV_PIX_FMT_DRM_PRIME;
++ if (av_hwframe_map(frame, src_frame, 0) != 0) {
++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++ av_frame_free(&frame);
++ return AVERROR(EINVAL);
++ }
++ }
++ else {
++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++ return AVERROR(EINVAL);
++ }
++
++ // Really hacky sync
++ while (de->show_all && de->q_next) {
++ usleep(3000);
++ }
++
++ pthread_mutex_lock(&de->q_lock);
++ {
++ AVFrame *const t = de->q_next;
++ de->q_next = frame;
++ frame = t;
++ }
++ pthread_mutex_unlock(&de->q_lock);
++
++ if (frame == NULL)
++ sem_post(&de->q_sem);
++ else
++ av_frame_free(&frame);
++
++ return 0;
++}
++
++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++ unsigned flags)
++{
++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++ return AVERROR_PATCHWELCOME;
++}
++
++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++ av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
++#endif
++ switch (type) {
++ case AV_APP_TO_DEV_WINDOW_REPAINT:
++ return 0;
++ default:
++ break;
++ }
++ return AVERROR(ENOSYS);
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int egl_vout_init(struct AVFormatContext *s)
++{
++ egl_display_env_t *const de = s->priv_data;
++ unsigned int i;
++
++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ de->setup = (struct egl_setup) { 0 };
++
++ for (i = 0; i != 32; ++i) {
++ de->aux[i].fd = -1;
++ }
++
++ de->q_terminate = 0;
++ pthread_mutex_init(&de->q_lock, NULL);
++ sem_init(&de->q_sem, 0, 0);
++ sem_init(&de->display_start_sem, 0, 0);
++ av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
++
++ sem_wait(&de->display_start_sem);
++ if (de->q_terminate) {
++ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
++ return -1;
++ }
++
++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++ return 0;
++}
++
++static void egl_vout_deinit(struct AVFormatContext *s)
++{
++ egl_display_env_t *const de = s->priv_data;
++
++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++ de->q_terminate = 1;
++ sem_post(&de->q_sem);
++ pthread_join(de->q_thread, NULL);
++ sem_destroy(&de->q_sem);
++ pthread_mutex_destroy(&de->q_lock);
++
++ av_frame_free(&de->q_next);
++ av_frame_free(&de->q_this);
++
++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++#define OFFSET(x) offsetof(egl_display_env_t, x)
++static const AVOption options[] = {
++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++ { NULL }
++
++};
++
++static const AVClass egl_vout_class = {
++ .class_name = "egl vid outdev",
++ .item_name = av_default_item_name,
++ .option = options,
++ .version = LIBAVUTIL_VERSION_INT,
++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++AVOutputFormat ff_vout_egl_muxer = {
++ .name = "vout_egl",
++ .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"),
++ .priv_data_size = sizeof(egl_display_env_t),
++ .audio_codec = AV_CODEC_ID_NONE,
++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME,
++ .write_header = egl_vout_write_header,
++ .write_packet = egl_vout_write_packet,
++ .write_uncoded_frame = egl_vout_write_frame,
++ .write_trailer = egl_vout_write_trailer,
++ .control_message = egl_vout_control_message,
++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++ .priv_class = &egl_vout_class,
++ .init = egl_vout_init,
++ .deinit = egl_vout_deinit,
++};
++
+--- a/libavfilter/Makefile
++++ b/libavfilter/Makefile
+@@ -254,6 +254,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER)
+ OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o
+ OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o
+ OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o
++OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o
+ OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o
+ OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o
+ OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o
+@@ -509,6 +510,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)
+ OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
+ OBJS-$(CONFIG_TRIM_FILTER) += trim.o
+ OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o
++OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o
+ OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o
+ OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \
+ opencl/unsharp.o
+--- a/libavfilter/aarch64/Makefile
++++ b/libavfilter/aarch64/Makefile
+@@ -1,3 +1,5 @@
++OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o
+ OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o
+
++NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_neon.o
+ NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o
+--- /dev/null
++++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
+@@ -0,0 +1,125 @@
++/*
++ * bwdif aarch64 NEON optimisations
++ *
++ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/common.h"
++#include "libavfilter/bwdif.h"
++#include "libavutil/aarch64/cpu.h"
++
++void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int parity, int clip_max, int spat);
++
++void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
++ int prefs3, int mrefs3, int parity, int clip_max);
++
++void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int prefs3, int mrefs3, int prefs4, int mrefs4,
++ int parity, int clip_max);
++
++void ff_bwdif_filter_line3_neon(void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int s_stride,
++ int w, int parity, int clip_max);
++
++
++static void filter_line3_helper(void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int s_stride,
++ int w, int parity, int clip_max)
++{
++ // Asm works on 16 byte chunks
++ // If w is a multiple of 16 then all is good - if not then if width rounded
++ // up to nearest 16 will fit in both src & dst strides then allow the asm
++ // to write over the padding bytes as that is almost certainly faster than
++ // having to invoke the C version to clean up the tail.
++ const int w1 = FFALIGN(w, 16);
++ const int w0 = clip_max != 255 ? 0 :
++ d_stride <= w1 && s_stride <= w1 ? w : w & ~15;
++
++ ff_bwdif_filter_line3_neon(dst1, d_stride,
++ prev1, cur1, next1, s_stride,
++ w0, parity, clip_max);
++
++ if (w0 < w)
++ ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride,
++ (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride,
++ w - w0, parity, clip_max);
++}
++
++static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int prefs3, int mrefs3, int prefs4, int mrefs4,
++ int parity, int clip_max)
++{
++ const int w0 = clip_max != 255 ? 0 : w & ~15;
++
++ ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1,
++ w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
++
++ if (w0 < w)
++ ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
++ w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
++}
++
++static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int parity, int clip_max, int spat)
++{
++ const int w0 = clip_max != 255 ? 0 : w & ~15;
++
++ ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2,
++ parity, clip_max, spat);
++
++ if (w0 < w)
++ ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
++ w - w0, prefs, mrefs, prefs2, mrefs2,
++ parity, clip_max, spat);
++}
++
++static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs,
++ int prefs3, int mrefs3, int parity, int clip_max)
++{
++ const int w0 = clip_max != 255 ? 0 : w & ~15;
++
++ ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
++
++ if (w0 < w)
++ ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0,
++ w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
++}
++
++void
++ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
++{
++ const int cpu_flags = av_get_cpu_flags();
++
++ if (bit_depth != 8)
++ return;
++
++ if (!have_neon(cpu_flags))
++ return;
++
++ s->filter_intra = filter_intra_helper;
++ s->filter_line = filter_line_helper;
++ s->filter_edge = filter_edge_helper;
++ s->filter_line3 = filter_line3_helper;
++}
++
+--- /dev/null
++++ b/libavfilter/aarch64/vf_bwdif_neon.S
+@@ -0,0 +1,788 @@
++/*
++ * bwdif aarch64 NEON optimisations
++ *
++ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++#include "libavutil/aarch64/asm.S"
++
++// Space taken on the stack by an int (32-bit)
++#ifdef __APPLE__
++.set SP_INT, 4
++#else
++.set SP_INT, 8
++#endif
++
++.macro SQSHRUNN b, s0, s1, s2, s3, n
++ sqshrun \s0\().4h, \s0\().4s, #\n - 8
++ sqshrun2 \s0\().8h, \s1\().4s, #\n - 8
++ sqshrun \s1\().4h, \s2\().4s, #\n - 8
++ sqshrun2 \s1\().8h, \s3\().4s, #\n - 8
++ uzp2 \b\().16b, \s0\().16b, \s1\().16b
++.endm
++
++.macro SMULL4K a0, a1, a2, a3, s0, s1, k
++ smull \a0\().4s, \s0\().4h, \k
++ smull2 \a1\().4s, \s0\().8h, \k
++ smull \a2\().4s, \s1\().4h, \k
++ smull2 \a3\().4s, \s1\().8h, \k
++.endm
++
++.macro UMULL4K a0, a1, a2, a3, s0, s1, k
++ umull \a0\().4s, \s0\().4h, \k
++ umull2 \a1\().4s, \s0\().8h, \k
++ umull \a2\().4s, \s1\().4h, \k
++ umull2 \a3\().4s, \s1\().8h, \k
++.endm
++
++.macro UMLAL4K a0, a1, a2, a3, s0, s1, k
++ umlal \a0\().4s, \s0\().4h, \k
++ umlal2 \a1\().4s, \s0\().8h, \k
++ umlal \a2\().4s, \s1\().4h, \k
++ umlal2 \a3\().4s, \s1\().8h, \k
++.endm
++
++.macro UMLSL4K a0, a1, a2, a3, s0, s1, k
++ umlsl \a0\().4s, \s0\().4h, \k
++ umlsl2 \a1\().4s, \s0\().8h, \k
++ umlsl \a2\().4s, \s1\().4h, \k
++ umlsl2 \a3\().4s, \s1\().8h, \k
++.endm
++
++// int b = m2s1 - m1;
++// int f = p2s1 - p1;
++// int dc = c0s1 - m1;
++// int de = c0s1 - p1;
++// int sp_max = FFMIN(p1 - c0s1, m1 - c0s1);
++// sp_max = FFMIN(sp_max, FFMAX(-b,-f));
++// int sp_min = FFMIN(c0s1 - p1, c0s1 - m1);
++// sp_min = FFMIN(sp_min, FFMAX(b,f));
++// diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max);
++.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3
++ uqsub \t0\().16b, \p1\().16b, \c0s1\().16b
++ uqsub \t2\().16b, \m1\().16b, \c0s1\().16b
++ umin \t2\().16b, \t0\().16b, \t2\().16b
++
++ uqsub \t1\().16b, \m1\().16b, \m2s1\().16b
++ uqsub \t3\().16b, \p1\().16b, \p2s1\().16b
++ umax \t3\().16b, \t3\().16b, \t1\().16b
++ umin \t3\().16b, \t3\().16b, \t2\().16b
++
++ uqsub \t0\().16b, \c0s1\().16b, \p1\().16b
++ uqsub \t2\().16b, \c0s1\().16b, \m1\().16b
++ umin \t2\().16b, \t0\().16b, \t2\().16b
++
++ uqsub \t1\().16b, \m2s1\().16b, \m1\().16b
++ uqsub \t0\().16b, \p2s1\().16b, \p1\().16b
++ umax \t0\().16b, \t0\().16b, \t1\().16b
++ umin \t2\().16b, \t2\().16b, \t0\().16b
++
++ cmeq \t1\().16b, \diff\().16b, #0
++ umax \diff\().16b, \diff\().16b, \t3\().16b
++ umax \diff\().16b, \diff\().16b, \t2\().16b
++ bic \diff\().16b, \diff\().16b, \t1\().16b
++.endm
++
++// i0 = s0;
++// if (i0 > d0 + diff0)
++// i0 = d0 + diff0;
++// else if (i0 < d0 - diff0)
++// i0 = d0 - diff0;
++//
++// i0 = s0 is safe
++.macro DIFF_CLIP i0, s0, d0, diff, t0, t1
++ uqadd \t0\().16b, \d0\().16b, \diff\().16b
++ uqsub \t1\().16b, \d0\().16b, \diff\().16b
++ umin \i0\().16b, \s0\().16b, \t0\().16b
++ umax \i0\().16b, \i0\().16b, \t1\().16b
++.endm
++
++// i0 = FFABS(m1 - p1) > td0 ? i1 : i2;
++// DIFF_CLIP
++//
++// i0 = i1 is safe
++.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2
++ uabd \t0\().16b, \m1\().16b, \p1\().16b
++ cmhi \t0\().16b, \t0\().16b, \td0\().16b
++ bsl \t0\().16b, \i1\().16b, \i2\().16b
++ DIFF_CLIP \i0, \t0, \d0, \diff, \t1, \t2
++.endm
++
++.macro PUSH_VREGS
++ stp d8, d9, [sp, #-64]!
++ stp d10, d11, [sp, #16]
++ stp d12, d13, [sp, #32]
++ stp d14, d15, [sp, #48]
++.endm
++
++.macro POP_VREGS
++ ldp d14, d15, [sp, #48]
++ ldp d12, d13, [sp, #32]
++ ldp d10, d11, [sp, #16]
++ ldp d8, d9, [sp], #64
++.endm
++
++.macro LDR_COEFFS d, t0
++ movrel \t0, coeffs, 0
++ ld1 {\d\().8h}, [\t0]
++.endm
++
++// static const uint16_t coef_lf[2] = { 4309, 213 };
++// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
++// static const uint16_t coef_sp[2] = { 5077, 981 };
++
++const coeffs, align=4 // align 4 means align on 2^4 boundry
++ .hword 4309 * 4, 213 * 4 // lf[0]*4 = v0.h[0]
++ .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] = v0.h[5]
++ .hword 5077, 981 // sp[0] = v0.h[6]
++endconst
++
++// ===========================================================================
++//
++// void ff_bwdif_filter_line3_neon(
++// void * dst1, // x0
++// int d_stride, // w1
++// const void * prev1, // x2
++// const void * cur1, // x3
++// const void * next1, // x4
++// int s_stride, // w5
++// int w, // w6
++// int parity, // w7
++// int clip_max); // [sp, #0] (Ignored)
++
++function ff_bwdif_filter_line3_neon, export=1
++ // Sanity check w
++ cmp w6, #0
++ ble 99f
++
++ LDR_COEFFS v0, x17
++
++// #define prev2 cur
++// const uint8_t * restrict next2 = parity ? prev : next;
++ cmp w7, #0
++ csel x17, x2, x4, ne
++
++ // We want all the V registers - save all the ones we must
++ PUSH_VREGS
++
++ // Some rearrangement of initial values for nice layout of refs in regs
++ mov w10, w6 // w10 = loop count
++ neg w9, w5 // w9 = mref
++ lsl w8, w9, #1 // w8 = mref2
++ add w7, w9, w9, LSL #1 // w7 = mref3
++ lsl w6, w9, #2 // w6 = mref4
++ mov w11, w5 // w11 = pref
++ lsl w12, w5, #1 // w12 = pref2
++ add w13, w5, w5, LSL #1 // w13 = pref3
++ lsl w14, w5, #2 // w14 = pref4
++ add w15, w5, w5, LSL #2 // w15 = pref5
++ add w16, w14, w12 // w16 = pref6
++
++ lsl w5, w1, #1 // w5 = d_stride * 2
++
++// for (x = 0; x < w; x++) {
++// int diff0, diff2;
++// int d0, d2;
++// int temporal_diff0, temporal_diff2;
++//
++// int i1, i2;
++// int j1, j2;
++// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
++
++10:
++// c0 = prev2[0] + next2[0]; // c0 = v20, v21
++// d0 = c0 >> 1; // d0 = v10
++// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
++ ldr q31, [x3]
++ ldr q21, [x17]
++ uhadd v10.16b, v31.16b, v21.16b
++ uabd v11.16b, v31.16b, v21.16b
++ uaddl v20.8h, v21.8b, v31.8b
++ uaddl2 v21.8h, v21.16b, v31.16b
++
++ ldr q31, [x3, w6, sxtw]
++ ldr q23, [x17, w6, sxtw]
++
++// i1 = coef_hf[0] * c0; // i1 = v2-v5
++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2]
++
++ ldr q30, [x3, w14, sxtw]
++ ldr q25, [x17, w14, sxtw]
++
++// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23
++ uaddl v22.8h, v23.8b, v31.8b
++ uaddl2 v23.8h, v23.16b, v31.16b
++
++// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12
++ uhadd v12.16b, v25.16b, v30.16b
++ uaddl v24.8h, v25.8b, v30.8b
++ uaddl2 v25.8h, v25.16b, v30.16b
++
++// j1 = -coef_hf[1] * (c0 + p4); // j1 = v6-v9 (-c0:v20,v21)
++ add v20.8h, v20.8h, v24.8h
++ add v21.8h, v21.8h, v25.8h
++ SMULL4K v6, v7, v8, v9, v20, v21, v0.h[5]
++
++// m3 = cur[mrefs3]; // m3 = v20
++ ldr q20, [x3, w7, sxtw]
++
++// p3 = cur[prefs3]; // p3 = v21
++ ldr q21, [x3, w13, sxtw]
++
++// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25)
++ add v22.8h, v22.8h, v24.8h
++ add v23.8h, v23.8h, v25.8h
++ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4]
++
++ ldr q29, [x3, w8, sxtw]
++ ldr q23, [x17, w8, sxtw]
++
++// i1 -= coef_lf[1] * 4 * (m3 + p3); // -
++ uaddl v30.8h, v20.8b, v21.8b
++ uaddl2 v31.8h, v20.16b, v21.16b
++
++ ldr q28, [x3, w16, sxtw]
++ ldr q25, [x17, w16, sxtw]
++
++ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1]
++
++// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13
++ uhadd v13.16b, v23.16b, v29.16b
++ uaddl v22.8h, v23.8b, v29.8b
++ uaddl2 v23.8h, v23.16b, v29.16b
++
++ ldr q31, [x3, w12, sxtw]
++ ldr q27, [x17, w12, sxtw]
++
++// p6 = prev2[prefs6] + next2[prefs6]; // p6 = v24,v25
++ uaddl v24.8h, v25.8b, v28.8b
++ uaddl2 v25.8h, v25.16b, v28.16b
++
++// j1 += coef_hf[2] * (m2 + p6); // (-p6:v24,v25)
++ add v24.8h, v24.8h, v22.8h
++ add v25.8h, v25.8h, v23.8h
++ UMLAL4K v6, v7, v8, v9, v24, v25, v0.h[4]
++
++// m1 = cur[mrefs]; // m1 = v24
++ ldr q24, [x3, w9, sxtw]
++
++// p5 = cur[prefs5]; // p5 = v25
++ ldr q25, [x3, w15, sxtw]
++
++// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27
++// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
++// d2 = p2 >> 1; // d2 = v15
++ uabd v14.16b, v31.16b, v27.16b
++ uhadd v15.16b, v31.16b, v27.16b
++ uaddl v26.8h, v27.8b, v31.8b
++ uaddl2 v27.8h, v27.16b, v31.16b
++
++// j1 += coef_hf[0] * p2; // -
++ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[2]
++
++// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*)
++ add v22.8h, v22.8h, v26.8h
++ add v23.8h, v23.8h, v27.8h
++ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3]
++
++// p1 = cur[prefs]; // p1 = v22
++ ldr q22, [x3, w11, sxtw]
++
++// j1 -= coef_lf[1] * 4 * (m1 + p5); // -
++ uaddl v26.8h, v24.8b, v25.8b
++ uaddl2 v27.8h, v24.16b, v25.16b
++ UMLSL4K v6, v7, v8, v9, v26, v27, v0.h[1]
++
++// j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16
++ uaddl v18.8h, v22.8b, v21.8b
++ uaddl2 v19.8h, v22.16b, v21.16b
++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6]
++
++ uaddl v18.8h, v24.8b, v25.8b
++ uaddl2 v19.8h, v24.16b, v25.16b
++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7]
++
++ SQSHRUNN v16, v28, v29, v30, v31, 13
++
++// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
++ uaddl v18.8h, v22.8b, v24.8b
++ uaddl2 v19.8h, v22.16b, v24.16b
++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6]
++
++ uaddl v18.8h, v20.8b, v21.8b
++ uaddl2 v19.8h, v20.16b, v21.16b
++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7]
++
++ SQSHRUNN v17, v28, v29, v30, v31, 13
++
++// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24
++ uaddl v26.8h, v24.8b, v22.8b
++ uaddl2 v27.8h, v24.16b, v22.16b
++ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0]
++
++ ldr q31, [x2, w9, sxtw]
++ ldr q29, [x4, w9, sxtw]
++
++// j1 += coef_lf[0] * 4 * (p1 + p3); // p1 = v22, p3 = v21
++ uaddl v26.8h, v21.8b, v22.8b
++ uaddl2 v27.8h, v21.16b, v22.16b
++ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[0]
++
++ ldr q30, [x2, w11, sxtw]
++ ldr q28, [x4, w11, sxtw]
++
++// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5*
++ SQSHRUNN v2, v2, v3, v4, v5, 15
++
++// j1 >>= 15; // j1 = v3, -v6*, -v7*, -v8*, -v9*
++ SQSHRUNN v3, v6, v7, v8, v9, 15
++
++// {
++// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
++// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
++ uabd v30.16b, v22.16b, v30.16b
++ uabd v31.16b, v24.16b, v31.16b
++ uabd v28.16b, v22.16b, v28.16b
++ uabd v29.16b, v24.16b, v29.16b
++ uhadd v31.16b, v31.16b, v30.16b
++ uhadd v29.16b, v29.16b, v28.16b
++
++ ldr q27, [x2, w13, sxtw]
++ ldr q26, [x4, w13, sxtw]
++
++// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
++ ushr v18.16b, v11.16b, #1
++ umax v18.16b, v18.16b, v31.16b
++ umax v18.16b, v18.16b, v29.16b
++// } // v28, v30 preserved for next block
++// { // tdiff2 = v14
++// int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1;
++// int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1;
++ uabd v31.16b, v21.16b, v27.16b
++ uabd v29.16b, v21.16b, v26.16b
++ uhadd v31.16b, v31.16b, v30.16b
++ uhadd v29.16b, v29.16b, v28.16b
++
++// diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19
++ ushr v19.16b, v14.16b, #1
++ umax v19.16b, v19.16b, v31.16b
++ umax v19.16b, v19.16b, v29.16b
++// }
++
++ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
++ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
++
++ // diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12
++ SPAT_CHECK v19, v10, v22, v15, v21, v12, v31, v30, v29, v28
++
++ // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19
++ INTERPOL v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29
++
++// dst[d_stride * 2] = av_clip_uint8(interpol);
++ str q3, [x0, w5, sxtw]
++
++// dst[d_stride] = p1;
++ str q22, [x0, w1, sxtw]
++
++ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
++ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
++
++// dst[0] = av_clip_uint8(interpol);
++ str q2, [x0], #16
++// }
++//
++// dst++;
++// cur++;
++// prev++;
++// prev2++;
++// next++;
++// }
++ subs w10, w10, #16
++ add x2, x2, #16
++ add x3, x3, #16
++ add x4, x4, #16
++ add x17, x17, #16
++ bgt 10b
++
++ POP_VREGS
++99:
++ ret
++endfunc
++
++// ===========================================================================
++//
++// void filter_line(
++// void *dst1, // x0
++// void *prev1, // x1
++// void *cur1, // x2
++// void *next1, // x3
++// int w, // w4
++// int prefs, // w5
++// int mrefs, // w6
++// int prefs2, // w7
++// int mrefs2, // [sp, #0]
++// int prefs3, // [sp, #SP_INT]
++// int mrefs3, // [sp, #SP_INT*2]
++// int prefs4, // [sp, #SP_INT*3]
++// int mrefs4, // [sp, #SP_INT*4]
++// int parity, // [sp, #SP_INT*5]
++// int clip_max) // [sp, #SP_INT*6]
++
++function ff_bwdif_filter_line_neon, export=1
++ // Sanity check w
++ cmp w4, #0
++ ble 99f
++
++ // Rearrange regs to be the same as line3 for ease of debug!
++ mov w10, w4 // w10 = loop count
++ mov w9, w6 // w9 = mref
++ mov w12, w7 // w12 = pref2
++ mov w11, w5 // w11 = pref
++ ldr w8, [sp, #0] // w8 = mref2
++ ldr w7, [sp, #SP_INT*2] // w7 = mref3
++ ldr w6, [sp, #SP_INT*4] // w6 = mref4
++ ldr w13, [sp, #SP_INT] // w13 = pref3
++ ldr w14, [sp, #SP_INT*3] // w14 = pref4
++
++ mov x4, x3
++ mov x3, x2
++ mov x2, x1
++
++ LDR_COEFFS v0, x17
++
++// #define prev2 cur
++// const uint8_t * restrict next2 = parity ? prev : next;
++ ldr w17, [sp, #SP_INT*5] // parity
++ cmp w17, #0
++ csel x17, x2, x4, ne
++
++ PUSH_VREGS
++
++// for (x = 0; x < w; x++) {
++// int diff0, diff2;
++// int d0, d2;
++// int temporal_diff0, temporal_diff2;
++//
++// int i1, i2;
++// int j1, j2;
++// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
++
++10:
++// c0 = prev2[0] + next2[0]; // c0 = v20, v21
++// d0 = c0 >> 1; // d0 = v10
++// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
++ ldr q31, [x3]
++ ldr q21, [x17]
++ uhadd v10.16b, v31.16b, v21.16b
++ uabd v11.16b, v31.16b, v21.16b
++ uaddl v20.8h, v21.8b, v31.8b
++ uaddl2 v21.8h, v21.16b, v31.16b
++
++ ldr q31, [x3, w6, sxtw]
++ ldr q23, [x17, w6, sxtw]
++
++// i1 = coef_hf[0] * c0; // i1 = v2-v5
++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2]
++
++ ldr q30, [x3, w14, sxtw]
++ ldr q25, [x17, w14, sxtw]
++
++// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23
++ uaddl v22.8h, v23.8b, v31.8b
++ uaddl2 v23.8h, v23.16b, v31.16b
++
++// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12
++ uhadd v12.16b, v25.16b, v30.16b
++ uaddl v24.8h, v25.8b, v30.8b
++ uaddl2 v25.8h, v25.16b, v30.16b
++
++// m3 = cur[mrefs3]; // m3 = v20
++ ldr q20, [x3, w7, sxtw]
++
++// p3 = cur[prefs3]; // p3 = v21
++ ldr q21, [x3, w13, sxtw]
++
++// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25)
++ add v22.8h, v22.8h, v24.8h
++ add v23.8h, v23.8h, v25.8h
++ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4]
++
++ ldr q29, [x3, w8, sxtw]
++ ldr q23, [x17, w8, sxtw]
++
++// i1 -= coef_lf[1] * 4 * (m3 + p3); // -
++ uaddl v30.8h, v20.8b, v21.8b
++ uaddl2 v31.8h, v20.16b, v21.16b
++
++ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1]
++
++ ldr q31, [x3, w12, sxtw]
++ ldr q27, [x17, w12, sxtw]
++
++// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13
++ uhadd v13.16b, v23.16b, v29.16b
++ uaddl v22.8h, v23.8b, v29.8b
++ uaddl2 v23.8h, v23.16b, v29.16b
++
++// m1 = cur[mrefs]; // m1 = v24
++ ldr q24, [x3, w9, sxtw]
++
++// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27
++// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
++// d2 = p2 >> 1; // d2 = v15
++ uabd v14.16b, v31.16b, v27.16b
++ uhadd v15.16b, v31.16b, v27.16b
++ uaddl v26.8h, v27.8b, v31.8b
++ uaddl2 v27.8h, v27.16b, v31.16b
++
++// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*)
++ add v22.8h, v22.8h, v26.8h
++ add v23.8h, v23.8h, v27.8h
++ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3]
++
++// p1 = cur[prefs]; // p1 = v22
++ ldr q22, [x3, w11, sxtw]
++
++// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
++ uaddl v18.8h, v22.8b, v24.8b
++ uaddl2 v19.8h, v22.16b, v24.16b
++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6]
++
++ uaddl v18.8h, v20.8b, v21.8b
++ uaddl2 v19.8h, v20.16b, v21.16b
++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7]
++
++ SQSHRUNN v17, v28, v29, v30, v31, 13
++
++// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24
++ uaddl v26.8h, v24.8b, v22.8b
++ uaddl2 v27.8h, v24.16b, v22.16b
++ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0]
++
++ ldr q31, [x2, w9, sxtw]
++ ldr q29, [x4, w9, sxtw]
++
++ ldr q30, [x2, w11, sxtw]
++ ldr q28, [x4, w11, sxtw]
++
++// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5*
++ SQSHRUNN v2, v2, v3, v4, v5, 15
++
++// {
++// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
++// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
++ uabd v30.16b, v22.16b, v30.16b
++ uabd v31.16b, v24.16b, v31.16b
++ uabd v28.16b, v22.16b, v28.16b
++ uabd v29.16b, v24.16b, v29.16b
++ uhadd v31.16b, v31.16b, v30.16b
++ uhadd v29.16b, v29.16b, v28.16b
++
++// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
++ ushr v18.16b, v11.16b, #1
++ umax v18.16b, v18.16b, v31.16b
++ umax v18.16b, v18.16b, v29.16b
++
++ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
++ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
++
++ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
++ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
++
++// dst[0] = av_clip_uint8(interpol);
++ str q2, [x0], #16
++// }
++//
++// dst++;
++// cur++;
++// prev++;
++// prev2++;
++// next++;
++// }
++
++ subs w10, w10, #16
++ add x2, x2, #16
++ add x3, x3, #16
++ add x4, x4, #16
++ add x17, x17, #16
++ bgt 10b
++
++ POP_VREGS
++99:
++ ret
++endfunc
++
++// ============================================================================
++//
++// void ff_bwdif_filter_edge_neon(
++// void *dst1, // x0
++// void *prev1, // x1
++// void *cur1, // x2
++// void *next1, // x3
++// int w, // w4
++// int prefs, // w5
++// int mrefs, // w6
++// int prefs2, // w7
++// int mrefs2, // [sp, #0]
++// int parity, // [sp, #SP_INT]
++// int clip_max, // [sp, #SP_INT*2] unused
++// int spat); // [sp, #SP_INT*3]
++
++function ff_bwdif_filter_edge_neon, export=1
++ // Sanity check w
++ cmp w4, #0
++ ble 99f
++
++// #define prev2 cur
++// const uint8_t * restrict next2 = parity ? prev : next;
++
++ ldr w8, [sp, #0] // mrefs2
++
++ ldr w17, [sp, #SP_INT] // parity
++ ldr w16, [sp, #SP_INT*3] // spat
++ cmp w17, #0
++ csel x17, x1, x3, ne
++
++// for (x = 0; x < w; x++) {
++
++10:
++// int m1 = cur[mrefs];
++// int d = (prev2[0] + next2[0]) >> 1;
++// int p1 = cur[prefs];
++// int temporal_diff0 = FFABS(prev2[0] - next2[0]);
++// int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
++// int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
++// int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2);
++ ldr q31, [x2]
++ ldr q21, [x17]
++ uhadd v16.16b, v31.16b, v21.16b // d0 = v16
++ uabd v17.16b, v31.16b, v21.16b // td0 = v17
++ ldr q24, [x2, w6, sxtw] // m1 = v24
++ ldr q22, [x2, w5, sxtw] // p1 = v22
++
++ ldr q0, [x1, w6, sxtw] // prev[mrefs]
++ ldr q2, [x1, w5, sxtw] // prev[prefs]
++ ldr q1, [x3, w6, sxtw] // next[mrefs]
++ ldr q3, [x3, w5, sxtw] // next[prefs]
++
++ ushr v29.16b, v17.16b, #1
++
++ uabd v31.16b, v0.16b, v24.16b
++ uabd v30.16b, v2.16b, v22.16b
++ uhadd v0.16b, v31.16b, v30.16b // td1 = q0
++
++ uabd v31.16b, v1.16b, v24.16b
++ uabd v30.16b, v3.16b, v22.16b
++ uhadd v1.16b, v31.16b, v30.16b // td2 = q1
++
++ umax v0.16b, v0.16b, v29.16b
++ umax v0.16b, v0.16b, v1.16b // diff = v0
++
++// if (spat) {
++// SPAT_CHECK()
++// }
++// i0 = (m1 + p1) >> 1;
++ cbz w16, 1f
++
++ ldr q31, [x2, w8, sxtw]
++ ldr q18, [x17, w8, sxtw]
++ ldr q30, [x2, w7, sxtw]
++ ldr q19, [x17, w7, sxtw]
++ uhadd v18.16b, v18.16b, v31.16b
++ uhadd v19.16b, v19.16b, v30.16b
++
++ SPAT_CHECK v0, v18, v24, v16, v22, v19, v31, v30, v29, v28
++
++1:
++ uhadd v2.16b, v22.16b, v24.16b
++
++ // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30
++ DIFF_CLIP v2, v2, v16, v0, v31, v30
++
++// dst[0] = av_clip(interpol, 0, clip_max);
++ str q2, [x0], #16
++
++// dst++;
++// cur++;
++// }
++ subs w4, w4, #16
++ add x1, x1, #16
++ add x2, x2, #16
++ add x3, x3, #16
++ add x17, x17, #16
++ bgt 10b
++
++99:
++ ret
++endfunc
++
++// ============================================================================
++//
++// void ff_bwdif_filter_intra_neon(
++// void *dst1, // x0
++// void *cur1, // x1
++// int w, // w2
++// int prefs, // w3
++// int mrefs, // w4
++// int prefs3, // w5
++// int mrefs3, // w6
++// int parity, // w7 unused
++// int clip_max) // [sp, #0] unused
++
++function ff_bwdif_filter_intra_neon, export=1
++ cmp w2, #0
++ ble 99f
++
++ LDR_COEFFS v0, x17
++
++// for (x = 0; x < w; x++) {
++10:
++
++// interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13;
++ ldr q31, [x1, w4, sxtw]
++ ldr q30, [x1, w3, sxtw]
++ ldr q29, [x1, w6, sxtw]
++ ldr q28, [x1, w5, sxtw]
++
++ uaddl v20.8h, v31.8b, v30.8b
++ uaddl2 v21.8h, v31.16b, v30.16b
++
++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[6]
++
++ uaddl v20.8h, v29.8b, v28.8b
++ uaddl2 v21.8h, v29.16b, v28.16b
++
++ UMLSL4K v2, v3, v4, v5, v20, v21, v0.h[7]
++
++// dst[0] = av_clip(interpol, 0, clip_max);
++ SQSHRUNN v2, v2, v3, v4, v5, 13
++ str q2, [x0], #16
++
++// dst++;
++// cur++;
++// }
++
++ subs w2, w2, #16
++ add x1, x1, #16
++ bgt 10b
++
++99:
++ ret
++endfunc
+--- a/libavfilter/allfilters.c
++++ b/libavfilter/allfilters.c
+@@ -242,6 +242,7 @@ extern const AVFilter ff_vf_derain;
+ extern const AVFilter ff_vf_deshake;
+ extern const AVFilter ff_vf_deshake_opencl;
+ extern const AVFilter ff_vf_despill;
++extern const AVFilter ff_vf_deinterlace_v4l2m2m;
+ extern const AVFilter ff_vf_detelecine;
+ extern const AVFilter ff_vf_dilation;
+ extern const AVFilter ff_vf_dilation_opencl;
+@@ -414,6 +415,7 @@ extern const AVFilter ff_vf_scale;
+ extern const AVFilter ff_vf_scale_cuda;
+ extern const AVFilter ff_vf_scale_npp;
+ extern const AVFilter ff_vf_scale_qsv;
++extern const AVFilter ff_vf_scale_v4l2m2m;
+ extern const AVFilter ff_vf_scale_vaapi;
+ extern const AVFilter ff_vf_scale_vulkan;
+ extern const AVFilter ff_vf_scale2ref;
+@@ -483,6 +485,7 @@ extern const AVFilter ff_vf_trim;
+ extern const AVFilter ff_vf_unpremultiply;
+ extern const AVFilter ff_vf_unsharp;
+ extern const AVFilter ff_vf_unsharp_opencl;
++extern const AVFilter ff_vf_unsand;
+ extern const AVFilter ff_vf_untile;
+ extern const AVFilter ff_vf_uspp;
+ extern const AVFilter ff_vf_v360;
+--- a/libavfilter/buffersink.c
++++ b/libavfilter/buffersink.c
+@@ -62,6 +62,11 @@ typedef struct BufferSinkContext {
+ int sample_rates_size;
+
+ AVFrame *peeked_frame;
++
++ union {
++ av_buffersink_alloc_video_frame * video;
++ } alloc_cb;
++ void * alloc_v;
+ } BufferSinkContext;
+
+ #define NB_ITEMS(list) (list ## _size / sizeof(*list))
+@@ -154,6 +159,22 @@ int attribute_align_arg av_buffersink_ge
+ return get_frame_internal(ctx, frame, 0, nb_samples);
+ }
+
++static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
++{
++ AVFilterContext * const ctx = link->dst;
++ BufferSinkContext * const bs = ctx->priv;
++ return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
++ ff_default_get_video_buffer(link, w, h);
++}
++
++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
++{
++ BufferSinkContext * const bs = ctx->priv;
++ bs->alloc_cb.video = cb;
++ bs->alloc_v = v;
++ return 0;
++}
++
+ #if FF_API_BUFFERSINK_ALLOC
+ AVBufferSinkParams *av_buffersink_params_alloc(void)
+ {
+@@ -403,6 +424,7 @@ static const AVFilterPad avfilter_vsink_
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
++ .get_buffer = {.video = alloc_video_buffer},
+ },
+ };
+
+--- a/libavfilter/buffersink.h
++++ b/libavfilter/buffersink.h
+@@ -202,6 +202,9 @@ int av_buffersink_get_frame(AVFilterCont
+ */
+ int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
+
++typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
++
+ /**
+ * @}
+ */
+--- a/libavfilter/buffersrc.c
++++ b/libavfilter/buffersrc.c
+@@ -204,7 +204,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
+
+ switch (ctx->outputs[0]->type) {
+ case AVMEDIA_TYPE_VIDEO:
+- CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
++ CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
+ frame->format, frame->pts);
+ break;
+ case AVMEDIA_TYPE_AUDIO:
+--- a/libavfilter/bwdif.h
++++ b/libavfilter/bwdif.h
+@@ -35,8 +35,29 @@ typedef struct BWDIFContext {
+ void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2, int mrefs2,
+ int parity, int clip_max, int spat);
++ void (*filter_line3)(void *dst, int dstride,
++ const void *prev, const void *cur, const void *next, int prefs,
++ int w, int parity, int clip_max);
+ } BWDIFContext;
+
+-void ff_bwdif_init_x86(BWDIFContext *bwdif);
++void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
++void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
++void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth);
++
++void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int parity, int clip_max, int spat);
++
++void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
++ int prefs3, int mrefs3, int parity, int clip_max);
++
++void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int prefs3, int mrefs3, int prefs4, int mrefs4,
++ int parity, int clip_max);
++
++void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int s_stride,
++ int w, int parity, int clip_max);
+
+ #endif /* AVFILTER_BWDIF_H */
+--- a/libavfilter/vf_bwdif.c
++++ b/libavfilter/vf_bwdif.c
+@@ -122,8 +122,8 @@ typedef struct ThreadData {
+ next2++; \
+ }
+
+-static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
+- int prefs3, int mrefs3, int parity, int clip_max)
++void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
++ int prefs3, int mrefs3, int parity, int clip_max)
+ {
+ uint8_t *dst = dst1;
+ uint8_t *cur = cur1;
+@@ -132,10 +132,10 @@ static void filter_intra(void *dst1, voi
+ FILTER_INTRA()
+ }
+
+-static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
+- int w, int prefs, int mrefs, int prefs2, int mrefs2,
+- int prefs3, int mrefs3, int prefs4, int mrefs4,
+- int parity, int clip_max)
++void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int prefs3, int mrefs3, int prefs4, int mrefs4,
++ int parity, int clip_max)
+ {
+ uint8_t *dst = dst1;
+ uint8_t *prev = prev1;
+@@ -150,9 +150,34 @@ static void filter_line_c(void *dst1, vo
+ FILTER2()
+ }
+
+-static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
+- int w, int prefs, int mrefs, int prefs2, int mrefs2,
+- int parity, int clip_max, int spat)
++#define NEXT_LINE()\
++ dst += d_stride; \
++ prev += prefs; \
++ cur += prefs; \
++ next += prefs;
++
++void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int s_stride,
++ int w, int parity, int clip_max)
++{
++ const int prefs = s_stride;
++ uint8_t * dst = dst1;
++ const uint8_t * prev = prev1;
++ const uint8_t * cur = cur1;
++ const uint8_t * next = next1;
++
++ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
++ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
++ NEXT_LINE();
++ memcpy(dst, cur, w);
++ NEXT_LINE();
++ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
++ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
++}
++
++void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int parity, int clip_max, int spat)
+ {
+ uint8_t *dst = dst1;
+ uint8_t *prev = prev1;
+@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1
+ FILTER2()
+ }
+
++// Round job start line down to multiple of 4 so that if filter_line3 exists
++// and the frame is a multiple of 4 high then filter_line will never be called
++static inline int job_start(const int jobnr, const int nb_jobs, const int h)
++{
++ return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3;
++}
++
+ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+ {
+ BWDIFContext *s = ctx->priv;
+@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext
+ int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1;
+ int df = (yadif->csp->comp[td->plane].depth + 7) / 8;
+ int refs = linesize / df;
+- int slice_start = (td->h * jobnr ) / nb_jobs;
+- int slice_end = (td->h * (jobnr+1)) / nb_jobs;
++ int slice_start = job_start(jobnr, nb_jobs, td->h);
++ int slice_end = job_start(jobnr + 1, nb_jobs, td->h);
+ int y;
+
+ for (y = slice_start; y < slice_end; y++) {
+@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext
+ refs << 1, -(refs << 1),
+ td->parity ^ td->tff, clip_max,
+ (y < 2) || ((y + 3) > td->h) ? 0 : 1);
++ } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) {
++ s->filter_line3(dst, td->frame->linesize[td->plane],
++ prev, cur, next, linesize, td->w,
++ td->parity ^ td->tff, clip_max);
++ y += 2;
+ } else {
+ s->filter_line(dst, prev, cur, next, td->w,
+ refs, -refs, refs << 1, -(refs << 1),
+@@ -265,22 +302,31 @@ static void filter(AVFilterContext *ctx,
+ YADIFContext *yadif = &bwdif->yadif;
+ ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
+ int i;
++ int last_plane = -1;
+
+ for (i = 0; i < yadif->csp->nb_components; i++) {
+ int w = dstpic->width;
+ int h = dstpic->height;
++ const AVComponentDescriptor * const comp = yadif->csp->comp + i;
++
++ // If the last plane was the same as this plane assume we've dealt
++ // with all the pels already
++ if (last_plane == comp->plane)
++ continue;
++ last_plane = comp->plane;
+
+ if (i == 1 || i == 2) {
+ w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
+ h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
+ }
+
+- td.w = w;
+- td.h = h;
+- td.plane = i;
++ // comp step is in bytes but td.w is in pels
++ td.w = w * comp->step / ((comp->depth + 7) / 8);
++ td.h = h;
++ td.plane = comp->plane;
+
+ ff_filter_execute(ctx, filter_slice, &td, NULL,
+- FFMIN(h, ff_filter_get_nb_threads(ctx)));
++ FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
+ }
+ if (yadif->current_field == YADIF_FIELD_END) {
+ yadif->current_field = YADIF_FIELD_NORMAL;
+@@ -313,6 +359,7 @@ static const enum AVPixelFormat pix_fmts
+ AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+ AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+ AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
++ AV_PIX_FMT_NV12,
+ AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+ AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+ AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
+@@ -340,21 +387,29 @@ static int config_props(AVFilterLink *li
+
+ yadif->csp = av_pix_fmt_desc_get(link->format);
+ yadif->filter = filter;
+- if (yadif->csp->comp[0].depth > 8) {
++ ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
++
++ return 0;
++}
++
++av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
++{
++ s->filter_line3 = 0;
++ if (bit_depth > 8) {
+ s->filter_intra = filter_intra_16bit;
+ s->filter_line = filter_line_c_16bit;
+ s->filter_edge = filter_edge_16bit;
+ } else {
+- s->filter_intra = filter_intra;
+- s->filter_line = filter_line_c;
+- s->filter_edge = filter_edge;
++ s->filter_intra = ff_bwdif_filter_intra_c;
++ s->filter_line = ff_bwdif_filter_line_c;
++ s->filter_edge = ff_bwdif_filter_edge_c;
+ }
+
+ #if ARCH_X86
+- ff_bwdif_init_x86(s);
++ ff_bwdif_init_x86(s, bit_depth);
++#elif ARCH_AARCH64
++ ff_bwdif_init_aarch64(s, bit_depth);
+ #endif
+-
+- return 0;
+ }
+
+
+--- /dev/null
++++ b/libavfilter/vf_deinterlace_v4l2m2m.c
+@@ -0,0 +1,2102 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * deinterlace video filter - V4L2 M2M
++ */
++
++#include <drm_fourcc.h>
++
++#include <linux/videodev2.h>
++
++#include <dirent.h>
++#include <fcntl.h>
++#include <poll.h>
++#include <stdatomic.h>
++#include <stdio.h>
++#include <string.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++#include <unistd.h>
++
++#include "config.h"
++
++#include "libavutil/avassert.h"
++#include "libavutil/avstring.h"
++#include "libavutil/common.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavutil/internal.h"
++#include "libavutil/mathematics.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/time.h"
++
++#define FF_INTERNAL_FIELDS 1
++#include "framequeue.h"
++#include "filters.h"
++#include "avfilter.h"
++#include "formats.h"
++#include "internal.h"
++#include "scale_eval.h"
++#include "video.h"
++
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
++typedef struct V4L2Queue V4L2Queue;
++typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
++
++typedef enum filter_type_v4l2_e
++{
++ FILTER_V4L2_DEINTERLACE = 1,
++ FILTER_V4L2_SCALE,
++} filter_type_v4l2_t;
++
++typedef struct V4L2Buffer {
++ int enqueued;
++ int reenqueue;
++ struct v4l2_buffer buffer;
++ AVFrame frame;
++ struct v4l2_plane planes[VIDEO_MAX_PLANES];
++ int num_planes;
++ AVDRMFrameDescriptor drm_frame;
++ V4L2Queue *q;
++} V4L2Buffer;
++
++typedef struct V4L2Queue {
++ struct v4l2_format format;
++ struct v4l2_selection sel;
++ int eos;
++ int num_buffers;
++ V4L2Buffer *buffers;
++ const char * name;
++ DeintV4L2M2MContextShared *ctx;
++} V4L2Queue;
++
++typedef struct pts_stats_s
++{
++ void * logctx;
++ const char * name; // For debug
++ unsigned int last_count;
++ unsigned int last_interval;
++ int64_t last_pts;
++} pts_stats_t;
++
++#define PTS_TRACK_SIZE 32
++typedef struct pts_track_el_s
++{
++ uint32_t n;
++ unsigned int interval;
++ AVFrame * props;
++} pts_track_el_t;
++
++typedef struct pts_track_s
++{
++ uint32_t n;
++ uint32_t last_n;
++ int got_2;
++ void * logctx;
++ pts_stats_t stats;
++ pts_track_el_t a[PTS_TRACK_SIZE];
++} pts_track_t;
++
++typedef enum drain_state_e
++{
++ DRAIN_NONE = 0, // Not draining
++ DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame
++ DRAIN_LAST, // Drain with long timeout last_frame in received on output expected
++ DRAIN_EOS, // Drain with long timeout EOS expected
++ DRAIN_DONE // Drained
++} drain_state_t;
++
++typedef struct DeintV4L2M2MContextShared {
++ void * logctx; // For logging - will be NULL when done
++ filter_type_v4l2_t filter_type;
++
++ int fd;
++ int done; // fd closed - awating all refs dropped
++ int width;
++ int height;
++
++ int drain; // EOS received (inlink status)
++ drain_state_t drain_state;
++ int64_t drain_pts; // PTS associated with inline status
++
++ unsigned int frames_rx;
++ unsigned int frames_tx;
++
++ // from options
++ int output_width;
++ int output_height;
++ enum AVPixelFormat output_format;
++
++ int has_enc_stop;
++ // We expect to get exactly the same number of frames out as we put in
++ // We can drain by matching input to output
++ int one_to_one;
++
++ int orig_width;
++ int orig_height;
++ atomic_uint refcount;
++
++ AVBufferRef *hw_frames_ctx;
++
++ unsigned int field_order;
++
++ pts_track_t track;
++
++ V4L2Queue output;
++ V4L2Queue capture;
++} DeintV4L2M2MContextShared;
++
++typedef struct DeintV4L2M2MContext {
++ const AVClass *class;
++
++ DeintV4L2M2MContextShared *shared;
++
++ char * w_expr;
++ char * h_expr;
++ char * output_format_string;;
++
++ int force_original_aspect_ratio;
++ int force_divisible_by;
++
++ char *colour_primaries_string;
++ char *colour_transfer_string;
++ char *colour_matrix_string;
++ int colour_range;
++ char *chroma_location_string;
++
++ enum AVColorPrimaries colour_primaries;
++ enum AVColorTransferCharacteristic colour_transfer;
++ enum AVColorSpace colour_matrix;
++ enum AVChromaLocation chroma_location;
++} DeintV4L2M2MContext;
++
++
++static inline int drain_frame_expected(const drain_state_t d)
++{
++ return d == DRAIN_EOS || d == DRAIN_LAST;
++}
++
++// These just list the ones we know we can cope with
++static uint32_t
++fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
++{
++ switch (avfmt) {
++ case AV_PIX_FMT_YUV420P:
++ return V4L2_PIX_FMT_YUV420;
++ case AV_PIX_FMT_NV12:
++ return V4L2_PIX_FMT_NV12;
++#if CONFIG_SAND
++ case AV_PIX_FMT_RPI4_8:
++ case AV_PIX_FMT_SAND128:
++ return V4L2_PIX_FMT_NV12_COL128;
++#endif
++ default:
++ break;
++ }
++ return 0;
++}
++
++static enum AVPixelFormat
++fmt_v4l2_to_av(const uint32_t pixfmt)
++{
++ switch (pixfmt) {
++ case V4L2_PIX_FMT_YUV420:
++ return AV_PIX_FMT_YUV420P;
++ case V4L2_PIX_FMT_NV12:
++ return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
++ case V4L2_PIX_FMT_NV12_COL128:
++ return AV_PIX_FMT_RPI4_8;
++#endif
++ default:
++ break;
++ }
++ return AV_PIX_FMT_NONE;
++}
++
++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
++{
++ return stats->last_interval;
++}
++
++// Pick 64 for max last count - that is >1sec at 60fps
++#define STATS_LAST_COUNT_MAX 64
++#define STATS_INTERVAL_MAX (1 << 30)
++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
++{
++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
++ if (stats->last_count < STATS_LAST_COUNT_MAX)
++ ++stats->last_count;
++ return;
++ }
++
++ if (stats->last_pts != AV_NOPTS_VALUE) {
++ const int64_t interval = pts - stats->last_pts;
++
++ if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
++ stats->last_count >= STATS_LAST_COUNT_MAX) {
++ if (stats->last_interval != 0)
++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
++ __func__, stats->name, interval, stats->last_count);
++ stats->last_interval = 0;
++ }
++ else {
++ const int64_t frame_time = interval / (int64_t)stats->last_count;
++
++ if (frame_time != stats->last_interval)
++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
++ stats->last_interval = frame_time;
++ }
++ }
++
++ stats->last_pts = pts;
++ stats->last_count = 1;
++}
++
++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
++{
++ *stats = (pts_stats_t){
++ .logctx = logctx,
++ .name = name,
++ .last_count = 1,
++ .last_interval = 0,
++ .last_pts = AV_NOPTS_VALUE
++ };
++}
++
++static inline uint32_t pts_track_next_n(pts_track_t * const trk)
++{
++ if (++trk->n == 0)
++ trk->n = 1;
++ return trk->n;
++}
++
++static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
++{
++ uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
++ pts_track_el_t * t;
++
++ // As a first guess assume that n==0 means last frame
++ if (n == 0) {
++ n = trk->last_n;
++ if (n == 0)
++ goto fail;
++ }
++
++ t = trk->a + (n & (PTS_TRACK_SIZE - 1));
++
++ if (t->n != n) {
++ av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
++ goto fail;
++ }
++
++ // 1st frame is simple - just believe it
++ if (n != trk->last_n) {
++ trk->last_n = n;
++ trk->got_2 = 0;
++ return av_frame_copy_props(dst, t->props);
++ }
++
++ // Only believe in a single interpolated frame
++ if (trk->got_2)
++ goto fail;
++ trk->got_2 = 1;
++
++ av_frame_copy_props(dst, t->props);
++
++
++ // If we can't guess - don't
++ if (t->interval == 0) {
++ dst->best_effort_timestamp = AV_NOPTS_VALUE;
++ dst->pts = AV_NOPTS_VALUE;
++ dst->pkt_dts = AV_NOPTS_VALUE;
++ }
++ else {
++ if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
++ dst->best_effort_timestamp += t->interval / 2;
++ if (dst->pts != AV_NOPTS_VALUE)
++ dst->pts += t->interval / 2;
++ if (dst->pkt_dts != AV_NOPTS_VALUE)
++ dst->pkt_dts += t->interval / 2;
++ }
++
++ return 0;
++
++fail:
++ trk->last_n = 0;
++ trk->got_2 = 0;
++ dst->pts = AV_NOPTS_VALUE;
++ dst->pkt_dts = AV_NOPTS_VALUE;
++ return 0;
++}
++
++// We are only ever expecting in-order frames so nothing more clever is required
++static unsigned int
++pts_track_count(const pts_track_t * const trk)
++{
++ return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
++}
++
++static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
++{
++ const uint32_t n = pts_track_next_n(trk);
++ pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
++
++ pts_stats_add(&trk->stats, src->pts);
++
++ t->n = n;
++ t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
++ av_frame_unref(t->props);
++ av_frame_copy_props(t->props, src);
++
++ // We now know what the previous interval was, rather than having to guess,
++ // so set it. There is a better than decent chance that this is before
++ // we use it.
++ if (t->interval != 0) {
++ pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
++ prev_t->interval = t->interval;
++ }
++
++ // In case deinterlace interpolates frames use every other usec
++ return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
++}
++
++static void pts_track_uninit(pts_track_t * const trk)
++{
++ unsigned int i;
++ for (i = 0; i != PTS_TRACK_SIZE; ++i) {
++ trk->a[i].n = 0;
++ av_frame_free(&trk->a[i].props);
++ }
++}
++
++static int pts_track_init(pts_track_t * const trk, void *logctx)
++{
++ unsigned int i;
++ trk->n = 1;
++ pts_stats_init(&trk->stats, logctx, "track");
++ for (i = 0; i != PTS_TRACK_SIZE; ++i) {
++ trk->a[i].n = 0;
++ if ((trk->a[i].props = av_frame_alloc()) == NULL) {
++ pts_track_uninit(trk);
++ return AVERROR(ENOMEM);
++ }
++ }
++ return 0;
++}
++
++static inline uint32_t
++fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline;
++}
++
++static inline uint32_t
++fmt_height(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++}
++
++static inline uint32_t
++fmt_width(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++}
++
++static inline uint32_t
++fmt_pixelformat(const struct v4l2_format * const fmt)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
++}
++
++static inline uint32_t
++buf_bytesused0(const struct v4l2_buffer * const buf)
++{
++ return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
++}
++
++static void
++init_format(V4L2Queue * const q, const uint32_t format_type)
++{
++ memset(&q->format, 0, sizeof(q->format));
++ memset(&q->sel, 0, sizeof(q->sel));
++ q->format.type = format_type;
++ q->sel.type = format_type;
++}
++
++static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
++{
++ struct v4l2_capability cap;
++ int ret;
++
++ memset(&cap, 0, sizeof(cap));
++ ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
++ if (ret < 0)
++ return ret;
++
++ if (ctx->filter_type == FILTER_V4L2_SCALE &&
++ strcmp("bcm2835-codec-isp", cap.card) != 0)
++ {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
++ return AVERROR(EINVAL);
++ }
++
++ if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
++ return AVERROR(EINVAL);
++ }
++
++ if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
++ }
++ else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT);
++ }
++ else {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
++ return AVERROR(EINVAL);
++ }
++
++ return 0;
++}
++
++// Just use for probe - doesn't modify q format
++static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
++{
++ struct v4l2_format fmt = {.type = queue->format.type};
++ DeintV4L2M2MContextShared *ctx = queue->ctx;
++ int ret, field;
++ // Pick YUV to test with if not otherwise specified
++ uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
++ enum AVPixelFormat r_avfmt;
++
++
++ ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
++ if (ret)
++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
++
++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
++ field = V4L2_FIELD_INTERLACED_TB;
++ else
++ field = V4L2_FIELD_NONE;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++ fmt.fmt.pix_mp.pixelformat = pixelformat;
++ fmt.fmt.pix_mp.field = field;
++ fmt.fmt.pix_mp.width = width;
++ fmt.fmt.pix_mp.height = height;
++ } else {
++ fmt.fmt.pix.pixelformat = pixelformat;
++ fmt.fmt.pix.field = field;
++ fmt.fmt.pix.width = width;
++ fmt.fmt.pix.height = height;
++ }
++
++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
++ fmt.fmt.pix_mp.pixelformat,
++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
++
++ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
++ if (ret)
++ return AVERROR(EINVAL);
++
++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
++ fmt.fmt.pix_mp.pixelformat,
++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
++
++ r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
++ if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
++ return AVERROR(EINVAL);
++ }
++ if (r_avfmt == AV_PIX_FMT_NONE) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
++ return AVERROR(EINVAL);
++ }
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++ if (fmt.fmt.pix_mp.field != field) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
++
++ return AVERROR(EINVAL);
++ }
++ } else {
++ if (fmt.fmt.pix.field != field) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
++
++ return AVERROR(EINVAL);
++ }
++ }
++
++ return 0;
++}
++
++static int
++do_s_fmt(V4L2Queue * const q)
++{
++ DeintV4L2M2MContextShared * const ctx = q->ctx;
++ const uint32_t pixelformat = fmt_pixelformat(&q->format);
++ int ret;
++
++ ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
++ if (ret) {
++ ret = AVERROR(errno);
++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
++ return ret;
++ }
++
++ if (pixelformat != fmt_pixelformat(&q->format)) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
++ return AVERROR(EINVAL);
++ }
++
++ q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
++ q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
++
++ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
++ if (ret) {
++ ret = AVERROR(errno);
++ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
++ }
++
++ return 0;
++}
++
++static void
++set_fmt_color(struct v4l2_format *const fmt,
++ const enum AVColorPrimaries avcp,
++ const enum AVColorSpace avcs,
++ const enum AVColorTransferCharacteristic avxc)
++{
++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
++
++ switch (avcp) {
++ case AVCOL_PRI_BT709:
++ cs = V4L2_COLORSPACE_REC709;
++ ycbcr = V4L2_YCBCR_ENC_709;
++ break;
++ case AVCOL_PRI_BT470M:
++ cs = V4L2_COLORSPACE_470_SYSTEM_M;
++ ycbcr = V4L2_YCBCR_ENC_601;
++ break;
++ case AVCOL_PRI_BT470BG:
++ cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++ break;
++ case AVCOL_PRI_SMPTE170M:
++ cs = V4L2_COLORSPACE_SMPTE170M;
++ break;
++ case AVCOL_PRI_SMPTE240M:
++ cs = V4L2_COLORSPACE_SMPTE240M;
++ break;
++ case AVCOL_PRI_BT2020:
++ cs = V4L2_COLORSPACE_BT2020;
++ break;
++ case AVCOL_PRI_SMPTE428:
++ case AVCOL_PRI_SMPTE431:
++ case AVCOL_PRI_SMPTE432:
++ case AVCOL_PRI_EBU3213:
++ case AVCOL_PRI_RESERVED:
++ case AVCOL_PRI_FILM:
++ case AVCOL_PRI_UNSPECIFIED:
++ default:
++ break;
++ }
++
++ switch (avcs) {
++ case AVCOL_SPC_RGB:
++ cs = V4L2_COLORSPACE_SRGB;
++ break;
++ case AVCOL_SPC_BT709:
++ cs = V4L2_COLORSPACE_REC709;
++ break;
++ case AVCOL_SPC_FCC:
++ cs = V4L2_COLORSPACE_470_SYSTEM_M;
++ break;
++ case AVCOL_SPC_BT470BG:
++ cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++ break;
++ case AVCOL_SPC_SMPTE170M:
++ cs = V4L2_COLORSPACE_SMPTE170M;
++ break;
++ case AVCOL_SPC_SMPTE240M:
++ cs = V4L2_COLORSPACE_SMPTE240M;
++ break;
++ case AVCOL_SPC_BT2020_CL:
++ cs = V4L2_COLORSPACE_BT2020;
++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
++ break;
++ case AVCOL_SPC_BT2020_NCL:
++ cs = V4L2_COLORSPACE_BT2020;
++ break;
++ default:
++ break;
++ }
++
++ switch (xfer) {
++ case AVCOL_TRC_BT709:
++ xfer = V4L2_XFER_FUNC_709;
++ break;
++ case AVCOL_TRC_IEC61966_2_1:
++ xfer = V4L2_XFER_FUNC_SRGB;
++ break;
++ case AVCOL_TRC_SMPTE240M:
++ xfer = V4L2_XFER_FUNC_SMPTE240M;
++ break;
++ case AVCOL_TRC_SMPTE2084:
++ xfer = V4L2_XFER_FUNC_SMPTE2084;
++ break;
++ default:
++ break;
++ }
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ fmt->fmt.pix_mp.colorspace = cs;
++ fmt->fmt.pix_mp.ycbcr_enc = ycbcr;
++ fmt->fmt.pix_mp.xfer_func = xfer;
++ } else {
++ fmt->fmt.pix.colorspace = cs;
++ fmt->fmt.pix.ycbcr_enc = ycbcr;
++ fmt->fmt.pix.xfer_func = xfer;
++ }
++}
++
++static void
++set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr)
++{
++ const enum v4l2_quantization q =
++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
++ V4L2_QUANTIZATION_DEFAULT;
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ fmt->fmt.pix_mp.quantization = q;
++ } else {
++ fmt->fmt.pix.quantization = q;
++ }
++}
++
++static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt)
++{
++ enum v4l2_ycbcr_encoding ycbcr;
++ enum v4l2_colorspace cs;
++
++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.colorspace :
++ fmt->fmt.pix.colorspace;
++
++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.ycbcr_enc:
++ fmt->fmt.pix.ycbcr_enc;
++
++ switch(ycbcr) {
++ case V4L2_YCBCR_ENC_XV709:
++ case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709;
++ case V4L2_YCBCR_ENC_XV601:
++ case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M;
++ default:
++ break;
++ }
++
++ switch(cs) {
++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG;
++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M;
++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M;
++ case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020;
++ default:
++ break;
++ }
++
++ return AVCOL_PRI_UNSPECIFIED;
++}
++
++static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt)
++{
++ enum v4l2_ycbcr_encoding ycbcr;
++ enum v4l2_colorspace cs;
++
++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.colorspace :
++ fmt->fmt.pix.colorspace;
++
++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.ycbcr_enc:
++ fmt->fmt.pix.ycbcr_enc;
++
++ switch(cs) {
++ case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
++ case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
++ case V4L2_COLORSPACE_BT2020:
++ if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
++ return AVCOL_SPC_BT2020_CL;
++ else
++ return AVCOL_SPC_BT2020_NCL;
++ default:
++ break;
++ }
++
++ return AVCOL_SPC_UNSPECIFIED;
++}
++
++static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt)
++{
++ enum v4l2_ycbcr_encoding ycbcr;
++ enum v4l2_xfer_func xfer;
++ enum v4l2_colorspace cs;
++
++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.colorspace :
++ fmt->fmt.pix.colorspace;
++
++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.ycbcr_enc:
++ fmt->fmt.pix.ycbcr_enc;
++
++ xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.xfer_func:
++ fmt->fmt.pix.xfer_func;
++
++ switch (xfer) {
++ case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709;
++ case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1;
++ default:
++ break;
++ }
++
++ switch (cs) {
++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22;
++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28;
++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M;
++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M;
++ default:
++ break;
++ }
++
++ switch (ycbcr) {
++ case V4L2_YCBCR_ENC_XV709:
++ case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG;
++ default:
++ break;
++ }
++
++ return AVCOL_TRC_UNSPECIFIED;
++}
++
++static enum AVColorRange get_color_range(const struct v4l2_format *const fmt)
++{
++ enum v4l2_quantization qt;
++
++ qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++ fmt->fmt.pix_mp.quantization :
++ fmt->fmt.pix.quantization;
++
++ switch (qt) {
++ case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
++ case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
++ default:
++ break;
++ }
++
++ return AVCOL_RANGE_UNSPECIFIED;
++}
++
++static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
++{
++ struct v4l2_format *const format = &q->format;
++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++ const uint32_t drm_fmt = src->layers[0].format;
++ // Treat INVALID as LINEAR
++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
++ uint32_t pix_fmt = 0;
++ uint32_t w = 0;
++ uint32_t h = 0;
++ uint32_t bpl = src->layers[0].planes[0].pitch;
++
++ // We really don't expect multiple layers
++ // All formats that we currently cope with are single object
++
++ if (src->nb_layers != 1 || src->nb_objects != 1)
++ return AVERROR(EINVAL);
++
++ switch (drm_fmt) {
++ case DRM_FORMAT_YUV420:
++ if (mod == DRM_FORMAT_MOD_LINEAR) {
++ if (src->layers[0].nb_planes != 3)
++ break;
++ pix_fmt = V4L2_PIX_FMT_YUV420;
++ h = src->layers[0].planes[1].offset / bpl;
++ w = bpl;
++ }
++ break;
++
++ case DRM_FORMAT_NV12:
++ if (mod == DRM_FORMAT_MOD_LINEAR) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12;
++ h = src->layers[0].planes[1].offset / bpl;
++ w = bpl;
++ }
++#if CONFIG_SAND
++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12_COL128;
++ w = bpl;
++ h = src->layers[0].planes[1].offset / 128;
++ bpl = fourcc_mod_broadcom_param(mod);
++ }
++#endif
++ break;
++
++ case DRM_FORMAT_P030:
++#if CONFIG_SAND
++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++ if (src->layers[0].nb_planes != 2)
++ break;
++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128;
++ w = bpl / 2; // Matching lie to how we construct this
++ h = src->layers[0].planes[1].offset / 128;
++ bpl = fourcc_mod_broadcom_param(mod);
++ }
++#endif
++ break;
++
++ default:
++ break;
++ }
++
++ if (!pix_fmt)
++ return AVERROR(EINVAL);
++
++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
++
++ pix->width = w;
++ pix->height = h;
++ pix->pixelformat = pix_fmt;
++ pix->plane_fmt[0].bytesperline = bpl;
++ pix->num_planes = 1;
++ }
++ else {
++ struct v4l2_pix_format *const pix = &format->fmt.pix;
++
++ pix->width = w;
++ pix->height = h;
++ pix->pixelformat = pix_fmt;
++ pix->bytesperline = bpl;
++ }
++
++ set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
++ set_fmt_color_range(format, frame->color_range);
++
++ q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right);
++ q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom);
++ q->sel.r.left = frame->crop_left;
++ q->sel.r.top = frame->crop_top;
++
++ return 0;
++}
++
++
++static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
++{
++ struct v4l2_format * const fmt = &queue->format;
++ struct v4l2_selection *const sel = &queue->sel;
++
++ memset(&fmt->fmt, 0, sizeof(fmt->fmt));
++
++ // Align w/h to 16 here in case there are alignment requirements at the next
++ // stage of the filter chain (also RPi deinterlace setup is bust and this
++ // fixes it)
++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++ fmt->fmt.pix_mp.pixelformat = pixelformat;
++ fmt->fmt.pix_mp.field = field;
++ fmt->fmt.pix_mp.width = FFALIGN(width, 16);
++ fmt->fmt.pix_mp.height = FFALIGN(height, 16);
++ } else {
++ fmt->fmt.pix.pixelformat = pixelformat;
++ fmt->fmt.pix.field = field;
++ fmt->fmt.pix.width = FFALIGN(width, 16);
++ fmt->fmt.pix.height = FFALIGN(height, 16);
++ }
++
++ set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer);
++ set_fmt_color_range(fmt, priv->colour_range);
++
++ sel->r.width = width;
++ sel->r.height = height;
++ sel->r.left = 0;
++ sel->r.top = 0;
++
++ return do_s_fmt(queue);
++}
++
++static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
++{
++ int ret;
++
++ ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
++ if (ctx->fd < 0)
++ return AVERROR(errno);
++
++ ret = deint_v4l2m2m_prepare_context(ctx);
++ if (ret) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
++ goto fail;
++ }
++
++ ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
++ if (ret) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
++ goto fail;
++ }
++
++ ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
++ if (ret) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
++ goto fail;
++ }
++
++ return 0;
++
++fail:
++ close(ctx->fd);
++ ctx->fd = -1;
++
++ return ret;
++}
++
++static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
++{
++ int ret = AVERROR(EINVAL);
++ struct dirent *entry;
++ char node[PATH_MAX];
++ DIR *dirp;
++
++ dirp = opendir("/dev");
++ if (!dirp)
++ return AVERROR(errno);
++
++ for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
++
++ if (strncmp(entry->d_name, "video", 5))
++ continue;
++
++ snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
++ av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
++ ret = deint_v4l2m2m_probe_device(ctx, node);
++ if (!ret)
++ break;
++ }
++
++ closedir(dirp);
++
++ if (ret) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
++ ctx->fd = -1;
++
++ return ret;
++ }
++
++ av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
++
++ return 0;
++}
++
++static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
++{
++ int ret;
++
++ ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
++ if (ret < 0)
++ return AVERROR(errno);
++
++ buf->enqueued = 1;
++
++ return 0;
++}
++
++static void
++drm_frame_init(AVDRMFrameDescriptor * const d)
++{
++ unsigned int i;
++ for (i = 0; i != AV_DRM_MAX_PLANES; ++i) {
++ d->objects[i].fd = -1;
++ }
++}
++
++static void
++drm_frame_uninit(AVDRMFrameDescriptor * const d)
++{
++ unsigned int i;
++ for (i = 0; i != d->nb_objects; ++i) {
++ if (d->objects[i].fd != -1) {
++ close(d->objects[i].fd);
++ d->objects[i].fd = -1;
++ }
++ }
++}
++
++static void
++avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n)
++{
++ unsigned int i;
++ V4L2Buffer* const avbufs = *ppavbufs;
++
++ if (avbufs == NULL)
++ return;
++ *ppavbufs = NULL;
++
++ for (i = 0; i != n; ++i) {
++ V4L2Buffer* const avbuf = avbufs + i;
++ drm_frame_uninit(&avbuf->drm_frame);
++ }
++
++ av_free(avbufs);
++}
++
++static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
++{
++ struct v4l2_exportbuffer expbuf;
++ int i, ret;
++ uint64_t mod = DRM_FORMAT_MOD_LINEAR;
++
++ AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
++ AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
++ const struct v4l2_format *const fmt = &q->format;
++ const uint32_t height = fmt_height(fmt);
++ ptrdiff_t bpl0;
++
++ /* fill the DRM frame descriptor */
++ drm_desc->nb_layers = 1;
++ layer->nb_planes = avbuf->num_planes;
++
++ for (int i = 0; i < avbuf->num_planes; i++) {
++ layer->planes[i].object_index = i;
++ layer->planes[i].offset = 0;
++ layer->planes[i].pitch = fmt_bpl(fmt, i);
++ }
++ bpl0 = layer->planes[0].pitch;
++
++ switch (fmt_pixelformat(fmt)) {
++#if CONFIG_SAND
++ case V4L2_PIX_FMT_NV12_COL128:
++ mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
++ layer->format = V4L2_PIX_FMT_NV12;
++
++ if (avbuf->num_planes > 1)
++ break;
++
++ layer->nb_planes = 2;
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = height * 128;
++ layer->planes[0].pitch = fmt_width(fmt);
++ layer->planes[1].pitch = layer->planes[0].pitch;
++ break;
++#endif
++
++ case DRM_FORMAT_NV12:
++ layer->format = V4L2_PIX_FMT_NV12;
++
++ if (avbuf->num_planes > 1)
++ break;
++
++ layer->nb_planes = 2;
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = bpl0 * height;
++ layer->planes[1].pitch = bpl0;
++ break;
++
++ case V4L2_PIX_FMT_YUV420:
++ layer->format = DRM_FORMAT_YUV420;
++
++ if (avbuf->num_planes > 1)
++ break;
++
++ layer->nb_planes = 3;
++ layer->planes[1].object_index = 0;
++ layer->planes[1].offset = bpl0 * height;
++ layer->planes[1].pitch = bpl0 / 2;
++ layer->planes[2].object_index = 0;
++ layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4);
++ layer->planes[2].pitch = bpl0 / 2;
++ break;
++
++ default:
++ drm_desc->nb_layers = 0;
++ return AVERROR(EINVAL);
++ }
++
++ drm_desc->nb_objects = 0;
++ for (i = 0; i < avbuf->num_planes; i++) {
++ memset(&expbuf, 0, sizeof(expbuf));
++
++ expbuf.index = avbuf->buffer.index;
++ expbuf.type = avbuf->buffer.type;
++ expbuf.plane = i;
++
++ ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
++ if (ret < 0)
++ return AVERROR(errno);
++
++ drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
++ avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
++ drm_desc->objects[i].fd = expbuf.fd;
++ drm_desc->objects[i].format_modifier = mod;
++ drm_desc->nb_objects = i + 1;
++ }
++
++ return 0;
++}
++
++static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
++{
++ struct v4l2_format *fmt = &queue->format;
++ DeintV4L2M2MContextShared *ctx = queue->ctx;
++ struct v4l2_requestbuffers req;
++ int ret, i, multiplanar;
++ uint32_t memory;
++
++ memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
++ V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
++
++ multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
++
++ memset(&req, 0, sizeof(req));
++ req.count = queue->num_buffers;
++ req.memory = memory;
++ req.type = fmt->type;
++
++ ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
++ if (ret < 0) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
++
++ return AVERROR(errno);
++ }
++
++ queue->num_buffers = req.count;
++ queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
++ if (!queue->buffers) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
++
++ return AVERROR(ENOMEM);
++ }
++
++ for (i = 0; i < queue->num_buffers; i++) {
++ V4L2Buffer * const buf = &queue->buffers[i];
++
++ buf->enqueued = 0;
++ buf->q = queue;
++
++ buf->buffer.type = fmt->type;
++ buf->buffer.memory = memory;
++ buf->buffer.index = i;
++
++ if (multiplanar) {
++ buf->buffer.length = VIDEO_MAX_PLANES;
++ buf->buffer.m.planes = buf->planes;
++ }
++
++ drm_frame_init(&buf->drm_frame);
++ }
++
++ for (i = 0; i < queue->num_buffers; i++) {
++ V4L2Buffer * const buf = &queue->buffers[i];
++
++ ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
++ if (ret < 0) {
++ ret = AVERROR(errno);
++
++ goto fail;
++ }
++
++ buf->num_planes = multiplanar ? buf->buffer.length : 1;
++
++ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
++ ret = deint_v4l2m2m_enqueue_buffer(buf);
++ if (ret)
++ goto fail;
++
++ ret = v4l2_buffer_export_drm(queue, buf);
++ if (ret)
++ goto fail;
++ }
++ }
++
++ return 0;
++
++fail:
++ avbufs_delete(&queue->buffers, queue->num_buffers);
++ queue->num_buffers = 0;
++ return ret;
++}
++
++static int deint_v4l2m2m_streamon(V4L2Queue *queue)
++{
++ DeintV4L2M2MContextShared * const ctx = queue->ctx;
++ int type = queue->format.type;
++ int ret;
++
++ ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
++ if (ret < 0)
++ return AVERROR(errno);
++
++ return 0;
++}
++
++static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
++{
++ DeintV4L2M2MContextShared * const ctx = queue->ctx;
++ int type = queue->format.type;
++ int ret;
++
++ ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
++ if (ret < 0)
++ return AVERROR(errno);
++
++ return 0;
++}
++
++// timeout in ms
++static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
++{
++ struct v4l2_plane planes[VIDEO_MAX_PLANES];
++ DeintV4L2M2MContextShared *ctx = queue->ctx;
++ struct v4l2_buffer buf = { 0 };
++ V4L2Buffer* avbuf = NULL;
++ struct pollfd pfd;
++ short events;
++ int ret;
++
++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
++ events = POLLOUT | POLLWRNORM;
++ else
++ events = POLLIN | POLLRDNORM;
++
++ pfd.events = events;
++ pfd.fd = ctx->fd;
++
++ for (;;) {
++ ret = poll(&pfd, 1, timeout);
++ if (ret > 0)
++ break;
++ if (errno == EINTR)
++ continue;
++ return NULL;
++ }
++
++ if (pfd.revents & POLLERR)
++ return NULL;
++
++ if (pfd.revents & events) {
++ memset(&buf, 0, sizeof(buf));
++ buf.memory = V4L2_MEMORY_MMAP;
++ buf.type = queue->format.type;
++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
++ memset(planes, 0, sizeof(planes));
++ buf.length = VIDEO_MAX_PLANES;
++ buf.m.planes = planes;
++ }
++
++ ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
++ if (ret) {
++ if (errno != EAGAIN)
++ av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
++ av_err2str(AVERROR(errno)));
++ return NULL;
++ }
++
++ avbuf = &queue->buffers[buf.index];
++ avbuf->enqueued = 0;
++ avbuf->buffer = buf;
++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
++ memcpy(avbuf->planes, planes, sizeof(planes));
++ avbuf->buffer.m.planes = avbuf->planes;
++ }
++ return avbuf;
++ }
++
++ return NULL;
++}
++
++static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
++{
++ int i;
++ V4L2Buffer *buf = NULL;
++
++ for (i = 0; i < queue->num_buffers; i++)
++ if (!queue->buffers[i].enqueued) {
++ buf = &queue->buffers[i];
++ break;
++ }
++ return buf;
++}
++
++static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
++{
++ int i;
++ V4L2Buffer *buf = NULL;
++
++ if (!queue || !queue->buffers)
++ return;
++ for (i = 0; i < queue->num_buffers; i++) {
++ buf = &queue->buffers[i];
++ if (queue->buffers[i].enqueued)
++ av_frame_unref(&buf->frame);
++ }
++}
++
++static void recycle_q(V4L2Queue * const queue)
++{
++ V4L2Buffer* avbuf;
++ while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
++ av_frame_unref(&avbuf->frame);
++ }
++}
++
++static int count_enqueued(V4L2Queue *queue)
++{
++ int i;
++ int n = 0;
++
++ if (queue->buffers == NULL)
++ return 0;
++
++ for (i = 0; i < queue->num_buffers; i++)
++ if (queue->buffers[i].enqueued)
++ ++n;
++ return n;
++}
++
++static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
++{
++ DeintV4L2M2MContextShared *const ctx = queue->ctx;
++ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
++ V4L2Buffer *buf;
++ int i;
++
++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
++ recycle_q(queue);
++
++ buf = deint_v4l2m2m_find_free_buf(queue);
++ if (!buf) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
++ return AVERROR(EAGAIN);
++ }
++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
++ for (i = 0; i < drm_desc->nb_objects; i++)
++ buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
++ else
++ buf->buffer.m.fd = drm_desc->objects[0].fd;
++
++ buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE :
++ frame->top_field_first ? V4L2_FIELD_INTERLACED_TB :
++ V4L2_FIELD_INTERLACED_BT;
++
++ if (ctx->field_order != buf->buffer.field) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
++ ctx->field_order = buf->buffer.field;
++ }
++
++ buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
++
++ buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
++
++ av_frame_move_ref(&buf->frame, frame);
++
++ return deint_v4l2m2m_enqueue_buffer(buf);
++}
++
++static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
++{
++ if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
++ V4L2Queue *capture = &ctx->capture;
++ V4L2Queue *output = &ctx->output;
++
++ av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
++
++ if (ctx->fd >= 0) {
++ deint_v4l2m2m_streamoff(capture);
++ deint_v4l2m2m_streamoff(output);
++ }
++
++ avbufs_delete(&capture->buffers, capture->num_buffers);
++
++ deint_v4l2m2m_unref_queued(output);
++
++ av_buffer_unref(&ctx->hw_frames_ctx);
++
++ if (capture->buffers)
++ av_free(capture->buffers);
++
++ if (output->buffers)
++ av_free(output->buffers);
++
++ if (ctx->fd >= 0) {
++ close(ctx->fd);
++ ctx->fd = -1;
++ }
++
++ av_free(ctx);
++ }
++}
++
++static void v4l2_free_buffer(void *opaque, uint8_t *unused)
++{
++ V4L2Buffer *buf = opaque;
++ DeintV4L2M2MContextShared *ctx = buf->q->ctx;
++
++ if (!ctx->done)
++ deint_v4l2m2m_enqueue_buffer(buf);
++
++ deint_v4l2m2m_destroy_context(ctx);
++}
++
++// timeout in ms
++static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
++{
++ DeintV4L2M2MContextShared *ctx = queue->ctx;
++ V4L2Buffer* avbuf;
++ enum AVColorPrimaries color_primaries;
++ enum AVColorSpace colorspace;
++ enum AVColorTransferCharacteristic color_trc;
++ enum AVColorRange color_range;
++
++ av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++ if (queue->eos) {
++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
++ return AVERROR_EOF;
++ }
++
++ avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
++ if (!avbuf) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
++ return AVERROR(EAGAIN);
++ }
++
++ if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
++ if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
++ queue->eos = 1;
++ if (buf_bytesused0(&avbuf->buffer) == 0)
++ return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
++ }
++
++ // Fill in PTS and anciliary info from src frame
++ pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
++
++ frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
++ sizeof(avbuf->drm_frame), v4l2_free_buffer,
++ avbuf, AV_BUFFER_FLAG_READONLY);
++ if (!frame->buf[0]) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
++ return AVERROR(ENOMEM);
++ }
++
++ atomic_fetch_add(&ctx->refcount, 1);
++
++ frame->data[0] = (uint8_t *)&avbuf->drm_frame;
++ frame->format = AV_PIX_FMT_DRM_PRIME;
++ if (ctx->hw_frames_ctx)
++ frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
++ frame->height = ctx->output_height;
++ frame->width = ctx->output_width;
++
++ color_primaries = get_color_primaries(&ctx->capture.format);
++ colorspace = get_color_space(&ctx->capture.format);
++ color_trc = get_color_trc(&ctx->capture.format);
++ color_range = get_color_range(&ctx->capture.format);
++
++ // If the color parameters are unspecified by V4L2 then leave alone as they
++ // will have been copied from src
++ if (color_primaries != AVCOL_PRI_UNSPECIFIED)
++ frame->color_primaries = color_primaries;
++ if (colorspace != AVCOL_SPC_UNSPECIFIED)
++ frame->colorspace = colorspace;
++ if (color_trc != AVCOL_TRC_UNSPECIFIED)
++ frame->color_trc = color_trc;
++ if (color_range != AVCOL_RANGE_UNSPECIFIED)
++ frame->color_range = color_range;
++
++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
++ // Not interlaced now
++ frame->interlaced_frame = 0; // *** Fill in from dst buffer?
++ frame->top_field_first = 0;
++ // Pkt duration halved
++ frame->pkt_duration /= 2;
++ }
++
++ if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
++ av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
++ frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
++ }
++
++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
++ return 0;
++}
++
++static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
++{
++ AVFilterLink *inlink = outlink->src->inputs[0];
++ AVFilterContext *avctx = outlink->src;
++ DeintV4L2M2MContext *priv = avctx->priv;
++ DeintV4L2M2MContextShared *ctx = priv->shared;
++ int ret;
++
++ ctx->height = avctx->inputs[0]->h;
++ ctx->width = avctx->inputs[0]->w;
++
++ if (ctx->filter_type == FILTER_V4L2_SCALE) {
++ if ((ret = ff_scale_eval_dimensions(priv,
++ priv->w_expr, priv->h_expr,
++ inlink, outlink,
++ &ctx->output_width, &ctx->output_height)) < 0)
++ return ret;
++
++ ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height,
++ priv->force_original_aspect_ratio, priv->force_divisible_by);
++ }
++ else {
++ ctx->output_width = ctx->width;
++ ctx->output_height = ctx->height;
++ }
++
++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__,
++ ctx->width, ctx->height, ctx->output_width, ctx->output_height,
++ inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den);
++
++ outlink->time_base = inlink->time_base;
++ outlink->w = ctx->output_width;
++ outlink->h = ctx->output_height;
++ outlink->format = inlink->format;
++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0)
++ outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den};
++
++ if (inlink->sample_aspect_ratio.num)
++ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
++ else
++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
++
++ ret = deint_v4l2m2m_find_device(ctx);
++ if (ret)
++ return ret;
++
++ if (inlink->hw_frames_ctx) {
++ ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
++ if (!ctx->hw_frames_ctx)
++ return AVERROR(ENOMEM);
++ }
++ return 0;
++}
++
++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
++{
++ const uint64_t mod = drm_desc->objects[0].format_modifier;
++ const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
++
++ // Only currently support single object things
++ if (drm_desc->nb_objects != 1)
++ return 0;
++
++ switch (drm_desc->layers[0].format) {
++ case DRM_FORMAT_YUV420:
++ return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
++ case DRM_FORMAT_NV12:
++ return is_linear ? V4L2_PIX_FMT_NV12 :
++#if CONFIG_SAND
++ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
++#endif
++ 0;
++ default:
++ break;
++ }
++ return 0;
++}
++
++static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
++{
++ AVFilterContext *avctx = link->dst;
++ DeintV4L2M2MContext *priv = avctx->priv;
++ DeintV4L2M2MContextShared *ctx = priv->shared;
++ V4L2Queue *capture = &ctx->capture;
++ V4L2Queue *output = &ctx->output;
++ int ret;
++
++ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
++ __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
++ av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__,
++ avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
++
++ if (ctx->field_order == V4L2_FIELD_ANY) {
++ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
++ uint32_t pixelformat = desc_pixelformat(drm_desc);
++
++ if (pixelformat == 0) {
++ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
++ av_fourcc2str(drm_desc->layers[0].format),
++ drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
++ return AVERROR(EINVAL);
++ }
++
++ ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
++ ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
++
++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
++ drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
++
++ if ((ret = set_src_fmt(output, in)) != 0) {
++ av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
++ av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
++ return ret;
++ }
++
++ ret = do_s_fmt(output);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
++ return ret;
++ }
++
++ if (ctx->output_format != AV_PIX_FMT_NONE)
++ pixelformat = fmt_av_to_v4l2(ctx->output_format);
++ ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
++ return ret;
++ }
++
++ ret = deint_v4l2m2m_allocate_buffers(capture);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
++ return ret;
++ }
++
++ ret = deint_v4l2m2m_streamon(capture);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
++ return ret;
++ }
++
++ ret = deint_v4l2m2m_allocate_buffers(output);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
++ return ret;
++ }
++
++ ret = deint_v4l2m2m_streamon(output);
++ if (ret) {
++ av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
++ return ret;
++ }
++
++ if (in->top_field_first)
++ ctx->field_order = V4L2_FIELD_INTERLACED_TB;
++ else
++ ctx->field_order = V4L2_FIELD_INTERLACED_BT;
++
++ {
++ struct v4l2_encoder_cmd ecmd = {
++ .cmd = V4L2_ENC_CMD_STOP
++ };
++ ctx->has_enc_stop = 0;
++ if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
++ ctx->has_enc_stop = 1;
++ }
++ else {
++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
++ }
++
++ }
++ }
++
++ ret = deint_v4l2m2m_enqueue_frame(output, in);
++
++ av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
++ return ret;
++}
++
++static int
++ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
++ AVFilterLink * const inlink)
++{
++ int instatus;
++ int64_t inpts;
++
++ if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
++ return 0;
++
++ s->drain = instatus;
++ s->drain_pts = inpts;
++ s->drain_state = DRAIN_TIMEOUT;
++
++ if (s->field_order == V4L2_FIELD_ANY) { // Not yet started
++ s->drain_state = DRAIN_DONE;
++ }
++ else if (s->one_to_one) {
++ s->drain_state = DRAIN_LAST;
++ }
++ else if (s->has_enc_stop) {
++ struct v4l2_encoder_cmd ecmd = {
++ .cmd = V4L2_ENC_CMD_STOP
++ };
++ if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
++ av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
++ s->drain_state = DRAIN_EOS;
++ }
++ else {
++ av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
++ }
++ }
++ return 1;
++}
++
++static int deint_v4l2m2m_activate(AVFilterContext *avctx)
++{
++ DeintV4L2M2MContext * const priv = avctx->priv;
++ DeintV4L2M2MContextShared *const s = priv->shared;
++ AVFilterLink * const outlink = avctx->outputs[0];
++ AVFilterLink * const inlink = avctx->inputs[0];
++ int n = 0;
++ int cn = 99;
++ int did_something = 0;
++
++ av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++ FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
++
++ ack_inlink(avctx, s, inlink);
++
++ if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup!
++ {
++ AVFrame * frame = av_frame_alloc();
++ int rv;
++
++ recycle_q(&s->output);
++ n = count_enqueued(&s->output);
++
++ if (frame == NULL) {
++ av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
++ return AVERROR(ENOMEM);
++ }
++
++ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
++ drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
++ if (rv != 0) {
++ av_frame_free(&frame);
++ if (rv == AVERROR_EOF) {
++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
++ s->drain_state = DRAIN_DONE;
++ }
++ else if (rv == AVERROR(EAGAIN)) {
++ if (s->drain_state != DRAIN_NONE) {
++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
++ s->drain_state = DRAIN_DONE;
++ }
++ }
++ else {
++ av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
++ return rv;
++ }
++ }
++ else {
++ frame->interlaced_frame = 0;
++ // frame is always consumed by filter_frame - even on error despite
++ // a somewhat confusing comment in the header
++ rv = ff_filter_frame(outlink, frame);
++ ++s->frames_tx;
++
++ av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
++ did_something = 1;
++
++ if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
++ s->drain_state = DRAIN_DONE;
++ }
++ }
++
++ cn = count_enqueued(&s->capture);
++ }
++
++ if (s->drain_state == DRAIN_DONE) {
++ ff_outlink_set_status(outlink, s->drain, s->drain_pts);
++ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
++ return 0;
++ }
++
++ recycle_q(&s->output);
++ n = count_enqueued(&s->output);
++
++ while (n < 6 && !s->drain) {
++ AVFrame * frame;
++ int rv;
++
++ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
++ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
++ return rv;
++ }
++
++ if (frame == NULL) {
++ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
++ if (!ack_inlink(avctx, s, inlink)) {
++ ff_inlink_request_frame(inlink);
++ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
++ }
++ break;
++ }
++ ++s->frames_rx;
++
++ rv = deint_v4l2m2m_filter_frame(inlink, frame);
++ av_frame_free(&frame);
++
++ if (rv != 0)
++ return rv;
++
++ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
++ did_something = 1;
++ ++n;
++ }
++
++ if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
++ ff_filter_set_ready(avctx, 1);
++ did_something = 1;
++ av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
++ }
++
++ av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
++ return did_something ? 0 : FFERROR_NOT_READY;
++}
++
++static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
++{
++ DeintV4L2M2MContext * const priv = avctx->priv;
++ DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
++
++ if (!ctx) {
++ av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
++ return AVERROR(ENOMEM);
++ }
++ priv->shared = ctx;
++ ctx->logctx = priv;
++ ctx->filter_type = filter_type;
++ ctx->fd = -1;
++ ctx->output.ctx = ctx;
++ ctx->output.num_buffers = 8;
++ ctx->output.name = "OUTPUT";
++ ctx->capture.ctx = ctx;
++ ctx->capture.num_buffers = 12;
++ ctx->capture.name = "CAPTURE";
++ ctx->done = 0;
++ ctx->field_order = V4L2_FIELD_ANY;
++
++ pts_track_init(&ctx->track, priv);
++
++ atomic_init(&ctx->refcount, 1);
++
++ if (priv->output_format_string) {
++ ctx->output_format = av_get_pix_fmt(priv->output_format_string);
++ if (ctx->output_format == AV_PIX_FMT_NONE) {
++ av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string);
++ return AVERROR(EINVAL);
++ }
++ if (fmt_av_to_v4l2(ctx->output_format) == 0) {
++ av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format));
++ return AVERROR(EINVAL);
++ }
++ } else {
++ // Use the input format once that is configured.
++ ctx->output_format = AV_PIX_FMT_NONE;
++ }
++
++#define STRING_OPTION(var_name, func_name, default_value) do { \
++ if (priv->var_name ## _string) { \
++ int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \
++ if (var < 0) { \
++ av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
++ return AVERROR(EINVAL); \
++ } \
++ priv->var_name = var; \
++ } else { \
++ priv->var_name = default_value; \
++ } \
++ } while (0)
++
++ STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED);
++ STRING_OPTION(colour_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED);
++ STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED);
++ STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED);
++
++ return 0;
++}
++
++static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
++{
++ return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
++}
++
++static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
++{
++ int rv;
++ DeintV4L2M2MContext * priv;
++ DeintV4L2M2MContextShared * ctx;
++
++ if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
++ return rv;
++
++ priv = avctx->priv;
++ ctx = priv->shared;
++
++ ctx->one_to_one = 1;
++ return 0;
++}
++
++static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
++{
++ DeintV4L2M2MContext *priv = avctx->priv;
++ DeintV4L2M2MContextShared *ctx = priv->shared;
++
++ av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
++ ctx->frames_rx, ctx->frames_tx);
++ ctx->done = 1;
++ ctx->logctx = NULL; // Log to NULL works, log to missing crashes
++ pts_track_uninit(&ctx->track);
++ deint_v4l2m2m_destroy_context(ctx);
++}
++
++static const AVOption deinterlace_v4l2m2m_options[] = {
++ { NULL },
++};
++
++AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
++
++#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
++
++static const AVOption scale_v4l2m2m_options[] = {
++ { "w", "Output video width",
++ OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
++ { "h", "Output video height",
++ OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
++ { "format", "Output video format (software format of hardware frames)",
++ OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
++ // These colour properties match the ones of the same name in vf_scale.
++ { "out_color_matrix", "Output colour matrix coefficient set",
++ OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
++ { "out_range", "Output colour range",
++ OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED },
++ AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" },
++ { "full", "Full range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++ { "limited", "Limited range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++ { "jpeg", "Full range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++ { "mpeg", "Limited range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++ { "tv", "Limited range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++ { "pc", "Full range",
++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++ // These colour properties match the ones in the VAAPI scaler
++ { "out_color_primaries", "Output colour primaries",
++ OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING,
++ { .str = NULL }, .flags = FLAGS },
++ { "out_color_transfer", "Output colour transfer characteristics",
++ OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING,
++ { .str = NULL }, .flags = FLAGS },
++ { "out_chroma_location", "Output chroma sample location",
++ OFFSET(chroma_location_string), AV_OPT_TYPE_STRING,
++ { .str = NULL }, .flags = FLAGS },
++ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
++ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
++ { NULL },
++};
++
++AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
++
++static const AVFilterPad deint_v4l2m2m_inputs[] = {
++ {
++ .name = "default",
++ .type = AVMEDIA_TYPE_VIDEO,
++ },
++};
++
++static const AVFilterPad deint_v4l2m2m_outputs[] = {
++ {
++ .name = "default",
++ .type = AVMEDIA_TYPE_VIDEO,
++ .config_props = deint_v4l2m2m_config_props,
++ },
++};
++
++AVFilter ff_vf_deinterlace_v4l2m2m = {
++ .name = "deinterlace_v4l2m2m",
++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
++ .priv_size = sizeof(DeintV4L2M2MContext),
++ .init = &deint_v4l2m2m_init,
++ .uninit = &deint_v4l2m2m_uninit,
++ FILTER_INPUTS(deint_v4l2m2m_inputs),
++ FILTER_OUTPUTS(deint_v4l2m2m_outputs),
++ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
++ .priv_class = &deinterlace_v4l2m2m_class,
++ .activate = deint_v4l2m2m_activate,
++};
++
++AVFilter ff_vf_scale_v4l2m2m = {
++ .name = "scale_v4l2m2m",
++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"),
++ .priv_size = sizeof(DeintV4L2M2MContext),
++ .init = &scale_v4l2m2m_init,
++ .uninit = &deint_v4l2m2m_uninit,
++ FILTER_INPUTS(deint_v4l2m2m_inputs),
++ FILTER_OUTPUTS(deint_v4l2m2m_outputs),
++ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
++ .priv_class = &scale_v4l2m2m_class,
++ .activate = deint_v4l2m2m_activate,
++};
++
+--- /dev/null
++++ b/libavfilter/vf_unsand.c
+@@ -0,0 +1,228 @@
++/*
++ * Copyright (c) 2007 Bobby Bingham
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * format and noformat video filters
++ */
++
++#include <string.h>
++
++#include "libavutil/internal.h"
++#include "libavutil/mem.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/opt.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#include "avfilter.h"
++#include "formats.h"
++#include "internal.h"
++#include "video.h"
++
++typedef struct UnsandContext {
++ const AVClass *class;
++} UnsandContext;
++
++static av_cold void uninit(AVFilterContext *ctx)
++{
++// UnsandContext *s = ctx->priv;
++}
++
++static av_cold int init(AVFilterContext *ctx)
++{
++// UnsandContext *s = ctx->priv;
++
++ return 0;
++}
++
++
++static int filter_frame(AVFilterLink *link, AVFrame *in)
++{
++ AVFilterLink * const outlink = link->dst->outputs[0];
++ AVFrame *out = NULL;
++ int rv = 0;
++
++ if (outlink->format == in->format) {
++ // If nothing to do then do nothing
++ out = in;
++ }
++ else
++ {
++ if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
++ {
++ rv = AVERROR(ENOMEM);
++ goto fail;
++ }
++ if (av_rpi_sand_to_planar_frame(out, in) != 0)
++ {
++ rv = -1;
++ goto fail;
++ }
++
++ av_frame_free(&in);
++ }
++
++ return ff_filter_frame(outlink, out);
++
++fail:
++ av_frame_free(&out);
++ av_frame_free(&in);
++ return rv;
++}
++
++#if 0
++static void dump_fmts(const AVFilterFormats * fmts)
++{
++ int i;
++ if (fmts== NULL) {
++ printf("NULL\n");
++ return;
++ }
++ for (i = 0; i < fmts->nb_formats; ++i) {
++ printf(" %d", fmts->formats[i]);
++ }
++ printf("\n");
++}
++#endif
++
++static int query_formats(AVFilterContext *ctx)
++{
++// UnsandContext *s = ctx->priv;
++ int ret;
++
++ // If we aren't connected at both ends then just do nothing
++ if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
++ return 0;
++
++ // Our output formats depend on our input formats and we can't/don't
++ // want to convert between bit depths so we need to wait for the source
++ // to have an opinion before we do
++ if (ctx->inputs[0]->incfg.formats == NULL)
++ return AVERROR(EAGAIN);
++
++ // Accept anything
++ if (ctx->inputs[0]->outcfg.formats == NULL &&
++ (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
++ return ret;
++
++ // Filter out sand formats
++
++ // Generate a container if we don't already have one
++ if (ctx->outputs[0]->incfg.formats == NULL)
++ {
++ // Somewhat rubbish way of ensuring we have a good structure
++ const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
++ AVFilterFormats *formats = ff_make_format_list(out_fmts);
++
++ if (formats == NULL)
++ return AVERROR(ENOMEM);
++ if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
++ return ret;
++ }
++
++ // Replace old format list with new filtered list derived from what our
++ // input says it can do
++ {
++ const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
++ AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
++ enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
++ int i;
++ int n = 0;
++ int seen_420p = 0;
++ int seen_420p10 = 0;
++
++ for (i = 0; i < src_ff->nb_formats; ++i) {
++ const enum AVPixelFormat f = src_ff->formats[i];
++
++ switch (f){
++ case AV_PIX_FMT_YUV420P:
++ case AV_PIX_FMT_SAND128:
++ case AV_PIX_FMT_RPI4_8:
++ if (!seen_420p) {
++ seen_420p = 1;
++ dst_fmts[n++] = AV_PIX_FMT_YUV420P;
++ }
++ break;
++ case AV_PIX_FMT_SAND64_10:
++ case AV_PIX_FMT_YUV420P10:
++ case AV_PIX_FMT_RPI4_10:
++ if (!seen_420p10) {
++ seen_420p10 = 1;
++ dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
++ }
++ break;
++ default:
++ dst_fmts[n++] = f;
++ break;
++ }
++ }
++
++ av_freep(&dst_ff->formats);
++ dst_ff->formats = dst_fmts;
++ dst_ff->nb_formats = n;
++ }
++
++// printf("Unsand: %s calc: ", __func__);
++// dump_fmts(ctx->outputs[0]->incfg.formats);
++
++ return 0;
++}
++
++
++#define OFFSET(x) offsetof(UnsandContext, x)
++static const AVOption unsand_options[] = {
++ { NULL }
++};
++
++
++AVFILTER_DEFINE_CLASS(unsand);
++
++static const AVFilterPad avfilter_vf_unsand_inputs[] = {
++ {
++ .name = "default",
++ .type = AVMEDIA_TYPE_VIDEO,
++ .filter_frame = filter_frame,
++ },
++ { NULL }
++};
++
++static const AVFilterPad avfilter_vf_unsand_outputs[] = {
++ {
++ .name = "default",
++ .type = AVMEDIA_TYPE_VIDEO
++ },
++};
++
++AVFilter ff_vf_unsand = {
++ .name = "unsand",
++ .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
++
++ .init = init,
++ .uninit = uninit,
++
++ FILTER_QUERY_FUNC(query_formats),
++
++ .priv_size = sizeof(UnsandContext),
++ .priv_class = &unsand_class,
++
++ FILTER_INPUTS(avfilter_vf_unsand_inputs),
++ FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
++};
++
+--- a/libavfilter/x86/vf_bwdif_init.c
++++ b/libavfilter/x86/vf_bwdif_init.c
+@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(vo
+ int mrefs2, int prefs3, int mrefs3, int prefs4,
+ int mrefs4, int parity, int clip_max);
+
+-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
++av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
+ {
+- YADIFContext *yadif = &bwdif->yadif;
+ int cpu_flags = av_get_cpu_flags();
+- int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
+
+ if (bit_depth <= 8) {
+ if (EXTERNAL_SSE2(cpu_flags))
+--- a/libavformat/matroskaenc.c
++++ b/libavformat/matroskaenc.c
+@@ -75,6 +75,10 @@
+
+ #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
+ ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
++
++/* Reserved size for H264 headers if not extant at init time */
++#define MAX_H264_HEADER_SIZE 1024
++
+ #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
+ !(mkv)->is_live)
+
+@@ -1119,8 +1123,12 @@ static int mkv_assemble_native_codecpriv
+ case AV_CODEC_ID_WAVPACK:
+ return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
+ case AV_CODEC_ID_H264:
+- return ff_isom_write_avcc(dyn_cp, extradata,
+- extradata_size);
++ if (extradata_size)
++ return ff_isom_write_avcc(dyn_cp, extradata,
++ extradata_size);
++ else
++ *size_to_reserve = MAX_H264_HEADER_SIZE;
++ break;
+ case AV_CODEC_ID_HEVC:
+ return ff_isom_write_hvcc(dyn_cp, extradata,
+ extradata_size, 0);
+@@ -2726,8 +2734,8 @@ static int mkv_check_new_extra_data(AVFo
+ }
+ break;
+ #endif
+- // FIXME: Remove the following once libaom starts propagating proper extradata during init()
+- // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
++ // FIXME: Remove the following once libaom starts propagating extradata during init()
++ // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
+ case AV_CODEC_ID_AV1:
+ if (side_data_size && mkv->track.bc && !par->extradata_size) {
+ // If the reserved space doesn't suffice, only write
+@@ -2739,6 +2747,16 @@ static int mkv_check_new_extra_data(AVFo
+ } else if (!par->extradata_size)
+ return AVERROR_INVALIDDATA;
+ break;
++ // H264 V4L2 has a similar issue
++ case AV_CODEC_ID_H264:
++ if (side_data_size && mkv->track.bc && !par->extradata_size) {
++ ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
++ par, mkv->track.bc, track, 0);
++ if (ret < 0)
++ return ret;
++ } else if (!par->extradata_size)
++ return AVERROR_INVALIDDATA;
++ break;
+ default:
+ if (side_data_size)
+ av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
+@@ -3171,9 +3189,15 @@ static int mkv_init(struct AVFormatConte
+ track->reformat = mkv_reformat_wavpack;
+ break;
+ case AV_CODEC_ID_H264:
++ // Default to reformat if no extradata as the only current
++ // encoder which does this is v4l2m2m which needs reformat
++ if (par->extradata_size == 0 ||
++ (par->extradata_size > 3 &&
++ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)))
++ track->reformat = mkv_reformat_h2645;
++ break;
+ case AV_CODEC_ID_HEVC:
+- if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 ||
+- par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) &&
++ if (par->extradata_size > 6 &&
+ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
+ track->reformat = mkv_reformat_h2645;
+ break;
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -6318,6 +6318,7 @@ static int mov_write_single_packet(AVFor
+ if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
+ trk->par->codec_id == AV_CODEC_ID_AAC ||
+ trk->par->codec_id == AV_CODEC_ID_AV1 ||
++ trk->par->codec_id == AV_CODEC_ID_H264 ||
+ trk->par->codec_id == AV_CODEC_ID_FLAC) {
+ size_t side_size;
+ uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+--- a/libavformat/rtpenc.c
++++ b/libavformat/rtpenc.c
+@@ -19,6 +19,7 @@
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
++#include "avc.h"
+ #include "avformat.h"
+ #include "mpegts.h"
+ #include "internal.h"
+@@ -584,8 +585,25 @@ static int rtp_write_packet(AVFormatCont
+ ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
+ break;
+ case AV_CODEC_ID_H264:
++ {
++ uint8_t *side_data;
++ size_t side_data_size = 0;
++
++ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
++ &side_data_size);
++
++ if (side_data_size != 0) {
++ int ps_size = side_data_size;
++ uint8_t * ps_buf = NULL;
++
++ ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
++ av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
++ ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
++ av_free(ps_buf);
++ }
+ ff_rtp_send_h264_hevc(s1, pkt->data, size);
+ break;
++ }
+ case AV_CODEC_ID_H261:
+ ff_rtp_send_h261(s1, pkt->data, size);
+ break;
+--- a/libavutil/Makefile
++++ b/libavutil/Makefile
+@@ -72,6 +72,7 @@ HEADERS = adler32.h
+ rational.h \
+ replaygain.h \
+ ripemd.h \
++ rpi_sand_fns.h \
+ samplefmt.h \
+ sha.h \
+ sha512.h \
+@@ -191,6 +192,7 @@ OBJS-$(CONFIG_MACOS_KPERF)
+ OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o
+ OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o
+ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o
++OBJS-$(CONFIG_SAND) += rpi_sand_fns.o
+ OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o
+ OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o
+ OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
+@@ -211,6 +213,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA) +
+ SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h
+ SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h
+ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h
++SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h
+ SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h
+ SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h
+ SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
+--- a/libavutil/aarch64/Makefile
++++ b/libavutil/aarch64/Makefile
+@@ -1,4 +1,6 @@
+ OBJS += aarch64/cpu.o \
+ aarch64/float_dsp_init.o \
+
+-NEON-OBJS += aarch64/float_dsp_neon.o
++NEON-OBJS += aarch64/float_dsp_neon.o \
++ aarch64/rpi_sand_neon.o \
++
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.S
+@@ -0,0 +1,672 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#include "asm.S"
++
++// void ff_rpi_sand8_lines_to_planar_y8(
++// uint8_t * dest, : x0
++// unsigned int dst_stride, : w1
++// const uint8_t * src, : x2
++// unsigned int src_stride1, : w3, always 128
++// unsigned int src_stride2, : w4
++// unsigned int _x, : w5
++// unsigned int y, : w6
++// unsigned int _w, : w7
++// unsigned int h); : [sp, #0]
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++ // w15 contains the number of rows we need to process
++ ldr w15, [sp, #0]
++
++ // w8 will contain the number of blocks per row
++ // w8 = floor(_w/stride1)
++ // stride1 is assumed to always be 128
++ mov w8, w1
++ lsr w8, w8, #7
++
++ // in case the width of the image is not a multiple of 128, there will
++ // be an incomplete block at the end of every row
++ // w9 contains the number of pixels stored within this block
++ // w9 = _w - w8 * 128
++ lsl w9, w8, #7
++ sub w9, w7, w9
++
++ // this is the value we have to add to the src pointer after reading a complete block
++ // it will move the address to the start of the next block
++ // w10 = stride2 * stride1 - stride1
++ mov w10, w4
++ lsl w10, w10, #7
++ sub w10, w10, #128
++
++ // w11 is the row offset, meaning the start offset of the first block of every collumn
++ // this will be increased with stride1 within every iteration of the row_loop
++ eor w11, w11, w11
++
++ // w12 = 0, processed row count
++ eor w12, w12, w12
++row_loop:
++ // start of the first block within the current row
++ // x13 = row offset + src
++ mov x13, x2
++ add x13, x13, x11
++
++ // w14 = 0, processed block count
++ eor w14, w14, w14
++
++ cmp w8, #0
++ beq no_main_y8
++
++block_loop:
++ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
++ // fortunately these aren't callee saved ones, meaning we don't need to backup them
++ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64
++ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64
++
++ // write these registers back to the destination vector and increase the dst address by 128
++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64
++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64
++
++ // move the source register to the beginning of the next block (x13 = src + block offset)
++ add x13, x13, x10
++ // increase the block counter
++ add w14, w14, #1
++
++ // continue with the block_loop if we haven't copied all full blocks yet
++ cmp w8, w14
++ bgt block_loop
++
++ // handle the last block at the end of each row
++ // at most 127 byte values copied from src to dst
++no_main_y8:
++ eor w5, w5, w5 // i = 0
++incomplete_block_loop_y8:
++ cmp w5, w9
++ bge incomplete_block_loop_end_y8
++
++ ldrb w6, [x13]
++ strb w6, [x0]
++ add x13, x13, #1
++ add x0, x0, #1
++
++ add w5, w5, #1
++ b incomplete_block_loop_y8
++incomplete_block_loop_end_y8:
++
++
++ // increase the row offset by 128 (stride1)
++ add w11, w11, #128
++ // increment the row counter
++ add w12, w12, #1
++
++ // process the next row if we haven't finished yet
++ cmp w15, w12
++ bgt row_loop
++
++ ret
++endfunc
++
++
++
++// void ff_rpi_sand8_lines_to_planar_c8(
++// uint8_t * dst_u, : x0
++// unsigned int dst_stride_u, : w1 == width
++// uint8_t * dst_v, : x2
++// unsigned int dst_stride_v, : w3 == width
++// const uint8_t * src, : x4
++// unsigned int stride1, : w5 == 128
++// unsigned int stride2, : w6
++// unsigned int _x, : w7
++// unsigned int y, : [sp, #0]
++// unsigned int _w, : [sp, #8]
++// unsigned int h); : [sp, #16]
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++ // w7 = width
++ ldr w7, [sp, #8]
++
++ // w15 contains the number of rows we need to process
++ // counts down
++ ldr w15, [sp, #16]
++
++ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
++ mov w8, w7
++ lsr w8, w8, #6
++
++ // number of pixels in block at the end of every row
++ // w9 = _w - (w8 * 64)
++ lsl w9, w8, #6
++ sub w9, w7, w9
++
++ // Skip at the end of the line to account for stride
++ sub w12, w1, w7
++
++ // address delta to the beginning of the next block
++ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
++ lsl w10, w6, #7
++ sub w10, w10, #128
++
++ // w11 = row address start offset = 0
++ eor w11, w11, w11
++
++row_loop_c8:
++ // start of the first block within the current row
++ // x13 = row offset + src
++ mov x13, x4
++ add x13, x13, x11
++
++ // w14 = 0, processed block count
++ eor w14, w14, w14
++
++ cmp w8, #0
++ beq no_main_c8
++
++block_loop_c8:
++ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values
++ ld2 { v0.16b, v1.16b }, [x13], #32
++ ld2 { v2.16b, v3.16b }, [x13], #32
++ ld2 { v4.16b, v5.16b }, [x13], #32
++ ld2 { v6.16b, v7.16b }, [x13], #32
++
++ // swap register so that we can write them out with a single instruction
++ mov v16.16b, v1.16b
++ mov v17.16b, v3.16b
++ mov v18.16b, v5.16b
++ mov v1.16b, v2.16b
++ mov v2.16b, v4.16b
++ mov v3.16b, v6.16b
++ mov v4.16b, v16.16b
++ mov v5.16b, v17.16b
++ mov v6.16b, v18.16b
++
++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64
++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64
++
++ // increment row counter and move src to the beginning of the next block
++ add w14, w14, #1
++ add x13, x13, x10
++
++ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
++ cmp w8, w14
++ bgt block_loop_c8
++
++no_main_c8:
++ // handle incomplete block at the end of every row
++ eor w5, w5, w5 // point counter, this might be
++incomplete_block_loop_c8:
++ cmp w5, w9
++ bge incomplete_block_loop_end_c8
++
++ ldrb w1, [x13]
++ strb w1, [x0]
++ add x13, x13, #1
++
++ ldrb w1, [x13]
++ strb w1, [x2]
++ add x13, x13, #1
++
++ add x0, x0, #1
++ add x2, x2, #1
++
++ add w5, w5, #1
++ b incomplete_block_loop_c8
++incomplete_block_loop_end_c8:
++
++ // increase row_offset by stride1
++ add w11, w11, #128
++ add x0, x0, w12, sxtw
++ add x2, x2, w12, sxtw
++
++ // jump to row_Loop_c8 iff the row count is small than the height
++ subs w15, w15, #1
++ bgt row_loop_c8
++
++ ret
++endfunc
++
++// Unzip chroma
++//
++// On entry:
++// a0 = V0, U2, ...
++// a1 = U0, V1, ...
++// a2 = U1, V2, ...
++// b0 = V8, U10, ...
++// b1 = U8, V9, ...
++// b2 = U9, V10, ...
++//
++// On exit:
++// d0 = U0, U3, ...
++// ...
++// a0 = V0, V3, ..
++// ...
++//
++// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs)
++
++.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2
++ uzp1 \d0\().8h, \a1\().8h, \b1\().8h
++ uzp1 \d1\().8h, \a2\().8h, \b2\().8h
++ uzp2 \d2\().8h, \a0\().8h, \b0\().8h
++
++ uzp1 \a0\().8h, \a0\().8h, \b0\().8h
++ uzp2 \a1\().8h, \a1\().8h, \b1\().8h
++ uzp2 \a2\().8h, \a2\().8h, \b2\().8h
++.endm
++
++// SAND30 -> 10bit
++.macro USAND10 d0, d1, d2, a0, a1
++ shrn \d2\().4h, \a0\().4s, #14
++ shrn \d1\().4h, \a0\().4s, #10
++
++ shrn2 \d2\().8h, \a1\().4s, #14
++ shrn2 \d1\().8h, \a1\().4s, #10
++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h
++
++ ushr \d2\().8h, \d2\().8h, #6
++ bic \d0\().8h, #0xfc, lsl #8
++ bic \d1\().8h, #0xfc, lsl #8
++.endm
++
++// SAND30 -> 8bit
++.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2
++ shrn \d1\().4h, \a0\().4s, #12
++ shrn2 \d1\().8h, \a1\().4s, #12
++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h
++ uzp2 \d2\().8h, \a0\().8h, \a1\().8h
++
++ shrn \t1\().4h, \a2\().4s, #12
++ shrn2 \t1\().8h, \a3\().4s, #12
++ uzp1 \t0\().8h, \a2\().8h, \a3\().8h
++ uzp2 \t2\().8h, \a2\().8h, \a3\().8h
++
++ shrn \d0\().8b, \d0\().8h, #2
++ shrn2 \d0\().16b, \t0\().8h, #2
++ shrn \d2\().8b, \d2\().8h, #6
++ shrn2 \d2\().16b, \t2\().8h, #6
++ uzp1 \d1\().16b, \d1\().16b, \t1\().16b
++.endm
++
++
++// void ff_rpi_sand30_lines_to_planar_c16(
++// uint8_t * dst_u, // [x0]
++// unsigned int dst_stride_u, // [w1]
++// uint8_t * dst_v, // [x2]
++// unsigned int dst_stride_v, // [w3]
++// const uint8_t * src, // [x4]
++// unsigned int stride1, // [w5] 128
++// unsigned int stride2, // [w6]
++// unsigned int _x, // [w7] 0
++// unsigned int y, // [sp, #0]
++// unsigned int _w, // [sp, #8] w9
++// unsigned int h); // [sp, #16] w10
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++ ldr w7, [sp, #0] // y
++ ldr w8, [sp, #8] // _w
++ ldr w10, [sp, #16] // h
++ lsl w6, w6, #7 // Fixup stride2
++ sub w6, w6, #64
++ uxtw x6, w6
++ sub w1, w1, w8, LSL #1 // Fixup chroma strides
++ sub w3, w3, w8, LSL #1
++ lsl w7, w7, #7 // Add y to src
++ add x4, x4, w7, UXTW
++10:
++ mov w13, #0
++ mov x5, x4
++ mov w9, w8
++1:
++ ld1 {v0.4s-v3.4s}, [x5], #64
++ ld1 {v4.4s-v7.4s}, [x5], x6
++ subs w9, w9, #48
++
++ USAND10 v17, v16, v18, v0, v1
++ USAND10 v20, v19, v21, v2, v3
++ UZPH_C v0, v1, v2, v16, v17, v18, v19, v20, v21
++ USAND10 v23, v22, v24, v4, v5
++ USAND10 v26, v25, v27, v6, v7
++ UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27
++
++ blt 2f
++
++ st3 {v0.8h-v2.8h}, [x0], #48
++ st3 {v4.8h-v6.8h}, [x0], #48
++ st3 {v16.8h-v18.8h}, [x2], #48
++ st3 {v22.8h-v24.8h}, [x2], #48
++
++ bne 1b
++11:
++ subs w10, w10, #1
++ add x4, x4, #128
++ add x0, x0, w1, UXTW
++ add x2, x2, w3, UXTW
++ bne 10b
++99:
++ ret
++
++// Partial final write
++2:
++ cmp w9, #24-48
++ blt 1f
++ st3 {v0.8h - v2.8h}, [x0], #48
++ st3 {v16.8h - v18.8h}, [x2], #48
++ beq 11b
++ mov v0.16b, v4.16b
++ mov v1.16b, v5.16b
++ sub w9, w9, #24
++ mov v2.16b, v6.16b
++ mov v16.16b, v22.16b
++ mov v17.16b, v23.16b
++ mov v18.16b, v24.16b
++1:
++ cmp w9, #12-48
++ blt 1f
++ st3 {v0.4h - v2.4h}, [x0], #24
++ st3 {v16.4h - v18.4h}, [x2], #24
++ beq 11b
++ mov v0.d[0], v0.d[1]
++ sub w9, w9, #12
++ mov v1.d[0], v1.d[1]
++ mov v2.d[0], v2.d[1]
++ mov v16.d[0], v16.d[1]
++ mov v17.d[0], v17.d[1]
++ mov v18.d[0], v18.d[1]
++1:
++ cmp w9, #6-48
++ blt 1f
++ st3 {v0.h - v2.h}[0], [x0], #6
++ st3 {v0.h - v2.h}[1], [x0], #6
++ st3 {v16.h - v18.h}[0], [x2], #6
++ st3 {v16.h - v18.h}[1], [x2], #6
++ beq 11b
++ mov v0.s[0], v0.s[1]
++ sub w9, w9, #6
++ mov v1.s[0], v1.s[1]
++ mov v2.s[0], v2.s[1]
++ mov v16.s[0], v16.s[1]
++ mov v17.s[0], v17.s[1]
++ mov v18.s[0], v18.s[1]
++1:
++ cmp w9, #3-48
++ blt 1f
++ st3 {v0.h - v2.h}[0], [x0], #6
++ st3 {v16.h - v18.h}[0], [x2], #6
++ beq 11b
++ mov v0.h[0], v0.h[1]
++ sub w9, w9, #3
++ mov v1.h[0], v1.h[1]
++ mov v16.h[0], v16.h[1]
++ mov v17.h[0], v17.h[1]
++1:
++ cmp w9, #2-48
++ blt 1f
++ st2 {v0.h - v1.h}[0], [x0], #4
++ st2 {v16.h - v17.h}[0], [x2], #4
++ b 11b
++1:
++ st1 {v0.h}[0], [x0], #2
++ st1 {v16.h}[0], [x2], #2
++ b 11b
++endfunc
++
++
++//void ff_rpi_sand30_lines_to_planar_p010(
++// uint8_t * dest,
++// unsigned int dst_stride,
++// const uint8_t * src,
++// unsigned int src_stride1,
++// unsigned int src_stride2,
++// unsigned int _x,
++// unsigned int y,
++// unsigned int _w,
++// unsigned int h);
++
++// void ff_rpi_sand30_lines_to_planar_y8(
++// uint8_t * dest, : x0
++// unsigned int dst_stride, : w1
++// const uint8_t * src, : x2
++// unsigned int src_stride1, : w3, always 128
++// unsigned int src_stride2, : w4
++// unsigned int _x, : w5
++// unsigned int y, : w6
++// unsigned int _w, : w7
++// unsigned int h); : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++ lsl w4, w4, #7
++ sub w4, w4, #64
++ uxtw x4, w4
++ sub w1, w1, w7, lsl #1
++ uxtw x6, w6
++ add x8, x2, x6, lsl #7
++ ldr w6, [sp, #0]
++
++10:
++ mov x2, x8
++ mov w5, w7
++1:
++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++ subs w5, w5, #96
++
++ USAND10 v16, v17, v18, v0, v1
++ USAND10 v19, v20, v21, v2, v3
++ USAND10 v22, v23, v24, v4, v5
++ USAND10 v25, v26, v27, v6, v7
++
++ blt 2f
++
++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48
++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48
++ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48
++ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48
++
++ bne 1b
++
++11:
++ subs w6, w6, #1
++ add x0, x0, w1, uxtw
++ add x8, x8, #128
++ bne 10b
++
++ ret
++
++// Partial final write
++2:
++ cmp w5, #48-96
++ blt 1f
++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48
++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48
++ beq 11b
++ mov v16.16b, v22.16b
++ mov v17.16b, v23.16b
++ sub w5, w5, #48
++ mov v18.16b, v24.16b
++ mov v19.16b, v25.16b
++ mov v20.16b, v26.16b
++ mov v21.16b, v27.16b
++1:
++ cmp w5, #24-96
++ blt 1f
++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48
++ beq 11b
++ mov v16.16b, v19.16b
++ mov v17.16b, v20.16b
++ sub w5, w5, #24
++ mov v18.16b, v21.16b
++1:
++ cmp w5, #12-96
++ blt 1f
++ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24
++ beq 11b
++ mov v16.d[0], v16.d[1]
++ sub w5, w5, #12
++ mov v17.d[0], v17.d[1]
++ mov v18.d[0], v18.d[1]
++1:
++ cmp w5, #6-96
++ blt 1f
++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6
++ st3 {v16.h, v17.h, v18.h}[1], [x0], #6
++ beq 11b
++ mov v16.s[0], v16.s[1]
++ sub w5, w5, #6
++ mov v17.s[0], v17.s[1]
++ mov v18.s[0], v18.s[1]
++1:
++ cmp w5, #3-96
++ blt 1f
++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6
++ beq 11b
++ mov v16.h[0], v16.h[1]
++ sub w5, w5, #3
++ mov v17.h[0], v17.h[1]
++1:
++ cmp w5, #2-96
++ blt 1f
++ st2 {v16.h, v17.h}[0], [x0], #4
++ b 11b
++1:
++ st1 {v16.h}[0], [x0], #2
++ b 11b
++
++endfunc
++
++// void ff_rpi_sand30_lines_to_planar_y8(
++// uint8_t * dest, : x0
++// unsigned int dst_stride, : w1
++// const uint8_t * src, : x2
++// unsigned int src_stride1, : w3, always 128
++// unsigned int src_stride2, : w4
++// unsigned int _x, : w5
++// unsigned int y, : w6
++// unsigned int _w, : w7
++// unsigned int h); : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++ lsl w4, w4, #7
++ sub w4, w4, #64
++ uxtw x4, w4
++ sub w1, w1, w7
++ uxtw x6, w6
++ add x8, x2, x6, lsl #7
++ ldr w6, [sp, #0]
++
++10:
++ mov x2, x8
++ mov w5, w7
++1:
++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++ subs w5, w5, #96
++
++ // v0, v1
++ USAND8 v16, v17, v18, v0, v1, v2, v3, v22, v23, v24
++ USAND8 v19, v20, v21, v4, v5, v6, v7, v22, v23, v24
++
++ blt 2f
++
++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48
++ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48
++
++ bne 1b
++
++11:
++ subs w6, w6, #1
++ add x0, x0, w1, uxtw
++ add x8, x8, #128
++ bne 10b
++
++ ret
++
++// Partial final write
++2:
++ cmp w5, #48-96
++ blt 1f
++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48
++ beq 11b
++ mov v16.16b, v22.16b
++ mov v17.16b, v23.16b
++ sub w5, w5, #48
++ mov v18.16b, v24.16b
++1:
++ cmp w5, #24-96
++ blt 1f
++ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24
++ beq 11b
++ mov v16.d[0], v16.d[1]
++ sub w5, w5, #24
++ mov v17.d[0], v17.d[1]
++ mov v18.d[0], v18.d[1]
++1:
++ cmp w5, #12-96
++ blt 1f
++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3
++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3
++ st3 {v16.b, v17.b, v18.b}[2], [x0], #3
++ st3 {v16.b, v17.b, v18.b}[3], [x0], #3
++ beq 11b
++ mov v16.s[0], v16.s[1]
++ sub w5, w5, #12
++ mov v17.s[0], v17.s[1]
++ mov v18.s[0], v18.s[1]
++1:
++ cmp w5, #6-96
++ blt 1f
++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3
++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3
++ beq 11b
++ mov v16.h[0], v16.h[1]
++ sub w5, w5, #6
++ mov v17.h[0], v17.h[1]
++ mov v18.h[0], v18.h[1]
++1:
++ cmp w5, #3-96
++ blt 1f
++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3
++ beq 11b
++ mov v16.b[0], v16.b[1]
++ sub w5, w5, #3
++ mov v17.b[0], v17.b[1]
++1:
++ cmp w5, #2-96
++ blt 1f
++ st2 {v16.b, v17.b}[0], [x0], #2
++ b 11b
++1:
++ st1 {v16.b}[0], [x0], #1
++ b 11b
++
++endfunc
++
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.h
+@@ -0,0 +1,59 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#pragma once
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
++ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
++ unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++#ifdef __cplusplus
++}
++#endif
++
+--- a/libavutil/arm/Makefile
++++ b/libavutil/arm/Makefile
+@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o
+
+ NEON-OBJS += arm/float_dsp_init_neon.o \
+ arm/float_dsp_neon.o \
++ arm/rpi_sand_neon.o \
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.S
+@@ -0,0 +1,925 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "libavutil/arm/asm.S"
++
++
++@ General notes:
++@ Having done some timing on this in sand8->y8 (Pi4)
++@ vst1 (680fps) is a bit faster than vstm (660fps)
++@ vldm (680fps) is noticably faster than vld1 (480fps)
++@ (or it might be that a mix is what is required)
++@
++@ At least on a Pi4 it is no more expensive to have a single auto-inc register
++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
++@ the latter was better)
++@
++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
++@ the memory is uncached.
++@ As these are Sand -> planar we can assume that src is going to be aligned but
++@ it is possible that dest isn't (converting to .yuv or other packed format).
++@ Luckily vst1 is faster than vstm :-) so all is well
++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
++@ .8 stores would let us do non-word aligned stores into uncached but it
++@ probably isn't worth it.
++
++
++
++
++@ void ff_rpi_sand128b_stripe_to_8_10(
++@ uint8_t * dest, // [r0]
++@ const uint8_t * src1, // [r1]
++@ const uint8_t * src2, // [r2]
++@ unsigned int lines); // [r3]
++
++.macro stripe2_to_8, bit_depth
++ vpush {q4-q7}
++1:
++ vldm r1!, {q0-q7}
++ subs r3, #1
++ vldm r2!, {q8-q15}
++ vqrshrn.u16 d0, q0, #\bit_depth - 8
++ vqrshrn.u16 d1, q1, #\bit_depth - 8
++ vqrshrn.u16 d2, q2, #\bit_depth - 8
++ vqrshrn.u16 d3, q3, #\bit_depth - 8
++ vqrshrn.u16 d4, q4, #\bit_depth - 8
++ vqrshrn.u16 d5, q5, #\bit_depth - 8
++ vqrshrn.u16 d6, q6, #\bit_depth - 8
++ vqrshrn.u16 d7, q7, #\bit_depth - 8
++ vqrshrn.u16 d8, q8, #\bit_depth - 8
++ vqrshrn.u16 d9, q9, #\bit_depth - 8
++ vqrshrn.u16 d10, q10, #\bit_depth - 8
++ vqrshrn.u16 d11, q11, #\bit_depth - 8
++ vqrshrn.u16 d12, q12, #\bit_depth - 8
++ vqrshrn.u16 d13, q13, #\bit_depth - 8
++ vqrshrn.u16 d14, q14, #\bit_depth - 8
++ vqrshrn.u16 d15, q15, #\bit_depth - 8
++ vstm r0!, {q0-q7}
++ bne 1b
++ vpop {q4-q7}
++ bx lr
++.endm
++
++function ff_rpi_sand128b_stripe_to_8_10, export=1
++ stripe2_to_8 10
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_y8(
++@ uint8_t * dest, // [r0]
++@ unsigned int dst_stride, // [r1]
++@ const uint8_t * src, // [r2]
++@ unsigned int src_stride1, // [r3] Ignored - assumed 128
++@ unsigned int src_stride2, // [sp, #0] -> r3
++@ unsigned int _x, // [sp, #4] Ignored - 0
++@ unsigned int y, // [sp, #8] (r7 in prefix)
++@ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++@ unsigned int h); // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++ push {r4-r8, lr} @ +24 L
++ ldr r3, [sp, #24]
++ ldr r6, [sp, #36]
++ ldr r7, [sp, #32] @ y
++ lsl r3, #7
++ sub r1, r6
++ add r8, r2, r7, lsl #7
++ ldr r7, [sp, #40]
++
++10:
++ mov r2, r8
++ add r4, r0, #24
++ mov r5, r6
++ mov lr, #0
++1:
++ vldm r2, {q8-q15}
++ add r2, r3
++ subs r5, #128
++ blt 2f
++ vst1.8 {d16, d17, d18, d19}, [r0]!
++ vst1.8 {d20, d21, d22, d23}, [r0]!
++ vst1.8 {d24, d25, d26, d27}, [r0]!
++ vst1.8 {d28, d29, d30, d31}, [r0]!
++ bne 1b
++11:
++ subs r7, #1
++ add r0, r1
++ add r8, #128
++ bne 10b
++
++ pop {r4-r8, pc}
++
++@ Partial final write
++2:
++ cmp r5, #64-128
++ blt 1f
++ vst1.8 {d16, d17, d18, d19}, [r0]!
++ vst1.8 {d20, d21, d22, d23}, [r0]!
++ beq 11b
++ vmov q8, q12
++ vmov q9, q13
++ sub r5, #64
++ vmov q10, q14
++ vmov q11, q15
++1:
++ cmp r5, #32-128
++ blt 1f
++ vst1.8 {d16, d17, d18, d19}, [r0]!
++ beq 11b
++ vmov q8, q10
++ sub r5, #32
++ vmov q9, q11
++1:
++ cmp r5, #16-128
++ blt 1f
++ vst1.8 {d16, d17}, [r0]!
++ beq 11b
++ sub r5, #16
++ vmov q8, q9
++1:
++ cmp r5, #8-128
++ blt 1f
++ vst1.8 {d16}, [r0]!
++ beq 11b
++ sub r5, #8
++ vmov d16, d17
++1:
++ cmp r5, #4-128
++ blt 1f
++ vst1.32 {d16[0]}, [r0]!
++ beq 11b
++ sub r5, #4
++ vshr.u64 d16, #32
++1:
++ cmp r5, #2-128
++ blt 1f
++ vst1.16 {d16[0]}, [r0]!
++ beq 11b
++ vst1.8 {d16[2]}, [r0]!
++ b 11b
++1:
++ vst1.8 {d16[0]}, [r0]!
++ b 11b
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_c8(
++@ uint8_t * dst_u, // [r0]
++@ unsigned int dst_stride_u, // [r1]
++@ uint8_t * dst_v, // [r2]
++@ unsigned int dst_stride_v, // [r3]
++@ const uint8_t * src, // [sp, #0] -> r4, r5
++@ unsigned int stride1, // [sp, #4] 128
++@ unsigned int stride2, // [sp, #8] -> r8
++@ unsigned int _x, // [sp, #12] 0
++@ unsigned int y, // [sp, #16] (r7 in prefix)
++@ unsigned int _w, // [sp, #20] -> r12, r6
++@ unsigned int h); // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++ push {r4-r8, lr} @ +24
++
++ ldr r5, [sp, #24]
++ ldr r8, [sp, #32]
++ ldr r7, [sp, #40]
++ ldr r6, [sp, #44]
++ lsl r8, #7
++ add r5, r5, r7, lsl #7
++ sub r1, r1, r6
++ sub r3, r3, r6
++ ldr r7, [sp, #48]
++ vpush {q4-q7}
++
++10:
++ mov r4, r5
++ mov r12, r6
++1:
++ subs r12, #64
++ vldm r4, {q0-q7}
++ add r4, r8
++ it gt
++ vldmgt r4, {q8-q15}
++ add r4, r8
++
++ vuzp.8 q0, q1
++ vuzp.8 q2, q3
++ vuzp.8 q4, q5
++ vuzp.8 q6, q7
++
++ vuzp.8 q8, q9
++ vuzp.8 q10, q11
++ vuzp.8 q12, q13
++ vuzp.8 q14, q15
++ subs r12, #64
++
++ @ Rearrange regs so we can use vst1 with 4 regs
++ vswp q1, q2
++ vswp q5, q6
++ vswp q9, q10
++ vswp q13, q14
++ blt 2f
++
++ vst1.8 {d0, d1, d2, d3 }, [r0]!
++ vst1.8 {d8, d9, d10, d11}, [r0]!
++ vst1.8 {d16, d17, d18, d19}, [r0]!
++ vst1.8 {d24, d25, d26, d27}, [r0]!
++
++ vst1.8 {d4, d5, d6, d7 }, [r2]!
++ vst1.8 {d12, d13, d14, d15}, [r2]!
++ vst1.8 {d20, d21, d22, d23}, [r2]!
++ vst1.8 {d28, d29, d30, d31}, [r2]!
++ bne 1b
++11:
++ subs r7, #1
++ add r5, #128
++ add r0, r1
++ add r2, r3
++ bne 10b
++ vpop {q4-q7}
++ pop {r4-r8,pc}
++
++2:
++ cmp r12, #64-128
++ blt 1f
++ vst1.8 {d0, d1, d2, d3 }, [r0]!
++ vst1.8 {d8, d9, d10, d11}, [r0]!
++ vst1.8 {d4, d5, d6, d7 }, [r2]!
++ vst1.8 {d12, d13, d14, d15}, [r2]!
++ beq 11b
++ sub r12, #64
++ vmov q0, q8
++ vmov q1, q9
++ vmov q2, q10
++ vmov q3, q11
++ vmov q4, q12
++ vmov q5, q13
++ vmov q6, q14
++ vmov q7, q15
++1:
++ cmp r12, #32-128
++ blt 1f
++ vst1.8 {d0, d1, d2, d3 }, [r0]!
++ vst1.8 {d4, d5, d6, d7 }, [r2]!
++ beq 11b
++ sub r12, #32
++ vmov q0, q4
++ vmov q1, q5
++ vmov q2, q6
++ vmov q3, q7
++1:
++ cmp r12, #16-128
++ blt 1f
++ vst1.8 {d0, d1 }, [r0]!
++ vst1.8 {d4, d5 }, [r2]!
++ beq 11b
++ sub r12, #16
++ vmov q0, q1
++ vmov q2, q3
++1:
++ cmp r12, #8-128
++ blt 1f
++ vst1.8 {d0}, [r0]!
++ vst1.8 {d4}, [r2]!
++ beq 11b
++ sub r12, #8
++ vmov d0, d1
++ vmov d4, d5
++1:
++ cmp r12, #4-128
++ blt 1f
++ vst1.32 {d0[0]}, [r0]!
++ vst1.32 {d4[0]}, [r2]!
++ beq 11b
++ sub r12, #4
++ vmov s0, s1
++ vmov s8, s9
++1:
++ cmp r12, #2-128
++ blt 1f
++ vst1.16 {d0[0]}, [r0]!
++ vst1.16 {d4[0]}, [r2]!
++ beq 11b
++ vst1.8 {d0[2]}, [r0]!
++ vst1.8 {d4[2]}, [r2]!
++ b 11b
++1:
++ vst1.8 {d0[0]}, [r0]!
++ vst1.8 {d4[0]}, [r2]!
++ b 11b
++endfunc
++
++
++
++@ void ff_rpi_sand30_lines_to_planar_y16(
++@ uint8_t * dest, // [r0]
++@ unsigned int dst_stride, // [r1]
++@ const uint8_t * src, // [r2]
++@ unsigned int src_stride1, // [r3] Ignored - assumed 128
++@ unsigned int src_stride2, // [sp, #0] -> r3
++@ unsigned int _x, // [sp, #4] Ignored - 0
++@ unsigned int y, // [sp, #8] (r7 in prefix)
++@ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++@ unsigned int h); // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++ push {r4-r8, lr} @ +24
++ ldr r3, [sp, #24]
++ ldr r6, [sp, #36]
++ ldr r7, [sp, #32] @ y
++ mov r12, #48
++ sub r3, #1
++ lsl r3, #7
++ sub r1, r1, r6, lsl #1
++ add r8, r2, r7, lsl #7
++ ldr r7, [sp, #40]
++
++10:
++ mov r2, r8
++ add r4, r0, #24
++ mov r5, r6
++ mov lr, #0
++1:
++ vldm r2!, {q10-q13}
++ add lr, #64
++
++ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20!
++ ands lr, #127
++ vshrn.u32 d2, q10, #10
++ vmovn.u32 d0, q10
++
++ vshrn.u32 d5, q11, #14
++ it eq
++ addeq r2, r3
++ vshrn.u32 d3, q11, #10
++ vmovn.u32 d1, q11
++
++ subs r5, #48
++ vshr.u16 q2, #6
++ vbic.u16 q0, #0xfc00
++ vbic.u16 q1, #0xfc00
++
++ vshrn.u32 d20, q12, #14
++ vshrn.u32 d18, q12, #10
++ vmovn.u32 d16, q12
++
++ vshrn.u32 d21, q13, #14
++ vshrn.u32 d19, q13, #10
++ vmovn.u32 d17, q13
++
++ vshr.u16 q10, #6
++ vbic.u16 q8, #0xfc00
++ vbic.u16 q9 , #0xfc00
++ blt 2f
++
++ vst3.16 {d0, d2, d4}, [r0], r12
++ vst3.16 {d1, d3, d5}, [r4], r12
++ vst3.16 {d16, d18, d20}, [r0], r12
++ vst3.16 {d17, d19, d21}, [r4], r12
++
++ bne 1b
++
++11:
++ subs r7, #1
++ add r0, r1
++ add r8, #128
++ bne 10b
++
++ pop {r4-r8, pc}
++
++@ Partial final write
++2:
++ cmp r5, #24-48
++ blt 1f
++ vst3.16 {d0, d2, d4}, [r0], r12
++ vst3.16 {d1, d3, d5}, [r4]
++ beq 11b
++ vmov q0, q8
++ sub r5, #24
++ vmov q1, q9
++ vmov q2, q10
++1:
++ cmp r5, #12-48
++ blt 1f
++ vst3.16 {d0, d2, d4}, [r0]!
++ beq 11b
++ vmov d0, d1
++ sub r5, #12
++ vmov d2, d3
++ vmov d4, d5
++1:
++ cmp r5, #6-48
++ add r4, r0, #6 @ avoid [r0]! on sequential instructions
++ blt 1f
++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]
++ vst3.16 {d0[1], d2[1], d4[1]}, [r4]
++ add r0, #12
++ beq 11b
++ vmov s0, s1
++ sub r5, #6
++ vmov s4, s5
++ vmov s8, s9
++1:
++ cmp r5, #3-48
++ blt 1f
++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]!
++ beq 11b
++ sub r5, #3
++ vshr.u32 d0, #16
++ vshr.u32 d2, #16
++1:
++ cmp r5, #2-48
++ blt 1f
++ vst2.16 {d0[0], d2[0]}, [r0]!
++ b 11b
++1:
++ vst1.16 {d0[0]}, [r0]!
++ b 11b
++
++endfunc
++
++
++@ void ff_rpi_sand30_lines_to_planar_c16(
++@ uint8_t * dst_u, // [r0]
++@ unsigned int dst_stride_u, // [r1]
++@ uint8_t * dst_v, // [r2]
++@ unsigned int dst_stride_v, // [r3]
++@ const uint8_t * src, // [sp, #0] -> r4, r5
++@ unsigned int stride1, // [sp, #4] 128
++@ unsigned int stride2, // [sp, #8] -> r8
++@ unsigned int _x, // [sp, #12] 0
++@ unsigned int y, // [sp, #16] (r7 in prefix)
++@ unsigned int _w, // [sp, #20] -> r6, r9
++@ unsigned int h); // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++ push {r4-r10, lr} @ +32
++ ldr r5, [sp, #32]
++ ldr r8, [sp, #40]
++ ldr r7, [sp, #48]
++ ldr r9, [sp, #52]
++ mov r12, #48
++ sub r8, #1
++ lsl r8, #7
++ add r5, r5, r7, lsl #7
++ sub r1, r1, r9, lsl #1
++ sub r3, r3, r9, lsl #1
++ ldr r7, [sp, #56]
++10:
++ mov lr, #0
++ mov r4, r5
++ mov r6, r9
++1:
++ vldm r4!, {q0-q3}
++ add lr, #64
++
++ @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
++ vshrn.u32 d20, q0, #14
++ vmovn.u32 d18, q0
++ vshrn.u32 d0, q0, #10
++ ands lr, #127
++
++ vshrn.u32 d21, q1, #14
++ vmovn.u32 d19, q1
++ vshrn.u32 d1, q1, #10
++
++ vshrn.u32 d22, q2, #10
++ vmovn.u32 d2, q2
++ vshrn.u32 d4, q2, #14
++
++ add r10, r0, #24
++ vshrn.u32 d23, q3, #10
++ vmovn.u32 d3, q3
++ vshrn.u32 d5, q3, #14
++
++ it eq
++ addeq r4, r8
++ vuzp.16 q0, q11
++ vuzp.16 q9, q1
++ vuzp.16 q10, q2
++
++ @ q0 V0, V3,..
++ @ q9 U0, U3...
++ @ q10 U1, U4...
++ @ q11 U2, U5,..
++ @ q1 V1, V4,
++ @ q2 V2, V5,..
++
++ subs r6, #24
++ vbic.u16 q11, #0xfc00
++ vbic.u16 q9, #0xfc00
++ vshr.u16 q10, #6
++ vshr.u16 q2, #6
++ vbic.u16 q0, #0xfc00
++ vbic.u16 q1, #0xfc00
++
++ blt 2f
++
++ vst3.16 {d18, d20, d22}, [r0], r12
++ vst3.16 {d19, d21, d23}, [r10]
++ add r10, r2, #24
++ vst3.16 {d0, d2, d4}, [r2], r12
++ vst3.16 {d1, d3, d5}, [r10]
++
++ bne 1b
++
++11:
++ subs r7, #1
++ add r5, #128
++ add r0, r1
++ add r2, r3
++ bne 10b
++
++ pop {r4-r10, pc}
++
++@ Partial final write
++2:
++ cmp r6, #-12
++ blt 1f
++ vst3.16 {d18, d20, d22}, [r0]!
++ vst3.16 {d0, d2, d4}, [r2]!
++ beq 11b
++ vmov d18, d19
++ vmov d20, d21
++ vmov d22, d23
++ sub r6, #12
++ vmov d0, d1
++ vmov d2, d3
++ vmov d4, d5
++1:
++ cmp r6, #-18
++ @ Rezip here as it makes the remaining tail handling easier
++ vzip.16 d0, d18
++ vzip.16 d2, d20
++ vzip.16 d4, d22
++ blt 1f
++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]!
++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]!
++ vst3.16 {d0[3], d2[3], d4[3]}, [r0]!
++ vst3.16 {d0[2], d2[2], d4[2]}, [r2]!
++ beq 11b
++ vmov d0, d18
++ vmov d2, d20
++ sub r6, #6
++ vmov d4, d22
++1:
++ cmp r6, #-21
++ blt 1f
++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]!
++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]!
++ beq 11b
++ vmov s4, s5
++ sub r6, #3
++ vmov s0, s1
++1:
++ cmp r6, #-22
++ blt 1f
++ vst2.16 {d0[1], d2[1]}, [r0]!
++ vst2.16 {d0[0], d2[0]}, [r2]!
++ b 11b
++1:
++ vst1.16 {d0[1]}, [r0]!
++ vst1.16 {d0[0]}, [r2]!
++ b 11b
++
++endfunc
++
++@ void ff_rpi_sand30_lines_to_planar_p010(
++@ uint8_t * dest, // [r0]
++@ unsigned int dst_stride, // [r1]
++@ const uint8_t * src, // [r2]
++@ unsigned int src_stride1, // [r3] Ignored - assumed 128
++@ unsigned int src_stride2, // [sp, #0] -> r3
++@ unsigned int _x, // [sp, #4] Ignored - 0
++@ unsigned int y, // [sp, #8] (r7 in prefix)
++@ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++@ unsigned int h); // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_p010, export=1
++ push {r4-r8, lr} @ +24
++ ldr r3, [sp, #24]
++ ldr r6, [sp, #36]
++ ldr r7, [sp, #32] @ y
++ mov r12, #48
++ vmov.u16 q15, #0xffc0
++ sub r3, #1
++ lsl r3, #7
++ sub r1, r1, r6, lsl #1
++ add r8, r2, r7, lsl #7
++ ldr r7, [sp, #40]
++
++10:
++ mov r2, r8
++ add r4, r0, #24
++ mov r5, r6
++ mov lr, #0
++1:
++ vldm r2!, {q10-q13}
++ add lr, #64
++
++ vshl.u32 q14, q10, #6
++ ands lr, #127
++ vshrn.u32 d4, q10, #14
++ vshrn.u32 d2, q10, #4
++ vmovn.u32 d0, q14
++
++ vshl.u32 q14, q11, #6
++ it eq
++ addeq r2, r3
++ vshrn.u32 d5, q11, #14
++ vshrn.u32 d3, q11, #4
++ vmovn.u32 d1, q14
++
++ subs r5, #48
++ vand q2, q15
++ vand q1, q15
++ vand q0, q15
++
++ vshl.u32 q14, q12, #6
++ vshrn.u32 d20, q12, #14
++ vshrn.u32 d18, q12, #4
++ vmovn.u32 d16, q14
++
++ vshl.u32 q14, q13, #6
++ vshrn.u32 d21, q13, #14
++ vshrn.u32 d19, q13, #4
++ vmovn.u32 d17, q14
++
++ vand q10, q15
++ vand q9, q15
++ vand q8, q15
++ blt 2f
++
++ vst3.16 {d0, d2, d4}, [r0], r12
++ vst3.16 {d1, d3, d5}, [r4], r12
++ vst3.16 {d16, d18, d20}, [r0], r12
++ vst3.16 {d17, d19, d21}, [r4], r12
++
++ bne 1b
++
++11:
++ subs r7, #1
++ add r0, r1
++ add r8, #128
++ bne 10b
++
++ pop {r4-r8, pc}
++
++@ Partial final write
++2:
++ cmp r5, #24-48
++ blt 1f
++ vst3.16 {d0, d2, d4}, [r0], r12
++ vst3.16 {d1, d3, d5}, [r4]
++ beq 11b
++ vmov q0, q8
++ sub r5, #24
++ vmov q1, q9
++ vmov q2, q10
++1:
++ cmp r5, #12-48
++ blt 1f
++ vst3.16 {d0, d2, d4}, [r0]!
++ beq 11b
++ vmov d0, d1
++ sub r5, #12
++ vmov d2, d3
++ vmov d4, d5
++1:
++ cmp r5, #6-48
++ add r4, r0, #6 @ avoid [r0]! on sequential instructions
++ blt 1f
++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]
++ vst3.16 {d0[1], d2[1], d4[1]}, [r4]
++ add r0, #12
++ beq 11b
++ vmov s0, s1
++ sub r5, #6
++ vmov s4, s5
++ vmov s8, s9
++1:
++ cmp r5, #3-48
++ blt 1f
++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]!
++ beq 11b
++ sub r5, #3
++ vshr.u32 d0, #16
++ vshr.u32 d2, #16
++1:
++ cmp r5, #2-48
++ blt 1f
++ vst2.16 {d0[0], d2[0]}, [r0]!
++ b 11b
++1:
++ vst1.16 {d0[0]}, [r0]!
++ b 11b
++
++endfunc
++
++
++@ void ff_rpi_sand30_lines_to_planar_y8(
++@ uint8_t * dest, // [r0]
++@ unsigned int dst_stride, // [r1]
++@ const uint8_t * src, // [r2]
++@ unsigned int src_stride1, // [r3] Ignored - assumed 128
++@ unsigned int src_stride2, // [sp, #0] -> r3
++@ unsigned int _x, // [sp, #4] Ignored - 0
++@ unsigned int y, // [sp, #8] (r7 in prefix)
++@ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++@ unsigned int h); // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++ push {r4-r8, lr} @ +24
++ ldr r3, [sp, #24]
++ ldr r6, [sp, #36]
++ ldr r7, [sp, #32] @ y
++ mov r12, #48
++ lsl r3, #7
++ sub r1, r1, r6
++ add r8, r2, r7, lsl #7
++ ldr r7, [sp, #40]
++
++10:
++ mov r2, r8
++ add r4, r0, #24
++ mov r5, r6
++1:
++ vldm r2, {q8-q15}
++
++ subs r5, #96
++
++ vmovn.u32 d0, q8
++ vshrn.u32 d2, q8, #12
++ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20!
++
++ add r2, r3
++
++ vmovn.u32 d1, q9
++ vshrn.u32 d3, q9, #12
++ vshrn.u32 d5, q9, #16
++
++ pld [r2, #0]
++
++ vshrn.u16 d0, q0, #2
++ vmovn.u16 d1, q1
++ vshrn.u16 d2, q2, #6
++
++ vmovn.u32 d16, q10
++ vshrn.u32 d18, q10, #12
++ vshrn.u32 d20, q10, #16
++
++ vmovn.u32 d17, q11
++ vshrn.u32 d19, q11, #12
++ vshrn.u32 d21, q11, #16
++
++ pld [r2, #64]
++
++ vshrn.u16 d4, q8, #2
++ vmovn.u16 d5, q9
++ vshrn.u16 d6, q10, #6
++
++ vmovn.u32 d16, q12
++ vshrn.u32 d18, q12, #12
++ vshrn.u32 d20, q12, #16
++
++ vmovn.u32 d17, q13
++ vshrn.u32 d19, q13, #12
++ vshrn.u32 d21, q13, #16
++
++ vshrn.u16 d16, q8, #2
++ vmovn.u16 d17, q9
++ vshrn.u16 d18, q10, #6
++
++ vmovn.u32 d20, q14
++ vshrn.u32 d22, q14, #12
++ vshrn.u32 d24, q14, #16
++
++ vmovn.u32 d21, q15
++ vshrn.u32 d23, q15, #12
++ vshrn.u32 d25, q15, #16
++
++ vshrn.u16 d20, q10, #2
++ vmovn.u16 d21, q11
++ vshrn.u16 d22, q12, #6
++
++ blt 2f
++
++ vst3.8 {d0, d1, d2}, [r0], r12
++ vst3.8 {d4, d5, d6}, [r4], r12
++ vst3.8 {d16, d17, d18}, [r0], r12
++ vst3.8 {d20, d21, d22}, [r4], r12
++
++ bne 1b
++
++11:
++ subs r7, #1
++ add r0, r1
++ add r8, #128
++ bne 10b
++
++ pop {r4-r8, pc}
++
++@ Partial final write
++2:
++ cmp r5, #48-96
++ blt 1f
++ vst3.8 {d0, d1, d2}, [r0], r12
++ vst3.8 {d4, d5, d6}, [r4], r12
++ beq 11b
++ vmov q0, q8
++ vmov q2, q10
++ sub r5, #48
++ vmov d2, d18
++ vmov d6, d22
++1:
++ cmp r5, #24-96
++ blt 1f
++ vst3.8 {d0, d1, d2}, [r0]!
++ beq 11b
++ vmov q0, q2
++ sub r5, #24
++ vmov d2, d6
++1:
++ cmp r5, #12-96
++ blt 1f
++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]!
++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]!
++ vst3.8 {d0[2], d1[2], d2[2]}, [r0]!
++ vst3.8 {d0[3], d1[3], d2[3]}, [r0]!
++ beq 11b
++ vmov s0, s1
++ sub r5, #12
++ vmov s2, s3
++ vmov s4, s5
++1:
++ cmp r5, #6-96
++ blt 1f
++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]!
++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]!
++ add r0, #12
++ beq 11b
++ vshr.u32 d0, #16
++ sub r5, #6
++ vshr.u32 d1, #16
++ vshr.u32 d2, #16
++1:
++ cmp r5, #3-96
++ blt 1f
++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]!
++ beq 11b
++ sub r5, #3
++ vshr.u32 d0, #8
++ vshr.u32 d1, #8
++1:
++ cmp r5, #2-96
++ blt 1f
++ vst2.8 {d0[0], d1[0]}, [r0]!
++ b 11b
++1:
++ vst1.8 {d0[0]}, [r0]!
++ b 11b
++
++endfunc
++
++
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.h
+@@ -0,0 +1,110 @@
++/*
++Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_ARM_SAND_NEON_H
++#define AVUTIL_ARM_SAND_NEON_H
++
++void ff_rpi_sand128b_stripe_to_8_10(
++ uint8_t * dest, // [r0]
++ const uint8_t * src1, // [r1]
++ const uint8_t * src2, // [r2]
++ unsigned int lines); // [r3]
++
++void ff_rpi_sand8_lines_to_planar_y8(
++ uint8_t * dest, // [r0]
++ unsigned int dst_stride, // [r1]
++ const uint8_t * src, // [r2]
++ unsigned int src_stride1, // [r3] Ignored - assumed 128
++ unsigned int src_stride2, // [sp, #0] -> r3
++ unsigned int _x, // [sp, #4] Ignored - 0
++ unsigned int y, // [sp, #8] (r7 in prefix)
++ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++ unsigned int h); // [sp, #16] -> r7
++
++void ff_rpi_sand8_lines_to_planar_c8(
++ uint8_t * dst_u, // [r0]
++ unsigned int dst_stride_u, // [r1]
++ uint8_t * dst_v, // [r2]
++ unsigned int dst_stride_v, // [r3]
++ const uint8_t * src, // [sp, #0] -> r4, r5
++ unsigned int stride1, // [sp, #4] 128
++ unsigned int stride2, // [sp, #8] -> r8
++ unsigned int _x, // [sp, #12] 0
++ unsigned int y, // [sp, #16] (r7 in prefix)
++ unsigned int _w, // [sp, #20] -> r12, r6
++ unsigned int h); // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_y16(
++ uint8_t * dest, // [r0]
++ unsigned int dst_stride, // [r1]
++ const uint8_t * src, // [r2]
++ unsigned int src_stride1, // [r3] Ignored - assumed 128
++ unsigned int src_stride2, // [sp, #0] -> r3
++ unsigned int _x, // [sp, #4] Ignored - 0
++ unsigned int y, // [sp, #8] (r7 in prefix)
++ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++ unsigned int h); // [sp, #16] -> r7
++
++void ff_rpi_sand30_lines_to_planar_c16(
++ uint8_t * dst_u, // [r0]
++ unsigned int dst_stride_u, // [r1]
++ uint8_t * dst_v, // [r2]
++ unsigned int dst_stride_v, // [r3]
++ const uint8_t * src, // [sp, #0] -> r4, r5
++ unsigned int stride1, // [sp, #4] 128
++ unsigned int stride2, // [sp, #8] -> r8
++ unsigned int _x, // [sp, #12] 0
++ unsigned int y, // [sp, #16] (r7 in prefix)
++ unsigned int _w, // [sp, #20] -> r6, r9
++ unsigned int h); // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_p010(
++ uint8_t * dest, // [r0]
++ unsigned int dst_stride, // [r1]
++ const uint8_t * src, // [r2]
++ unsigned int src_stride1, // [r3] Ignored - assumed 128
++ unsigned int src_stride2, // [sp, #0] -> r3
++ unsigned int _x, // [sp, #4] Ignored - 0
++ unsigned int y, // [sp, #8] (r7 in prefix)
++ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++ unsigned int h); // [sp, #16] -> r7
++
++void ff_rpi_sand30_lines_to_planar_y8(
++ uint8_t * dest, // [r0]
++ unsigned int dst_stride, // [r1]
++ const uint8_t * src, // [r2]
++ unsigned int src_stride1, // [r3] Ignored - assumed 128
++ unsigned int src_stride2, // [sp, #0] -> r3
++ unsigned int _x, // [sp, #4] Ignored - 0
++ unsigned int y, // [sp, #8] (r7 in prefix)
++ unsigned int _w, // [sp, #12] -> r6 (cur r5)
++ unsigned int h); // [sp, #16] -> r7
++
++#endif // AVUTIL_ARM_SAND_NEON_H
++
+--- a/libavutil/frame.c
++++ b/libavutil/frame.c
+@@ -16,6 +16,8 @@
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
++#include "config.h"
++
+ #include "channel_layout.h"
+ #include "avassert.h"
+ #include "buffer.h"
+@@ -27,6 +29,9 @@
+ #include "mem.h"
+ #include "samplefmt.h"
+ #include "hwcontext.h"
++#if CONFIG_SAND
++#include "rpi_sand_fns.h"
++#endif
+
+ #if FF_API_OLD_CHANNEL_LAYOUT
+ #define CHECK_CHANNELS_CONSISTENCY(frame) \
+@@ -875,6 +880,12 @@ int av_frame_apply_cropping(AVFrame *fra
+ (frame->crop_top + frame->crop_bottom) >= frame->height)
+ return AVERROR(ERANGE);
+
++#if CONFIG_SAND
++ // Sand cannot be cropped - do not try
++ if (av_rpi_is_sand_format(frame->format))
++ return 0;
++#endif
++
+ desc = av_pix_fmt_desc_get(frame->format);
+ if (!desc)
+ return AVERROR_BUG;
+--- a/libavutil/frame.h
++++ b/libavutil/frame.h
+@@ -940,6 +940,16 @@ int av_frame_apply_cropping(AVFrame *fra
+ */
+ const char *av_frame_side_data_name(enum AVFrameSideDataType type);
+
++
++static inline int av_frame_cropped_width(const AVFrame * const frame)
++{
++ return frame->width - (frame->crop_left + frame->crop_right);
++}
++static inline int av_frame_cropped_height(const AVFrame * const frame)
++{
++ return frame->height - (frame->crop_top + frame->crop_bottom);
++}
++
+ /**
+ * @}
+ */
+--- a/libavutil/hwcontext_drm.c
++++ b/libavutil/hwcontext_drm.c
+@@ -21,6 +21,7 @@
+ #include <fcntl.h>
+ #include <sys/mman.h>
+ #include <unistd.h>
++#include <sys/ioctl.h>
+
+ /* This was introduced in version 4.6. And may not exist all without an
+ * optional package. So to prevent a hard dependency on needing the Linux
+@@ -31,6 +32,7 @@
+ #endif
+
+ #include <drm.h>
++#include <libdrm/drm_fourcc.h>
+ #include <xf86drm.h>
+
+ #include "avassert.h"
+@@ -38,7 +40,9 @@
+ #include "hwcontext_drm.h"
+ #include "hwcontext_internal.h"
+ #include "imgutils.h"
+-
++#if CONFIG_SAND
++#include "libavutil/rpi_sand_fns.h"
++#endif
+
+ static void drm_device_free(AVHWDeviceContext *hwdev)
+ {
+@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceC
+ AVDRMDeviceContext *hwctx = hwdev->hwctx;
+ drmVersionPtr version;
+
++ if (device == NULL) {
++ hwctx->fd = -1;
++ return 0;
++ }
++
+ hwctx->fd = open(device, O_RDWR);
+ if (hwctx->fd < 0)
+ return AVERROR(errno);
+@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesConte
+ if (flags & AV_HWFRAME_MAP_WRITE)
+ mmap_prot |= PROT_WRITE;
+
++ if (dst->format == AV_PIX_FMT_NONE)
++ dst->format = hwfc->sw_format;
+ #if HAVE_LINUX_DMA_BUF_H
+ if (flags & AV_HWFRAME_MAP_READ)
+ map->sync_flags |= DMA_BUF_SYNC_READ;
+@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesConte
+
+ dst->width = src->width;
+ dst->height = src->height;
++ dst->crop_top = src->crop_top;
++ dst->crop_bottom = src->crop_bottom;
++ dst->crop_left = src->crop_left;
++ dst->crop_right = src->crop_right;
++
++#if CONFIG_SAND
++ // Rework for sand frames
++ if (av_rpi_is_sand_frame(dst)) {
++ // As it stands the sand formats hold stride2 in linesize[3]
++ // linesize[0] & [1] contain stride1 which is always 128 for everything we do
++ // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
++ dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
++ dst->linesize[0] = 128;
++ dst->linesize[1] = 128;
++ // *** Are we sure src->height is actually what we want ???
++ }
++#endif
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &drm_unmap_frame, map);
+@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHW
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+ {
+- enum AVPixelFormat *pix_fmts;
++ enum AVPixelFormat *p;
+
+- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
+- if (!pix_fmts)
++ p = *formats = av_malloc_array(3, sizeof(*p));
++ if (!p)
+ return AVERROR(ENOMEM);
+
+- pix_fmts[0] = ctx->sw_format;
+- pix_fmts[1] = AV_PIX_FMT_NONE;
++ // **** Offer native sand too ????
++ *p++ =
++#if CONFIG_SAND
++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
++ AV_PIX_FMT_YUV420P :
++ ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
++ AV_PIX_FMT_YUV420P10LE :
++#endif
++ ctx->sw_format;
++
++#if CONFIG_SAND
++ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
++ *p++ = AV_PIX_FMT_NV12;
++#endif
+
+- *formats = pix_fmts;
++ *p = AV_PIX_FMT_NONE;
+ return 0;
+ }
+
+@@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFr
+ map = av_frame_alloc();
+ if (!map)
+ return AVERROR(ENOMEM);
+- map->format = dst->format;
+
++ // Map to default
++ map->format = AV_PIX_FMT_NONE;
+ err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+ if (err)
+ goto fail;
+
+- map->width = dst->width;
+- map->height = dst->height;
++#if 0
++ av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
++ hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
++ map->width, map->height,
++ map->linesize[0],
++ map->linesize[1],
++ map->linesize[2],
++ map->linesize[3],
++ dst->width, dst->height,
++ dst->linesize[0],
++ dst->linesize[1],
++ dst->linesize[2]);
++#endif
++#if CONFIG_SAND
++ if (av_rpi_is_sand_frame(map)) {
++ // Preserve crop - later ffmpeg code assumes that we have in that it
++ // overwrites any crop that we create with the old values
++ const unsigned int w = FFMIN(dst->width, map->width);
++ const unsigned int h = FFMIN(dst->height, map->height);
++
++ map->crop_top = 0;
++ map->crop_bottom = 0;
++ map->crop_left = 0;
++ map->crop_right = 0;
++
++ if (av_rpi_sand_to_planar_frame(dst, map) != 0)
++ {
++ av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
++ err = AVERROR(EINVAL);
++ goto fail;
++ }
++
++ dst->width = w;
++ dst->height = h;
++ }
++ else
++#endif
++ {
++ // Kludge mapped h/w s.t. frame_copy works
++ map->width = dst->width;
++ map->height = dst->height;
++ err = av_frame_copy(dst, map);
++ }
+
+- err = av_frame_copy(dst, map);
+ if (err)
++ {
++ av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
+ goto fail;
++ }
+
+ err = 0;
+ fail:
+@@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFram
+ int err;
+
+ if (src->width > hwfc->width || src->height > hwfc->height)
++ {
++ av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
+ return AVERROR(EINVAL);
++ }
+
+ map = av_frame_alloc();
+ if (!map)
+--- a/libavutil/hwcontext_vulkan.c
++++ b/libavutil/hwcontext_vulkan.c
+@@ -57,6 +57,14 @@
+ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
+ #endif
+
++// Sometimes missing definitions
++#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME
++#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264"
++#endif
++#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME
++#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265"
++#endif
++
+ typedef struct VulkanQueueCtx {
+ VkFence fence;
+ VkQueue queue;
+--- a/libavutil/pixdesc.c
++++ b/libavutil/pixdesc.c
+@@ -2491,6 +2491,50 @@ static const AVPixFmtDescriptor av_pix_f
+ },
+ .flags = AV_PIX_FMT_FLAG_PLANAR,
+ },
++ [AV_PIX_FMT_SAND128] = {
++ .name = "sand128",
++ .nb_components = 3,
++ .log2_chroma_w = 1,
++ .log2_chroma_h = 1,
++ .comp = {
++ { 0, 1, 0, 0, 8 }, /* Y */
++ { 1, 2, 0, 0, 8 }, /* U */
++ { 1, 2, 1, 0, 8 }, /* V */
++ },
++ .flags = 0,
++ },
++ [AV_PIX_FMT_SAND64_10] = {
++ .name = "sand64_10",
++ .nb_components = 3,
++ .log2_chroma_w = 1,
++ .log2_chroma_h = 1,
++ .comp = {
++ { 0, 2, 0, 0, 10 }, /* Y */
++ { 1, 4, 0, 0, 10 }, /* U */
++ { 1, 4, 2, 0, 10 }, /* V */
++ },
++ .flags = 0,
++ },
++ [AV_PIX_FMT_SAND64_16] = {
++ .name = "sand64_16",
++ .nb_components = 3,
++ .log2_chroma_w = 1,
++ .log2_chroma_h = 1,
++ .comp = {
++ { 0, 2, 0, 0, 16 }, /* Y */
++ { 1, 4, 0, 0, 16 }, /* U */
++ { 1, 4, 2, 0, 16 }, /* V */
++ },
++ .flags = 0,
++ },
++ [AV_PIX_FMT_RPI4_8] = {
++ .name = "rpi4_8",
++ .flags = AV_PIX_FMT_FLAG_HWACCEL,
++ },
++ [AV_PIX_FMT_RPI4_10] = {
++ .name = "rpi4_10",
++ .flags = AV_PIX_FMT_FLAG_HWACCEL,
++ },
+ };
+
+ static const char * const color_range_names[] = {
+--- a/libavutil/pixfmt.h
++++ b/libavutil/pixfmt.h
+@@ -349,6 +349,14 @@ enum AVPixelFormat {
+
+ AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
+ AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
++// RPI - not on ifdef so can be got at by calling progs
++// #define so code that uses this can know it is there
++#define AVUTIL_HAVE_PIX_FMT_SAND 1
++ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
++ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++ AV_PIX_FMT_RPI4_8,
++ AV_PIX_FMT_RPI4_10,
+
+ AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
+ AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
+--- /dev/null
++++ b/libavutil/rpi_sand_fn_pw.h
+@@ -0,0 +1,227 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++// * Included twice from rpi_sand_fn with different PW
++
++#define STRCAT(x,y) x##y
++
++#if PW == 1
++#define pixel uint8_t
++#define FUNC(f) STRCAT(f, 8)
++#elif PW == 2
++#define pixel uint16_t
++#define FUNC(f) STRCAT(f, 16)
++#else
++#error Unexpected PW
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x = _x;
++ const unsigned int w = _w;
++ const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++ if (_x == 0) {
++ ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
++ src, stride1, stride2, _x, y, _w, h);
++ return;
++ }
++#endif
++
++ if ((x & ~mask) == ((x + w) & ~mask)) {
++ // All in one sand stripe
++ const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
++ memcpy(dst, p, w);
++ }
++ }
++ else
++ {
++ // Two+ stripe
++ const unsigned int sstride = stride1 * stride2;
++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ const uint8_t * p2 = p1 + sstride - (x & mask);
++ const unsigned int w1 = stride1 - (x & mask);
++ const unsigned int w3 = (x + w) & mask;
++ const unsigned int w2 = w - (w1 + w3);
++
++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
++ unsigned int j;
++ const uint8_t * p = p2;
++ uint8_t * d = dst;
++ memcpy(d, p1, w1);
++ d += w1;
++ for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
++ memcpy(d, p, stride1);
++ }
++ memcpy(d, p, w3);
++ }
++ }
++}
++
++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
++
++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
++ uint8_t * dst_v, const unsigned int dst_stride_v,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x = _x * 2;
++ const unsigned int w = _w * 2;
++ const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++ if (_x == 0) {
++ ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
++ src, stride1, stride2, _x, y, _w, h);
++ return;
++ }
++#endif
++
++ if ((x & ~mask) == ((x + w) & ~mask)) {
++ // All in one sand stripe
++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
++ pixel * du = (pixel *)dst_u;
++ pixel * dv = (pixel *)dst_v;
++ const pixel * p = (const pixel *)p1;
++ for (unsigned int k = 0; k < w; k += 2 * PW) {
++ *du++ = *p++;
++ *dv++ = *p++;
++ }
++ }
++ }
++ else
++ {
++ // Two+ stripe
++ const unsigned int sstride = stride1 * stride2;
++ const unsigned int sstride_p = (sstride - stride1) / PW;
++
++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ const uint8_t * p2 = p1 + sstride - (x & mask);
++ const unsigned int w1 = stride1 - (x & mask);
++ const unsigned int w3 = (x + w) & mask;
++ const unsigned int w2 = w - (w1 + w3);
++
++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
++ unsigned int j;
++ const pixel * p = (const pixel *)p1;
++ pixel * du = (pixel *)dst_u;
++ pixel * dv = (pixel *)dst_v;
++ for (unsigned int k = 0; k < w1; k += 2 * PW) {
++ *du++ = *p++;
++ *dv++ = *p++;
++ }
++ for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++ for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++ *du++ = *p++;
++ *dv++ = *p++;
++ }
++ }
++ for (unsigned int k = 0; k < w3; k += 2 * PW) {
++ *du++ = *p++;
++ *dv++ = *p++;
++ }
++ }
++ }
++}
++
++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
++ unsigned int stride1, unsigned int stride2,
++ const uint8_t * src_u, const unsigned int src_stride_u,
++ const uint8_t * src_v, const unsigned int src_stride_v,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x = _x * 2;
++ const unsigned int w = _w * 2;
++ const unsigned int mask = stride1 - 1;
++ if ((x & ~mask) == ((x + w) & ~mask)) {
++ // All in one sand stripe
++ uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
++ const pixel * su = (const pixel *)src_u;
++ const pixel * sv = (const pixel *)src_v;
++ pixel * p = (pixel *)p1;
++ for (unsigned int k = 0; k < w; k += 2 * PW) {
++ *p++ = *su++;
++ *p++ = *sv++;
++ }
++ }
++ }
++ else
++ {
++ // Two+ stripe
++ const unsigned int sstride = stride1 * stride2;
++ const unsigned int sstride_p = (sstride - stride1) / PW;
++
++ const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++ const uint8_t * p2 = p1 + sstride - (x & mask);
++ const unsigned int w1 = stride1 - (x & mask);
++ const unsigned int w3 = (x + w) & mask;
++ const unsigned int w2 = w - (w1 + w3);
++
++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
++ unsigned int j;
++ const pixel * su = (const pixel *)src_u;
++ const pixel * sv = (const pixel *)src_v;
++ pixel * p = (pixel *)p1;
++ for (unsigned int k = 0; k < w1; k += 2 * PW) {
++ *p++ = *su++;
++ *p++ = *sv++;
++ }
++ for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++ for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++ *p++ = *su++;
++ *p++ = *sv++;
++ }
++ }
++ for (unsigned int k = 0; k < w3; k += 2 * PW) {
++ *p++ = *su++;
++ *p++ = *sv++;
++ }
++ }
++ }
++}
++
++
++#undef pixel
++#undef STRCAT
++#undef FUNC
++
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.c
+@@ -0,0 +1,447 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "config.h"
++#include <stdint.h>
++#include <string.h>
++#include "rpi_sand_fns.h"
++#include "avassert.h"
++#include "frame.h"
++
++#if ARCH_ARM && HAVE_NEON
++#include "libavutil/arm/cpu.h"
++#include "libavutil/arm/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#elif ARCH_AARCH64 && HAVE_NEON
++#include "libavutil/aarch64/cpu.h"
++#include "libavutil/aarch64/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#else
++#define HAVE_SAND_ASM 0
++#endif
++
++#define PW 1
++#include "rpi_sand_fn_pw.h"
++#undef PW
++
++#define PW 2
++#include "rpi_sand_fn_pw.h"
++#undef PW
++
++#if 1
++// Simple round
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++ const unsigned int rnd = (1 << shr) >> 1;
++ const uint16_t * src = (const uint16_t *)_src;
++
++ for (; n != 0; --n) {
++ *dst++ = (*src++ + rnd) >> shr;
++ }
++}
++#else
++// Dithered variation
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++ unsigned int rnd = (1 << shr) >> 1;
++ const unsigned int mask = ((1 << shr) - 1);
++ const uint16_t * src = (const uint16_t *)_src;
++
++ for (; n != 0; --n) {
++ rnd = *src++ + (rnd & mask);
++ *dst++ = rnd >> shr;
++ }
++}
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++ const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++ const unsigned int x1 = ((_x + _w) / 3) * 4;
++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++ const unsigned int mask = stride1 - 1;
++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++ if (_x == 0 && have_neon(av_get_cpu_flags())) {
++ ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++ return;
++ }
++#endif
++
++ if (x0 == x1) {
++ // *******************
++ // Partial single word xfer
++ return;
++ }
++
++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++ {
++ unsigned int x = x0;
++ const uint32_t * p = (const uint32_t *)p0;
++ uint16_t * d = (uint16_t *)dst;
++
++ if (xskip0 != 0) {
++ const uint32_t p3 = *p++;
++
++ if (xskip0 == 1)
++ *d++ = (p3 >> 10) & 0x3ff;
++ *d++ = (p3 >> 20) & 0x3ff;
++
++ if (((x += 4) & mask) == 0)
++ p += slice_inc;
++ }
++
++ while (x != x1) {
++ const uint32_t p3 = *p++;
++ *d++ = p3 & 0x3ff;
++ *d++ = (p3 >> 10) & 0x3ff;
++ *d++ = (p3 >> 20) & 0x3ff;
++
++ if (((x += 4) & mask) == 0)
++ p += slice_inc;
++ }
++
++ if (xrem1 != 0) {
++ const uint32_t p3 = *p;
++
++ *d++ = p3 & 0x3ff;
++ if (xrem1 == 2)
++ *d++ = (p3 >> 10) & 0x3ff;
++ }
++ }
++}
++
++
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++ uint8_t * dst_v, const unsigned int dst_stride_v,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
++ const unsigned int xskip0 = _x - (x0 >> 3) * 3;
++ const unsigned int x1 = ((_x + _w) / 3) * 8;
++ const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
++ const unsigned int mask = stride1 - 1;
++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++ if (_x == 0 && have_neon(av_get_cpu_flags())) {
++ ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
++ src, stride1, stride2, _x, y, _w, h);
++ return;
++ }
++#endif
++
++ if (x0 == x1) {
++ // *******************
++ // Partial single word xfer
++ return;
++ }
++
++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
++ {
++ unsigned int x = x0;
++ const uint32_t * p = (const uint32_t *)p0;
++ uint16_t * du = (uint16_t *)dst_u;
++ uint16_t * dv = (uint16_t *)dst_v;
++
++ if (xskip0 != 0) {
++ const uint32_t p3a = *p++;
++ const uint32_t p3b = *p++;
++
++ if (xskip0 == 1)
++ {
++ *du++ = (p3a >> 20) & 0x3ff;
++ *dv++ = (p3b >> 0) & 0x3ff;
++ }
++ *du++ = (p3b >> 10) & 0x3ff;
++ *dv++ = (p3b >> 20) & 0x3ff;
++
++ if (((x += 8) & mask) == 0)
++ p += slice_inc;
++ }
++
++ while (x != x1) {
++ const uint32_t p3a = *p++;
++ const uint32_t p3b = *p++;
++
++ *du++ = p3a & 0x3ff;
++ *dv++ = (p3a >> 10) & 0x3ff;
++ *du++ = (p3a >> 20) & 0x3ff;
++ *dv++ = p3b & 0x3ff;
++ *du++ = (p3b >> 10) & 0x3ff;
++ *dv++ = (p3b >> 20) & 0x3ff;
++
++ if (((x += 8) & mask) == 0)
++ p += slice_inc;
++ }
++
++ if (xrem1 != 0) {
++ const uint32_t p3a = *p++;
++ const uint32_t p3b = *p++;
++
++ *du++ = p3a & 0x3ff;
++ *dv++ = (p3a >> 10) & 0x3ff;
++ if (xrem1 == 2)
++ {
++ *du++ = (p3a >> 20) & 0x3ff;
++ *dv++ = p3b & 0x3ff;
++ }
++ }
++ }
++}
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// single lose bottom 2 bits truncation
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h)
++{
++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++ const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++ const unsigned int x1 = ((_x + _w) / 3) * 4;
++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++ const unsigned int mask = stride1 - 1;
++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++ if (_x == 0) {
++ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++ return;
++ }
++#endif
++
++ if (x0 == x1) {
++ // *******************
++ // Partial single word xfer
++ return;
++ }
++
++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++ {
++ unsigned int x = x0;
++ const uint32_t * p = (const uint32_t *)p0;
++ uint8_t * d = dst;
++
++ if (xskip0 != 0) {
++ const uint32_t p3 = *p++;
++
++ if (xskip0 == 1)
++ *d++ = (p3 >> 12) & 0xff;
++ *d++ = (p3 >> 22) & 0xff;
++
++ if (((x += 4) & mask) == 0)
++ p += slice_inc;
++ }
++
++ while (x != x1) {
++ const uint32_t p3 = *p++;
++ *d++ = (p3 >> 2) & 0xff;
++ *d++ = (p3 >> 12) & 0xff;
++ *d++ = (p3 >> 22) & 0xff;
++
++ if (((x += 4) & mask) == 0)
++ p += slice_inc;
++ }
++
++ if (xrem1 != 0) {
++ const uint32_t p3 = *p;
++
++ *d++ = (p3 >> 2) & 0xff;
++ if (xrem1 == 2)
++ *d++ = (p3 >> 12) & 0xff;
++ }
++ }
++}
++
++
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++ unsigned int w, unsigned int h, const unsigned int shr)
++{
++ const unsigned int n = dst_stride1 / 2;
++ unsigned int j;
++
++ // This is true for our current layouts
++ av_assert0(dst_stride1 == src_stride1);
++
++ // As we have the same stride1 for src & dest and src is wider than dest
++ // then if we loop on src we can always write contiguously to dest
++ // We make no effort to copy an exact width - round up to nearest src stripe
++ // as we will always have storage in dest for that
++
++#if ARCH_ARM && HAVE_NEON
++ if (shr == 3 && src_stride1 == 128) {
++ for (j = 0; j + n < w; j += dst_stride1) {
++ uint8_t * d = dst + j * dst_stride2;
++ const uint8_t * s1 = src + j * 2 * src_stride2;
++ const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++ ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
++ }
++ }
++ else
++#endif
++ {
++ for (j = 0; j + n < w; j += dst_stride1) {
++ uint8_t * d = dst + j * dst_stride2;
++ const uint8_t * s1 = src + j * 2 * src_stride2;
++ const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
++ cpy16_to_8(d, s1, n, shr);
++ cpy16_to_8(d + n, s2, n, shr);
++ }
++ }
++ }
++
++ // Fix up a trailing dest half stripe
++ if (j < w) {
++ uint8_t * d = dst + j * dst_stride2;
++ const uint8_t * s1 = src + j * 2 * src_stride2;
++
++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
++ cpy16_to_8(d, s1, n, shr);
++ }
++ }
++}
++
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
++{
++ const int w = av_frame_cropped_width(src);
++ const int h = av_frame_cropped_height(src);
++ const int x = src->crop_left;
++ const int y = src->crop_top;
++
++ // We will crop as part of the conversion
++ dst->crop_top = 0;
++ dst->crop_left = 0;
++ dst->crop_bottom = 0;
++ dst->crop_right = 0;
++
++ switch (src->format){
++ case AV_PIX_FMT_SAND128:
++ case AV_PIX_FMT_RPI4_8:
++ switch (dst->format){
++ case AV_PIX_FMT_YUV420P:
++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++ src->data[0],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x, y, w, h);
++ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
++ dst->data[2], dst->linesize[2],
++ src->data[1],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x/2, y/2, w/2, h/2);
++ break;
++ case AV_PIX_FMT_NV12:
++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++ src->data[0],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x, y, w, h);
++ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
++ src->data[1],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x/2, y/2, w, h/2);
++ break;
++ default:
++ return -1;
++ }
++ break;
++ case AV_PIX_FMT_SAND64_10:
++ switch (dst->format){
++ case AV_PIX_FMT_YUV420P10:
++ av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
++ src->data[0],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x*2, y, w*2, h);
++ av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
++ dst->data[2], dst->linesize[2],
++ src->data[1],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x, y/2, w, h/2);
++ break;
++ default:
++ return -1;
++ }
++ break;
++ case AV_PIX_FMT_RPI4_10:
++ switch (dst->format){
++ case AV_PIX_FMT_YUV420P10:
++ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
++ src->data[0],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x, y, w, h);
++ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
++ dst->data[2], dst->linesize[2],
++ src->data[1],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x/2, y/2, w/2, h/2);
++ break;
++ case AV_PIX_FMT_NV12:
++ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
++ src->data[0],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x, y, w, h);
++ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
++ src->data[1],
++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++ x/2, y/2, w, h/2);
++ break;
++ default:
++ return -1;
++ }
++ break;
++ default:
++ return -1;
++ }
++
++ return av_frame_copy_props(dst, src);
++}
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.h
+@@ -0,0 +1,188 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++ * Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ * Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the distribution.
++ * Neither the name of the copyright holder nor the
++ names of its contributors may be used to endorse or promote products
++ derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_RPI_SAND_FNS
++#define AVUTIL_RPI_SAND_FNS
++
++#include "libavutil/frame.h"
++
++// For all these fns _x & _w are measured as coord * PW
++// For the C fns coords are in chroma pels (so luma / 2)
++// Strides are in bytes
++
++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
++ uint8_t * dst_v, const unsigned int dst_stride_v,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++ uint8_t * dst_v, const unsigned int dst_stride_v,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
++ unsigned int stride1, unsigned int stride2,
++ const uint8_t * src_u, const unsigned int src_stride_u,
++ const uint8_t * src_v, const unsigned int src_stride_v,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
++ unsigned int stride1, unsigned int stride2,
++ const uint8_t * src_u, const unsigned int src_stride_u,
++ const uint8_t * src_v, const unsigned int src_stride_v,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++ uint8_t * dst_v, const unsigned int dst_stride_v,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++ unsigned int w, unsigned int h, const unsigned int shr);
++
++
++// dst must contain required pixel format & allocated data buffers
++// Cropping on the src buffer will be honoured and dst crop will be set to zero
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
++
++
++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
++{
++#ifdef RPI_ZC_SAND128_ONLY
++ // If we are sure we only only support 128 byte sand formats replace the
++ // var with a constant which should allow for better optimisation
++ return 128;
++#else
++ return frame->linesize[0];
++#endif
++}
++
++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
++{
++ return frame->linesize[3];
++}
++
++
++static inline int av_rpi_is_sand_format(const int format)
++{
++ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
++{
++ return av_rpi_is_sand_format(frame->format);
++}
++
++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
++{
++ return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
++}
++
++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
++{
++ return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
++}
++
++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
++{
++ return (frame->format == AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
++{
++ return av_rpi_is_sand8_frame(frame) ? 0 : 1;
++}
++
++// If x is measured in bytes (not pixels) then this works for sand64_16 as
++// well as sand128 - but in the general case we work that out
++
++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
++{
++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++ const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
++ const unsigned int x1 = x & (stride1 - 1);
++ const unsigned int x2 = x ^ x1;
++
++ return x1 + stride1 * y + stride2 * x2;
++}
++
++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
++{
++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++ const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
++ const unsigned int x1 = x & (stride1 - 1);
++ const unsigned int x2 = x ^ x1;
++
++ return x1 + stride1 * y_c + stride2 * x2;
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++ return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++ return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
++}
++
++#endif
++
+--- a/libswscale/aarch64/rgb2rgb.c
++++ b/libswscale/aarch64/rgb2rgb.c
+@@ -30,6 +30,12 @@
+ void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int width, int height,
+ int src1Stride, int src2Stride, int dstStride);
++void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv);
++void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv);
+
+ av_cold void rgb2rgb_init_aarch64(void)
+ {
+@@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void)
+
+ if (have_neon(cpu_flags)) {
+ interleaveBytes = ff_interleave_bytes_neon;
++ ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
++ ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
+ }
+ }
+--- a/libswscale/aarch64/rgb2rgb_neon.S
++++ b/libswscale/aarch64/rgb2rgb_neon.S
+@@ -77,3 +77,359 @@ function ff_interleave_bytes_neon, expor
+ 0:
+ ret
+ endfunc
++
++// Expand rgb2 into r0+r1/g0+g1/b0+b1
++.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2
++ uxtl \r0\().8h, \r2\().8b
++ uxtl \g0\().8h, \g2\().8b
++ uxtl \b0\().8h, \b2\().8b
++
++ uxtl2 \r1\().8h, \r2\().16b
++ uxtl2 \g1\().8h, \g2\().16b
++ uxtl2 \b1\().8h, \b2\().16b
++.endm
++
++// Expand rgb2 into r0+r1/g0+g1/b0+b1
++// and pick every other el to put back into rgb2 for chroma
++.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2
++ XRGB3Y \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2
++
++ bic \r2\().8h, #0xff, LSL #8
++ bic \g2\().8h, #0xff, LSL #8
++ bic \b2\().8h, #0xff, LSL #8
++.endm
++
++.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2
++ smull \d0\().4s, \s0\().4h, \c0
++ smlal \d0\().4s, \s1\().4h, \c1
++ smlal \d0\().4s, \s2\().4h, \c2
++ smull2 \d1\().4s, \s0\().8h, \c0
++ smlal2 \d1\().4s, \s1\().8h, \c1
++ smlal2 \d1\().4s, \s2\().8h, \c2
++.endm
++
++// d0 may be s0
++// s0, s2 corrupted
++.macro SHRN_Y d0, s0, s1, s2, s3, k128h
++ shrn \s0\().4h, \s0\().4s, #12
++ shrn2 \s0\().8h, \s1\().4s, #12
++ add \s0\().8h, \s0\().8h, \k128h\().8h // +128 (>> 3 = 16)
++ sqrshrun \d0\().8b, \s0\().8h, #3
++ shrn \s2\().4h, \s2\().4s, #12
++ shrn2 \s2\().8h, \s3\().4s, #12
++ add \s2\().8h, \s2\().8h, \k128h\().8h
++ sqrshrun2 \d0\().16b, v28.8h, #3
++.endm
++
++.macro SHRN_C d0, s0, s1, k128b
++ shrn \s0\().4h, \s0\().4s, #14
++ shrn2 \s0\().8h, \s1\().4s, #14
++ sqrshrn \s0\().8b, \s0\().8h, #1
++ add \d0\().8b, \s0\().8b, \k128b\().8b // +128
++.endm
++
++.macro STB2V s0, n, a
++ st1 {\s0\().b}[(\n+0)], [\a], #1
++ st1 {\s0\().b}[(\n+1)], [\a], #1
++.endm
++
++.macro STB4V s0, n, a
++ STB2V \s0, (\n+0), \a
++ STB2V \s0, (\n+2), \a
++.endm
++
++
++// void ff_rgb24toyv12_aarch64(
++// const uint8_t *src, // x0
++// uint8_t *ydst, // x1
++// uint8_t *udst, // x2
++// uint8_t *vdst, // x3
++// int width, // w4
++// int height, // w5
++// int lumStride, // w6
++// int chromStride, // w7
++// int srcStr, // [sp, #0]
++// int32_t *rgb2yuv); // [sp, #8]
++
++function ff_rgb24toyv12_aarch64, export=1
++ ldr x15, [sp, #8]
++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12
++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12
++ ld3 {v3.s, v4.s, v5.s}[2], [x15]
++ mov v6.16b, v3.16b
++ mov v3.16b, v5.16b
++ mov v5.16b, v6.16b
++ b 99f
++endfunc
++
++// void ff_bgr24toyv12_aarch64(
++// const uint8_t *src, // x0
++// uint8_t *ydst, // x1
++// uint8_t *udst, // x2
++// uint8_t *vdst, // x3
++// int width, // w4
++// int height, // w5
++// int lumStride, // w6
++// int chromStride, // w7
++// int srcStr, // [sp, #0]
++// int32_t *rgb2yuv); // [sp, #8] (including Mac)
++
++// regs
++// v0-2 Src bytes - reused as chroma src
++// v3-5 Coeffs (packed very inefficiently - could be squashed)
++// v6 128b
++// v7 128h
++// v8-15 Reserved
++// v16-18 Lo Src expanded as H
++// v19 -
++// v20-22 Hi Src expanded as H
++// v23 -
++// v24 U out
++// v25 U tmp
++// v26 Y out
++// v27-29 Y tmp
++// v30 V out
++// v31 V tmp
++
++function ff_bgr24toyv12_aarch64, export=1
++ ldr x15, [sp, #8]
++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12
++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12
++ ld3 {v3.s, v4.s, v5.s}[2], [x15]
++
++99:
++ ldr w14, [sp, #0]
++ movi v7.8b, #128
++ uxtl v6.8h, v7.8b
++ // Ensure if nothing to do then we do nothing
++ cmp w4, #0
++ b.le 90f
++ cmp w5, #0
++ b.le 90f
++ // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
++ // the remainder done in the tail
++ tst w4, #15
++ b.eq 1f
++ sub w4, w4, #16
++1:
++
++// -------------------- Even line body - YUV
++11:
++ subs w9, w4, #0
++ mov x10, x0
++ mov x11, x1
++ mov x12, x2
++ mov x13, x3
++ b.lt 12f
++
++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48
++ subs w9, w9, #16
++ b.le 13f
++
++10:
++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2
++
++ // Testing shows it is faster to stack the smull/smlal ops together
++ // rather than interleave them between channels and indeed even the
++ // shift/add sections seem happier not interleaved
++
++ // Y0
++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++ // Y1
++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++ SHRN_Y v26, v26, v27, v28, v29, v6
++
++ // U
++ // Vector subscript *2 as we loaded into S but are only using H
++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
++
++ // V
++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
++
++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48
++
++ SHRN_C v24, v24, v25, v7
++ SHRN_C v30, v30, v31, v7
++
++ subs w9, w9, #16
++
++ st1 {v26.16b}, [x11], #16
++ st1 {v24.8b}, [x12], #8
++ st1 {v30.8b}, [x13], #8
++
++ b.gt 10b
++
++// -------------------- Even line tail - YUV
++// If width % 16 == 0 then simply runs once with preloaded RGB
++// If other then deals with preload & then does remaining tail
++
++13:
++ // Body is simple copy of main loop body minus preload
++
++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2
++ // Y0
++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++ // Y1
++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++ SHRN_Y v26, v26, v27, v28, v29, v6
++ // U
++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
++ // V
++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
++
++ cmp w9, #-16
++
++ SHRN_C v24, v24, v25, v7
++ SHRN_C v30, v30, v31, v7
++
++ // Here:
++ // w9 == 0 width % 16 == 0, tail done
++ // w9 > -16 1st tail done (16 pels), remainder still to go
++ // w9 == -16 shouldn't happen
++ // w9 > -32 2nd tail done
++ // w9 <= -32 shouldn't happen
++
++ b.lt 2f
++ st1 {v26.16b}, [x11], #16
++ st1 {v24.8b}, [x12], #8
++ st1 {v30.8b}, [x13], #8
++ cbz w9, 3f
++
++12:
++ sub w9, w9, #16
++
++ tbz w9, #3, 1f
++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24
++1: tbz w9, #2, 1f
++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3
++1: tbz w9, #1, 1f
++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3
++1: tbz w9, #0, 13b
++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3
++ b 13b
++
++2:
++ tbz w9, #3, 1f
++ st1 {v26.8b}, [x11], #8
++ STB4V v24, 0, x12
++ STB4V v30, 0, x13
++1: tbz w9, #2, 1f
++ STB4V v26 8, x11
++ STB2V v24, 4, x12
++ STB2V v30, 4, x13
++1: tbz w9, #1, 1f
++ STB2V v26, 12, x11
++ st1 {v24.b}[6], [x12], #1
++ st1 {v30.b}[6], [x13], #1
++1: tbz w9, #0, 1f
++ st1 {v26.b}[14], [x11]
++ st1 {v24.b}[7], [x12]
++ st1 {v30.b}[7], [x13]
++1:
++3:
++
++// -------------------- Odd line body - Y only
++
++ subs w5, w5, #1
++ b.eq 90f
++
++ subs w9, w4, #0
++ add x0, x0, w14, sxtw
++ add x1, x1, w6, sxtw
++ mov x10, x0
++ mov x11, x1
++ b.lt 12f
++
++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48
++ subs w9, w9, #16
++ b.le 13f
++
++10:
++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2
++ // Y0
++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++ // Y1
++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++
++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48
++
++ SHRN_Y v26, v26, v27, v28, v29, v6
++
++ subs w9, w9, #16
++
++ st1 {v26.16b}, [x11], #16
++
++ b.gt 10b
++
++// -------------------- Odd line tail - Y
++// If width % 16 == 0 then simply runs once with preloaded RGB
++// If other then deals with preload & then does remaining tail
++
++13:
++ // Body is simple copy of main loop body minus preload
++
++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2
++ // Y0
++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++ // Y1
++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++
++ cmp w9, #-16
++
++ SHRN_Y v26, v26, v27, v28, v29, v6
++
++ // Here:
++ // w9 == 0 width % 16 == 0, tail done
++ // w9 > -16 1st tail done (16 pels), remainder still to go
++ // w9 == -16 shouldn't happen
++ // w9 > -32 2nd tail done
++ // w9 <= -32 shouldn't happen
++
++ b.lt 2f
++ st1 {v26.16b}, [x11], #16
++ cbz w9, 3f
++
++12:
++ sub w9, w9, #16
++
++ tbz w9, #3, 1f
++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24
++1: tbz w9, #2, 1f
++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3
++1: tbz w9, #1, 1f
++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3
++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3
++1: tbz w9, #0, 13b
++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3
++ b 13b
++
++2:
++ tbz w9, #3, 1f
++ st1 {v26.8b}, [x11], #8
++1: tbz w9, #2, 1f
++ STB4V v26, 8, x11
++1: tbz w9, #1, 1f
++ STB2V v26, 12, x11
++1: tbz w9, #0, 1f
++ st1 {v26.b}[14], [x11]
++1:
++3:
++
++// ------------------- Loop to start
++
++ add x0, x0, w14, sxtw
++ add x1, x1, w6, sxtw
++ add x2, x2, w7, sxtw
++ add x3, x3, w7, sxtw
++ subs w5, w5, #1
++ b.gt 11b
++90:
++ ret
++endfunc
+--- a/libswscale/rgb2rgb.c
++++ b/libswscale/rgb2rgb.c
+@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *sr
+ int width, int height,
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv);
++void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
++ uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
++ uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
++ uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
++ uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
++ uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
+ void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+ int srcStride, int dstStride);
+ void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
+--- a/libswscale/rgb2rgb.h
++++ b/libswscale/rgb2rgb.h
+@@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, ui
+ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+ uint8_t *vdst, int width, int height, int lumStride,
+ int chromStride, int srcStride, int32_t *rgb2yuv);
++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv);
+
+ /**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+@@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint
+ int width, int height,
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv);
++extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
++extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++ int width, int height,
++ int lumStride, int chromStride, int srcStride,
++ int32_t *rgb2yuv);
+ extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+ int srcStride, int dstStride);
+
+--- a/libswscale/rgb2rgb_template.c
++++ b/libswscale/rgb2rgb_template.c
+@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const ui
+ * others are ignored in the C version.
+ * FIXME: Write HQ version.
+ */
+-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+ uint8_t *vdst, int width, int height, int lumStride,
+- int chromStride, int srcStride, int32_t *rgb2yuv)
++ int chromStride, int srcStride, int32_t *rgb2yuv,
++ const uint8_t x[9])
+ {
+- int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+- int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+- int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+ int y;
+ const int chromWidth = width >> 1;
+
+@@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src
+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+ ydst[2 * i + 1] = Y;
+ }
++ if ((width & 1) != 0) {
++ unsigned int b = src[6 * i + 0];
++ unsigned int g = src[6 * i + 1];
++ unsigned int r = src[6 * i + 2];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++ udst[i] = U;
++ vdst[i] = V;
++ ydst[2 * i] = Y;
++ }
+ ydst += lumStride;
+ src += srcStride;
+
+@@ -700,6 +714,15 @@ void ff_rgb24toyv12_c(const uint8_t *src
+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+ ydst[2 * i + 1] = Y;
+ }
++ if ((width & 1) != 0) {
++ unsigned int b = src[6 * i + 0];
++ unsigned int g = src[6 * i + 1];
++ unsigned int r = src[6 * i + 2];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++ ydst[2 * i] = Y;
++ }
+ udst += chromStride;
+ vdst += chromStride;
+ ydst += lumStride;
+@@ -707,6 +730,147 @@ void ff_rgb24toyv12_c(const uint8_t *src
+ }
+ }
+
++static const uint8_t x_rgb[9] = {
++ RY_IDX, GY_IDX, BY_IDX,
++ RU_IDX, GU_IDX, BU_IDX,
++ RV_IDX, GV_IDX, BV_IDX,
++};
++
++static const uint8_t x_bgr[9] = {
++ BY_IDX, GY_IDX, RY_IDX,
++ BU_IDX, GU_IDX, RU_IDX,
++ BV_IDX, GV_IDX, RV_IDX,
++};
++
++void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv,
++ const uint8_t x[9])
++{
++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
++ int y;
++ const int chromWidth = width >> 1;
++
++ for (y = 0; y < height; y += 2) {
++ int i;
++ for (i = 0; i < chromWidth; i++) {
++ unsigned int b = src[8 * i + 2];
++ unsigned int g = src[8 * i + 1];
++ unsigned int r = src[8 * i + 0];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++ udst[i] = U;
++ vdst[i] = V;
++ ydst[2 * i] = Y;
++
++ b = src[8 * i + 6];
++ g = src[8 * i + 5];
++ r = src[8 * i + 4];
++
++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++ ydst[2 * i + 1] = Y;
++ }
++ if ((width & 1) != 0) {
++ unsigned int b = src[8 * i + 2];
++ unsigned int g = src[8 * i + 1];
++ unsigned int r = src[8 * i + 0];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++ udst[i] = U;
++ vdst[i] = V;
++ ydst[2 * i] = Y;
++ }
++ ydst += lumStride;
++ src += srcStride;
++
++ if (y+1 == height)
++ break;
++
++ for (i = 0; i < chromWidth; i++) {
++ unsigned int b = src[8 * i + 2];
++ unsigned int g = src[8 * i + 1];
++ unsigned int r = src[8 * i + 0];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++ ydst[2 * i] = Y;
++
++ b = src[8 * i + 6];
++ g = src[8 * i + 5];
++ r = src[8 * i + 4];
++
++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++ ydst[2 * i + 1] = Y;
++ }
++ if ((width & 1) != 0) {
++ unsigned int b = src[8 * i + 2];
++ unsigned int g = src[8 * i + 1];
++ unsigned int r = src[8 * i + 0];
++
++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++ ydst[2 * i] = Y;
++ }
++ udst += chromStride;
++ vdst += chromStride;
++ ydst += lumStride;
++ src += srcStride;
++ }
++}
++
++static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++// As the general code does no SIMD-like ops simply adding 1 to the src address
++// will fix the ignored alpha position
++static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++ uint8_t *vdst, int width, int height, int lumStride,
++ int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++
+ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int width, int height,
+ int src1Stride, int src2Stride, int dstStride)
+@@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void)
+ yuy2toyv12 = yuy2toyv12_c;
+ planar2x = planar2x_c;
+ ff_rgb24toyv12 = ff_rgb24toyv12_c;
++ ff_bgr24toyv12 = ff_bgr24toyv12_c;
++ ff_rgbxtoyv12 = ff_rgbxtoyv12_c;
++ ff_bgrxtoyv12 = ff_bgrxtoyv12_c;
++ ff_xrgbtoyv12 = ff_xrgbtoyv12_c;
++ ff_xbgrtoyv12 = ff_xbgrtoyv12_c;
+ interleaveBytes = interleaveBytes_c;
+ deinterleaveBytes = deinterleaveBytes_c;
+ vu9_to_vu12 = vu9_to_vu12_c;
+--- a/libswscale/swscale_unscaled.c
++++ b/libswscale/swscale_unscaled.c
+@@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext
+ return srcSliceH;
+ }
+
++static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++ int srcStride[], int srcSliceY, int srcSliceH,
++ uint8_t *dst[], int dstStride[])
++{
++ ff_bgr24toyv12(
++ src[0],
++ dst[0] + srcSliceY * dstStride[0],
++ dst[1] + (srcSliceY >> 1) * dstStride[1],
++ dst[2] + (srcSliceY >> 1) * dstStride[2],
++ c->srcW, srcSliceH,
++ dstStride[0], dstStride[1], srcStride[0],
++ c->input_rgb2yuv_table);
++ if (dst[3])
++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++ return srcSliceH;
++}
++
++static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++ int srcStride[], int srcSliceY, int srcSliceH,
++ uint8_t *dst[], int dstStride[])
++{
++ ff_bgrxtoyv12(
++ src[0],
++ dst[0] + srcSliceY * dstStride[0],
++ dst[1] + (srcSliceY >> 1) * dstStride[1],
++ dst[2] + (srcSliceY >> 1) * dstStride[2],
++ c->srcW, srcSliceH,
++ dstStride[0], dstStride[1], srcStride[0],
++ c->input_rgb2yuv_table);
++ if (dst[3])
++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++ return srcSliceH;
++}
++
++static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++ int srcStride[], int srcSliceY, int srcSliceH,
++ uint8_t *dst[], int dstStride[])
++{
++ ff_rgbxtoyv12(
++ src[0],
++ dst[0] + srcSliceY * dstStride[0],
++ dst[1] + (srcSliceY >> 1) * dstStride[1],
++ dst[2] + (srcSliceY >> 1) * dstStride[2],
++ c->srcW, srcSliceH,
++ dstStride[0], dstStride[1], srcStride[0],
++ c->input_rgb2yuv_table);
++ if (dst[3])
++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++ return srcSliceH;
++}
++
++static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++ int srcStride[], int srcSliceY, int srcSliceH,
++ uint8_t *dst[], int dstStride[])
++{
++ ff_xbgrtoyv12(
++ src[0],
++ dst[0] + srcSliceY * dstStride[0],
++ dst[1] + (srcSliceY >> 1) * dstStride[1],
++ dst[2] + (srcSliceY >> 1) * dstStride[2],
++ c->srcW, srcSliceH,
++ dstStride[0], dstStride[1], srcStride[0],
++ c->input_rgb2yuv_table);
++ if (dst[3])
++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++ return srcSliceH;
++}
++
++static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++ int srcStride[], int srcSliceY, int srcSliceH,
++ uint8_t *dst[], int dstStride[])
++{
++ ff_xrgbtoyv12(
++ src[0],
++ dst[0] + srcSliceY * dstStride[0],
++ dst[1] + (srcSliceY >> 1) * dstStride[1],
++ dst[2] + (srcSliceY >> 1) * dstStride[2],
++ c->srcW, srcSliceH,
++ dstStride[0], dstStride[1], srcStride[0],
++ c->input_rgb2yuv_table);
++ if (dst[3])
++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++ return srcSliceH;
++}
++
+ static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+@@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext
+ const enum AVPixelFormat dstFormat = c->dstFormat;
+ const int flags = c->flags;
+ const int dstH = c->dstH;
+- const int dstW = c->dstW;
+ int needsDither;
+
+ needsDither = isAnyRGB(dstFormat) &&
+@@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext
+ /* bgr24toYV12 */
+ if (srcFormat == AV_PIX_FMT_BGR24 &&
+ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
+- !(flags & SWS_ACCURATE_RND) && !(dstW&1))
++ !(flags & SWS_ACCURATE_RND))
+ c->convert_unscaled = bgr24ToYv12Wrapper;
++ /* rgb24toYV12 */
++ if (srcFormat == AV_PIX_FMT_RGB24 &&
++ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
++ !(flags & SWS_ACCURATE_RND))
++ c->convert_unscaled = rgb24ToYv12Wrapper;
++
++ /* bgrxtoYV12 */
++ if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
++ (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++ !(flags & SWS_ACCURATE_RND))
++ c->convert_unscaled = bgrxToYv12Wrapper;
++ /* rgbx24toYV12 */
++ if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
++ (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++ !(flags & SWS_ACCURATE_RND))
++ c->convert_unscaled = rgbxToYv12Wrapper;
++ /* xbgrtoYV12 */
++ if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
++ (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++ !(flags & SWS_ACCURATE_RND))
++ c->convert_unscaled = xbgrToYv12Wrapper;
++ /* xrgb24toYV12 */
++ if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
++ (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++ !(flags & SWS_ACCURATE_RND))
++ c->convert_unscaled = xrgbToYv12Wrapper;
+
+ /* RGB/BGR -> RGB/BGR (no dither needed forms) */
+ if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
+--- a/libswscale/tests/swscale.c
++++ b/libswscale/tests/swscale.c
+@@ -23,6 +23,7 @@
+ #include <string.h>
+ #include <inttypes.h>
+ #include <stdarg.h>
++#include <time.h>
+
+ #undef HAVE_AV_CONFIG_H
+ #include "libavutil/cpu.h"
+@@ -78,6 +79,15 @@ struct Results {
+ uint32_t crc;
+ };
+
++static int time_rep = 0;
++
++static uint64_t utime(void)
++{
++ struct timespec ts;
++ clock_gettime(CLOCK_MONOTONIC, &ts);
++ return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
++}
++
+ // test by ref -> src -> dst -> out & compare out against ref
+ // ref & out are YV12
+ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
+@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const
+ goto end;
+ }
+
+- printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
++ printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d",
+ desc_src->name, srcW, srcH,
+ desc_dst->name, dstW, dstH,
+ flags);
+@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const
+
+ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
+
++ if (time_rep != 0)
++ {
++ const uint64_t now = utime();
++ uint64_t done;
++ for (i = 1; i != time_rep; ++i) {
++ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
++ }
++ done = utime();
++ printf(" T=%7"PRId64"us ", done-now);
++ }
++
+ for (i = 0; i < 4 && dstStride[i]; i++)
+ crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
+ dstStride[i] * dstH);
+@@ -355,56 +376,78 @@ static int fileTest(const uint8_t * cons
+ return 0;
+ }
+
+-#define W 96
+-#define H 96
+-
+ int main(int argc, char **argv)
+ {
++ unsigned int W = 96;
++ unsigned int H = 96;
++ unsigned int W2;
++ unsigned int H2;
++ unsigned int S;
+ enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
+ enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
+- uint8_t *rgb_data = av_malloc(W * H * 4);
+- const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
+- int rgb_stride[4] = { 4 * W, 0, 0, 0 };
+- uint8_t *data = av_malloc(4 * W * H);
+- const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
+- int stride[4] = { W, W, W, W };
+ int x, y;
+ struct SwsContext *sws;
+ AVLFG rand;
+ int res = -1;
+ int i;
+ FILE *fp = NULL;
+-
+- if (!rgb_data || !data)
+- return -1;
++ uint8_t *rgb_data;
++ uint8_t * rgb_src[4] = { NULL };
++ int rgb_stride[4] = { 0 };
++ uint8_t *data;
++ uint8_t * src[4] = { NULL };
++ int stride[4] = { 0 };
+
+ for (i = 1; i < argc; i += 2) {
++ const char * const arg2 = argv[i+1];
++
+ if (argv[i][0] != '-' || i + 1 == argc)
+ goto bad_option;
+ if (!strcmp(argv[i], "-ref")) {
+- fp = fopen(argv[i + 1], "r");
++ fp = fopen(arg2, "r");
+ if (!fp) {
+- fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
++ fprintf(stderr, "could not open '%s'\n", arg2);
+ goto error;
+ }
+ } else if (!strcmp(argv[i], "-cpuflags")) {
+ unsigned flags = av_get_cpu_flags();
+- int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
++ int ret = av_parse_cpu_caps(&flags, arg2);
+ if (ret < 0) {
+- fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
++ fprintf(stderr, "invalid cpu flags %s\n", arg2);
+ return ret;
+ }
+ av_force_cpu_flags(flags);
+ } else if (!strcmp(argv[i], "-src")) {
+- srcFormat = av_get_pix_fmt(argv[i + 1]);
++ srcFormat = av_get_pix_fmt(arg2);
+ if (srcFormat == AV_PIX_FMT_NONE) {
+- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
++ fprintf(stderr, "invalid pixel format %s\n", arg2);
+ return -1;
+ }
+ } else if (!strcmp(argv[i], "-dst")) {
+- dstFormat = av_get_pix_fmt(argv[i + 1]);
++ dstFormat = av_get_pix_fmt(arg2);
+ if (dstFormat == AV_PIX_FMT_NONE) {
+- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
++ fprintf(stderr, "invalid pixel format %s\n", arg2);
++ return -1;
++ }
++ } else if (!strcmp(argv[i], "-w")) {
++ char * p = NULL;
++ W = strtoul(arg2, &p, 0);
++ if (!W || *p) {
++ fprintf(stderr, "bad width %s\n", arg2);
++ return -1;
++ }
++ } else if (!strcmp(argv[i], "-h")) {
++ char * p = NULL;
++ H = strtoul(arg2, &p, 0);
++ if (!H || *p) {
++ fprintf(stderr, "bad height '%s'\n", arg2);
++ return -1;
++ }
++ } else if (!strcmp(argv[i], "-t")) {
++ char * p = NULL;
++ time_rep = (int)strtol(arg2, &p, 0);
++ if (*p) {
++ fprintf(stderr, "bad time repetitions '%s'\n", arg2);
+ return -1;
+ }
+ } else {
+@@ -414,15 +457,34 @@ bad_option:
+ }
+ }
+
+- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
++ S = (W + 15) & ~15;
++ rgb_data = av_mallocz(S * H * 4);
++ rgb_src[0] = rgb_data;
++ rgb_stride[0] = 4 * S;
++ data = av_mallocz(4 * S * H);
++ src[0] = data;
++ src[1] = data + S * H;
++ src[2] = data + S * H * 2;
++ src[3] = data + S * H * 3;
++ stride[0] = S;
++ stride[1] = S;
++ stride[2] = S;
++ stride[3] = S;
++ H2 = H < 96 ? 8 : H / 12;
++ W2 = W < 96 ? 8 : W / 12;
++
++ if (!rgb_data || !data)
++ return -1;
++
++ sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H,
+ AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
+
+ av_lfg_init(&rand, 1);
+
+ for (y = 0; y < H; y++)
+ for (x = 0; x < W * 4; x++)
+- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
+- res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
++ rgb_data[ x + y * 4 * S] = av_lfg_get(&rand);
++ res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride);
+ if (res < 0 || res != H) {
+ res = -1;
+ goto error;
+@@ -431,10 +493,10 @@ bad_option:
+ av_free(rgb_data);
+
+ if(fp) {
+- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
++ res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat);
+ fclose(fp);
+ } else {
+- selfTest(src, stride, W, H, srcFormat, dstFormat);
++ selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat);
+ res = 0;
+ }
+ error:
+--- /dev/null
++++ b/pi-util/BUILD.txt
+@@ -0,0 +1,67 @@
++Building Pi FFmpeg
++==================
++
++Current only building on a Pi is supported.
++This builds ffmpeg the way I've tested it
++
++Get all dependencies - the current package dependencies are good enough
++
++$ sudo apt-get build-dep ffmpeg
++
++Configure using the pi-util/conf_native.sh script
++-------------------------------------------------
++
++This sets the normal release options and creates an ouutput dir to build into
++The directory name will depend on system and options but will be under out/
++
++There are a few choices here
++ --mmal build including the legacy mmal-based decoders and zero-copy code
++ this requires appropriate libraries which currently will exist for
++ armv7 but not arm64
++ --noshared
++ Build a static image rather than a shared library one. Static is
++ easier for testing as there is no need to worry about library
++ paths being confused and therefore running the wrong code, Shared
++ is what is needed, in most cases, when building for use by other
++ programs.
++ --usr Set install dir to /usr (i.e. system default) rather than in
++ <builddir>/install
++
++So for a static build
++---------------------
++
++$ pi-util/conf_native.sh --noshared
++
++$ make -j8 -C out/<wherever the script said it was building to>
++
++You can now run ffmpeg directly from where it was built
++
++For a shared build
++------------------
++
++There are two choices here
++
++$ pi-util/conf_native.sh
++$ make -j8 -C out/<builddir> install
++
++This sets the install prefix to <builddir>/install and is probably what you
++want if you don't want to overwrite the system files.
++
++You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
++built. You can copy the contents of <build dir>/install to /usr and that mostly
++works. The only downside is that paths in pkgconfig end up being set to the
++install directory in your build directory which may be less than ideal when
++building other packages.
++
++The alternative if you just want to replace the system libs is:
++
++$ pi-util/conf_native.sh --usr
++$ make -j8 -C out/<builddir>
++$ sudo pi-util/clean_usr_libs.sh
++$ sudo make -j8 -C out/<builddir> install
++
++The clean_usr_libs.sh step wipes any existing libs & includes (for all
++architectures) from the system which helps avoid confusion when running other
++progs as you can be sure you're not running old code which is unfortunately
++easy to do otherwise.
++
+--- /dev/null
++++ b/pi-util/NOTES.txt
+@@ -0,0 +1,69 @@
++Notes on the hevc_rpi decoder & associated support code
++-------------------------------------------------------
++
++There are 3 main parts to the existing code:
++
++1) The decoder - this is all in libavcodec as rpi_hevc*.
++
++2) A few filters to deal with Sand frames and a small patch to
++automatically select the sand->i420 converter when required.
++
++3) A kludge in ffmpeg.c to display the decoded video. This could & should
++be converted into a proper ffmpeg display module.
++
++
++Decoder
++-------
++
++The decoder is a modified version of the existing ffmpeg hevc decoder.
++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
++More complex bitstreams can be up to ~200% faster but particularly easy
++streams can cut its advantage down to ~50%. This means that a Pi3+ can
++display nearly all 8-bit 1080p30 streams and with some overclocking it can
++display most lower bitrate 10-bit 1080p30 streams - this latter case is
++not helped by the requirement to downsample to 8-bit before display on a
++Pi.
++
++It has had co-processor offload added for inter-pred and large block
++residual transform. Various parts have had optimized ARM NEON assembler
++added and the existing ARM asm sections have been profiled and
++re-optimized for A53. The main C code has been substantially reworked at
++its lower levels in an attempt to optimize it and minimize memory
++bandwidth. To some extent code paths that deal with frame types that it
++doesn't support have been pruned.
++
++It outputs frames in Broadcom Sand format. This is a somewhat annoying
++layout that doesn't fit into ffmpegs standard frame descriptions. It has
++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
++the stripe followed by interleaved U & V, that is then followed by the Y
++for the next stripe, etc. The final stripe is always padded to
++stripe-width. This is used in an attempt to help with cache locality and
++cut down on the number of dram bank switches. It is annoying to use for
++inter-pred with conventional processing but the way the Pi QPU (which is
++used for inter-pred) works means that it has negligible downsides here and
++the improved memory performance exceeds the overhead of the increased
++complexity in the rest of the code.
++
++Frames must be allocated out of GPU memory (as otherwise they can't be
++accessed by the co-processors). Utility functions (in rpi_zc.c) have been
++written to make this easier. As the frames are already in GPU memory they
++can be displayed by the Pi h/w without any further copying.
++
++
++Known non-features
++------------------
++
++Frame allocation should probably be done in some other way in order to fit
++into the standard framework better.
++
++Sand frames are currently declared as software frames, there is an
++argument that they should be hardware frames but they aren't really.
++
++There must be a better way of auto-selecting the hevc_rpi decoder over the
++normal s/w hevc decoder, but I became confused by the existing h/w
++acceleration framework and what I wanted to do didn't seem to fit in
++neatly.
++
++Display should be a proper device rather than a kludge in ffmpeg.c
++
++
+--- /dev/null
++++ b/pi-util/TESTMESA.txt
+@@ -0,0 +1,82 @@
++# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
++
++# These assume that the drm_mmal test for Sand8 has been built on this Pi
++# as build relies on many of the same files
++
++# 1st get everything required to build ffmpeg
++# If sources aren't already enabled on your Pi then enable them
++sudo su
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
++mv /tmp/sources.list /etc/apt/
++mv /tmp/raspi.list /etc/apt/sources.list.d/
++apt update
++
++# Get dependancies
++sudo apt build-dep ffmpeg
++
++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
++
++# Enable H265 V4L2 request decoder
++sudo su
++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
++# You may also want to add more CMA if you are going to try 4k videos
++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
++# dtoverlay=vc4-fkms-v3d,cma-512
++reboot
++# Check it has turned up
++ls -la /dev/video*
++# This should include video19
++# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19
++
++# Currently on the Pi the linux headers from the debian distro don't match
++# the kernel that we ship and we need to update them - hopefully this step
++# will be unneeded in the future
++sudo apt install git bc bison flex libssl-dev make
++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
++cd linux
++KERNEL=kernel7l
++make bcm2711_defconfig
++make headers_install
++sudo cp -r usr/include/linux /usr/include
++cd ..
++
++# Config - this builds a staticly linked ffmpeg which is easier for testing
++pi-util/conf_native.sh --noshared
++
++# Build (this is a bit dull)
++# If you want to poke the source the libavdevice/egl_vout.c contains the
++# output code -
++cd out/armv7-static-rel
++
++# Check that you have actually configured V4L2 request
++grep HEVC_V4L2REQUEST config.h
++# You are hoping for
++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
++# if you get 0 then the config has failed
++
++make -j6
++
++# Grab test streams
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
++
++# Test i420 output (works currently)
++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
++
++# Test Sand8 output - doesn't currently work but should once you have
++# Sand8 working in drm_mmal. I can't guarantee that this will work as
++# I can't test this path with a known working format, but the debug looks
++# good. If this doesn't work & drm_mmal does with sand8 then come back to me
++# The "show_all 1" forces vout to display every frame otherwise it drops any
++# frame that would cause it to block
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
++
++# Test Sand30 - doesn't currently work
++# (Beware that when FFmpeg errors out it often leaves your teminal window
++# in a state where you need to reset it)
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
++
++
++
+--- /dev/null
++++ b/pi-util/clean_usr_libs.sh
+@@ -0,0 +1,42 @@
++set -e
++U=/usr/include/arm-linux-gnueabihf
++rm -rf $U/libavcodec
++rm -rf $U/libavdevice
++rm -rf $U/libavfilter
++rm -rf $U/libavformat
++rm -rf $U/libavutil
++rm -rf $U/libswresample
++rm -rf $U/libswscale
++U=/usr/include/aarch64-linux-gnu
++rm -rf $U/libavcodec
++rm -rf $U/libavdevice
++rm -rf $U/libavfilter
++rm -rf $U/libavformat
++rm -rf $U/libavutil
++rm -rf $U/libswresample
++rm -rf $U/libswscale
++U=/usr/lib/arm-linux-gnueabihf
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++U=/usr/lib/arm-linux-gnueabihf/neon/vfp
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++U=/usr/lib/aarch64-linux-gnu
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++
+--- /dev/null
++++ b/pi-util/conf_arm64_native.sh
+@@ -0,0 +1,45 @@
++echo "Configure for ARM64 native build"
++
++#RPI_KEEPS="-save-temps=obj"
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++ SHARED_LIBS="--disable-shared"
++ echo Static libs
++ OUT=out/arm64-static-rel
++else
++ echo Shared libs
++ OUT=out/arm64-shared-rel
++fi
++
++mkdir -p $OUT
++cd $OUT
++
++A=aarch64-linux-gnu
++USR_PREFIX=`pwd`/install
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++../../configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ --disable-stripping\
++ --disable-thumb\
++ --disable-mmal\
++ --enable-sand\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-epoxy\
++ --enable-libudev\
++ --enable-vout-drm\
++ --enable-vout-egl\
++ $SHARED_LIBS\
++ --extra-cflags="-ggdb"
++
++# --enable-decoder=hevc_rpi\
++# --enable-extra-warnings\
++# --arch=armv71\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/conf_h265.2016.csv
+@@ -0,0 +1,195 @@
++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
++3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
+--- /dev/null
++++ b/pi-util/conf_h265.2016_HEVC_v1.csv
+@@ -0,0 +1,147 @@
++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null
++++ b/pi-util/conf_h265.csv
+@@ -0,0 +1,144 @@
++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null
++++ b/pi-util/conf_native.sh
+@@ -0,0 +1,135 @@
++echo "Configure for native build"
++
++FFSRC=`pwd`
++MC=`dpkg --print-architecture`
++BUILDBASE=$FFSRC/out
++
++#RPI_KEEPS="-save-temps=obj"
++RPI_KEEPS=""
++
++NOSHARED=
++MMAL=
++USR_PREFIX=
++TOOLCHAIN=
++R=rel
++
++while [ "$1" != "" ] ; do
++ case $1 in
++ --noshared)
++ NOSHARED=1
++ ;;
++ --mmal)
++ MMAL=1
++ ;;
++ --usr)
++ USR_PREFIX=/usr
++ ;;
++ --tsan)
++ TOOLCHAIN="--toolchain=gcc-tsan"
++ R=tsan
++ ;;
++ *)
++ echo "Usage $0: [--noshared] [--mmal] [--usr]"
++ echo " noshared Build static libs and executable - good for testing"
++ echo " mmal Build mmal decoders"
++ echo " usr Set install prefix to /usr [default=<build-dir>/install]"
++ exit 1
++ ;;
++ esac
++ shift
++done
++
++
++MCOPTS=
++RPI_INCLUDES=
++RPI_LIBDIRS=
++RPI_DEFINES=
++RPI_EXTRALIBS=
++
++# uname -m gives kernel type which may not have the same
++# 32/64bitness as userspace :-( getconf shoudl provide the answer
++# but use uname to check we are on the right processor
++MC=`uname -m`
++LB=`getconf LONG_BIT`
++if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then
++ if [ "$LB" == "32" ]; then
++ echo "M/C armv7"
++ A=arm-linux-gnueabihf
++ B=armv7
++ MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
++ RPI_DEFINES=-mfpu=neon-vfpv4
++ elif [ "$LB" == "64" ]; then
++ echo "M/C aarch64"
++ A=aarch64-linux-gnu
++ B=arm64
++ else
++ echo "Unknown LONG_BIT name: $LB"
++ exit 1
++ fi
++else
++ echo "Unknown machine name: $MC"
++ exit 1
++fi
++
++if [ $MMAL ]; then
++ RPI_OPT_VC=/opt/vc
++ RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
++ RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
++ RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
++ RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
++ RPIOPTS="--enable-mmal"
++else
++ RPIOPTS="--disable-mmal"
++fi
++
++C=`lsb_release -sc`
++V=`cat RELEASE`
++
++SHARED_LIBS="--enable-shared"
++if [ $NOSHARED ]; then
++ SHARED_LIBS="--disable-shared"
++ OUT=$BUILDBASE/$B-$C-$V-static-$R
++ echo Static libs
++else
++ echo Shared libs
++ OUT=$BUILDBASE/$B-$C-$V-shared-$R
++fi
++
++if [ ! $USR_PREFIX ]; then
++ USR_PREFIX=$OUT/install
++fi
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++echo Destination directory: $OUT
++mkdir -p $OUT
++# Nothing under here need worry git - including this .gitignore!
++echo "**" > $BUILDBASE/.gitignore
++cd $OUT
++
++$FFSRC/configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ $MCOPTS\
++ $TOOLCHAIN\
++ --disable-stripping\
++ --disable-thumb\
++ --enable-sand\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-vout-egl\
++ --enable-vout-drm\
++ --enable-gpl\
++ $SHARED_LIBS\
++ $RPIOPTS\
++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
++ --extra-ldflags="$RPI_LIBDIRS"\
++ --extra-libs="$RPI_EXTRALIBS"\
++ --extra-version="rpi"
++
++echo "Configured into $OUT"
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/ffconf.py
+@@ -0,0 +1,215 @@
++#!/usr/bin/env python3
++
++import string
++import os
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++CODEC_HEVC_RPI = 1
++HWACCEL_RPI = 2
++HWACCEL_DRM = 3
++HWACCEL_VAAPI = 4
++
++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
++ hwaccel = ""
++ if dectype == HWACCEL_RPI:
++ hwaccel = "rpi"
++ elif dectype == HWACCEL_DRM:
++ hwaccel = "drm"
++ elif dectype == HWACCEL_VAAPI:
++ hwaccel = "vaapi"
++
++ pix_fmt = []
++ if pix == "8":
++ pix_fmt = ["-pix_fmt", "yuv420p"]
++ elif pix == "10":
++ pix_fmt = ["-pix_fmt", "yuv420p10le"]
++ elif pix == "12":
++ pix_fmt = ["-pix_fmt", "yuv420p12le"]
++
++ tmp_root = "/tmp"
++
++ names = srcname.split('/')
++ while len(names) > 1:
++ tmp_root = os.path.join(tmp_root, names[0])
++ del names[0]
++ name = names[0]
++
++ if not os.path.exists(tmp_root):
++ os.makedirs(tmp_root)
++
++ dec_file = os.path.join(tmp_root, name + ".dec.md5")
++ try:
++ os.remove(dec_file)
++ except:
++ pass
++
++ flog = open(os.path.join(tmp_root, name + ".log"), "wt")
++
++ ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
++
++ # Unaligned needed for cropping conformance
++ if hwaccel:
++ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
++ else:
++ rstr = subprocess.call(
++ [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
++ stdout=flog, stderr=subprocess.STDOUT)
++
++ try:
++ m1 = None
++ m2 = None
++ with open(os.path.join(fileroot, md5_file)) as f:
++ for line in f:
++ m1 = re.search("[0-9a-f]{32}", line.lower())
++ if m1:
++ break
++
++ with open(dec_file) as f:
++ m2 = re.search("[0-9a-f]{32}", f.readline())
++ except:
++ pass
++
++ if m1 and m2 and m1.group() == m2.group():
++ print("Match: " + m1.group(), file=flog)
++ rv = 0
++ elif not m1:
++ print("****** Cannot find m1", file=flog)
++ rv = 3
++ elif not m2:
++ print("****** Cannot find m2", file=flog)
++ rv = 2
++ else:
++ print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog)
++ rv = 1
++ flog.close()
++ return rv
++
++def scandir(root):
++ aconf = []
++ ents = os.listdir(root)
++ ents.sort(key=str.lower)
++ for name in ents:
++ test_path = os.path.join(root, name)
++ if S_ISDIR(os.stat(test_path).st_mode):
++ files = os.listdir(test_path)
++ es_file = "?"
++ md5_file = "?"
++ for f in files:
++ (base, ext) = os.path.splitext(f)
++ if base[0] == '.':
++ pass
++ elif ext == ".bit" or ext == ".bin":
++ es_file = f
++ elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
++ if md5_file == "?":
++ md5_file = f
++ elif base[-3:] == "yuv":
++ md5_file = f
++ aconf.append((1, name, es_file, md5_file))
++ return aconf
++
++def runtest(name, tests):
++ if not tests:
++ return True
++ for t in tests:
++ if name[0:len(t)] == t or name.find("/" + t) != -1:
++ return True
++ return False
++
++def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
++ unx_failures = []
++ unx_success = []
++ failures = 0
++ successes = 0
++ for a in csva:
++ exp_test = int(a[0])
++ if (exp_test and runtest(a[1], tests)):
++ name = a[1]
++ print ("==== ", name, end="")
++ sys.stdout.flush()
++
++ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
++ if (rv == 0):
++ successes += 1
++ else:
++ failures += 1
++
++ if (rv == 0):
++ if exp_test == 2:
++ print(": * OK *")
++ unx_success.append(name)
++ else:
++ print(": ok")
++ elif exp_test == 2 and rv == 1:
++ print(": fail")
++ elif exp_test == 3 and rv == 2:
++ # Call an expected "crash" an abort
++ print(": abort")
++ else:
++ unx_failures.append(name)
++ if rv == 1:
++ print(": * FAIL *")
++ elif (rv == 2) :
++ print(": * CRASH *")
++ elif (rv == 3) :
++ print(": * MD5 MISSING *")
++ else :
++ print(": * BANG *")
++
++ if unx_failures or unx_success:
++ print("Unexpected Failures:", unx_failures)
++ print("Unexpected Success: ", unx_success)
++ else:
++ print("All tests normal:", successes, "ok,", failures, "failed")
++
++
++class ConfCSVDialect(csv.Dialect):
++ delimiter = ','
++ doublequote = True
++ lineterminator = '\n'
++ quotechar='"'
++ quoting = csv.QUOTE_MINIMAL
++ skipinitialspace = True
++ strict = True
++
++if __name__ == '__main__':
++
++ argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
++ argp.add_argument("tests", nargs='*')
++ argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
++ argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
++ argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
++ argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
++ argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
++ argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
++ argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
++ args = argp.parse_args()
++
++ if args.csvgen:
++ csv.writer(sys.stdout).writerows(scandir(args.test_root))
++ exit(0)
++
++ with open(args.csv, 'rt') as csvfile:
++ csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
++
++ dectype = CODEC_HEVC_RPI
++ if os.path.exists("/dev/rpivid-hevcmem"):
++ dectype = HWACCEL_RPI
++ if args.drm or os.path.exists("/sys/module/rpivid_hevc"):
++ dectype = HWACCEL_DRM
++
++ if args.pi4:
++ dectype = HWACCEL_RPI
++ elif args.drm:
++ dectype = HWACCEL_DRM
++ elif args.vaapi:
++ dectype = HWACCEL_VAAPI
++
++ doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
++
+--- /dev/null
++++ b/pi-util/ffperf.py
+@@ -0,0 +1,128 @@
++#!/usr/bin/env python3
++
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++ close_threshold = 0.01
++
++ def __init__(self, stats_dict=None):
++ if stats_dict != None:
++ self.name = stats_dict["name"]
++ self.elapsed = float(stats_dict["elapsed"])
++ self.user = float(stats_dict["user"])
++ self.sys = float(stats_dict["sys"])
++
++ def times_str(self):
++ ctime = self.sys + self.user
++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++ def dict(self):
++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++ def is_close(self, other):
++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++ def __lt__(self, other):
++ return self.elapsed < other.elapsed
++ def __gt__(self, other):
++ return self.elapsed > other.elapsed
++
++ def time_file(name, prefix, ffmpeg="./ffmpeg"):
++ stats = tstats()
++ stats.name = name
++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++ cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
++ "-vcodec", "hevc_rpi",
++ "-t", "30", "-i", prefix + name,
++ "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
++ pinfo = os.wait4(cproc.pid, 0)
++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++ stats.elapsed = end_time - start_time
++ stats.user = pinfo[2].ru_utime
++ stats.sys = pinfo[2].ru_stime
++ return stats
++
++
++def common_prefix(s1, s2):
++ for i in range(min(len(s1),len(s2))):
++ if s1[i] != s2[i]:
++ return s1[:i]
++ return s1[:i+1]
++
++def main():
++ global flog
++
++ argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
++To blank the screen before starting use "xdg-screensaver activate"
++(For some reason this doesn't seem to work from within python).
++""")
++
++ argp.add_argument("streams", nargs='*')
++ argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
++ argp.add_argument("--csv_in", help="CSV input filename")
++ argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
++ argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
++
++ args = argp.parse_args()
++
++ csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
++ csv_out.writeheader()
++
++ stats_in = {}
++ if args.csv_in != None:
++ with open(args.csv_in, 'r', newline='') as f_in:
++ stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++ flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
++
++ streams = args.streams
++ if not streams:
++ if not stats_in:
++ print ("No source streams specified")
++ return 1
++ prefix = "" if args.prefix == None else args.prefix
++ streams = [k for k in stats_in]
++ elif args.prefix != None:
++ prefix = args.prefix
++ else:
++ prefix = streams[0]
++ for f in streams[1:]:
++ prefix = common_prefix(prefix, f)
++ pp = prefix.rpartition(os.sep)
++ prefix = pp[0] + pp[1]
++ streams = [s[len(prefix):] for s in streams]
++
++ for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
++ print ("====", f)
++
++ t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
++ for i in range(args.repeat):
++ t = tstats.time_file(f, prefix, args.ffmpeg)
++ print ("...", t.times_str())
++ if t0 > t:
++ t0 = t
++
++ if t0.name in stats_in:
++ pstat = stats_in[t0.name]
++ print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
++
++ csv_out.writerow(t0.dict())
++
++ print ()
++
++ return 0
++
++
++if __name__ == '__main__':
++ exit(main())
++
+--- /dev/null
++++ b/pi-util/genpatch.sh
+@@ -0,0 +1,35 @@
++set -e
++
++NOPATCH=
++if [ "$1" == "--notag" ]; then
++ shift
++ NOPATCH=1
++fi
++
++if [ "$1" == "" ]; then
++ echo Usage: $0 [--notag] \<patch_tag\>
++ echo e.g.: $0 mmal_4
++ exit 1
++fi
++
++VERSION=`cat RELEASE`
++if [ "$VERSION" == "" ]; then
++ echo Can\'t find version RELEASE
++ exit 1
++fi
++
++PATCHFILE=../ffmpeg-$VERSION-$1.patch
++
++if [ $NOPATCH ]; then
++ echo Not tagged
++else
++ # Only continue if we are all comitted
++ git diff --name-status --exit-code
++
++ PATCHTAG=pi/$VERSION/$1
++ echo Tagging: $PATCHTAG
++
++ git tag $PATCHTAG
++fi
++echo Generating patch: $PATCHFILE
++git diff n$VERSION -- > $PATCHFILE
+--- /dev/null
++++ b/pi-util/make_array.py
+@@ -0,0 +1,23 @@
++#!/usr/bin/env python
++
++# Usage
++# make_array file.bin
++# Produces file.h with array of bytes.
++#
++import sys
++for file in sys.argv[1:]:
++ prefix,suffix = file.split('.')
++ assert suffix=='bin'
++ name=prefix.split('/')[-1]
++ print 'Converting',file
++ with open(prefix+'.h','wb') as out:
++ print >>out, 'static const unsigned char',name,'[] = {'
++ with open(file,'rb') as fd:
++ i = 0
++ for byte in fd.read():
++ print >>out, '0x%02x, ' % ord(byte),
++ i = i + 1
++ if i % 8 == 0:
++ print >>out, ' // %04x' % (i - 8)
++ print >>out,'};'
++
+--- /dev/null
++++ b/pi-util/mkinst.sh
+@@ -0,0 +1,5 @@
++set -e
++
++make install
++
++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
+--- /dev/null
++++ b/pi-util/patkodi.sh
+@@ -0,0 +1,9 @@
++set -e
++KODIBASE=/home/jc/rpi/kodi/xbmc
++JOBS=-j20
++make $JOBS
++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
++make -C $KODIBASE/build install
++
++
+--- /dev/null
++++ b/pi-util/perfcmp.py
+@@ -0,0 +1,101 @@
++#!/usr/bin/env python3
++
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++ close_threshold = 0.01
++
++ def __init__(self, stats_dict=None):
++ if stats_dict != None:
++ self.name = stats_dict["name"]
++ self.elapsed = float(stats_dict["elapsed"])
++ self.user = float(stats_dict["user"])
++ self.sys = float(stats_dict["sys"])
++
++ def times_str(self):
++ ctime = self.sys + self.user
++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++ def dict(self):
++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++ def is_close(self, other):
++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++ def __lt__(self, other):
++ return self.elapsed < other.elapsed
++ def __gt__(self, other):
++ return self.elapsed > other.elapsed
++
++ def time_file(name, prefix):
++ stats = tstats()
++ stats.name = name
++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++ cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
++ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
++ pinfo = os.wait4(cproc.pid, 0)
++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++ stats.elapsed = end_time - start_time
++ stats.user = pinfo[2].ru_utime
++ stats.sys = pinfo[2].ru_stime
++ return stats
++
++
++def common_prefix(s1, s2):
++ for i in range(min(len(s1),len(s2))):
++ if s1[i] != s2[i]:
++ return s1[:i]
++ return s1[:i+1]
++
++def main():
++ argp = argparse.ArgumentParser(description="FFmpeg performance compare")
++
++ argp.add_argument("stream0", help="CSV to compare")
++ argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
++
++ args = argp.parse_args()
++
++ with open(args.stream0, 'r', newline='') as f_in:
++ stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++ with open(args.stream1, 'r', newline='') as f_in:
++ stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++ print (args.stream0, "<<-->>", args.stream1)
++ print ()
++
++ for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
++ if not (f in stats0) :
++ print (" XX :", f)
++ continue
++ if not (f in stats1) :
++ print (" XX :", f)
++ continue
++
++ s0 = stats0[f]
++ s1 = stats1[f]
++
++ pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
++ thresh = 0.3
++ tc = 6
++
++ nchar = min(tc - 1, int(abs(pcent) / thresh))
++ cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
++
++ print ("%6.2f %s%6.2f (%+5.2f) : %s" %
++ (s0.elapsed, cc, s1.elapsed, pcent, f))
++
++ return 0
++
++
++if __name__ == '__main__':
++ exit(main())
++
+--- /dev/null
++++ b/pi-util/qem.sh
+@@ -0,0 +1,9 @@
++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
++QASM=python\ ../local/bin/qasm.py
++SRC_FILE=libavcodec/rpi_hevc_shader.qasm
++DST_BASE=shader
++
++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
++
+--- /dev/null
++++ b/pi-util/testfilt.py
+@@ -0,0 +1,83 @@
++#!/usr/bin/env python3
++
++import string
++import os
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class validator:
++ def __init__(self):
++ self.ok = False
++
++ def isok(self):
++ return self.ok
++
++ def setok(self):
++ self.ok = True
++
++class valid_regex(validator):
++ def __init__(self, regex):
++ super().__init__()
++ self.regex = re.compile(regex)
++
++ def scanline(self, line):
++ if self.isok() or self.regex.search(line):
++ self.setok()
++
++
++def validate(validators, flog):
++ for line in flog:
++ for v in validators:
++ v.scanline(line)
++
++ ok = True
++ for v in validators:
++ if not v.isok():
++ ok = False
++ # complain
++ print("Test failed")
++
++ if ok:
++ print("OK")
++ return ok
++
++def runtest(name, ffmpeg, args, suffix, validators):
++ log_root = os.path.join("/tmp", "testfilt", name)
++ ofilename = os.path.join(log_root, name + suffix)
++
++ if not os.path.exists(log_root):
++ os.makedirs(log_root)
++
++ try:
++ os.remove(ofilename)
++ except:
++ pass
++
++ flog = open(os.path.join(log_root, name + ".log"), "wb")
++ ffargs = [ffmpeg] + args + [ofilename]
++
++ subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False)
++ flog.close
++
++ flog = open(os.path.join(log_root, name + ".log"), "rt")
++ return validate(validators, flog)
++
++def sayok(log_root, flog):
++ print("Woohoo")
++ return True
++
++if __name__ == '__main__':
++
++ argp = argparse.ArgumentParser(description="FFmpeg filter tester")
++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
++ args = argp.parse_args()
++
++ runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i",
++ "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv",
++# "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv",
++ "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv",
++ [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')])
+--- /dev/null
++++ b/pi-util/v3dusage.py
+@@ -0,0 +1,128 @@
++#!/usr/bin/env python
++
++import sys
++import argparse
++import re
++
++def do_logparse(logname):
++
++ rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
++ rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
++ rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
++ rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
++
++ ttotal = {'idle':0.0}
++ tstart = {}
++ qctotal = {}
++ qtstotal = {}
++ l2hits = {}
++ l2total = {}
++ time0 = None
++ idle_start = None
++ qpu_op_no = 0
++ op_count = 0
++
++ with open(logname, "rt") as infile:
++ for line in infile:
++ match = rmatch.match(line)
++ if match:
++# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
++ time = float(match.group(1))
++ unit = match.group(3)
++ opstart = not match.group(2)
++ optype = match.group(7)
++ hascb = match.group(8) != "0"
++
++ if unit == 'qpu1':
++ unit = unit + "." + str(qpu_op_no)
++ if not opstart:
++ if hascb or optype == 'EXECUTE_SYNC':
++ qpu_op_no = 0
++ else:
++ qpu_op_no += 1
++
++ # Ignore sync type
++ if optype == 'EXECUTE_SYNC':
++ continue
++
++ if not time0:
++ time0 = time
++
++ if opstart:
++ tstart[unit] = time;
++ elif unit in tstart:
++ op_count += 1
++ if not unit in ttotal:
++ ttotal[unit] = 0.0
++ ttotal[unit] += time - tstart[unit]
++ del tstart[unit]
++
++ if not idle_start and not tstart:
++ idle_start = time
++ elif idle_start and tstart:
++ ttotal['idle'] += time - idle_start
++ idle_start = None
++
++ match = rqcycle.match(line)
++ if match:
++ unit = "qpu1." + str(qpu_op_no)
++ if not unit in qctotal:
++ qctotal[unit] = 0
++ qctotal[unit] += int(match.group(2))
++
++ match = rqtscycle.match(line)
++ if match:
++ unit = "qpu1." + str(qpu_op_no)
++ if not unit in qtstotal:
++ qtstotal[unit] = 0
++ qtstotal[unit] += int(match.group(2))
++
++ match = rl2hits.match(line)
++ if match:
++ unit = "qpu1." + str(qpu_op_no)
++ if not unit in l2total:
++ l2total[unit] = 0
++ l2hits[unit] = 0
++ l2total[unit] += int(match.group(3))
++ if match.group(2) == "hits":
++ l2hits[unit] += int(match.group(3))
++
++
++ if not time0:
++ print "No v3d profile records found"
++ else:
++ tlogged = time - time0
++
++ print "Logged time:", tlogged, " Op count:", op_count
++ for unit in sorted(ttotal):
++ print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
++ print
++ for unit in sorted(qctotal):
++ if not unit in qtstotal:
++ qtstotal[unit] = 0;
++ print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
++ if unit in l2total:
++ print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
++
++
++
++if __name__ == '__main__':
++ argp = argparse.ArgumentParser(
++ formatter_class=argparse.RawDescriptionHelpFormatter,
++ description="QPU/VPU perf summary from VC logging",
++ epilog = """
++Will also summarise TMU stalls if logging requests set in qpu noflush param
++in the profiled code.
++
++Example use:
++ vcgencmd set_logging level=0xc0
++ <command to profile>
++ sudo vcdbg log msg >& t.log
++ v3dusage.py t.log
++""")
++
++ argp.add_argument("logfile")
++ args = argp.parse_args()
++
++ do_logparse(args.logfile)
++
+--- a/tests/checkasm/Makefile
++++ b/tests/checkasm/Makefile
+@@ -38,6 +38,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)
+ # libavfilter tests
+ AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
+ AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
++AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
+ AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+ AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
+ AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
+@@ -56,8 +57,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWS
+ AVUTILOBJS += av_tx.o
+ AVUTILOBJS += fixed_dsp.o
+ AVUTILOBJS += float_dsp.o
++AVUTILOBJS-$(CONFIG_SAND) += rpi_sand.o
+
+-CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS)
++CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) $(AVUTILOBJS-yes)
+
+ CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o
+ CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
+--- a/tests/checkasm/checkasm.c
++++ b/tests/checkasm/checkasm.c
+@@ -173,6 +173,9 @@ static const struct {
+ #if CONFIG_BLEND_FILTER
+ { "vf_blend", checkasm_check_blend },
+ #endif
++ #if CONFIG_BWDIF_FILTER
++ { "vf_bwdif", checkasm_check_vf_bwdif },
++ #endif
+ #if CONFIG_COLORSPACE_FILTER
+ { "vf_colorspace", checkasm_check_colorspace },
+ #endif
+@@ -201,6 +204,9 @@ static const struct {
+ { "fixed_dsp", checkasm_check_fixed_dsp },
+ { "float_dsp", checkasm_check_float_dsp },
+ { "av_tx", checkasm_check_av_tx },
++ #if CONFIG_SAND
++ { "rpi_sand", checkasm_check_rpi_sand },
++ #endif
+ #endif
+ { NULL }
+ };
+--- a/tests/checkasm/checkasm.h
++++ b/tests/checkasm/checkasm.h
+@@ -72,6 +72,7 @@ void checkasm_check_motion(void);
+ void checkasm_check_nlmeans(void);
+ void checkasm_check_opusdsp(void);
+ void checkasm_check_pixblockdsp(void);
++void checkasm_check_rpi_sand(void);
+ void checkasm_check_sbrdsp(void);
+ void checkasm_check_synth_filter(void);
+ void checkasm_check_sw_gbrp(void);
+@@ -81,6 +82,7 @@ void checkasm_check_utvideodsp(void);
+ void checkasm_check_v210dec(void);
+ void checkasm_check_v210enc(void);
+ void checkasm_check_vc1dsp(void);
++void checkasm_check_vf_bwdif(void);
+ void checkasm_check_vf_eq(void);
+ void checkasm_check_vf_gblur(void);
+ void checkasm_check_vf_hflip(void);
+--- /dev/null
++++ b/tests/checkasm/rpi_sand.c
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (c) 2023 John Cox
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License along
++ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ */
++
++#include <string.h>
++#include "checkasm.h"
++#include "libavutil/common.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#if ARCH_ARM
++#include "libavutil/arm/cpu.h"
++#include "libavutil/arm/rpi_sand_neon.h"
++#elif ARCH_AARCH64
++#include "libavutil/aarch64/cpu.h"
++#include "libavutil/aarch64/rpi_sand_neon.h"
++#endif
++
++static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c)
++{
++ return (a & 0x3ff) | ((b & 0x3ff) << 10) | ((c & 0x3ff) << 20);
++}
++
++void checkasm_check_rpi_sand(void)
++{
++ const unsigned int w = 1280;
++ const unsigned int h = 66;
++ const unsigned int stride1 = 128;
++ const unsigned int stride2 = h*3/2;
++ const unsigned int ssize = ((w+95)/96)*128*h*3/2;
++ const unsigned int ysize = ((w + 32) * (h + 32) * 2);
++
++ uint8_t * sbuf0 = malloc(ssize);
++ uint8_t * sbuf1 = malloc(ssize);
++ uint8_t * ybuf0 = malloc(ysize);
++ uint8_t * ybuf1 = malloc(ysize);
++ uint8_t * vbuf0 = malloc(ysize);
++ uint8_t * vbuf1 = malloc(ysize);
++ uint8_t * yframe0 = (w + 32) * 16 + ybuf0;
++ uint8_t * yframe1 = (w + 32) * 16 + ybuf1;
++ uint8_t * vframe0 = (w + 32) * 16 + vbuf0;
++ uint8_t * vframe1 = (w + 32) * 16 + vbuf1;
++ unsigned int i;
++
++ for (i = 0; i != ssize; i += 4)
++ *(uint32_t*)(sbuf0 + i) = rnd();
++ memcpy(sbuf1, sbuf0, ssize);
++
++ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_y16 : av_rpi_sand30_to_planar_y16, "rpi_sand30_to_planar_y16")) {
++ declare_func(void, uint8_t * dst, const unsigned int dst_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++ memset(ybuf0, 0xbb, ysize);
++ memset(ybuf1, 0xbb, ysize);
++
++ call_ref(yframe0, (w + 32) * 2, sbuf0, stride1, stride2, 0, 0, w, h);
++ call_new(yframe1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
++
++ if (memcmp(sbuf0, sbuf1, ssize)
++ || memcmp(ybuf0, ybuf1, ysize))
++ fail();
++
++ bench_new(ybuf1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
++ }
++
++ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_c16 : av_rpi_sand30_to_planar_c16, "rpi_sand30_to_planar_c16")) {
++ declare_func(void, uint8_t * u_dst, const unsigned int u_stride,
++ uint8_t * v_dst, const unsigned int v_stride,
++ const uint8_t * src,
++ unsigned int stride1, unsigned int stride2,
++ unsigned int _x, unsigned int y,
++ unsigned int _w, unsigned int h);
++
++ memset(ybuf0, 0xbb, ysize);
++ memset(ybuf1, 0xbb, ysize);
++ memset(vbuf0, 0xbb, ysize);
++ memset(vbuf1, 0xbb, ysize);
++
++ call_ref(yframe0, (w + 32), vframe0, (w + 32), sbuf0, stride1, stride2, 0, 0, w/2, h/2);
++ call_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
++
++ if (memcmp(sbuf0, sbuf1, ssize)
++ || memcmp(ybuf0, ybuf1, ysize)
++ || memcmp(vbuf0, vbuf1, ysize))
++ fail();
++
++ bench_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
++ }
++
++
++ report("sand30");
++
++ free(sbuf0);
++ free(sbuf1);
++ free(ybuf0);
++ free(ybuf1);
++ free(vbuf0);
++ free(vbuf1);
++}
++
+--- /dev/null
++++ b/tests/checkasm/vf_bwdif.c
+@@ -0,0 +1,256 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License along
++ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ */
++
++#include <string.h>
++#include "checkasm.h"
++#include "libavcodec/internal.h"
++#include "libavfilter/bwdif.h"
++#include "libavutil/mem_internal.h"
++
++#define WIDTH 256
++
++#define randomize_buffers(buf0, buf1, mask, count) \
++ for (size_t i = 0; i < count; i++) \
++ buf0[i] = buf1[i] = rnd() & mask
++
++#define randomize_overflow_check(buf0, buf1, mask, count) \
++ for (size_t i = 0; i < count; i++) \
++ buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0;
++
++#define BODY(type, depth) \
++ do { \
++ type prev0[9*WIDTH], prev1[9*WIDTH]; \
++ type next0[9*WIDTH], next1[9*WIDTH]; \
++ type cur0[9*WIDTH], cur1[9*WIDTH]; \
++ type dst0[WIDTH], dst1[WIDTH]; \
++ const int stride = WIDTH; \
++ const int mask = (1<<depth)-1; \
++ \
++ declare_func(void, void *dst, void *prev, void *cur, void *next, \
++ int w, int prefs, int mrefs, int prefs2, int mrefs2, \
++ int prefs3, int mrefs3, int prefs4, int mrefs4, \
++ int parity, int clip_max); \
++ \
++ randomize_buffers(prev0, prev1, mask, 9*WIDTH); \
++ randomize_buffers(next0, next1, mask, 9*WIDTH); \
++ randomize_buffers( cur0, cur1, mask, 9*WIDTH); \
++ \
++ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, \
++ WIDTH, stride, -stride, 2*stride, -2*stride, \
++ 3*stride, -3*stride, 4*stride, -4*stride, \
++ 0, mask); \
++ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \
++ WIDTH, stride, -stride, 2*stride, -2*stride, \
++ 3*stride, -3*stride, 4*stride, -4*stride, \
++ 0, mask); \
++ \
++ if (memcmp(dst0, dst1, sizeof dst0) \
++ || memcmp(prev0, prev1, sizeof prev0) \
++ || memcmp(next0, next1, sizeof next0) \
++ || memcmp( cur0, cur1, sizeof cur0)) \
++ fail(); \
++ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \
++ WIDTH, stride, -stride, 2*stride, -2*stride, \
++ 3*stride, -3*stride, 4*stride, -4*stride, \
++ 0, mask); \
++ } while (0)
++
++void checkasm_check_vf_bwdif(void)
++{
++ BWDIFContext ctx_8, ctx_10;
++
++ ff_bwdif_init_filter_line(&ctx_8, 8);
++ ff_bwdif_init_filter_line(&ctx_10, 10);
++
++ if (check_func(ctx_8.filter_line, "bwdif8")) {
++ BODY(uint8_t, 8);
++ report("bwdif8");
++ }
++
++ if (check_func(ctx_10.filter_line, "bwdif10")) {
++ BODY(uint16_t, 10);
++ report("bwdif10");
++ }
++
++ if (!ctx_8.filter_line3)
++ ctx_8.filter_line3 = ff_bwdif_filter_line3_c;
++
++ {
++ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]);
++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]);
++ const int stride = WIDTH;
++ const int mask = (1<<8)-1;
++ int parity;
++
++ for (parity = 0; parity != 2; ++parity) {
++ if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) {
++
++ declare_func(void, void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int prefs,
++ int w, int parity, int clip_max);
++
++ randomize_buffers(prev0, prev1, mask, 11*WIDTH);
++ randomize_buffers(next0, next1, mask, 11*WIDTH);
++ randomize_buffers( cur0, cur1, mask, 11*WIDTH);
++
++ call_ref(dst0, stride,
++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
++ WIDTH, parity, mask);
++ call_new(dst1, stride,
++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
++ WIDTH, parity, mask);
++
++ if (memcmp(dst0, dst1, WIDTH*3)
++ || memcmp(prev0, prev1, WIDTH*11)
++ || memcmp(next0, next1, WIDTH*11)
++ || memcmp( cur0, cur1, WIDTH*11))
++ fail();
++
++ bench_new(dst1, stride,
++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
++ WIDTH, parity, mask);
++ }
++ }
++
++ // Use just 0s and ~0s to try to provoke bad cropping or overflow
++ // Parity makes no difference to this test so just test 0
++ if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) {
++
++ declare_func(void, void * dst1, int d_stride,
++ const void * prev1, const void * cur1, const void * next1, int prefs,
++ int w, int parity, int clip_max);
++
++ randomize_overflow_check(prev0, prev1, mask, 11*WIDTH);
++ randomize_overflow_check(next0, next1, mask, 11*WIDTH);
++ randomize_overflow_check( cur0, cur1, mask, 11*WIDTH);
++
++ call_ref(dst0, stride,
++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
++ WIDTH, 0, mask);
++ call_new(dst1, stride,
++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
++ WIDTH, 0, mask);
++
++ if (memcmp(dst0, dst1, WIDTH*3)
++ || memcmp(prev0, prev1, WIDTH*11)
++ || memcmp(next0, next1, WIDTH*11)
++ || memcmp( cur0, cur1, WIDTH*11))
++ fail();
++
++ // No point to benching
++ }
++
++ report("bwdif8.line3");
++ }
++
++ {
++ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]);
++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]);
++ const int stride = WIDTH;
++ const int mask = (1<<8)-1;
++ int spat;
++ int parity;
++
++ for (spat = 0; spat != 2; ++spat) {
++ for (parity = 0; parity != 2; ++parity) {
++ if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) {
++
++ declare_func(void, void *dst1, void *prev1, void *cur1, void *next1,
++ int w, int prefs, int mrefs, int prefs2, int mrefs2,
++ int parity, int clip_max, int spat);
++
++ randomize_buffers(prev0, prev1, mask, 11*WIDTH);
++ randomize_buffers(next0, next1, mask, 11*WIDTH);
++ randomize_buffers( cur0, cur1, mask, 11*WIDTH);
++ memset(dst0, 0xba, WIDTH * 3);
++ memset(dst1, 0xba, WIDTH * 3);
++
++ call_ref(dst0 + stride,
++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH,
++ stride, -stride, stride * 2, -stride * 2,
++ parity, mask, spat);
++ call_new(dst1 + stride,
++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
++ stride, -stride, stride * 2, -stride * 2,
++ parity, mask, spat);
++
++ if (memcmp(dst0, dst1, WIDTH*3)
++ || memcmp(prev0, prev1, WIDTH*11)
++ || memcmp(next0, next1, WIDTH*11)
++ || memcmp( cur0, cur1, WIDTH*11))
++ fail();
++
++ bench_new(dst1 + stride,
++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
++ stride, -stride, stride * 2, -stride * 2,
++ parity, mask, spat);
++ }
++ }
++ }
++
++ report("bwdif8.edge");
++ }
++
++ if (check_func(ctx_8.filter_intra, "bwdif8.intra")) {
++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]);
++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]);
++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]);
++ const int stride = WIDTH;
++ const int mask = (1<<8)-1;
++
++ declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs,
++ int prefs3, int mrefs3, int parity, int clip_max);
++
++ randomize_buffers( cur0, cur1, mask, 11*WIDTH);
++ memset(dst0, 0xba, WIDTH * 3);
++ memset(dst1, 0xba, WIDTH * 3);
++
++ call_ref(dst0 + stride,
++ cur0 + stride * 4, WIDTH,
++ stride, -stride, stride * 3, -stride * 3,
++ 0, mask);
++ call_new(dst1 + stride,
++ cur0 + stride * 4, WIDTH,
++ stride, -stride, stride * 3, -stride * 3,
++ 0, mask);
++
++ if (memcmp(dst0, dst1, WIDTH*3)
++ || memcmp( cur0, cur1, WIDTH*11))
++ fail();
++
++ bench_new(dst1 + stride,
++ cur0 + stride * 4, WIDTH,
++ stride, -stride, stride * 3, -stride * 3,
++ 0, mask);
++
++ report("bwdif8.intra");
++ }
++}
+--- a/tests/fate/checkasm.mak
++++ b/tests/fate/checkasm.mak
+@@ -26,6 +26,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
+ fate-checkasm-motion \
+ fate-checkasm-opusdsp \
+ fate-checkasm-pixblockdsp \
++ fate-checkasm-rpi_sand \
+ fate-checkasm-sbrdsp \
+ fate-checkasm-synth_filter \
+ fate-checkasm-sw_gbrp \
+@@ -36,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
+ fate-checkasm-v210enc \
+ fate-checkasm-vc1dsp \
+ fate-checkasm-vf_blend \
++ fate-checkasm-vf_bwdif \
+ fate-checkasm-vf_colorspace \
+ fate-checkasm-vf_eq \
+ fate-checkasm-vf_gblur \
diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch b/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch
new file mode 100644
index 0000000..5a064b8
--- /dev/null
+++ b/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch
@@ -0,0 +1,79 @@
+From 702742f9575c87ac8c496d76daf51af7d4aaebd7 Mon Sep 17 00:00:00 2001
+From: Vincent Davis Jr <vince@underview.tech>
+Date: Sun, 9 Jun 2024 18:09:25 -0400
+Subject: [PATCH] configure: setup for OE-core usage
+
+Upstream-Status: Inappropriate
+
+RPI-Distro repo clones original ffmpeg and applies patches to enable
+raspiberry pi support.
+
+Add global CFLAGS and LDFLAGS. So, that when
+./configure runs test it's able to locate proper
+headers and libs in a cross-compile environment.
+
+Add new check to opengl. None of the above headers
+exists and we also should be using GLESv2.
+
+Update where compiler finds OMX_Core.h
+
+Only check that sdl2 version greater than 3.0.0
+
+Signed-off-by: Vincent Davis Jr <vince@underview.tech>
+---
+ configure | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/configure b/configure
+index 7214c221..7a541e63 100755
+--- a/configure
++++ b/configure
+@@ -5898,6 +5898,9 @@ enable_weak_pic() {
+ }
+
+ enabled pic && enable_weak_pic
++# Set CFLAGS and LDFLAGS globally
++add_cflags -I${sysroot}/usr/include/ -I${sysroot}/usr/include/IL -I${sysroot}/usr/include/drm
++add_ldflags -L${sysroot}/usr/lib
+
+ test_cc <<EOF || die "Symbol mangling check failed."
+ int ff_extern;
+@@ -6716,8 +6719,8 @@ enabled mbedtls && { check_pkg_config mbedtls mbedtls mbedtls/x509_crt
+ enabled mediacodec && { enabled jni || die "ERROR: mediacodec requires --enable-jni"; }
+ enabled mmal && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
+ { ! enabled cross_compile &&
+- add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline &&
+- add_ldflags -L/opt/vc/lib/ &&
++ add_cflags -I${sysroot}/usr/include -I${sysroot}/usr/include/interface/vmcs_host/linux -I${sysroot}/usr/include/interface/vcos/pthreads -fgnu89-inline &&
++ add_ldflags -L${sysroot}/usr/lib &&
+ check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host; } ||
+ die "ERROR: mmal not found" &&
+ check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS"; }
+@@ -6737,12 +6740,13 @@ enabled opengl && { check_lib opengl GL/glx.h glXGetProcAddress "-lGL
+ check_lib opengl windows.h wglGetProcAddress "-lopengl32 -lgdi32" ||
+ check_lib opengl OpenGL/gl3.h glGetError "-Wl,-framework,OpenGL" ||
+ check_lib opengl ES2/gl.h glGetError "-isysroot=${sysroot} -Wl,-framework,OpenGLES" ||
++ check_lib opengl GLES2/gl2.h glGetError "-lGLESv2" ||
+ die "ERROR: opengl not found."
+ }
+ enabled omx_rpi && { test_code cc OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame ||
+ { ! enabled cross_compile &&
+- add_cflags -isystem/opt/vc/include/IL &&
+- test_code cc OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame; } ||
++ add_cflags -I${sysroot}/usr/include/IL &&
++ test_code cc IL/OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame; } ||
+ die "ERROR: OpenMAX IL headers from raspberrypi/firmware not found"; } &&
+ enable omx
+ enabled omx && require_headers OMX_Core.h
+@@ -6788,7 +6792,7 @@ fi
+
+ if enabled sdl2; then
+ SDL2_CONFIG="${cross_prefix}sdl2-config"
+- test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 3.0.0" SDL_events.h SDL_PollEvent
++ test_pkg_config sdl2 "sdl2 >= 2.0.1" SDL_events.h SDL_PollEvent
+ if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
+ sdl2_cflags=$("${SDL2_CONFIG}" --cflags)
+ sdl2_extralibs=$("${SDL2_CONFIG}" --libs)
+--
+2.34.1
+
diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch b/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch
new file mode 100644
index 0000000..02c07de
--- /dev/null
+++ b/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch
@@ -0,0 +1,35 @@
+From 0dfb56e12fa709794525cda1471091f6699905d5 Mon Sep 17 00:00:00 2001
+From: Vincent Davis Jr <vince@underview.tech>
+Date: Thu, 8 Dec 2022 10:49:03 -0600
+Subject: [PATCH] libavcodec: omx replace /opt/vc path with /usr/lib
+
+Upstream-Status: Inappropriate
+
+RPI-Distro repo clones original ffmpeg and applies patches to enable
+raspiberry pi support.
+
+Configures omx.c for OE usages as libbcm_host.so
+and libopenmaxil.so are located in a different
+location.
+
+Signed-off-by: Vincent Davis Jr <vince@underview.tech>
+---
+ libavcodec/omx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libavcodec/omx.c b/libavcodec/omx.c
+index 0a6a3083..8c6e9193 100644
+--- a/libavcodec/omx.c
++++ b/libavcodec/omx.c
+@@ -141,7 +141,7 @@ static av_cold OMXContext *omx_init(void *logctx, const char *libname, const cha
+ {
+ static const char * const libnames[] = {
+ #if CONFIG_OMX_RPI
+- "/opt/vc/lib/libopenmaxil.so", "/opt/vc/lib/libbcm_host.so",
++ "/usr/lib/libopenmaxil.so", "/usr/lib/libbcm_host.so",
+ #else
+ "libOMX_Core.so", NULL,
+ "libOmxCore.so", NULL,
+--
+2.38.1
+
diff --git a/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb b/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb
new file mode 100644
index 0000000..e8f640b
--- /dev/null
+++ b/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb
@@ -0,0 +1,192 @@
+SUMMARY = "A complete, cross-platform solution to record, convert and stream audio and video."
+DESCRIPTION = "FFmpeg is the leading multimedia framework, able to decode, encode, transcode, \
+ mux, demux, stream, filter and play pretty much anything that humans and machines \
+ have created. It supports the most obscure ancient formats up to the cutting edge."
+HOMEPAGE = "https://www.ffmpeg.org/"
+SECTION = "libs"
+
+LICENSE = "GPL-2.0-or-later & LGPL-2.1-or-later & ISC & MIT & BSD-2-Clause & BSD-3-Clause & IJG"
+LICENSE:${PN} = "GPL-2.0-or-later"
+LICENSE:libavcodec = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libavdevice = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libavfilter = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libavformat = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libavutil = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libpostproc = "GPL-2.0-or-later"
+LICENSE:libswresample = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE:libswscale = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}"
+LICENSE_FLAGS = "commercial"
+
+LIC_FILES_CHKSUM = "file://COPYING.GPLv2;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
+ file://COPYING.GPLv3;md5=d32239bcb673463ab874e80d47fae504 \
+ file://COPYING.LGPLv2.1;md5=bd7a443320af8c812e4c18d1b79df004 \
+ file://COPYING.LGPLv3;md5=e6a600fd5e1d9cbde2d983680233ad02"
+
+# Build fails when thumb is enabled: https://bugzilla.yoctoproject.org/show_bug.cgi?id=7717
+ARM_INSTRUCTION_SET:armv4 = "arm"
+ARM_INSTRUCTION_SET:armv5 = "arm"
+ARM_INSTRUCTION_SET:armv6 = "arm"
+# Should be API compatible with libav (which was a fork of ffmpeg)
+# libpostproc was previously packaged from a separate recipe
+PROVIDES = "ffmpeg libav libpostproc"
+RPROVIDES:${PN} = "${PROVIDES}"
+DEPENDS = "nasm-native"
+
+inherit autotools pkgconfig
+PACKAGECONFIG ??= "avdevice avfilter avcodec avformat swresample swscale postproc ffplay \
+ v4l2 drm udev alsa bzlib lzma pic pthreads shared theora zlib libvorbis x264 gpl \
+ ${@bb.utils.contains('MACHINE_FEATURES', 'vc4graphics', '', 'mmal sand vout-drm', d)} \
+ ${@bb.utils.contains('AVAILTUNES', 'mips32r2', 'mips32r2', '', d)} \
+ ${@bb.utils.contains('DISTRO_FEATURES', 'opengl', 'opengl', '', d)} \
+ ${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'xv xcb', '', d)} \
+ ${@bb.utils.contains('DISTRO_FEATURES', 'x11 opengl', 'epoxy vout-egl', '', d)}"
+
+SRC_URI = "\
+ git://git@github.com/RPi-Distro/ffmpeg;protocol=https;branch=pios/bookworm \
+ file://0001-ffmpeg-5.1.4-rpi_24.patch \
+ file://2001-configure-setup-for-OE-core-usage.patch \
+ file://2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch \
+ "
+
+SRCREV = "1c363463c432c5ed492c7b759abb6e015b93b6b5"
+
+S = "${WORKDIR}/git"
+
+# libraries to build in addition to avutil
+PACKAGECONFIG[avdevice] = "--enable-avdevice,--disable-avdevice"
+PACKAGECONFIG[avfilter] = "--enable-avfilter,--disable-avfilter"
+PACKAGECONFIG[avcodec] = "--enable-avcodec,--disable-avcodec"
+PACKAGECONFIG[avformat] = "--enable-avformat,--disable-avformat"
+PACKAGECONFIG[swresample] = "--enable-swresample,--disable-swresample"
+PACKAGECONFIG[swscale] = "--enable-swscale,--disable-swscale"
+PACKAGECONFIG[postproc] = "--enable-postproc,--disable-postproc"
+#PACKAGECONFIG[avresample] = "--enable-avresample,--disable-avresample"
+
+# features to support
+PACKAGECONFIG[ffplay] = "--enable-ffplay,--disable-ffplay"
+PACKAGECONFIG[alsa] = "--enable-alsa,--disable-alsa,alsa-lib"
+PACKAGECONFIG[altivec] = "--enable-altivec,--disable-altivec,"
+PACKAGECONFIG[bzlib] = "--enable-bzlib,--disable-bzlib,bzip2"
+PACKAGECONFIG[fdk-aac] = "--enable-libfdk-aac --enable-nonfree,--disable-libfdk-aac,fdk-aac"
+PACKAGECONFIG[gpl] = "--enable-gpl,--disable-gpl"
+PACKAGECONFIG[opengl] = "--enable-opengl,--disable-opengl,virtual/libgles2"
+PACKAGECONFIG[gsm] = "--enable-libgsm,--disable-libgsm,libgsm"
+PACKAGECONFIG[jack] = "--enable-indev=jack,--disable-indev=jack,jack"
+PACKAGECONFIG[libvorbis] = "--enable-libvorbis,--disable-libvorbis,libvorbis"
+PACKAGECONFIG[libopus] = "--enable-libopus,--disable-libopus,libopus"
+PACKAGECONFIG[lzma] = "--enable-lzma,--disable-lzma,xz"
+PACKAGECONFIG[mfx] = "--enable-libmfx,--disable-libmfx,intel-mediasdk"
+PACKAGECONFIG[mp3lame] = "--enable-libmp3lame,--disable-libmp3lame,lame"
+PACKAGECONFIG[openssl] = "--enable-openssl,--disable-openssl,openssl"
+PACKAGECONFIG[sdl2] = "--enable-sdl2,--disable-sdl2,virtual/libsdl2"
+PACKAGECONFIG[speex] = "--enable-libspeex,--disable-libspeex,speex"
+PACKAGECONFIG[srt] = "--enable-libsrt,--disable-libsrt,srt"
+PACKAGECONFIG[theora] = "--enable-libtheora,--disable-libtheora,libtheora libogg"
+PACKAGECONFIG[vaapi] = "--enable-vaapi,--disable-vaapi,libva"
+PACKAGECONFIG[vdpau] = "--enable-vdpau,--disable-vdpau,libvdpau"
+PACKAGECONFIG[vpx] = "--enable-libvpx,--disable-libvpx,libvpx"
+PACKAGECONFIG[x264] = "--enable-libx264,--disable-libx264,x264"
+PACKAGECONFIG[xcb] = "--enable-libxcb,--disable-libxcb,libxcb"
+PACKAGECONFIG[xv] = "--enable-outdev=xv,--disable-outdev=xv,libxv"
+PACKAGECONFIG[zlib] = "--enable-zlib,--disable-zlib,zlib"
+PACKAGECONFIG[snappy] = "--enable-libsnappy,--disable-libsnappy,snappy"
+PACKAGECONFIG[udev] = "--enable-libudev,--disable-libudev,udev"
+PACKAGECONFIG[drm] = "--enable-libdrm,--disable-libdrm,libdrm"
+PACKAGECONFIG[epoxy] = "--enable-epoxy,--disable-epoxy,libepoxy"
+PACKAGECONFIG[v4l2] = "--enable-libv4l2 --enable-v4l2-m2m,,v4l-utils"
+PACKAGECONFIG[mmal] = "--enable-omx --enable-omx-rpi --enable-mmal,,userland"
+PACKAGECONFIG[sand] = "--enable-sand,,"
+PACKAGECONFIG[vout-drm] = "--enable-vout-drm,,libdrm"
+PACKAGECONFIG[vout-egl] = "--enable-vout-egl,,virtual/egl"
+
+# other configuration options
+PACKAGECONFIG[mips32r2] = ",--disable-mipsdsp --disable-mipsdspr2"
+PACKAGECONFIG[pic] = "--enable-pic"
+PACKAGECONFIG[pthreads] = "--enable-pthreads,--disable-pthreads"
+PACKAGECONFIG[shared] = "--enable-shared"
+PACKAGECONFIG[strip] = ",--disable-stripping"
+
+# Check codecs that require --enable-nonfree
+USE_NONFREE = "${@bb.utils.contains_any('PACKAGECONFIG', [ 'openssl' ], 'yes', '', d)}"
+
+def cpu(d):
+ for arg in (d.getVar('TUNE_CCARGS') or '').split():
+ if arg.startswith('-mcpu='):
+ return arg[6:]
+ return 'generic'
+
+EXTRA_OECONF = " \
+ ${@bb.utils.contains('USE_NONFREE', 'yes', '--enable-nonfree', '', d)} \
+ \
+ --cross-prefix=${TARGET_PREFIX} \
+ \
+ --ld="${CCLD}" \
+ --cc="${CC}" \
+ --cxx="${CXX}" \
+ --arch=${TARGET_ARCH} \
+ --target-os="linux" \
+ --enable-cross-compile \
+ --extra-cflags="${CFLAGS} ${HOST_CC_ARCH}${TOOLCHAIN_OPTIONS}" \
+ --extra-ldflags="${LDFLAGS}" \
+ --sysroot="${STAGING_DIR_TARGET}" \
+ ${EXTRA_FFCONF} \
+ --libdir=${libdir} \
+ --shlibdir=${libdir} \
+ --datadir=${datadir}/ffmpeg \
+ --cpu=${@cpu(d)} \
+ --pkg-config=pkg-config \
+"
+EXTRA_OECONF:append:linux-gnux32 = " --disable-asm"
+
+# Some patches introduce assembly files which needs preprocessing with
+# gcc e.g. src/libavutil/aarch64/rpi_sand_neon.S
+TOOLCHAIN = "gcc"
+# gold crashes on x86, another solution is to --disable-asm but thats more hacky
+# ld.gold: internal error in relocate_section, at ../../gold/i386.cc:3684
+LDFLAGS:append:x86 = "${@bb.utils.contains('DISTRO_FEATURES', 'ld-is-gold', ' -fuse-ld=bfd ', '', d)}"
+EXTRA_OEMAKE = "V=1"
+
+do_configure() {
+ ${S}/configure ${EXTRA_OECONF}
+}
+
+# patch out build host paths for reproducibility
+do_compile:prepend:class-target() {
+ sed -i -e "s,${WORKDIR},,g" ${B}/config.h
+}
+
+PACKAGES =+ "libavcodec \
+ libavdevice \
+ libavfilter \
+ libavformat \
+ libavresample \
+ libavutil \
+ libpostproc \
+ libswresample \
+ libswscale"
+
+FILES:${PN}:append = " /usr/share/ffmpeg"
+FILES:libavcodec = "${libdir}/libavcodec${SOLIBS}"
+FILES:libavdevice = "${libdir}/libavdevice${SOLIBS}"
+FILES:libavfilter = "${libdir}/libavfilter${SOLIBS}"
+FILES:libavformat = "${libdir}/libavformat${SOLIBS}"
+FILES:libavresample = "${libdir}/libavresample${SOLIBS}"
+FILES:libavutil = "${libdir}/libavutil${SOLIBS}"
+FILES:libpostproc = "${libdir}/libpostproc${SOLIBS}"
+FILES:libswresample = "${libdir}/libswresample${SOLIBS}"
+FILES:libswscale = "${libdir}/libswscale${SOLIBS}"
+# ffmpeg disables PIC on some platforms (e.g. x86-32)
+INSANE_SKIP:${MLPREFIX}libavcodec = "textrel"
+INSANE_SKIP:${MLPREFIX}libavdevice = "textrel"
+INSANE_SKIP:${MLPREFIX}libavfilter = "textrel"
+INSANE_SKIP:${MLPREFIX}libavformat = "textrel"
+INSANE_SKIP:${MLPREFIX}libavutil = "textrel"
+INSANE_SKIP:${MLPREFIX}libavresample = "textrel"
+INSANE_SKIP:${MLPREFIX}libswscale = "textrel"
+INSANE_SKIP:${MLPREFIX}libswresample = "textrel"
+INSANE_SKIP:${MLPREFIX}libpostproc = "textrel"
+
+# Only enable it for rpi class of machines
+COMPATIBLE_HOST = "null"
+COMPATIBLE_HOST:rpi = "(.*)"
+