diff options
Diffstat (limited to 'recipes-multimedia/rpidistro-ffmpeg')
4 files changed, 24380 insertions, 0 deletions
diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch b/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch new file mode 100644 index 0000000..016cf40 --- /dev/null +++ b/recipes-multimedia/rpidistro-ffmpeg/files/0001-ffmpeg-5.1.4-rpi_24.patch @@ -0,0 +1,24074 @@ + +Upstream-Status: Inappropriate + +RPI-Distro repo clones original ffmpeg and applies patches to enable +raspiberry pi support. + +--- a/configure ++++ b/configure +@@ -205,6 +205,7 @@ External library support: + --disable-bzlib disable bzlib [autodetect] + --disable-coreimage disable Apple CoreImage framework [autodetect] + --enable-chromaprint enable audio fingerprinting with chromaprint [no] ++ --disable-epoxy disable epoxy [autodetect] + --enable-frei0r enable frei0r video filtering [no] + --enable-gcrypt enable gcrypt, needed for rtmp(t)e support + if openssl, librtmp or gmp is not used [no] +@@ -281,6 +282,7 @@ External library support: + if openssl, gnutls or mbedtls is not used [no] + --enable-libtwolame enable MP2 encoding via libtwolame [no] + --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] ++ --disable-libudev disable libudev [autodetect] + --enable-libv4l2 enable libv4l2/v4l-utils [no] + --enable-libvidstab enable video stabilization using vid.stab [no] + --enable-libvmaf enable vmaf filter via libvmaf [no] +@@ -343,12 +345,16 @@ External library support: + --enable-libmfx enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no] + --enable-libnpp enable Nvidia Performance Primitives-based code [no] + --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] ++ --enable-sand enable sand video formats [rpi] ++ --enable-vout-drm enable the vout_drm module - for internal testing only [no] ++ --enable-vout-egl enable the vout_egl module - for internal testing only [no] + --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] + --disable-nvenc disable Nvidia video encoding code [autodetect] + --enable-omx enable OpenMAX IL code [no] + --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no] + --enable-rkmpp enable Rockchip Media Process Platform code [no] + --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect] ++ --enable-v4l2-request enable V4L2 request API code [no] + --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] + --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] + --disable-videotoolbox disable VideoToolbox code [autodetect] +@@ -1754,7 +1760,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST=" + avfoundation + bzlib + coreimage ++ epoxy + iconv ++ libudev + libxcb + libxcb_shm + libxcb_shape +@@ -1924,6 +1932,7 @@ HWACCEL_LIBRARY_LIST=" + mmal + omx + opencl ++ v4l2_request + " + + DOCUMENT_LIST=" +@@ -1941,10 +1950,14 @@ FEATURE_LIST=" + omx_rpi + runtime_cpudetect + safe_bitstream_reader ++ sand + shared + small + static + swscale_alpha ++ vout_drm ++ vout_egl ++ v4l2_req_hevc_vx + " + + # this list should be kept in linking order +@@ -2501,6 +2514,7 @@ CONFIG_EXTRA=" + rtpdec + rtpenc_chain + rv34dsp ++ sand + scene_sad + sinewin + snappy +@@ -3011,6 +3025,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder + dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32" + ffnvcodec_deps_any="libdl LoadLibrary" + nvdec_deps="ffnvcodec" ++v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev" + vaapi_x11_deps="xlib_x11" + videotoolbox_hwaccel_deps="videotoolbox pthreads" + videotoolbox_hwaccel_extralibs="-framework QuartzCore" +@@ -3054,6 +3069,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicP + hevc_dxva2_hwaccel_select="hevc_decoder" + hevc_nvdec_hwaccel_deps="nvdec" + hevc_nvdec_hwaccel_select="hevc_decoder" ++hevc_v4l2request_hwaccel_deps="v4l2_request" ++hevc_v4l2request_hwaccel_select="hevc_decoder" + hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC" + hevc_vaapi_hwaccel_select="hevc_decoder" + hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" +@@ -3539,8 +3556,11 @@ sndio_indev_deps="sndio" + sndio_outdev_deps="sndio" + v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" + v4l2_indev_suggest="libv4l2" ++v4l2_outdev_deps="libdrm" + v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" + v4l2_outdev_suggest="libv4l2" ++vout_drm_outdev_deps="libdrm" ++vout_egl_outdev_deps="xlib epoxy" + vfwcap_indev_deps="vfw32 vfwcap_defines" + xcbgrab_indev_deps="libxcb" + xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" +@@ -3745,6 +3765,7 @@ tonemap_opencl_filter_deps="opencl const + transpose_opencl_filter_deps="opencl" + transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" + transpose_vulkan_filter_deps="vulkan spirv_compiler" ++unsand_filter_select="sand" + unsharp_opencl_filter_deps="opencl" + uspp_filter_deps="gpl avcodec" + vaguedenoiser_filter_deps="gpl" +@@ -6296,6 +6317,12 @@ if enabled xlib; then + disable xlib + fi + ++enabled libudev && ++ check_pkg_config libudev libudev libudev.h udev_new ++ ++enabled epoxy && ++ check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version ++ + check_headers direct.h + check_headers dirent.h + check_headers dxgidebug.h +@@ -6735,8 +6762,16 @@ enabled rkmpp && { require_p + { enabled libdrm || + die "ERROR: rkmpp requires --enable-libdrm"; } + } ++enabled v4l2_request && { enabled libdrm || ++ die "ERROR: v4l2-request requires --enable-libdrm"; } && ++ { enabled libudev || ++ die "ERROR: v4l2-request requires libudev"; } + enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init + ++enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; } ++ ++enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } && ++ { enabled xlib || die "ERROR: vout_egl requires xlib"; } + + if enabled gcrypt; then + GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" +@@ -6817,6 +6852,10 @@ if enabled v4l2_m2m; then + check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;" + fi + ++check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns ++check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" ++disable v4l2_req_hevc_vx ++ + check_headers sys/videoio.h + test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete + +@@ -7305,6 +7344,9 @@ check_deps $CONFIG_LIST \ + + enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86" + ++# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done ++enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx ++ + case $target_os in + haiku) + disable memalign +--- a/fftools/ffmpeg.c ++++ b/fftools/ffmpeg.c +@@ -1953,8 +1953,8 @@ static int ifilter_send_frame(InputFilte + av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout); + break; + case AVMEDIA_TYPE_VIDEO: +- need_reinit |= ifilter->width != frame->width || +- ifilter->height != frame->height; ++ need_reinit |= ifilter->width != av_frame_cropped_width(frame) || ++ ifilter->height != av_frame_cropped_height(frame); + break; + } + +@@ -1965,6 +1965,9 @@ static int ifilter_send_frame(InputFilte + (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data)) + need_reinit = 1; + ++ if (no_cvt_hw && fg->graph) ++ need_reinit = 0; ++ + if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) { + if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9)) + need_reinit = 1; +@@ -2220,8 +2223,7 @@ static int decode_video(InputStream *ist + decoded_frame->top_field_first = ist->top_field_first; + + ist->frames_decoded++; +- +- if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { ++ if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { + err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame); + if (err < 0) + goto fail; +@@ -2418,7 +2420,12 @@ static int process_input_packet(InputStr + case AVMEDIA_TYPE_VIDEO: + ret = decode_video (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt, + &decode_failed); +- if (!repeating || !pkt || got_output) { ++ // Pi: Do not inc dts if no_cvt_hw set ++ // V4L2 H264 decode has long latency and sometimes spits out a long ++ // stream of output without input. In this case incrementing DTS is wrong. ++ // There may be cases where the condition as written is correct so only ++ // "fix" in the cases which cause problems ++ if (!repeating || !pkt || (got_output && !no_cvt_hw)) { + if (pkt && pkt->duration) { + duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q); + } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) { +@@ -2564,12 +2571,15 @@ static enum AVPixelFormat get_format(AVC + break; + + if (ist->hwaccel_id == HWACCEL_GENERIC || +- ist->hwaccel_id == HWACCEL_AUTO) { ++ ist->hwaccel_id == HWACCEL_AUTO || ++ no_cvt_hw) { + for (i = 0;; i++) { + config = avcodec_get_hw_config(s->codec, i); + if (!config) + break; +- if (!(config->methods & ++ if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL)) ++ av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p); ++ else if (!(config->methods & + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) + continue; + if (config->pix_fmt == *p) +--- a/fftools/ffmpeg.h ++++ b/fftools/ffmpeg.h +@@ -626,6 +626,7 @@ extern enum VideoSyncMethod video_sync_m + extern float frame_drop_threshold; + extern int do_benchmark; + extern int do_benchmark_all; ++extern int no_cvt_hw; + extern int do_deinterlace; + extern int do_hex_dump; + extern int do_pkt_dump; +--- a/fftools/ffmpeg_filter.c ++++ b/fftools/ffmpeg_filter.c +@@ -1175,8 +1175,8 @@ int ifilter_parameters_from_frame(InputF + + ifilter->format = frame->format; + +- ifilter->width = frame->width; +- ifilter->height = frame->height; ++ ifilter->width = av_frame_cropped_width(frame); ++ ifilter->height = av_frame_cropped_height(frame); + ifilter->sample_aspect_ratio = frame->sample_aspect_ratio; + + ifilter->sample_rate = frame->sample_rate; +--- a/fftools/ffmpeg_hw.c ++++ b/fftools/ffmpeg_hw.c +@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum + char *name; + size_t index_pos; + int index, index_limit = 1000; ++ if (!type_name) ++ return NULL; + index_pos = strlen(type_name); + name = av_malloc(index_pos + 4); + if (!name) +--- a/fftools/ffmpeg_opt.c ++++ b/fftools/ffmpeg_opt.c +@@ -162,6 +162,7 @@ enum VideoSyncMethod video_sync_method = + float frame_drop_threshold = 0; + int do_benchmark = 0; + int do_benchmark_all = 0; ++int no_cvt_hw = 0; + int do_hex_dump = 0; + int do_pkt_dump = 0; + int copy_ts = 0; +@@ -3724,6 +3725,8 @@ const OptionDef options[] = { + "add timings for benchmarking" }, + { "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all }, + "add timings for each task" }, ++ { "no_cvt_hw", OPT_BOOL | OPT_EXPERT, { &no_cvt_hw }, ++ "do not auto-convert hw frames to sw" }, + { "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress }, + "write program-readable progress information", "url" }, + { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, +--- a/libavcodec/Makefile ++++ b/libavcodec/Makefile +@@ -161,7 +161,10 @@ OBJS-$(CONFIG_VIDEODSP) + + OBJS-$(CONFIG_VP3DSP) += vp3dsp.o + OBJS-$(CONFIG_VP56DSP) += vp56dsp.o + OBJS-$(CONFIG_VP8DSP) += vp8dsp.o +-OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o ++OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ ++ weak_link.o v4l2_req_dmabufs.o ++OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ ++ v4l2_req_devscan.o weak_link.o + OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o + OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o + +@@ -972,6 +975,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) + OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o + OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o + OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o ++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o ++OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o + OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o + OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o + OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o +--- a/libavcodec/avcodec.h ++++ b/libavcodec/avcodec.h +@@ -2212,6 +2212,17 @@ typedef struct AVHWAccel { + * that avctx->hwaccel_priv_data is invalid. + */ + int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); ++ ++ /** ++ * Called if parsing fails ++ * ++ * An error has occured, end_frame will not be called ++ * start_frame & decode_slice may or may not have been called ++ * Optional ++ * ++ * @param avctx the codec context ++ */ ++ void (*abort_frame)(AVCodecContext *avctx); + } AVHWAccel; + + /** +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v1.h +@@ -0,0 +1,229 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * These are the HEVC state controls for use with stateless HEVC ++ * codec drivers. ++ * ++ * It turns out that these structs are not stable yet and will undergo ++ * more changes. So keep them private until they are stable and ready to ++ * become part of the official public API. ++ */ ++ ++#ifndef _HEVC_CTRLS_H_ ++#define _HEVC_CTRLS_H_ ++ ++#include <linux/videodev2.h> ++ ++/* The pixel format isn't stable at the moment and will likely be renamed. */ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ ++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_MPEG_BASE + 1008) ++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_MPEG_BASE + 1009) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_MPEG_BASE + 1011) ++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_MPEG_BASE + 1015) ++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_MPEG_BASE + 1016) ++ ++/* enum v4l2_ctrl_type type values */ ++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 ++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 ++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 ++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 ++ ++enum v4l2_mpeg_video_hevc_decode_mode { ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_mpeg_video_hevc_start_code { ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/* The controls are not stable at the moment and will likely be reworked. */ ++struct v4l2_ctrl_hevc_sps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++ ++struct v4l2_ctrl_hevc_pps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ ++ __u8 num_extra_slice_header_bits; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ ++ __u8 padding[4]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 ++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 ++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 rps; ++ __u8 field_pic; ++ __u16 pic_order_cnt[2]; ++ __u8 padding[2]; ++}; ++ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 padding[6]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_bit_offset; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __u16 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 num_active_dpb_entries; ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ ++ __u8 num_rps_poc_st_curr_before; ++ __u8 num_rps_poc_st_curr_after; ++ __u8 num_rps_poc_lt_curr; ++ ++ __u8 padding; ++ ++ __u32 entry_point_offset_minus1[256]; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u64 flags; ++}; ++ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++#endif +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v2.h +@@ -0,0 +1,257 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * These are the HEVC state controls for use with stateless HEVC ++ * codec drivers. ++ * ++ * It turns out that these structs are not stable yet and will undergo ++ * more changes. So keep them private until they are stable and ready to ++ * become part of the official public API. ++ */ ++ ++#ifndef _HEVC_CTRLS_H_ ++#define _HEVC_CTRLS_H_ ++ ++#include <linux/videodev2.h> ++ ++/* The pixel format isn't stable at the moment and will likely be renamed. */ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ ++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) ++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) ++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) ++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) ++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) ++ ++/* enum v4l2_ctrl_type type values */ ++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 ++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 ++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 ++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 ++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 ++ ++enum v4l2_mpeg_video_hevc_decode_mode { ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_mpeg_video_hevc_start_code { ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/* The controls are not stable at the moment and will likely be reworked. */ ++struct v4l2_ctrl_hevc_sps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) ++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) ++ ++struct v4l2_ctrl_hevc_pps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ ++ __u8 num_extra_slice_header_bits; ++ __u8 num_ref_idx_l0_default_active_minus1; ++ __u8 num_ref_idx_l1_default_active_minus1; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ ++ __u8 padding[4]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 ++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 ++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 rps; ++ __u8 field_pic; ++ __u16 pic_order_cnt[2]; ++ __u8 padding[2]; ++}; ++ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 padding[6]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_bit_offset; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __u16 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ ++ __u8 padding[5]; ++ ++ __u32 entry_point_offset_minus1[256]; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 ++ ++struct v4l2_ctrl_hevc_decode_params { ++ __s32 pic_order_cnt_val; ++ __u8 num_active_dpb_entries; ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 num_poc_st_curr_before; ++ __u8 num_poc_st_curr_after; ++ __u8 num_poc_lt_curr; ++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u64 flags; ++}; ++ ++/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ ++#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) ++/* ++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - ++ * the number of data (in bits) to skip in the ++ * slice segment header. ++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" ++ * to before syntax element "slice_temporal_mvp_enabled_flag". ++ * If IDR, the skipped bits are just "pic_output_flag" ++ * (separate_colour_plane_flag is not supported). ++ */ ++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) ++ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++#endif +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v3.h +@@ -0,0 +1,255 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * These are the HEVC state controls for use with stateless HEVC ++ * codec drivers. ++ * ++ * It turns out that these structs are not stable yet and will undergo ++ * more changes. So keep them private until they are stable and ready to ++ * become part of the official public API. ++ */ ++ ++#ifndef _HEVC_CTRLS_H_ ++#define _HEVC_CTRLS_H_ ++ ++#include <linux/videodev2.h> ++ ++/* The pixel format isn't stable at the moment and will likely be renamed. */ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ ++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) ++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) ++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) ++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) ++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) ++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) ++ ++/* enum v4l2_ctrl_type type values */ ++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 ++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 ++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 ++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 ++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 ++ ++enum v4l2_mpeg_video_hevc_decode_mode { ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_mpeg_video_hevc_start_code { ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, ++ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/* The controls are not stable at the moment and will likely be reworked. */ ++struct v4l2_ctrl_hevc_sps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) ++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) ++ ++struct v4l2_ctrl_hevc_pps { ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ ++ __u8 num_extra_slice_header_bits; ++ __u8 num_ref_idx_l0_default_active_minus1; ++ __u8 num_ref_idx_l1_default_active_minus1; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ ++ __u8 padding[4]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 flags; ++ __u8 field_pic; ++ __u16 pic_order_cnt[2]; ++ __u8 padding[2]; ++}; ++ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 padding[6]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_bit_offset; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __u16 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ ++ __u8 padding[5]; ++ ++ __u32 entry_point_offset_minus1[256]; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 ++ ++struct v4l2_ctrl_hevc_decode_params { ++ __s32 pic_order_cnt_val; ++ __u8 num_active_dpb_entries; ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 num_poc_st_curr_before; ++ __u8 num_poc_st_curr_after; ++ __u8 num_poc_lt_curr; ++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u64 flags; ++}; ++ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ ++#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) ++/* ++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - ++ * the number of data (in bits) to skip in the ++ * slice segment header. ++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" ++ * to before syntax element "slice_temporal_mvp_enabled_flag". ++ * If IDR, the skipped bits are just "pic_output_flag" ++ * (separate_colour_plane_flag is not supported). ++ */ ++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) ++ ++#endif +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v4.h +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Video for Linux Two controls header file ++ * ++ * Copyright (C) 1999-2012 the contributors ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * Alternatively you can redistribute this file under the terms of the ++ * BSD license as stated below: ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in ++ * the documentation and/or other materials provided with the ++ * distribution. ++ * 3. The names of its contributors may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * The contents of this header was split off from videodev2.h. All control ++ * definitions should be added to this header, which is included by ++ * videodev2.h. ++ */ ++ ++#ifndef AVCODEC_HEVC_CTRLS_V4_H ++#define AVCODEC_HEVC_CTRLS_V4_H ++ ++#include <linux/const.h> ++#include <linux/types.h> ++ ++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS ++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ ++#endif ++#ifndef V4L2_CID_CODEC_STATELESS_BASE ++#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) ++#endif ++ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ ++#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) ++#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) ++#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) ++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) ++ ++enum v4l2_stateless_hevc_decode_mode { ++ V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_stateless_hevc_start_code { ++ V4L2_STATELESS_HEVC_START_CODE_NONE, ++ V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/** ++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set ++ * ++ * @video_parameter_set_id: specifies the value of the ++ * vps_video_parameter_set_id of the active VPS ++ * @seq_parameter_set_id: provides an identifier for the SPS for ++ * reference by other syntax elements ++ * @pic_width_in_luma_samples: specifies the width of each decoded picture ++ * in units of luma samples ++ * @pic_height_in_luma_samples: specifies the height of each decoded picture ++ * in units of luma samples ++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the ++ * samples of the luma array ++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the ++ * samples of the chroma arrays ++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of ++ * the variable MaxPicOrderCntLsb ++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum ++ * required size of the decoded picture ++ * buffer for the codec video sequence ++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures ++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the ++ * value of SpsMaxLatencyPictures array ++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum ++ * luma coding block size ++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * coding block size ++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma ++ * transform block size ++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * transform block size ++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in inter ++ * prediction mode ++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in intra ++ * prediction mode ++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of ++ * bits used to represent each of PCM sample ++ * values of the luma component ++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number ++ * of bits used to represent each of PCM ++ * sample values of the chroma components ++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the ++ * minimum size of coding blocks ++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum size of ++ * coding blocks ++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() ++ * syntax structures included in the SPS ++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term ++ * reference pictures that are specified in the SPS ++ * @chroma_format_idc: specifies the chroma sampling ++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number ++ * of temporal sub-layers ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_sps { ++ __u8 video_parameter_set_id; ++ __u8 seq_parameter_set_id; ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u8 reserved[6]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) ++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) ++ ++/** ++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set ++ * ++ * @pic_parameter_set_id: identifies the PPS for reference by other ++ * syntax elements ++ * @num_extra_slice_header_bits: specifies the number of extra slice header ++ * bits that are present in the slice header RBSP ++ * for coded pictures referring to the PPS. ++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l0_active_minus1 ++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l1_active_minus1 ++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for ++ * each slice referring to the PPS ++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding ++ * tree block size and the minimum luma coding block ++ * size of coding units that convey cu_qp_delta_abs ++ * and cu_qp_delta_sign_flag ++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb ++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr ++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns ++ * partitioning the picture ++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning ++ * the picture ++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in ++ * units of coding tree blocks ++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in ++ * units of coding tree blocks ++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for ++ * beta divided by 2 ++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC ++ * divided by 2 ++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of ++ * the variable Log2ParMrgLevel ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_PPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_pps { ++ __u8 pic_parameter_set_id; ++ __u8 num_extra_slice_header_bits; ++ __u8 num_ref_idx_l0_default_active_minus1; ++ __u8 num_ref_idx_l1_default_active_minus1; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ __u8 reserved; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 ++ ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++/** ++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry ++ * ++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference. ++ * @flags: long term flag for the reference frame ++ * @field_pic: whether the reference is a field picture or a frame. ++ * @reserved: padding field. Should be zeroed by applications. ++ * @pic_order_cnt_val: the picture order count of the current picture. ++ */ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 flags; ++ __u8 field_pic; ++ __u16 reserved; ++ __s32 pic_order_cnt_val; ++}; ++ ++/** ++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters ++ * ++ * @delta_luma_weight_l0: the difference of the weighting factor applied ++ * to the luma prediction value for list 0 ++ * @luma_offset_l0: the additive offset applied to the luma prediction value ++ * for list 0 ++ * @delta_chroma_weight_l0: the difference of the weighting factor applied ++ * to the chroma prediction values for list 0 ++ * @chroma_offset_l0: the difference of the additive offset applied to ++ * the chroma prediction values for list 0 ++ * @delta_luma_weight_l1: the difference of the weighting factor applied ++ * to the luma prediction value for list 1 ++ * @luma_offset_l1: the additive offset applied to the luma prediction value ++ * for list 1 ++ * @delta_chroma_weight_l1: the difference of the weighting factor applied ++ * to the chroma prediction values for list 1 ++ * @chroma_offset_l1: the difference of the additive offset applied to ++ * the chroma prediction values for list 1 ++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for ++ * all luma weighting factors ++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm ++ * of the denominator for all chroma ++ * weighting factors ++ */ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++/** ++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters ++ * ++ * This control is a dynamically sized 1-dimensional array, ++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. ++ * ++ * @bit_size: size (in bits) of the current slice data ++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data ++ * @num_entry_point_offsets: specifies the number of entry point offset syntax ++ * elements in the slice header. ++ * @nal_unit_type: specifies the coding type of the slice (B, P or I) ++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit ++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} ++ * @colour_plane_id: specifies the colour plane associated with the current slice ++ * @slice_pic_order_cnt: specifies the picture order count ++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 0 ++ * that may be used to decode the slice ++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 1 ++ * that may be used to decode the slice ++ * @collocated_ref_idx: specifies the reference index of the collocated picture used ++ * for temporal motion vector prediction ++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging ++ * motion vector prediction candidates supported in ++ * the slice subtracted from 5 ++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding ++ * blocks in the slice ++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset ++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset ++ * @slice_act_y_qp_offset: screen content extension parameters ++ * @slice_act_cb_qp_offset: screen content extension parameters ++ * @slice_act_cr_qp_offset: screen content extension parameters ++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 ++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 ++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or ++ * more fields ++ * @reserved0: padding field. Should be zeroed by applications. ++ * @slice_segment_addr: specifies the address of the first coding tree block in ++ * the slice segment ++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB ++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS ++ * @pred_weight_table: the prediction weight coefficients for inter-picture ++ * prediction ++ * @reserved1: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_byte_offset; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __s32 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ __u8 reserved0[3]; ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u8 reserved1[2]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 ++ ++/** ++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters ++ * ++ * @pic_order_cnt_val: picture order count ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS of the first slice ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS of the first slice ++ * @num_active_dpb_entries: the number of entries in dpb ++ * @num_poc_st_curr_before: the number of reference pictures in the short-term ++ * set that come before the current frame ++ * @num_poc_st_curr_after: the number of reference pictures in the short-term ++ * set that come after the current frame ++ * @num_poc_lt_curr: the number of reference pictures in the long-term set ++ * @poc_st_curr_before: provides the index of the short term before references ++ * in DPB array ++ * @poc_st_curr_after: provides the index of the short term after references ++ * in DPB array ++ * @poc_lt_curr: provides the index of the long term references in DPB array ++ * @reserved: padding field. Should be zeroed by applications. ++ * @dpb: the decoded picture buffer, for meta-data about reference frames ++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_decode_params { ++ __s32 pic_order_cnt_val; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ __u8 num_active_dpb_entries; ++ __u8 num_poc_st_curr_before; ++ __u8 num_poc_st_curr_after; ++ __u8 num_poc_lt_curr; ++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 reserved[4]; ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u64 flags; ++}; ++ ++/** ++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters ++ * ++ * @scaling_list_4x4: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_8x8: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_16x16: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_32x32: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ */ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++#endif +--- a/libavcodec/hevc_parser.c ++++ b/libavcodec/hevc_parser.c +@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCod + avctx->profile = ps->sps->ptl.general_ptl.profile_idc; + avctx->level = ps->sps->ptl.general_ptl.level_idc; + ++ if (ps->sps->chroma_format_idc == 1) { ++ avctx->chroma_sample_location = ps->sps->vui.chroma_loc_info_present_flag ? ++ ps->sps->vui.chroma_sample_loc_type_top_field + 1 : ++ AVCHROMA_LOC_LEFT; ++ } ++ else if (ps->sps->chroma_format_idc == 2 || ++ ps->sps->chroma_format_idc == 3) { ++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; ++ } ++ else { ++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; ++ } ++ + if (ps->vps->vps_timing_info_present_flag) { + num = ps->vps->vps_num_units_in_tick; + den = ps->vps->vps_time_scale; +--- a/libavcodec/hevc_refs.c ++++ b/libavcodec/hevc_refs.c +@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContex + if (!frame->rpl_buf) + goto fail; + +- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); +- if (!frame->tab_mvf_buf) +- goto fail; +- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ if (s->tab_mvf_pool) { ++ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); ++ if (!frame->tab_mvf_buf) ++ goto fail; ++ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ } + +- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); +- if (!frame->rpl_tab_buf) +- goto fail; +- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; +- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; +- for (j = 0; j < frame->ctb_count; j++) +- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ if (s->rpl_tab_pool) { ++ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); ++ if (!frame->rpl_tab_buf) ++ goto fail; ++ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; ++ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; ++ for (j = 0; j < frame->ctb_count; j++) ++ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ } + + frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; + frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); +@@ -284,14 +288,17 @@ static int init_slice_rpl(HEVCContext *s + int ctb_count = frame->ctb_count; + int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; + int i; ++ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; + + if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) + return AVERROR_INVALIDDATA; + +- for (i = ctb_addr_ts; i < ctb_count; i++) +- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; ++ if (frame->rpl_tab) { ++ for (i = ctb_addr_ts; i < ctb_count; i++) ++ frame->rpl_tab[i] = tab; ++ } + +- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; ++ frame->refPicList = tab->refPicList; + + return 0; + } +--- a/libavcodec/hevcdec.c ++++ b/libavcodec/hevcdec.c +@@ -340,6 +340,19 @@ static void export_stream_params(HEVCCon + + ff_set_sar(avctx, sps->vui.sar); + ++ if (sps->chroma_format_idc == 1) { ++ avctx->chroma_sample_location = sps->vui.chroma_loc_info_present_flag ? ++ sps->vui.chroma_sample_loc_type_top_field + 1 : ++ AVCHROMA_LOC_LEFT; ++ } ++ else if (sps->chroma_format_idc == 2 || ++ sps->chroma_format_idc == 3) { ++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; ++ } ++ else { ++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; ++ } ++ + if (sps->vui.video_signal_type_present_flag) + avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG + : AVCOL_RANGE_MPEG; +@@ -402,6 +415,7 @@ static enum AVPixelFormat get_format(HEV + #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ + CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ + CONFIG_HEVC_NVDEC_HWACCEL + \ ++ CONFIG_HEVC_V4L2REQUEST_HWACCEL + \ + CONFIG_HEVC_VAAPI_HWACCEL + \ + CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ + CONFIG_HEVC_VDPAU_HWACCEL) +@@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEV + #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; + #endif ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ *fmt++ = AV_PIX_FMT_DRM_PRIME; ++#endif + break; + case AV_PIX_FMT_YUV420P10: + #if CONFIG_HEVC_DXVA2_HWACCEL +@@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEV + #if CONFIG_HEVC_NVDEC_HWACCEL + *fmt++ = AV_PIX_FMT_CUDA; + #endif ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ *fmt++ = AV_PIX_FMT_DRM_PRIME; ++#endif + break; + case AV_PIX_FMT_YUV444P: + #if CONFIG_HEVC_VDPAU_HWACCEL +@@ -504,6 +524,16 @@ static int set_sps(HEVCContext *s, const + if (!sps) + return 0; + ++ // If hwaccel then we don't need all the s/w decode helper arrays ++ if (s->avctx->hwaccel) { ++ export_stream_params(s, sps); ++ ++ s->avctx->pix_fmt = pix_fmt; ++ s->ps.sps = sps; ++ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; ++ return 0; ++ } ++ + ret = pic_arrays_init(s, sps); + if (ret < 0) + goto fail; +@@ -3011,11 +3041,13 @@ static int hevc_frame_start(HEVCContext + ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); + int ret; + +- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); +- memset(s->vertical_bs, 0, s->bs_width * s->bs_height); +- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); +- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); +- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ if (s->horizontal_bs) { ++ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); ++ memset(s->vertical_bs, 0, s->bs_width * s->bs_height); ++ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); ++ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); ++ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ } + + s->is_decoded = 0; + s->first_nal_type = s->nal_unit_type; +@@ -3507,8 +3539,13 @@ static int hevc_decode_frame(AVCodecCont + + s->ref = NULL; + ret = decode_nal_units(s, avpkt->data, avpkt->size); +- if (ret < 0) ++ if (ret < 0) { ++ // Ensure that hwaccel knows this frame is over ++ if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame) ++ s->avctx->hwaccel->abort_frame(s->avctx); ++ + return ret; ++ } + + if (avctx->hwaccel) { + if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { +@@ -3558,15 +3595,19 @@ static int hevc_ref_frame(HEVCContext *s + dst->needs_fg = 1; + } + +- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); +- if (!dst->tab_mvf_buf) +- goto fail; +- dst->tab_mvf = src->tab_mvf; ++ if (src->tab_mvf_buf) { ++ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); ++ if (!dst->tab_mvf_buf) ++ goto fail; ++ dst->tab_mvf = src->tab_mvf; ++ } + +- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); +- if (!dst->rpl_tab_buf) +- goto fail; +- dst->rpl_tab = src->rpl_tab; ++ if (src->rpl_tab_buf) { ++ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); ++ if (!dst->rpl_tab_buf) ++ goto fail; ++ dst->rpl_tab = src->rpl_tab; ++ } + + dst->rpl_buf = av_buffer_ref(src->rpl_buf); + if (!dst->rpl_buf) +@@ -3900,6 +3941,9 @@ const FFCodec ff_hevc_decoder = { + #if CONFIG_HEVC_NVDEC_HWACCEL + HWACCEL_NVDEC(hevc), + #endif ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ HWACCEL_V4L2REQUEST(hevc), ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + HWACCEL_VAAPI(hevc), + #endif +--- a/libavcodec/hwaccels.h ++++ b/libavcodec/hwaccels.h +@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_h + extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; + extern const AVHWAccel ff_hevc_dxva2_hwaccel; + extern const AVHWAccel ff_hevc_nvdec_hwaccel; ++extern const AVHWAccel ff_hevc_v4l2request_hwaccel; + extern const AVHWAccel ff_hevc_vaapi_hwaccel; + extern const AVHWAccel ff_hevc_vdpau_hwaccel; + extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +--- a/libavcodec/hwconfig.h ++++ b/libavcodec/hwconfig.h +@@ -24,6 +24,7 @@ + + + #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) ++#define HWACCEL_CAP_MT_SAFE (1 << 1) + + + typedef struct AVCodecHWConfigInternal { +@@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal { + HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) + #define HWACCEL_NVDEC(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) ++#define HWACCEL_V4L2REQUEST(codec) \ ++ HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel) + #define HWACCEL_VAAPI(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) + #define HWACCEL_VDPAU(codec) \ +--- a/libavcodec/mmaldec.c ++++ b/libavcodec/mmaldec.c +@@ -24,6 +24,9 @@ + * MMAL Video Decoder + */ + ++#pragma GCC diagnostic push ++// Many many redundant decls in the header files ++#pragma GCC diagnostic ignored "-Wredundant-decls" + #include <bcm_host.h> + #include <interface/mmal/mmal.h> + #include <interface/mmal/mmal_parameters_video.h> +@@ -31,6 +34,7 @@ + #include <interface/mmal/util/mmal_util_params.h> + #include <interface/mmal/util/mmal_default_components.h> + #include <interface/mmal/vc/mmal_vc_api.h> ++#pragma GCC diagnostic pop + #include <stdatomic.h> + + #include "avcodec.h" +--- a/libavcodec/pthread_frame.c ++++ b/libavcodec/pthread_frame.c +@@ -217,7 +217,8 @@ FF_ENABLE_DEPRECATION_WARNINGS + + /* if the previous thread uses hwaccel then we take the lock to ensure + * the threads don't run concurrently */ +- if (avctx->hwaccel) { ++ if (avctx->hwaccel && ++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { + pthread_mutex_lock(&p->parent->hwaccel_mutex); + p->hwaccel_serializing = 1; + } +@@ -243,7 +244,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + p->hwaccel_serializing = 0; + pthread_mutex_unlock(&p->parent->hwaccel_mutex); + } +- av_assert0(!avctx->hwaccel); ++ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); + + if (p->async_serializing) { + p->async_serializing = 0; +@@ -331,6 +332,12 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + + dst->hwaccel_flags = src->hwaccel_flags; ++ if (src->hwaccel && ++ (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { ++ dst->hwaccel = src->hwaccel; ++ dst->hwaccel_context = src->hwaccel_context; ++ dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data; ++ } + + err = av_buffer_replace(&dst->internal->pool, src->internal->pool); + if (err < 0) +@@ -461,10 +468,13 @@ static int submit_packet(PerThreadContex + } + + /* transfer the stashed hwaccel state, if any */ +- av_assert0(!p->avctx->hwaccel); +- FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); +- FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); +- FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); ++ av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); ++ if (p->avctx->hwaccel && ++ !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { ++ FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); ++ FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); ++ FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); ++ } + + av_packet_unref(p->avpkt); + ret = av_packet_ref(p->avpkt, avpkt); +@@ -656,7 +666,9 @@ void ff_thread_finish_setup(AVCodecConte + + if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; + +- if (avctx->hwaccel && !p->hwaccel_serializing) { ++ if (avctx->hwaccel && ++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && ++ !p->hwaccel_serializing) { + pthread_mutex_lock(&p->parent->hwaccel_mutex); + p->hwaccel_serializing = 1; + } +@@ -673,9 +685,12 @@ void ff_thread_finish_setup(AVCodecConte + * this is done here so that this worker thread can wipe its own hwaccel + * state after decoding, without requiring synchronization */ + av_assert0(!p->parent->stash_hwaccel); +- p->parent->stash_hwaccel = avctx->hwaccel; +- p->parent->stash_hwaccel_context = avctx->hwaccel_context; +- p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; ++ if (avctx->hwaccel && ++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { ++ p->parent->stash_hwaccel = avctx->hwaccel; ++ p->parent->stash_hwaccel_context = avctx->hwaccel_context; ++ p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; ++ } + + pthread_mutex_lock(&p->progress_mutex); + if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ +@@ -730,6 +745,15 @@ void ff_frame_thread_free(AVCodecContext + + park_frame_worker_threads(fctx, thread_count); + ++ if (fctx->prev_thread && ++ avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && ++ avctx->internal->hwaccel_priv_data != ++ fctx->prev_thread->avctx->internal->hwaccel_priv_data) { ++ if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n"); ++ } ++ } ++ + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + AVCodecContext *ctx = p->avctx; +@@ -778,10 +802,13 @@ void ff_frame_thread_free(AVCodecContext + + /* if we have stashed hwaccel state, move it to the user-facing context, + * so it will be freed in avcodec_close() */ +- av_assert0(!avctx->hwaccel); +- FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); +- FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); +- FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); ++ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); ++ if (avctx->hwaccel && ++ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { ++ FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); ++ FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); ++ FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); ++ } + + av_freep(&avctx->internal->thread_ctx); + } +--- a/libavcodec/raw.c ++++ b/libavcodec/raw.c +@@ -294,6 +294,12 @@ static const PixelFormatTag raw_pix_fmt_ + { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ + { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ + ++ /* RPI (Might as well define for everything) */ ++ { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') }, ++ { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') }, ++ { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') }, ++ { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') }, ++ + { AV_PIX_FMT_NONE, 0 }, + }; + +--- a/libavcodec/rawenc.c ++++ b/libavcodec/rawenc.c +@@ -24,6 +24,7 @@ + * Raw Video Encoder + */ + ++#include "config.h" + #include "avcodec.h" + #include "codec_internal.h" + #include "encode.h" +@@ -33,6 +34,10 @@ + #include "libavutil/intreadwrite.h" + #include "libavutil/imgutils.h" + #include "libavutil/internal.h" ++#include "libavutil/avassert.h" ++#if CONFIG_SAND ++#include "libavutil/rpi_sand_fns.h" ++#endif + + static av_cold int raw_encode_init(AVCodecContext *avctx) + { +@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCod + return 0; + } + ++#if CONFIG_SAND ++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) ++{ ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3 / 2; ++ uint8_t * dst; ++ int ret; ++ ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) ++ return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); ++ dst += width * height; ++ av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2); ++ return 0; ++} ++ ++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) ++{ ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3; ++ uint8_t * dst; ++ int ret; ++ ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) ++ return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height); ++ dst += width * height * 2; ++ av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2); ++ return 0; ++} ++ ++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) ++{ ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3; ++ uint8_t * dst; ++ int ret; ++ ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) ++ return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); ++ dst += width * height * 2; ++ av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2); ++ return 0; ++} ++#endif ++ ++ + static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, +- const AVFrame *frame, int *got_packet) ++ const AVFrame *src_frame, int *got_packet) + { +- int ret = av_image_get_buffer_size(frame->format, +- frame->width, frame->height, 1); ++ int ret; ++ AVFrame * frame = NULL; + +- if (ret < 0) ++#if CONFIG_SAND ++ if (av_rpi_is_sand_frame(src_frame)) { ++ ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) : ++ av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) : ++ av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1; ++ *got_packet = (ret == 0); + return ret; ++ } ++#endif ++ ++ if ((frame = av_frame_clone(src_frame)) == NULL) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0) ++ goto fail; ++ ++ ret = av_image_get_buffer_size(frame->format, ++ frame->width, frame->height, 1); ++ if (ret < 0) ++ goto fail; + + if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0) +- return ret; ++ goto fail; + if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size, + (const uint8_t **)frame->data, frame->linesize, + frame->format, + frame->width, frame->height, 1)) < 0) +- return ret; ++ goto fail; + + if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 && + frame->format == AV_PIX_FMT_YUYV422) { +@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *av + AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16); + } + } ++ pkt->flags |= AV_PKT_FLAG_KEY; ++ av_frame_free(&frame); + *got_packet = 1; + return 0; ++ ++fail: ++ av_frame_free(&frame); ++ *got_packet = 0; ++ return ret; + } + + const FFCodec ff_rawvideo_encoder = { +--- a/libavcodec/v4l2_buffers.c ++++ b/libavcodec/v4l2_buffers.c +@@ -21,6 +21,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include <drm_fourcc.h> + #include <linux/videodev2.h> + #include <sys/ioctl.h> + #include <sys/mman.h> +@@ -28,57 +29,89 @@ + #include <fcntl.h> + #include <poll.h> + #include "libavcodec/avcodec.h" ++#include "libavcodec/internal.h" ++#include "libavutil/avassert.h" + #include "libavutil/pixdesc.h" ++#include "libavutil/hwcontext.h" + #include "v4l2_context.h" + #include "v4l2_buffers.h" + #include "v4l2_m2m.h" ++#include "v4l2_req_dmabufs.h" ++#include "weak_link.h" + + #define USEC_PER_SEC 1000000 +-static AVRational v4l2_timebase = { 1, USEC_PER_SEC }; ++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; + +-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? +- container_of(buf->context, V4L2m2mContext, output) : +- container_of(buf->context, V4L2m2mContext, capture); ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); + } + +-static inline AVCodecContext *logger(V4L2Buffer *buf) ++static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) + { +- return buf_to_m2mctx(buf)->avctx; ++ return ctx_to_m2mctx(buf->context); + } + +-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) ++static inline AVCodecContext *logger(const V4L2Buffer * const buf) + { +- V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ return buf_to_m2mctx(buf)->avctx; ++} + +- if (s->avctx->pkt_timebase.num) +- return s->avctx->pkt_timebase; +- return s->avctx->time_base; ++static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) ++{ ++ const V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ const AVRational tb = s->avctx->pkt_timebase.num ? ++ s->avctx->pkt_timebase : ++ s->avctx->time_base; ++ return tb.num && tb.den ? tb : v4l2_timebase; + } + +-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) ++static inline struct timeval tv_from_int(const int64_t t) + { +- int64_t v4l2_pts; ++ return (struct timeval){ ++ .tv_usec = t % USEC_PER_SEC, ++ .tv_sec = t / USEC_PER_SEC ++ }; ++} + +- if (pts == AV_NOPTS_VALUE) +- pts = 0; ++static inline int64_t int_from_tv(const struct timeval t) ++{ ++ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; ++} + ++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++{ + /* convert pts to v4l2 timebase */ +- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); +- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; +- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; ++ const int64_t v4l2_pts = ++ pts == AV_NOPTS_VALUE ? 0 : ++ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); ++ out->buf.timestamp = tv_from_int(v4l2_pts); + } + +-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) ++static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) + { +- int64_t v4l2_pts; +- ++ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); ++ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; ++#if 0 + /* convert pts back to encoder timebase */ +- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + +- avbuf->buf.timestamp.tv_usec; ++ return ++ avbuf->context->no_pts_rescale ? v4l2_pts : ++ v4l2_pts == 0 ? AV_NOPTS_VALUE : ++ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++#endif ++} + +- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) ++{ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ out->planes[plane].bytesused = bytesused; ++ out->planes[plane].length = length; ++ } else { ++ out->buf.bytesused = bytesused; ++ out->buf.length = length; ++ } + } + + static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) +@@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_co + return AVCOL_PRI_UNSPECIFIED; + } + ++static void v4l2_set_color(V4L2Buffer *buf, ++ const enum AVColorPrimaries avcp, ++ const enum AVColorSpace avcs, ++ const enum AVColorTransferCharacteristic avxc) ++{ ++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; ++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; ++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; ++ ++ switch (avcp) { ++ case AVCOL_PRI_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ ycbcr = V4L2_YCBCR_ENC_709; ++ break; ++ case AVCOL_PRI_BT470M: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ ycbcr = V4L2_YCBCR_ENC_601; ++ break; ++ case AVCOL_PRI_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_PRI_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_PRI_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_PRI_BT2020: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ case AVCOL_PRI_SMPTE428: ++ case AVCOL_PRI_SMPTE431: ++ case AVCOL_PRI_SMPTE432: ++ case AVCOL_PRI_EBU3213: ++ case AVCOL_PRI_RESERVED: ++ case AVCOL_PRI_FILM: ++ case AVCOL_PRI_UNSPECIFIED: ++ default: ++ break; ++ } ++ ++ switch (avcs) { ++ case AVCOL_SPC_RGB: ++ cs = V4L2_COLORSPACE_SRGB; ++ break; ++ case AVCOL_SPC_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ break; ++ case AVCOL_SPC_FCC: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ break; ++ case AVCOL_SPC_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_SPC_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_SPC_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_SPC_BT2020_CL: ++ cs = V4L2_COLORSPACE_BT2020; ++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ default: ++ break; ++ } ++ ++ switch (xfer) { ++ case AVCOL_TRC_BT709: ++ xfer = V4L2_XFER_FUNC_709; ++ break; ++ case AVCOL_TRC_IEC61966_2_1: ++ xfer = V4L2_XFER_FUNC_SRGB; ++ break; ++ case AVCOL_TRC_SMPTE240M: ++ xfer = V4L2_XFER_FUNC_SMPTE240M; ++ break; ++ case AVCOL_TRC_SMPTE2084: ++ xfer = V4L2_XFER_FUNC_SMPTE2084; ++ break; ++ default: ++ break; ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { ++ buf->context->format.fmt.pix_mp.colorspace = cs; ++ buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; ++ buf->context->format.fmt.pix_mp.xfer_func = xfer; ++ } else { ++ buf->context->format.fmt.pix.colorspace = cs; ++ buf->context->format.fmt.pix.ycbcr_enc = ycbcr; ++ buf->context->format.fmt.pix.xfer_func = xfer; ++ } ++} ++ + static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) + { + enum v4l2_quantization qt; +@@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_ + return AVCOL_RANGE_UNSPECIFIED; + } + ++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) ++{ ++ const enum v4l2_quantization q = ++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : ++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : ++ V4L2_QUANTIZATION_DEFAULT; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { ++ buf->context->format.fmt.pix_mp.quantization = q; ++ } else { ++ buf->context->format.fmt.pix.quantization = q; ++ } ++} ++ + static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) + { + enum v4l2_ycbcr_encoding ycbcr; +@@ -209,73 +355,218 @@ static enum AVColorTransferCharacteristi + return AVCOL_TRC_UNSPECIFIED; + } + +-static void v4l2_free_buffer(void *opaque, uint8_t *unused) ++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) + { +- V4L2Buffer* avbuf = opaque; +- V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); ++} + +- if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { +- atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); ++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) ++{ ++ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; ++} + +- if (s->reinit) { +- if (!atomic_load(&s->refcount)) +- sem_post(&s->refsync); +- } else { +- if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) { +- /* no need to queue more buffers to the driver */ +- avbuf->status = V4L2BUF_AVAILABLE; +- } +- else if (avbuf->context->streamon) +- ff_v4l2_buffer_enqueue(avbuf); +- } ++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++{ ++ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : ++ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++} + +- av_buffer_unref(&avbuf->context_ref); ++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) ++{ ++ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; ++ AVDRMLayerDescriptor *layer; ++ ++ /* fill the DRM frame descriptor */ ++ drm_desc->nb_objects = avbuf->num_planes; ++ drm_desc->nb_layers = 1; ++ ++ layer = &drm_desc->layers[0]; ++ layer->nb_planes = avbuf->num_planes; ++ ++ for (int i = 0; i < avbuf->num_planes; i++) { ++ layer->planes[i].object_index = i; ++ layer->planes[i].offset = avbuf->plane_info[i].offset; ++ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; ++ } ++ ++ switch (avbuf->context->av_pix_fmt) { ++ case AV_PIX_FMT_0BGR: ++ layer->format = DRM_FORMAT_RGBX8888; ++ break; ++ case AV_PIX_FMT_RGB0: ++ layer->format = DRM_FORMAT_XBGR8888; ++ break; ++ case AV_PIX_FMT_0RGB: ++ layer->format = DRM_FORMAT_BGRX8888; ++ break; ++ case AV_PIX_FMT_BGR0: ++ layer->format = DRM_FORMAT_XRGB8888; ++ break; ++ ++ case AV_PIX_FMT_ABGR: ++ layer->format = DRM_FORMAT_RGBA8888; ++ break; ++ case AV_PIX_FMT_RGBA: ++ layer->format = DRM_FORMAT_ABGR8888; ++ break; ++ case AV_PIX_FMT_ARGB: ++ layer->format = DRM_FORMAT_BGRA8888; ++ break; ++ case AV_PIX_FMT_BGRA: ++ layer->format = DRM_FORMAT_ARGB8888; ++ break; ++ ++ case AV_PIX_FMT_BGR24: ++ layer->format = DRM_FORMAT_BGR888; ++ break; ++ case AV_PIX_FMT_RGB24: ++ layer->format = DRM_FORMAT_RGB888; ++ break; ++ ++ case AV_PIX_FMT_YUYV422: ++ ++ layer->format = DRM_FORMAT_YUYV; ++ layer->nb_planes = 1; ++ ++ break; ++ ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_NV21: ++ ++ layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ? ++ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height; ++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; ++ break; ++ ++ case AV_PIX_FMT_YUV420P: ++ ++ layer->format = DRM_FORMAT_YUV420; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 3; ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height; ++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; ++ ++ layer->planes[2].object_index = 0; ++ layer->planes[2].offset = layer->planes[1].offset + ++ ((avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height) >> 2); ++ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; ++ break; ++ ++ default: ++ drm_desc->nb_layers = 0; ++ break; + } ++ ++ return (uint8_t *) drm_desc; + } + +-static int v4l2_buf_increase_ref(V4L2Buffer *in) ++static void v4l2_free_bufref(void *opaque, uint8_t *data) + { +- V4L2m2mContext *s = buf_to_m2mctx(in); ++ AVBufferRef * bufref = (AVBufferRef *)data; ++ V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data; ++ struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl); + +- if (in->context_ref) +- atomic_fetch_add(&in->context_refcount, 1); +- else { +- in->context_ref = av_buffer_ref(s->self_ref); +- if (!in->context_ref) +- return AVERROR(ENOMEM); ++ if (ctx != NULL) { ++ // Buffer still attached to context ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ ++ if (!s->output_drm && avbuf->dmabuf[0] != NULL) { ++ for (unsigned int i = 0; i != avbuf->num_planes; ++i) ++ dmabuf_read_end(avbuf->dmabuf[i]); ++ } ++ ++ ff_mutex_lock(&ctx->lock); ++ ++ ff_v4l2_buffer_set_avail(avbuf); ++ avbuf->buf.timestamp.tv_sec = 0; ++ avbuf->buf.timestamp.tv_usec = 0; ++ ++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); ++ } ++ else if (ctx->streamon) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); ++ ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER ++ } ++ else { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name); ++ } + +- in->context_refcount = 1; ++ ff_mutex_unlock(&ctx->lock); + } + +- in->status = V4L2BUF_RET_USER; +- atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); ++ ff_weak_link_unlock(avbuf->context_wl); ++ av_buffer_unref(&bufref); ++} + +- return 0; ++static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length; + } + +-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) ++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) + { +- int ret; ++ int i, ret; ++ const V4L2m2mContext * const s = buf_to_m2mctx(avbuf); + +- if (plane >= in->num_planes) +- return AVERROR(EINVAL); ++ for (i = 0; i < avbuf->num_planes; i++) { ++ int dma_fd = -1; ++ const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i); + +- /* even though most encoders return 0 in data_offset encoding vp8 does require this value */ +- *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset, +- in->plane_info[plane].length, v4l2_free_buffer, in, 0); +- if (!*buf) +- return AVERROR(ENOMEM); ++ if (s->db_ctl != NULL) { ++ if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL) ++ return AVERROR(ENOMEM); ++ dma_fd = dmabuf_fd(avbuf->dmabuf[i]); ++ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) ++ avbuf->buf.m.planes[i].m.fd = dma_fd; ++ else ++ avbuf->buf.m.fd = dma_fd; + +- ret = v4l2_buf_increase_ref(in); +- if (ret) +- av_buffer_unref(buf); ++ if (!s->output_drm) ++ avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]); ++ } ++ else { ++ struct v4l2_exportbuffer expbuf; ++ memset(&expbuf, 0, sizeof(expbuf)); ++ ++ expbuf.index = avbuf->buf.index; ++ expbuf.type = avbuf->buf.type; ++ expbuf.plane = i; ++ ++ ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf); ++ if (ret < 0) ++ return AVERROR(errno); ++ dma_fd = expbuf.fd; ++ } + +- return ret; ++ avbuf->drm_frame.objects[i].size = blen; ++ avbuf->drm_frame.objects[i].fd = dma_fd; ++ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ } ++ ++ return 0; + } + + static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) + { + unsigned int bytesused, length; ++ int rv = 0; + + if (plane >= out->num_planes) + return AVERROR(EINVAL); +@@ -283,32 +574,61 @@ static int v4l2_bufref_to_buf(V4L2Buffer + length = out->plane_info[plane].length; + bytesused = FFMIN(size+offset, length); + +- memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); +- +- if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { +- out->planes[plane].bytesused = bytesused; +- out->planes[plane].length = length; +- } else { +- out->buf.bytesused = bytesused; +- out->buf.length = length; ++ if (size > length - offset) { ++ size = length - offset; ++ rv = AVERROR(ENOMEM); + } + +- return 0; ++ memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size); ++ ++ set_buf_length(out, plane, bytesused, length); ++ ++ return rv; ++} ++ ++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) ++{ ++ AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]); ++ AVBufferRef * newbuf; ++ ++ if (!bufref) ++ return NULL; ++ ++ newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0); ++ if (newbuf == NULL) ++ av_buffer_unref(&bufref); ++ ++ avbuf->status = V4L2BUF_RET_USER; ++ return newbuf; + } + + static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) + { +- int i, ret; ++ int i; + + frame->format = avbuf->context->av_pix_fmt; + +- for (i = 0; i < avbuf->num_planes; i++) { +- ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]); +- if (ret) +- return ret; ++ frame->buf[0] = wrap_avbuf(avbuf); ++ if (frame->buf[0] == NULL) ++ return AVERROR(ENOMEM); ++ ++ if (buf_to_m2mctx(avbuf)->output_drm) { ++ /* 1. get references to the actual data */ ++ const int rv = ff_v4l2_context_frames_set(avbuf->context); ++ if (rv != 0) ++ return rv; ++ ++ frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); ++ return 0; ++ } ++ + ++ /* 1. get references to the actual data */ ++ for (i = 0; i < avbuf->num_planes; i++) { ++ frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset; + frame->linesize[i] = avbuf->plane_info[i].bytesperline; +- frame->data[i] = frame->buf[i]->data; + } + + /* fixup special cases */ +@@ -317,88 +637,152 @@ static int v4l2_buffer_buf_to_swframe(AV + case AV_PIX_FMT_NV21: + if (avbuf->num_planes > 1) + break; +- frame->linesize[1] = avbuf->plane_info[0].bytesperline; +- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; ++ frame->linesize[1] = frame->linesize[0]; ++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); + break; + + case AV_PIX_FMT_YUV420P: + if (avbuf->num_planes > 1) + break; +- frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1; +- frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1; +- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; +- frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2); ++ frame->linesize[1] = frame->linesize[0] / 2; ++ frame->linesize[2] = frame->linesize[1]; ++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); ++ frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2; + break; + + default: + break; + } + ++ if (avbuf->dmabuf[0] != NULL) { ++ for (unsigned int i = 0; i != avbuf->num_planes; ++i) ++ dmabuf_read_start(avbuf->dmabuf[i]); ++ } ++ ++ return 0; ++} ++ ++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h) ++{ ++ if (dst_stride == src_stride && w + 32 >= dst_stride) { ++ memcpy(dst, src, dst_stride * h); ++ } ++ else { ++ while (--h >= 0) { ++ memcpy(dst, src, w); ++ dst += dst_stride; ++ src += src_stride; ++ } ++ } ++} ++ ++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) ++{ ++ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); ++} ++ ++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) ++ return AVERROR(EINVAL); ++ ++ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ // Only currently cope with single buffer types ++ if (out->buf.length != 1) ++ return AVERROR_PATCHWELCOME; ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->planes[0].m.fd = src->objects[0].fd; ++ } ++ else { ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->buf.m.fd = src->objects[0].fd; ++ } ++ ++ // No need to copy src AVDescriptor and if we did then we may confuse ++ // fd close on free ++ out->ref_buf = av_buffer_ref(frame->buf[0]); ++ + return 0; + } + + static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + { +- int i, ret; +- struct v4l2_format fmt = out->context->format; +- int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? +- fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat; +- int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? +- fmt.fmt.pix_mp.height : fmt.fmt.pix.height; +- int is_planar_format = 0; +- +- switch (pixel_format) { +- case V4L2_PIX_FMT_YUV420M: +- case V4L2_PIX_FMT_YVU420M: +-#ifdef V4L2_PIX_FMT_YUV422M +- case V4L2_PIX_FMT_YUV422M: +-#endif +-#ifdef V4L2_PIX_FMT_YVU422M +- case V4L2_PIX_FMT_YVU422M: +-#endif +-#ifdef V4L2_PIX_FMT_YUV444M +- case V4L2_PIX_FMT_YUV444M: +-#endif +-#ifdef V4L2_PIX_FMT_YVU444M +- case V4L2_PIX_FMT_YVU444M: +-#endif +- case V4L2_PIX_FMT_NV12M: +- case V4L2_PIX_FMT_NV21M: +- case V4L2_PIX_FMT_NV12MT_16X16: +- case V4L2_PIX_FMT_NV12MT: +- case V4L2_PIX_FMT_NV16M: +- case V4L2_PIX_FMT_NV61M: +- is_planar_format = 1; +- } +- +- if (!is_planar_format) { +- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); +- int planes_nb = 0; +- int offset = 0; +- +- for (i = 0; i < desc->nb_components; i++) +- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); +- +- for (i = 0; i < planes_nb; i++) { +- int size, h = height; +- if (i == 1 || i == 2) { ++ int i; ++ int num_planes = 0; ++ int pel_strides[4] = {0}; ++ ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); ++ ++ if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { ++ av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__); ++ return -1; ++ } ++ ++ for (i = 0; i != desc->nb_components; ++i) { ++ if (desc->comp[i].plane >= num_planes) ++ num_planes = desc->comp[i].plane + 1; ++ pel_strides[desc->comp[i].plane] = desc->comp[i].step; ++ } ++ ++ if (out->num_planes > 1) { ++ if (num_planes != out->num_planes) { ++ av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); ++ return -1; ++ } ++ for (i = 0; i != num_planes; ++i) { ++ int w = frame->width; ++ int h = frame->height; ++ if (is_chroma(desc, i, num_planes)) { ++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); + h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); + } +- size = frame->linesize[i] * h; +- ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset); +- if (ret) +- return ret; +- offset += size; ++ ++ cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline, ++ frame->data[i], frame->linesize[i], ++ w * pel_strides[i], h); ++ set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length); + } +- return 0; + } ++ else ++ { ++ unsigned int offset = 0; ++ ++ for (i = 0; i != num_planes; ++i) { ++ int w = frame->width; ++ int h = frame->height; ++ int dst_stride = out->plane_info[0].bytesperline; ++ uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset; ++ ++ if (is_chroma(desc, i, num_planes)) { ++ // Is chroma ++ dst_stride >>= desc->log2_chroma_w; ++ offset += dst_stride * (out->context->height >> desc->log2_chroma_h); ++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); ++ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); ++ } ++ else { ++ // Is luma or alpha ++ offset += dst_stride * out->context->height; ++ } ++ if (offset > out->plane_info[0].length) { ++ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length); ++ return -1; ++ } + +- for (i = 0; i < out->num_planes; i++) { +- ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0); +- if (ret) +- return ret; ++ cpy_2d(dst, dst_stride, ++ frame->data[i], frame->linesize[i], ++ w * pel_strides[i], h); ++ } ++ set_buf_length(out, 0, offset, out->plane_info[0].length); + } +- + return 0; + } + +@@ -408,16 +792,31 @@ static int v4l2_buffer_swframe_to_buf(co + * + ******************************************************************************/ + +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) + { +- v4l2_set_pts(out, frame->pts); +- +- return v4l2_buffer_swframe_to_buf(frame, out); ++ out->buf.flags = frame->key_frame ? ++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : ++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); ++ // Beware that colour info is held in format rather than the actual ++ // v4l2 buffer struct so this may not be as useful as you might hope ++ v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); ++ v4l2_set_color_range(out, frame->color_range); ++ // PTS & interlace are buffer vars ++ if (track_ts) ++ out->buf.timestamp = tv_from_int(track_ts); ++ else ++ v4l2_set_pts(out, frame->pts); ++ v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); ++ ++ return frame->format == AV_PIX_FMT_DRM_PRIME ? ++ v4l2_buffer_primeframe_to_buf(frame, out) : ++ v4l2_buffer_swframe_to_buf(frame, out); + } + + int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) + { + int ret; ++ V4L2Context * const ctx = avbuf->context; + + av_frame_unref(frame); + +@@ -428,17 +827,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram + + /* 2. get frame information */ + frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); ++ frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : ++ (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P : ++ (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B : ++ AV_PICTURE_TYPE_NONE; + frame->color_primaries = v4l2_get_color_primaries(avbuf); + frame->colorspace = v4l2_get_color_space(avbuf); + frame->color_range = v4l2_get_color_range(avbuf); + frame->color_trc = v4l2_get_color_trc(avbuf); + frame->pts = v4l2_get_pts(avbuf); + frame->pkt_dts = AV_NOPTS_VALUE; ++ frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf); ++ frame->top_field_first = v4l2_buf_is_top_first(avbuf); + + /* these values are updated also during re-init in v4l2_process_driver_event */ +- frame->height = avbuf->context->height; +- frame->width = avbuf->context->width; +- frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio; ++ frame->height = ctx->height; ++ frame->width = ctx->width; ++ frame->sample_aspect_ratio = ctx->sample_aspect_ratio; ++ ++ if (ctx->selection.height && ctx->selection.width) { ++ frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0; ++ frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0; ++ frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ? ++ frame->width - (ctx->selection.left + ctx->selection.width) : 0; ++ frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ? ++ frame->height - (ctx->selection.top + ctx->selection.height) : 0; ++ } + + /* 3. report errors upstream */ + if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { +@@ -451,15 +865,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram + + int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) + { +- int ret; +- + av_packet_unref(pkt); +- ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf); +- if (ret) +- return ret; ++ ++ pkt->buf = wrap_avbuf(avbuf); ++ if (pkt->buf == NULL) ++ return AVERROR(ENOMEM); + + pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; +- pkt->data = pkt->buf->data; ++ pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; ++ pkt->flags = 0; + + if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) + pkt->flags |= AV_PKT_FLAG_KEY; +@@ -474,39 +888,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket + return 0; + } + +-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp) + { + int ret; + +- ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0); +- if (ret) ++ if (extlen) { ++ ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0); ++ if (ret) ++ return ret; ++ } ++ ++ ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); ++ if (ret && ret != AVERROR(ENOMEM)) + return ret; + +- v4l2_set_pts(out, pkt->pts); ++ if (timestamp) ++ out->buf.timestamp = tv_from_int(timestamp); ++ else ++ v4l2_set_pts(out, pkt->pts); ++ ++ out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? ++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : ++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); + +- if (pkt->flags & AV_PKT_FLAG_KEY) +- out->flags = V4L2_BUF_FLAG_KEYFRAME; ++ return ret; ++} + +- return 0; ++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) ++{ ++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); ++} ++ ++ ++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) ++{ ++ V4L2Buffer * const avbuf = (V4L2Buffer *)data; ++ int i; ++ ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) { ++ struct V4L2Plane_info *p = avbuf->plane_info + i; ++ if (p->mm_addr != NULL) ++ munmap(p->mm_addr, p->length); ++ } ++ ++ if (avbuf->dmabuf[0] == NULL) { ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { ++ if (avbuf->drm_frame.objects[i].fd != -1) ++ close(avbuf->drm_frame.objects[i].fd); ++ } ++ } ++ else { ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) { ++ dmabuf_free(avbuf->dmabuf[i]); ++ } ++ } ++ ++ av_buffer_unref(&avbuf->ref_buf); ++ ++ ff_weak_link_unref(&avbuf->context_wl); ++ ++ av_free(avbuf); + } + +-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) ++ ++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) + { +- V4L2Context *ctx = avbuf->context; + int ret, i; ++ V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); ++ AVBufferRef * bufref; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ int want_mmap; + +- avbuf->buf.memory = V4L2_MEMORY_MMAP; ++ *pbufref = NULL; ++ if (avbuf == NULL) ++ return AVERROR(ENOMEM); ++ ++ bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0); ++ if (bufref == NULL) { ++ av_free(avbuf); ++ return AVERROR(ENOMEM); ++ } ++ ++ avbuf->context = ctx; ++ avbuf->buf.memory = mem; + avbuf->buf.type = ctx->type; + avbuf->buf.index = index; + ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { ++ avbuf->drm_frame.objects[i].fd = -1; ++ } ++ ++ avbuf->context_wl = ff_weak_link_ref(ctx->wl_master); ++ + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->buf.length = VIDEO_MAX_PLANES; + avbuf->buf.m.planes = avbuf->planes; + } + +- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); ++ ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf); + if (ret < 0) +- return AVERROR(errno); ++ goto fail; + + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->num_planes = 0; +@@ -518,33 +1001,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer + } else + avbuf->num_planes = 1; + +- for (i = 0; i < avbuf->num_planes; i++) { ++ want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && ++ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); + ++ for (i = 0; i < avbuf->num_planes; i++) { + avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? + ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : + ctx->format.fmt.pix.bytesperline; + + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; +- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, +- PROT_READ | PROT_WRITE, MAP_SHARED, +- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); ++ avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset; ++ ++ if (want_mmap) ++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); + } else { + avbuf->plane_info[i].length = avbuf->buf.length; +- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, +- PROT_READ | PROT_WRITE, MAP_SHARED, +- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); ++ avbuf->plane_info[i].offset = 0; ++ ++ if (want_mmap) ++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); + } + +- if (avbuf->plane_info[i].mm_addr == MAP_FAILED) +- return AVERROR(ENOMEM); ++ if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { ++ avbuf->plane_info[i].mm_addr = NULL; ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } + } + + avbuf->status = V4L2BUF_AVAILABLE; + +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) +- return 0; +- + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->buf.m.planes = avbuf->planes; + avbuf->buf.length = avbuf->num_planes; +@@ -554,20 +1045,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer + avbuf->buf.length = avbuf->planes[0].length; + } + +- return ff_v4l2_buffer_enqueue(avbuf); ++ if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) { ++ // export_drm does dmabuf alloc if we aren't using v4l2 alloc ++ ret = v4l2_buffer_export_drm(avbuf); ++ if (ret) { ++ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); ++ goto fail; ++ } ++ } ++ ++ *pbufref = bufref; ++ return 0; ++ ++fail: ++ av_buffer_unref(&bufref); ++ return ret; + } + + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) + { + int ret; ++ int qc; + +- avbuf->buf.flags = avbuf->flags; ++ if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", ++ avbuf->context->name, avbuf->buf.index, ++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, ++ avbuf->context->q_count); ++ } + + ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf); +- if (ret < 0) +- return AVERROR(errno); ++ if (ret < 0) { ++ int err = errno; ++ av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n", ++ avbuf->context->name, avbuf->buf.index, ++ err, strerror(err)); ++ return AVERROR(err); ++ } + ++ // Lock not wanted - if called from buffer free then lock already obtained ++ qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; + avbuf->status = V4L2BUF_IN_DRIVER; ++ pthread_cond_broadcast(&avbuf->context->cond); ++ ++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", ++ avbuf->context->name, avbuf->buf.index, ++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); + + return 0; + } +--- a/libavcodec/v4l2_buffers.h ++++ b/libavcodec/v4l2_buffers.h +@@ -28,31 +28,47 @@ + #include <stddef.h> + #include <linux/videodev2.h> + ++#include "avcodec.h" + #include "libavutil/buffer.h" + #include "libavutil/frame.h" ++#include "libavutil/hwcontext_drm.h" + #include "packet.h" + + enum V4L2Buffer_status { + V4L2BUF_AVAILABLE, + V4L2BUF_IN_DRIVER, ++ V4L2BUF_IN_USE, + V4L2BUF_RET_USER, + }; + + /** + * V4L2Buffer (wrapper for v4l2_buffer management) + */ ++struct V4L2Context; ++struct ff_weak_link_client; ++struct dmabuf_h; ++ + typedef struct V4L2Buffer { +- /* each buffer needs to have a reference to its context */ ++ /* each buffer needs to have a reference to its context ++ * The pointer is good enough for most operation but once the buffer has ++ * been passed to the user the buffer may become orphaned so for free ops ++ * the weak link must be used to ensure that the context is actually ++ * there ++ */ + struct V4L2Context *context; ++ struct ff_weak_link_client *context_wl; + +- /* This object is refcounted per-plane, so we need to keep track +- * of how many context-refs we are holding. */ +- AVBufferRef *context_ref; +- atomic_uint context_refcount; ++ /* DRM descriptor */ ++ AVDRMFrameDescriptor drm_frame; ++ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we ++ * are done ++ */ ++ AVBufferRef * ref_buf; + + /* keep track of the mmap address and mmap length */ + struct V4L2Plane_info { +- int bytesperline; ++ size_t bytesperline; ++ size_t offset; + void * mm_addr; + size_t length; + } plane_info[VIDEO_MAX_PLANES]; +@@ -63,9 +79,9 @@ typedef struct V4L2Buffer { + struct v4l2_buffer buf; + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + +- int flags; + enum V4L2Buffer_status status; + ++ struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here + } V4L2Buffer; + + /** +@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket + */ + int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); + ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp); ++ + /** + * Extracts the data from an AVFrame to a V4L2Buffer + * +@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AV + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); + + /** + * Initializes a V4L2Buffer +@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); ++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); + + /** + * Enqueues a V4L2Buffer +@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer + */ + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); + ++static inline void ++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) ++{ ++ avbuf->status = V4L2BUF_AVAILABLE; ++ av_buffer_unref(&avbuf->ref_buf); ++} ++ + + #endif // AVCODEC_V4L2_BUFFERS_H +--- a/libavcodec/v4l2_context.c ++++ b/libavcodec/v4l2_context.c +@@ -27,11 +27,14 @@ + #include <unistd.h> + #include <fcntl.h> + #include <poll.h> ++#include "libavutil/avassert.h" ++#include "libavutil/pixdesc.h" + #include "libavcodec/avcodec.h" + #include "libavcodec/internal.h" + #include "v4l2_buffers.h" + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" ++#include "weak_link.h" + + struct v4l2_format_update { + uint32_t v4l2_fmt; +@@ -41,26 +44,168 @@ struct v4l2_format_update { + int update_avfmt; + }; + +-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) ++ ++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) + { +- return V4L2_TYPE_IS_OUTPUT(ctx->type) ? +- container_of(ctx, V4L2m2mContext, output) : +- container_of(ctx, V4L2m2mContext, capture); ++ return (int64_t)n; + } + +-static inline AVCodecContext *logger(V4L2Context *ctx) ++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) + { +- return ctx_to_m2mctx(ctx)->avctx; ++ return (unsigned int)pts; ++} ++ ++// FFmpeg requires us to propagate a number of vars from the coded pkt into ++// the decoded frame. The only thing that tracks like that in V4L2 stateful ++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no ++// guarantees about PTS being unique or specified for every frame so replace ++// the supplied PTS with a simple incrementing number and keep a circular ++// buffer of all the things we want preserved (including the original PTS) ++// indexed by the tracking no. ++static int64_t ++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = avpkt->size, ++ .pts = avpkt->pts, ++ .dts = avpkt->dts, ++ .reordered_opaque = avctx->reordered_opaque, ++ .pkt_pos = avpkt->pos, ++ .pkt_duration = avpkt->duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++static int64_t ++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = 0, ++ .pts = frame->pts, ++ .dts = AV_NOPTS_VALUE, ++ .reordered_opaque = frame->reordered_opaque, ++ .pkt_pos = frame->pkt_pos, ++ .pkt_duration = frame->pkt_duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_frame_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVFrame *const frame) ++{ ++ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) ++ { ++ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ frame->pts = AV_NOPTS_VALUE; ++ frame->pkt_dts = AV_NOPTS_VALUE; ++ frame->reordered_opaque = x->last_opaque; ++ frame->pkt_pos = -1; ++ frame->pkt_duration = 0; ++ frame->pkt_size = -1; ++ } ++ else if (!t->discard) ++ { ++ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ frame->pkt_dts = t->dts; ++ frame->reordered_opaque = t->reordered_opaque; ++ frame->pkt_pos = t->pkt_pos; ++ frame->pkt_duration = t->pkt_duration; ++ frame->pkt_size = t->pkt_size; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (frame->pts != AV_NOPTS_VALUE) ++ x->last_pts = frame->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); ++ return 0; ++} ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_pkt_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVPacket *const pkt) ++{ ++ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) ++ { ++ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ pkt->pts = AV_NOPTS_VALUE; ++ } ++ else if (!t->discard) ++ { ++ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (pkt->pts != AV_NOPTS_VALUE) ++ x->last_pts = pkt->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ // * Would like something much better than this...xlat(offset + out_count)? ++ pkt->dts = pkt->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ pkt->pts, t->track_pts, n); ++ return 0; + } + +-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) ++ ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); + } + +-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) ++static inline AVCodecContext *logger(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++ return ctx_to_m2mctx(ctx)->avctx; + } + + static AVRational v4l2_get_sar(V4L2Context *ctx) +@@ -81,21 +226,29 @@ static AVRational v4l2_get_sar(V4L2Conte + return sar; + } + +-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) ++static inline int ctx_buffers_alloced(const V4L2Context * const ctx) ++{ ++ return ctx->bufrefs != NULL; ++} ++ ++// Width/Height changed or we don't have an alloc in the first place? ++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) + { +- struct v4l2_format *fmt1 = &ctx->format; +- int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? +- fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || +- fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height +- : +- fmt1->fmt.pix.width != fmt2->fmt.pix.width || +- fmt1->fmt.pix.height != fmt2->fmt.pix.height; ++ const struct v4l2_format *fmt1 = &ctx->format; ++ int ret = !ctx_buffers_alloced(ctx) || ++ (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ++ fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || ++ fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height ++ : ++ fmt1->fmt.pix.width != fmt2->fmt.pix.width || ++ fmt1->fmt.pix.height != fmt2->fmt.pix.height); + + if (ret) +- av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", ++ av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n", + ctx->name, +- v4l2_get_width(fmt1), v4l2_get_height(fmt1), +- v4l2_get_width(fmt2), v4l2_get_height(fmt2)); ++ ctx_buffers_alloced(ctx), ++ ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), ++ ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); + + return ret; + } +@@ -153,76 +306,100 @@ static inline void v4l2_save_to_context( + } + } + +-static int v4l2_start_decode(V4L2Context *ctx) ++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r) + { +- struct v4l2_decoder_cmd cmd = { +- .cmd = V4L2_DEC_CMD_START, +- .flags = 0, ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, ++ .target = V4L2_SEL_TGT_COMPOSE + }; +- int ret; + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd); +- if (ret) ++ memset(r, 0, sizeof(*r)); ++ if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection)) + return AVERROR(errno); + ++ *r = selection.r; + return 0; + } + +-/** +- * handle resolution change event and end of stream event +- * returns 1 if reinit was successful, negative if it failed +- * returns 0 if reinit was not executed +- */ +-static int v4l2_handle_event(V4L2Context *ctx) ++static int do_source_change(V4L2m2mContext * const s) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); +- struct v4l2_format cap_fmt = s->capture.format; +- struct v4l2_event evt = { 0 }; ++ AVCodecContext *const avctx = s->avctx; ++ + int ret; ++ int reinit; ++ struct v4l2_format cap_fmt = s->capture.format; + +- ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt); +- if (ret < 0) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name); +- return 0; +- } ++ s->capture.done = 0; + +- if (evt.type == V4L2_EVENT_EOS) { +- ctx->done = 1; ++ ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); ++ if (ret) { ++ av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name); + return 0; + } + +- if (evt.type != V4L2_EVENT_SOURCE_CHANGE) +- return 0; ++ get_default_selection(&s->capture, &s->capture.selection); + +- ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); +- if (ret) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name); +- return 0; ++ reinit = ctx_resolution_changed(&s->capture, &cap_fmt); ++ if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0) ++ reinit = 1; ++ ++ s->capture.format = cap_fmt; ++ if (reinit) { ++ s->capture.height = ff_v4l2_get_format_height(&cap_fmt); ++ s->capture.width = ff_v4l2_get_format_width(&cap_fmt); + } + +- if (v4l2_resolution_changed(&s->capture, &cap_fmt)) { +- s->capture.height = v4l2_get_height(&cap_fmt); +- s->capture.width = v4l2_get_width(&cap_fmt); +- s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); +- } else { +- v4l2_start_decode(ctx); +- return 0; ++ // If we don't support selection (or it is bust) and we obviously have HD then kludge ++ if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) && ++ (s->capture.height == 1088 && s->capture.width == 1920)) { ++ s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080}; + } + +- s->reinit = 1; ++ s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); + +- if (s->avctx) +- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); +- if (ret < 0) +- av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n"); ++ av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", ++ av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)), ++ s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, ++ s->capture.width, s->capture.height, ++ s->capture.selection.width, s->capture.selection.height, ++ s->capture.selection.left, s->capture.selection.top, reinit); + +- ret = ff_v4l2_m2m_codec_reinit(s); +- if (ret) { +- av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); +- return AVERROR(EINVAL); ++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); ++ if (ret) ++ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); ++ s->draining = 0; ++ ++ if (!reinit) { ++ /* Buffers are OK so just stream off to ack */ ++ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); ++ } ++ else { ++ if (avctx) ++ ret = ff_set_dimensions(s->avctx, ++ s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, ++ s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height); ++ if (ret < 0) ++ av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); ++ ++ ff_v4l2_context_release(&s->capture); ++ ++ if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || ++ s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", ++ s->capture.width, s->capture.height, ++ ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); ++ return AVERROR(EINVAL); ++ } ++ ++ // Update pixel format - should only actually do something on initial change ++ s->capture.av_pix_fmt = ++ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); ++ avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt; ++ avctx->sw_pix_fmt = s->capture.av_pix_fmt; + } + +- /* reinit executed */ ++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); + return 1; + } + +@@ -266,171 +443,293 @@ static int v4l2_stop_encode(V4L2Context + return 0; + } + +-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) +-{ +- struct v4l2_plane planes[VIDEO_MAX_PLANES]; +- struct v4l2_buffer buf = { 0 }; +- V4L2Buffer *avbuf; +- struct pollfd pfd = { +- .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */ +- .fd = ctx_to_m2mctx(ctx)->fd, ++// DQ a buffer ++// Amalgamates all the various ways there are of signalling EOS/Event to ++// generate a consistant EPIPE. ++// ++// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped) ++// ++// Returns: ++// 0 Success ++// AVERROR(EPIPE) Nothing more to read ++// AVERROR(ENOSPC) No buffers in Q to put result in ++// * AVERROR(..) ++ ++ static int ++dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) ++{ ++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); ++ AVCodecContext * const avctx = m->avctx; ++ V4L2Buffer * avbuf; ++ const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type); ++ ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ ++ struct v4l2_buffer buf = { ++ .type = ctx->type, ++ .memory = V4L2_MEMORY_MMAP, + }; +- int i, ret; + +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) { +- for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) +- break; +- } +- if (i == ctx->num_buffers) +- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to " +- "userspace. Increase num_capture_buffers " +- "to prevent device deadlock or dropped " +- "packets/frames.\n"); ++ *ppavbuf = NULL; ++ ++ if (ctx->flag_last) ++ return AVERROR(EPIPE); ++ ++ if (is_mp) { ++ buf.length = VIDEO_MAX_PLANES; ++ buf.m.planes = planes; + } + +- /* if we are draining and there are no more capture buffers queued in the driver we are done */ +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) { +- for (i = 0; i < ctx->num_buffers; i++) { +- /* capture buffer initialization happens during decode hence +- * detection happens at runtime +- */ +- if (!ctx->buffers) +- break; ++ while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) { ++ const int err = errno; ++ av_assert0(AVERROR(err) < 0); ++ if (err != EINTR) { ++ av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", ++ ctx->name, av_err2str(AVERROR(err))); ++ ++ if (err == EPIPE) ++ ctx->flag_last = 1; + +- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) +- goto start; ++ return AVERROR(err); + } +- ctx->done = 1; +- return NULL; + } ++ atomic_fetch_sub(&ctx->q_count, 1); + +-start: +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) +- pfd.events = POLLOUT | POLLWRNORM; +- else { +- /* no need to listen to requests for more input while draining */ +- if (ctx_to_m2mctx(ctx)->draining) +- pfd.events = POLLIN | POLLRDNORM | POLLPRI; ++ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; ++ ff_v4l2_buffer_set_avail(avbuf); ++ avbuf->buf = buf; ++ if (is_mp) { ++ memcpy(avbuf->planes, planes, sizeof(planes)); ++ avbuf->buf.m.planes = avbuf->planes; ++ } ++ // Done with any attached buffer ++ av_buffer_unref(&avbuf->ref_buf); ++ ++ if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { ++ // Zero length cap buffer return == EOS ++ if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n"); ++ ++ // Must reQ so we don't leak ++ // May not matter if the next thing we do is release all the ++ // buffers but better to be tidy. ++ ff_v4l2_buffer_enqueue(avbuf); ++ ++ ctx->flag_last = 1; ++ return AVERROR(EPIPE); ++ } ++ ++#ifdef V4L2_BUF_FLAG_LAST ++ // If flag_last set then this contains data but is the last frame ++ // so remember that but return OK ++ if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0) ++ ctx->flag_last = 1; ++#endif + } + +- for (;;) { +- ret = poll(&pfd, 1, timeout); +- if (ret > 0) +- break; +- if (errno == EINTR) ++ *ppavbuf = avbuf; ++ return 0; ++} ++ ++/** ++ * handle resolution change event and end of stream event ++ * Expects to be called after the stream has stopped ++ * ++ * returns 1 if reinit was successful, negative if it failed ++ * returns 0 if reinit was not executed ++ */ ++static int ++get_event(V4L2m2mContext * const m) ++{ ++ AVCodecContext * const avctx = m->avctx; ++ struct v4l2_event evt = { 0 }; ++ ++ while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) { ++ const int rv = AVERROR(errno); ++ if (rv == AVERROR(EINTR)) + continue; +- return NULL; ++ if (rv == AVERROR(EAGAIN)) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n"); ++ return AVERROR_EOF; ++ } ++ av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv)); ++ return rv; + } + +- /* 0. handle errors */ +- if (pfd.revents & POLLERR) { +- /* if we are trying to get free buffers but none have been queued yet +- no need to raise a warning */ +- if (timeout == 0) { +- for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status != V4L2BUF_AVAILABLE) +- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); +- } +- } +- else +- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); ++ av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type); + +- return NULL; ++ if (evt.type == V4L2_EVENT_EOS) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n"); ++ return AVERROR_EOF; + } + +- /* 1. handle resolution changes */ +- if (pfd.revents & POLLPRI) { +- ret = v4l2_handle_event(ctx); +- if (ret < 0) { +- /* if re-init failed, abort */ +- ctx->done = 1; +- return NULL; ++ if (evt.type == V4L2_EVENT_SOURCE_CHANGE) ++ return do_source_change(m); ++ ++ return 0; ++} ++ ++static inline int ++dq_ok(const V4L2Context * const c) ++{ ++ return c->streamon && atomic_load(&c->q_count) != 0; ++} ++ ++// Get a buffer ++// If output then just gets the buffer in the expected way ++// If capture then runs the capture state m/c to deal with res change etc. ++// If return value == 0 then *ppavbuf != NULL ++ ++static int ++get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout) ++{ ++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); ++ AVCodecContext * const avctx = m->avctx; ++ const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type); ++ ++ const unsigned int poll_cap = (POLLIN | POLLRDNORM); ++ const unsigned int poll_out = (POLLOUT | POLLWRNORM); ++ const unsigned int poll_event = POLLPRI; ++ ++ *ppavbuf = NULL; ++ ++ for (;;) { ++ struct pollfd pfd = { ++ .fd = m->fd, ++ // If capture && stream not started then assume we are waiting for the initial event ++ .events = !is_cap ? poll_out : ++ !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap : ++ poll_event, ++ }; ++ int ret; ++ ++ if (ctx->done) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name); ++ return AVERROR_EOF; + } +- if (ret) { +- /* if re-init was successful drop the buffer (if there was one) +- * since we had to reconfigure capture (unmap all buffers) +- */ +- return NULL; ++ ++ // If capture && timeout == -1 then also wait for rx buffer free ++ if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining) ++ pfd.events |= poll_out; ++ ++ // If nothing Qed all we will get is POLLERR - avoid that ++ if ((pfd.events == poll_out && !dq_ok(&m->output)) || ++ (pfd.events == poll_cap && !dq_ok(&m->capture)) || ++ (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); ++ return AVERROR(ENOSPC); + } +- } + +- /* 2. dequeue the buffer */ +- if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { ++ // Timeout kludged s.t. "forever" eventually gives up & produces logging ++ // If waiting for an event when we have seen a last_frame then we expect ++ // it to be ready already so force a short timeout ++ ret = poll(&pfd, 1, ++ ff_v4l2_ctx_eos(ctx) ? 10 : ++ timeout == -1 ? 3000 : timeout); ++ if (ret < 0) { ++ ret = AVERROR(errno); // Remember errno before logging etc. ++ av_assert0(ret < 0); ++ } ++ ++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n", ++ ctx->name, ret, timeout, pfd.events, pfd.revents); + +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- /* there is a capture buffer ready */ +- if (pfd.revents & (POLLIN | POLLRDNORM)) +- goto dequeue; ++ if (ret < 0) { ++ if (ret == AVERROR(EINTR)) ++ continue; ++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret)); ++ return ret; ++ } + +- /* the driver is ready to accept more input; instead of waiting for the capture +- * buffer to complete we return NULL so input can proceed (we are single threaded) +- */ +- if (pfd.revents & (POLLOUT | POLLWRNORM)) +- return NULL; ++ if (ret == 0) { ++ if (timeout == -1) ++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events); ++ if (ff_v4l2_ctx_eos(ctx)) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name); ++ ret = get_event(m); ++ if (ret < 0) { ++ ctx->done = 1; ++ return ret; ++ } ++ } ++ return AVERROR(EAGAIN); + } + +-dequeue: +- memset(&buf, 0, sizeof(buf)); +- buf.memory = V4L2_MEMORY_MMAP; +- buf.type = ctx->type; +- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { +- memset(planes, 0, sizeof(planes)); +- buf.length = VIDEO_MAX_PLANES; +- buf.m.planes = planes; ++ if ((pfd.revents & POLLERR) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name); ++ return AVERROR_UNKNOWN; + } + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf); +- if (ret) { +- if (errno != EAGAIN) { ++ if ((pfd.revents & poll_event) != 0) { ++ ret = get_event(m); ++ if (ret < 0) { + ctx->done = 1; +- if (errno != EPIPE) +- av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", +- ctx->name, av_err2str(AVERROR(errno))); ++ return ret; + } +- return NULL; ++ continue; + } + +- if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? +- buf.m.planes[0].bytesused : buf.bytesused; +- if (bytesused == 0) { +- ctx->done = 1; +- return NULL; +- } +-#ifdef V4L2_BUF_FLAG_LAST +- if (buf.flags & V4L2_BUF_FLAG_LAST) +- ctx->done = 1; +-#endif ++ if ((pfd.revents & poll_cap) != 0) { ++ ret = dq_buf(ctx, ppavbuf); ++ if (ret == AVERROR(EPIPE)) ++ continue; ++ return ret; + } + +- avbuf = &ctx->buffers[buf.index]; +- avbuf->status = V4L2BUF_AVAILABLE; +- avbuf->buf = buf; +- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { +- memcpy(avbuf->planes, planes, sizeof(planes)); +- avbuf->buf.m.planes = avbuf->planes; ++ if ((pfd.revents & poll_out) != 0) { ++ if (is_cap) ++ return AVERROR(EAGAIN); ++ return dq_buf(ctx, ppavbuf); + } +- return avbuf; ++ ++ av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents); ++ return AVERROR_UNKNOWN; + } ++} + +- return NULL; ++// Clear out flags and timestamps that should should be set by the user ++// Returns the passed avbuf ++static V4L2Buffer * ++clean_v4l2_buffer(V4L2Buffer * const avbuf) ++{ ++ struct v4l2_buffer *const buf = &avbuf->buf; ++ ++ buf->flags = 0; ++ buf->field = V4L2_FIELD_ANY; ++ buf->timestamp = (struct timeval){0}; ++ buf->timecode = (struct v4l2_timecode){0}; ++ buf->sequence = 0; ++ ++ return avbuf; ++} ++ ++int ++ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1) ++{ ++ V4L2Buffer * avbuf; ++ if (timeout1 != 0) { ++ int rv = get_qbuf(ctx, &avbuf, timeout1); ++ if (rv != 0) ++ return rv; ++ } ++ do { ++ get_qbuf(ctx, &avbuf, 0); ++ } while (avbuf); ++ return 0; + } + + static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) + { +- int timeout = 0; /* return when no more buffers to dequeue */ + int i; + + /* get back as many output buffers as possible */ +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- do { +- } while (v4l2_dequeue_v4l2buf(ctx, timeout)); +- } ++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) ++ ff_v4l2_dq_all(ctx, 0); + + for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) +- return &ctx->buffers[i]; ++ V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (avbuf->status == V4L2BUF_AVAILABLE) ++ return clean_v4l2_buffer(avbuf); + } + + return NULL; +@@ -438,25 +737,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf( + + static int v4l2_release_buffers(V4L2Context* ctx) + { +- struct v4l2_requestbuffers req = { +- .memory = V4L2_MEMORY_MMAP, +- .type = ctx->type, +- .count = 0, /* 0 -> unmaps buffers from the driver */ +- }; +- int i, j; ++ int i; ++ int ret = 0; ++ const int fd = ctx_to_m2mctx(ctx)->fd; + +- for (i = 0; i < ctx->num_buffers; i++) { +- V4L2Buffer *buffer = &ctx->buffers[i]; ++ // Orphan any buffers in the wild ++ ff_weak_link_break(&ctx->wl_master); + +- for (j = 0; j < buffer->num_planes; j++) { +- struct V4L2Plane_info *p = &buffer->plane_info[j]; +- if (p->mm_addr && p->length) +- if (munmap(p->mm_addr, p->length) < 0) +- av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); ++ if (ctx->bufrefs) { ++ for (i = 0; i < ctx->num_buffers; i++) ++ av_buffer_unref(ctx->bufrefs + i); ++ } ++ ++ if (fd != -1) { ++ struct v4l2_requestbuffers req = { ++ .memory = V4L2_MEMORY_MMAP, ++ .type = ctx->type, ++ .count = 0, /* 0 -> unmap all buffers from the driver */ ++ }; ++ ++ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { ++ if (errno == EINTR) ++ continue; ++ ++ ret = AVERROR(errno); ++ ++ av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", ++ ctx->name, av_err2str(AVERROR(errno))); ++ ++ if (ctx_to_m2mctx(ctx)->output_drm) ++ av_log(logger(ctx), AV_LOG_ERROR, ++ "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n" ++ "for all buffers: \n" ++ " 1. drmModeRmFB(..)\n" ++ " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); + } + } ++ atomic_store(&ctx->q_count, 0); + +- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req); ++ return ret; + } + + static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) +@@ -485,6 +804,8 @@ static inline int v4l2_try_raw_format(V4 + + static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) + { ++ V4L2m2mContext* s = ctx_to_m2mctx(ctx); ++ V4L2m2mPriv *priv = s->avctx->priv_data; + enum AVPixelFormat pixfmt = ctx->av_pix_fmt; + struct v4l2_fmtdesc fdesc; + int ret; +@@ -498,21 +819,22 @@ static int v4l2_get_raw_format(V4L2Conte + return 0; + } + +- for (;;) { ++ for (;; ++fdesc.index) { + ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc); + if (ret) + return AVERROR(EINVAL); + ++ if (priv->pix_fmt != AV_PIX_FMT_NONE) { ++ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) ++ continue; ++ } ++ + pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); + ret = v4l2_try_raw_format(ctx, pixfmt); +- if (ret){ +- fdesc.index++; +- continue; ++ if (ret == 0) { ++ *p = pixfmt; ++ return 0; + } +- +- *p = pixfmt; +- +- return 0; + } + + return AVERROR(EINVAL); +@@ -555,30 +877,131 @@ static int v4l2_get_coded_format(V4L2Con + * + *****************************************************************************/ + +-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) ++ ++static void flush_all_buffers_status(V4L2Context* const ctx) ++{ ++ int i; ++ ++ if (!ctx->bufrefs) ++ return; ++ ++ for (i = 0; i < ctx->num_buffers; ++i) { ++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (buf->status == V4L2BUF_IN_DRIVER) ++ ff_v4l2_buffer_set_avail(buf); ++ } ++ atomic_store(&ctx->q_count, 0); ++} ++ ++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) ++{ ++ int i; ++ int rv; ++ ++ if (!ctx->bufrefs) { ++ rv = ff_v4l2_context_init(ctx); ++ if (rv) { ++ av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); ++ return rv; ++ } ++ } ++ ++ ff_mutex_lock(&ctx->lock); ++ for (i = 0; i < ctx->num_buffers; ++i) { ++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (buf->status == V4L2BUF_AVAILABLE) { ++ rv = ff_v4l2_buffer_enqueue(buf); ++ if (rv < 0) ++ break; ++ } ++ } ++ ff_mutex_unlock(&ctx->lock); ++ return rv; ++} ++ ++static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx) + { + int type = ctx->type; +- int ret; ++ int ret = 0; + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); +- if (ret < 0) +- return AVERROR(errno); ++ if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) ++ stuff_all_buffers(avctx, ctx); + +- ctx->streamon = (cmd == VIDIOC_STREAMON); ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, ++ av_err2str(ret)); ++ return ret; ++ } + +- return 0; ++ ctx->first_buf = 1; ++ ctx->streamon = 1; ++ ctx->flag_last = 0; ++ av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name); ++ return ret; ++} ++ ++static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx) ++{ ++ int type = ctx->type; ++ int ret = 0; ++ const int has_bufs = ctx_buffers_alloced(ctx); ++ ++ // Avoid doing anything if there is nothing we can do ++ if (!has_bufs && !ctx->streamon) ++ return 0; ++ ++ if (has_bufs) ++ ff_mutex_lock(&ctx->lock); ++ ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, ++ av_err2str(ret)); ++ } ++ else { ++ flush_all_buffers_status(ctx); ++ ++ ctx->streamon = 0; ++ ctx->flag_last = 0; ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name); ++ } ++ ++ if (has_bufs) ++ ff_mutex_unlock(&ctx->lock); ++ return ret; ++} ++ ++ ++int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) ++{ ++ AVCodecContext * const avctx = logger(ctx); ++ ++ switch (cmd) { ++ case VIDIOC_STREAMOFF: ++ return set_streamoff(avctx, ctx); ++ case VIDIOC_STREAMON: ++ return set_streamon(avctx, ctx); ++ default: ++ av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd); ++ break; ++ } ++ return AVERROR_BUG; + } + + int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; ++ int64_t track_ts; + V4L2Buffer* avbuf; + int ret; + + if (!frame) { + ret = v4l2_stop_encode(ctx); + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); + s->draining= 1; + return 0; + } +@@ -587,23 +1010,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Co + if (!avbuf) + return AVERROR(EAGAIN); + +- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); ++ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); ++ ++ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); + if (ret) + return ret; + + return ff_v4l2_buffer_enqueue(avbuf); + } + +-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, ++ const void * extdata, size_t extlen) + { + V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer* avbuf; + int ret; ++ int64_t track_ts; + + if (!pkt->size) { + ret = v4l2_stop_decode(ctx); ++ // Log but otherwise ignore stop failure + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); + s->draining = 1; + return 0; + } +@@ -612,8 +1041,13 @@ int ff_v4l2_context_enqueue_packet(V4L2C + if (!avbuf) + return AVERROR(EAGAIN); + +- ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); +- if (ret) ++ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); ++ ++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); ++ if (ret == AVERROR(ENOMEM)) ++ av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", ++ __func__, pkt->size, avbuf->planes[0].length); ++ else if (ret) + return ret; + + return ff_v4l2_buffer_enqueue(avbuf); +@@ -621,42 +1055,77 @@ int ff_v4l2_context_enqueue_packet(V4L2C + + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) + { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer *avbuf; ++ int rv; + +- /* +- * timeout=-1 blocks until: +- * 1. decoded frame available +- * 2. an input buffer is ready to be dequeued +- */ +- avbuf = v4l2_dequeue_v4l2buf(ctx, timeout); +- if (!avbuf) { +- if (ctx->done) +- return AVERROR_EOF; +- +- return AVERROR(EAGAIN); +- } ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv; ++ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); + +- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); ++ return 0; + } + +-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) ++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) + { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer *avbuf; ++ int rv; + +- /* +- * blocks until: +- * 1. encoded packet available +- * 2. an input buffer ready to be dequeued +- */ +- avbuf = v4l2_dequeue_v4l2buf(ctx, -1); +- if (!avbuf) { +- if (ctx->done) +- return AVERROR_EOF; ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC ++ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); + +- return AVERROR(EAGAIN); ++ return 0; ++} ++ ++// Return 0 terminated list of drm fourcc video formats for this context ++// NULL if none found or error ++// Returned list is malloced so must be freed ++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN) ++{ ++ unsigned int i; ++ unsigned int n = 0; ++ unsigned int size = 0; ++ uint32_t * e = NULL; ++ *pN = 0; ++ ++ for (i = 0; i < 1024; ++i) { ++ struct v4l2_fmtdesc fdesc = { ++ .index = i, ++ .type = ctx->type ++ }; ++ ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc)) ++ return e; ++ ++ if (n + 1 >= size) { ++ unsigned int newsize = (size == 0) ? 16 : size * 2; ++ uint32_t * t = av_realloc(e, newsize * sizeof(*t)); ++ if (!t) ++ return e; ++ e = t; ++ size = newsize; ++ } ++ ++ e[n] = fdesc.pixelformat; ++ e[++n] = 0; ++ if (pN) ++ *pN = n; + } + +- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); ++ // If we've looped 1024 times we are clearly confused ++ *pN = 0; ++ av_free(e); ++ return NULL; + } + + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) +@@ -688,78 +1157,194 @@ int ff_v4l2_context_get_format(V4L2Conte + + int ff_v4l2_context_set_format(V4L2Context* ctx) + { +- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ int ret; ++ ++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ if (ret != 0) ++ return ret; ++ ++ // Check returned size against min size and if smaller have another go ++ // Only worry about plane[0] as this is meant to enforce limits for ++ // encoded streams where we might know a bit more about the shape ++ // than the driver ++ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) { ++ if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage) ++ return 0; ++ ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size; ++ } ++ else { ++ if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage) ++ return 0; ++ ctx->format.fmt.pix.sizeimage = ctx->min_buf_size; ++ } ++ ++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ return ret; + } + + void ff_v4l2_context_release(V4L2Context* ctx) + { + int ret; + +- if (!ctx->buffers) ++ if (!ctx->bufrefs) + return; + + ret = v4l2_release_buffers(ctx); + if (ret) + av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name); + +- av_freep(&ctx->buffers); ++ av_freep(&ctx->bufrefs); ++ av_buffer_unref(&ctx->frames_ref); ++ ++ ff_mutex_destroy(&ctx->lock); ++ pthread_cond_destroy(&ctx->cond); + } + +-int ff_v4l2_context_init(V4L2Context* ctx) ++ ++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + struct v4l2_requestbuffers req; +- int ret, i; +- +- if (!v4l2_type_supported(ctx)) { +- av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); +- return AVERROR_PATCHWELCOME; +- } ++ int ret; ++ int i; + +- ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); +- if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); ++ av_assert0(ctx->bufrefs == NULL); + + memset(&req, 0, sizeof(req)); +- req.count = ctx->num_buffers; +- req.memory = V4L2_MEMORY_MMAP; ++ req.count = req_buffers; ++ req.memory = mem; + req.type = ctx->type; +- ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); +- if (ret < 0) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno)); +- return AVERROR(errno); ++ while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { ++ if (errno != EINTR) { ++ ret = AVERROR(errno); ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret)); ++ return ret; ++ } + } + + ctx->num_buffers = req.count; +- ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer)); +- if (!ctx->buffers) { ++ ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs)); ++ if (!ctx->bufrefs) { + av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name); +- return AVERROR(ENOMEM); ++ goto fail_release; + } + +- for (i = 0; i < req.count; i++) { +- ctx->buffers[i].context = ctx; +- ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i); +- if (ret < 0) { ++ ctx->wl_master = ff_weak_link_new(ctx); ++ if (!ctx->wl_master) { ++ ret = AVERROR(ENOMEM); ++ goto fail_release; ++ } ++ ++ for (i = 0; i < ctx->num_buffers; i++) { ++ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); ++ if (ret) { + av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); +- goto error; ++ goto fail_release; + } + } + + av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name, + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat), + req.count, +- v4l2_get_width(&ctx->format), +- v4l2_get_height(&ctx->format), ++ ff_v4l2_get_format_width(&ctx->format), ++ ff_v4l2_get_format_height(&ctx->format), + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage, + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline); + + return 0; + +-error: ++fail_release: + v4l2_release_buffers(ctx); ++ av_freep(&ctx->bufrefs); ++ return ret; ++} ++ ++int ff_v4l2_context_frames_set(V4L2Context *const ctx) ++{ ++ AVHWFramesContext *hwframes; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ const int w = ctx->width != 0 ? ctx->width : s->avctx->width; ++ const int h = ctx->height != 0 ? ctx->height : s->avctx->height; ++ int ret; ++ ++ if (ctx->frames_ref != NULL) { ++ const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data; ++ if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h) ++ return 0; ++ av_buffer_unref(&ctx->frames_ref); ++ } ++ ++ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); ++ if (!ctx->frames_ref) ++ return AVERROR(ENOMEM); ++ ++ hwframes = (AVHWFramesContext*)ctx->frames_ref->data; ++ hwframes->format = AV_PIX_FMT_DRM_PRIME; ++ hwframes->sw_format = ctx->av_pix_fmt; ++ hwframes->width = w; ++ hwframes->height = h; ++ ret = av_hwframe_ctx_init(ctx->frames_ref); ++ if (ret < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret)); ++ av_buffer_unref(&ctx->frames_ref); ++ return ret; ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__, ++ av_get_pix_fmt_name(ctx->av_pix_fmt), w, h); ++ return 0; ++} ++ ++int ff_v4l2_context_init(V4L2Context* ctx) ++{ ++ struct v4l2_queryctrl qctrl; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ int ret; ++ ++ // It is not valid to reinit a context without a previous release ++ av_assert0(ctx->bufrefs == NULL); ++ ++ if (!v4l2_type_supported(ctx)) { ++ av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); ++ return AVERROR_PATCHWELCOME; ++ } ++ ++ ff_mutex_init(&ctx->lock, NULL); ++ pthread_cond_init(&ctx->cond, NULL); ++ atomic_init(&ctx->q_count, 0); ++ ++ ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); ++ goto fail_unlock; ++ } ++ ++ memset(&qctrl, 0, sizeof(qctrl)); ++ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; ++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { ++ ret = AVERROR(errno); ++ if (ret != AVERROR(EINVAL)) { ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); ++ goto fail_unlock; ++ } ++ // Control unsupported - set default if wanted ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = 4; ++ } ++ else { ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = qctrl.minimum + 2; ++ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); ++ } + +- av_freep(&ctx->buffers); ++ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); ++ if (ret < 0) ++ goto fail_unlock; ++ ++ return 0; + ++fail_unlock: ++ ff_mutex_destroy(&ctx->lock); + return ret; + } +--- a/libavcodec/v4l2_context.h ++++ b/libavcodec/v4l2_context.h +@@ -32,6 +32,8 @@ + #include "libavutil/rational.h" + #include "codec_id.h" + #include "packet.h" ++#include "libavutil/buffer.h" ++#include "libavutil/thread.h" + #include "v4l2_buffers.h" + + typedef struct V4L2Context { +@@ -71,11 +73,18 @@ typedef struct V4L2Context { + */ + int width, height; + AVRational sample_aspect_ratio; ++ struct v4l2_rect selection; + + /** +- * Indexed array of V4L2Buffers ++ * If the default size of buffer is less than this then try to ++ * set to this. + */ +- V4L2Buffer *buffers; ++ uint32_t min_buf_size; ++ ++ /** ++ * Indexed array of pointers to V4L2Buffers ++ */ ++ AVBufferRef **bufrefs; + + /** + * Readonly after init. +@@ -83,16 +92,38 @@ typedef struct V4L2Context { + int num_buffers; + + /** ++ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF ++ */ ++ enum v4l2_memory buf_mem; ++ ++ /** + * Whether the stream has been started (VIDIOC_STREAMON has been sent). + */ + int streamon; + ++ /* 1st buffer after stream on */ ++ int first_buf; ++ + /** + * Either no more buffers available or an unrecoverable error was notified + * by the V4L2 kernel driver: once set the context has to be exited. + */ + int done; + ++ int flag_last; ++ ++ /** ++ * If NZ then when Qing frame/pkt use this rather than the ++ * "real" PTS ++ */ ++ uint64_t track_ts; ++ ++ AVBufferRef *frames_ref; ++ atomic_int q_count; ++ struct ff_weak_link_master *wl_master; ++ ++ AVMutex lock; ++ pthread_cond_t cond; + } V4L2Context; + + /** +@@ -104,6 +135,14 @@ typedef struct V4L2Context { + int ff_v4l2_context_init(V4L2Context* ctx); + + /** ++ * (re)set the hwframecontext from the current v4l2 context ++ * ++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. ++ * @return 0 in case of success, a negative value representing the error otherwise. ++ */ ++int ff_v4l2_context_frames_set(V4L2Context *const ctx); ++ ++/** + * Sets the V4L2Context format in the v4l2 driver. + * + * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. +@@ -121,6 +160,19 @@ int ff_v4l2_context_set_format(V4L2Conte + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe); + + /** ++ * Get the list of drm fourcc pixel formats for this context ++ * ++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context ++ * description for required variables. ++ * @param[in] pN A pointer to receive the number of formats ++ * found. May be NULL if not wanted. ++ * @return Pointer to malloced list of zero terminated formats, ++ * NULL if none or error. As list is malloced it must be ++ * freed. ++ */ ++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN); ++ ++/** + * Releases a V4L2Context. + * + * @param[in] ctx A pointer to a V4L2Context. +@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Conte + * @param[inout] pkt The AVPacket to dequeue to. + * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. + */ +-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); ++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout); + + /** + * Dequeues a buffer from a V4L2Context to an AVFrame. +@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2C + * @param[in] ctx The V4L2Context to dequeue from. + * @param[inout] f The AVFrame to dequeue to. + * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) ++ * + * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. ++ * AVERROR(ENOSPC) if no buffer availible to put ++ * the frame in + */ + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); + +@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co + * @param[in] pkt A pointer to an AVPacket. + * @return 0 in case of success, a negative error otherwise. + */ +-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); + + /** + * Enqueues a buffer to a V4L2Context from an AVFrame +@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2C + */ + int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); + ++/** ++ * Dequeue all buffers on this queue ++ * ++ * Used to recycle output buffers ++ * ++ * @param[in] ctx The V4L2Context to dequeue from. ++ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, ++ * all others have a timeout of zero ++ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return ++ * of the first dequeue operation, 0 otherwise. ++ */ ++int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1); ++ ++/** ++ * Returns the number of buffers currently queued ++ * ++ * @param[in] ctx The V4L2Context to evaluate ++ */ ++static inline int ++ff_v4l2_context_q_count(const V4L2Context* const ctx) ++{ ++ return atomic_load(&ctx->q_count); ++} ++ + #endif // AVCODEC_V4L2_CONTEXT_H +--- a/libavcodec/v4l2_fmt.c ++++ b/libavcodec/v4l2_fmt.c +@@ -42,6 +42,14 @@ static const struct fmt_conversion { + { AV_FMT(RGB24), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB24) }, + { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGR32) }, + { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB32) }, ++ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRX32) }, ++ { AV_FMT(RGB0), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBX32) }, ++ { AV_FMT(0BGR), AV_CODEC(RAWVIDEO), V4L2_FMT(XBGR32) }, ++ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(XRGB32) }, ++ { AV_FMT(BGRA), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRA32) }, ++ { AV_FMT(RGBA), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBA32) }, ++ { AV_FMT(ABGR), AV_CODEC(RAWVIDEO), V4L2_FMT(ABGR32) }, ++ { AV_FMT(ARGB), AV_CODEC(RAWVIDEO), V4L2_FMT(ARGB32) }, + { AV_FMT(GRAY8), AV_CODEC(RAWVIDEO), V4L2_FMT(GREY) }, + { AV_FMT(YUV420P), AV_CODEC(RAWVIDEO), V4L2_FMT(YUV420) }, + { AV_FMT(YUYV422), AV_CODEC(RAWVIDEO), V4L2_FMT(YUYV) }, +--- a/libavcodec/v4l2_m2m.c ++++ b/libavcodec/v4l2_m2m.c +@@ -34,6 +34,15 @@ + #include "v4l2_context.h" + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" ++#include "v4l2_req_dmabufs.h" ++ ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ x->last_pts = AV_NOPTS_VALUE; ++} ++ + + static inline int v4l2_splane_video(struct v4l2_capability *cap) + { +@@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2m + + s->capture.done = s->output.done = 0; + s->capture.name = "capture"; ++ s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + s->output.name = "output"; ++ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + atomic_init(&s->refcount, 0); + sem_init(&s->refsync, 0, 0); + +@@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2m + if (v4l2_mplane_video(&cap)) { + s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; ++ s->output.format.type = s->output.type; + return 0; + } + + if (v4l2_splane_video(&cap)) { + s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; ++ s->output.format.type = s->output.type; + return 0; + } + + return AVERROR(EINVAL); + } + ++static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ struct v4l2_format fmt = {.type = s->output.type}; ++ int rv; ++ uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt); ++ unsigned int w; ++ unsigned int h; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ fmt.fmt.pix_mp.pixelformat = pixfmt; ++ fmt.fmt.pix_mp.width = avctx->width; ++ fmt.fmt.pix_mp.height = avctx->height; ++ } ++ else { ++ fmt.fmt.pix.pixelformat = pixfmt; ++ fmt.fmt.pix.width = avctx->width; ++ fmt.fmt.pix.height = avctx->height; ++ } ++ ++ rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt); ++ ++ if (rv != 0) { ++ rv = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ ++ w = ff_v4l2_get_format_width(&fmt); ++ h = ff_v4l2_get_format_height(&fmt); ++ ++ if (w < avctx->width || h < avctx->height) { ++ av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ + static int v4l2_probe_driver(V4L2m2mContext *s) + { + void *log_ctx = s->avctx; +@@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mCont + goto done; + } + ++ // If being given frames (encode) check that V4L2 can cope with the size ++ if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO && ++ (ret = check_size(s->avctx, s)) != 0) ++ goto done; ++ + ret = ff_v4l2_context_get_format(&s->capture, 1); + if (ret) { + av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n"); +@@ -214,13 +270,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont + av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); + + /* 2. unmap the capture buffers (v4l2 and ffmpeg): +- * we must wait for all references to be released before being allowed +- * to queue new buffers. + */ +- av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n"); +- if (atomic_load(&s->refcount)) +- while(sem_wait(&s->refsync) == -1 && errno == EINTR); +- + ff_v4l2_context_release(&s->capture); + + /* 3. get the new capture format */ +@@ -239,7 +289,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont + + /* 5. complete reinit */ + s->draining = 0; +- s->reinit = 0; + + return 0; + } +@@ -256,6 +305,9 @@ static void v4l2_m2m_destroy_context(voi + av_frame_unref(s->frame); + av_frame_free(&s->frame); + av_packet_unref(&s->buf_pkt); ++ av_freep(&s->extdata_data); ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); + + av_free(s); + } +@@ -268,6 +320,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p + if (!s) + return 0; + ++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); ++ ++ if (s->avctx && av_codec_is_decoder(s->avctx->codec)) ++ av_packet_unref(&s->buf_pkt); ++ + if (s->fd >= 0) { + ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); + if (ret) +@@ -279,8 +336,20 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p + } + + ff_v4l2_context_release(&s->output); ++ av_buffer_unref(&s->device_ref); ++ ++ dmabufs_ctl_unref(&s->db_ctl); ++ ++ if (s->fd != -1) { ++ close(s->fd); ++ s->fd = -1; ++ } + + s->self_ref = NULL; ++ // This is only called on avctx close so after this point we don't have that ++ // Crash sooner if we find we are using it (can still log with avctx = NULL) ++ s->avctx = NULL; ++ priv->context = NULL; + av_buffer_unref(&priv->context_ref); + + return 0; +@@ -324,35 +393,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv * + return v4l2_configure_contexts(s); + } + +-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) ++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) + { +- *s = av_mallocz(sizeof(V4L2m2mContext)); +- if (!*s) ++ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); ++ ++ *pps = NULL; ++ if (!s) + return AVERROR(ENOMEM); + +- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), ++ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), + &v4l2_m2m_destroy_context, NULL, 0); + if (!priv->context_ref) { +- av_freep(s); ++ av_free(s); + return AVERROR(ENOMEM); + } + + /* assign the context */ +- priv->context = *s; +- (*s)->priv = priv; ++ priv->context = s; ++ s->priv = priv; + + /* populate it */ +- priv->context->capture.num_buffers = priv->num_capture_buffers; +- priv->context->output.num_buffers = priv->num_output_buffers; +- priv->context->self_ref = priv->context_ref; +- priv->context->fd = -1; ++ s->capture.num_buffers = priv->num_capture_buffers; ++ s->output.num_buffers = priv->num_output_buffers; ++ s->self_ref = priv->context_ref; ++ s->fd = -1; ++ xlat_init(&s->xlat); + + priv->context->frame = av_frame_alloc(); + if (!priv->context->frame) { + av_buffer_unref(&priv->context_ref); +- *s = NULL; /* freed when unreferencing context_ref */ + return AVERROR(ENOMEM); + } + ++ *pps = s; + return 0; + } +--- a/libavcodec/v4l2_m2m.h ++++ b/libavcodec/v4l2_m2m.h +@@ -30,6 +30,7 @@ + #include <linux/videodev2.h> + + #include "libavcodec/avcodec.h" ++#include "libavutil/pixfmt.h" + #include "v4l2_context.h" + + #define container_of(ptr, type, member) ({ \ +@@ -38,7 +39,39 @@ + + #define V4L_M2M_DEFAULT_OPTS \ + { "num_output_buffers", "Number of buffers in the output context",\ +- OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS } ++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS } ++ ++#define FF_V4L2_M2M_TRACK_SIZE 128 ++typedef struct V4L2m2mTrackEl { ++ int discard; // If we see this buffer its been flushed, so discard ++ int pending; ++ int pkt_size; ++ int64_t pts; ++ int64_t dts; ++ int64_t reordered_opaque; ++ int64_t pkt_pos; ++ int64_t pkt_duration; ++ int64_t track_pts; ++} V4L2m2mTrackEl; ++ ++typedef struct pts_stats_s ++{ ++ void * logctx; ++ const char * name; // For debug ++ unsigned int last_count; ++ unsigned int last_interval; ++ int64_t last_pts; ++ int64_t guess; ++} pts_stats_t; ++ ++typedef struct xlat_track_s { ++ unsigned int track_no; ++ int64_t last_pts; // Last valid PTS decoded ++ int64_t last_opaque; ++ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; ++} xlat_track_t; ++ ++struct dmabufs_ctl; + + typedef struct V4L2m2mContext { + char devname[PATH_MAX]; +@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext { + AVCodecContext *avctx; + sem_t refsync; + atomic_uint refcount; +- int reinit; + + /* null frame/packet received */ + int draining; ++ int running; + AVPacket buf_pkt; + + /* Reference to a frame. Only used during encoding */ +@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext { + + /* reference back to V4L2m2mPriv */ + void *priv; ++ ++ AVBufferRef *device_ref; ++ ++ /* generate DRM frames */ ++ int output_drm; ++ ++ /* input frames are drmprime */ ++ int input_drm; ++ ++ /* Frame tracking */ ++ xlat_track_t xlat; ++ ++ pts_stats_t pts_stat; ++ ++ /* req pkt */ ++ int req_pkt; ++ int reorder_size; ++ ++ /* Ext data sent */ ++ int extdata_sent; ++ /* Ext data sent in packet - overrides ctx */ ++ void * extdata_data; ++ size_t extdata_size; ++ ++#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 ++#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 ++ /* Quirks */ ++ unsigned int quirks; ++ ++ struct dmabufs_ctl * db_ctl; + } V4L2m2mContext; + + typedef struct V4L2m2mPriv { +@@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv { + + int num_output_buffers; + int num_capture_buffers; ++ const char * dmabuf_alloc; ++ enum AVPixelFormat pix_fmt; + } V4L2m2mPriv; + + /** +@@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont + */ + int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); + ++ ++static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++} ++ ++static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++} ++ ++static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; ++} ++ ++static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx) ++{ ++ return ctx->flag_last; ++} ++ ++ + #endif /* AVCODEC_V4L2_M2M_H */ +--- a/libavcodec/v4l2_m2m_dec.c ++++ b/libavcodec/v4l2_m2m_dec.c +@@ -21,8 +21,14 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "config_components.h" ++ + #include <linux/videodev2.h> + #include <sys/ioctl.h> ++ ++#include "libavutil/avassert.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" + #include "libavutil/pixfmt.h" + #include "libavutil/pixdesc.h" + #include "libavutil/opt.h" +@@ -30,75 +36,279 @@ + #include "codec_internal.h" + #include "libavcodec/decode.h" + ++#include "libavcodec/hwaccels.h" ++#include "libavcodec/internal.h" ++#include "libavcodec/hwconfig.h" ++ + #include "v4l2_context.h" + #include "v4l2_m2m.h" + #include "v4l2_fmt.h" ++#include "v4l2_req_dmabufs.h" + +-static int v4l2_try_start(AVCodecContext *avctx) ++#if CONFIG_H264_DECODER ++#include "h264_parse.h" ++#endif ++#if CONFIG_HEVC_DECODER ++#include "hevc_parse.h" ++#endif ++ ++// Pick 64 for max last count - that is >1sec at 60fps ++#define STATS_LAST_COUNT_MAX 64 ++#define STATS_INTERVAL_MAX (1 << 30) ++ ++#ifndef FF_API_BUFFER_SIZE_T ++#define FF_API_BUFFER_SIZE_T 1 ++#endif ++ ++#define DUMP_FAILED_EXTRADATA 0 ++ ++#if DUMP_FAILED_EXTRADATA ++static inline char hex1(unsigned int x) + { +- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; +- V4L2Context *const capture = &s->capture; +- V4L2Context *const output = &s->output; +- struct v4l2_selection selection = { 0 }; +- int ret; ++ x &= 0xf; ++ return x <= 9 ? '0' + x : 'a' + x - 10; ++} + +- /* 1. start the output process */ +- if (!output->streamon) { +- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); +- if (ret < 0) { +- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); +- return ret; +- } ++static inline char * hex2(char * s, unsigned int x) ++{ ++ *s++ = hex1(x >> 4); ++ *s++ = hex1(x); ++ return s; ++} ++ ++static inline char * hex4(char * s, unsigned int x) ++{ ++ s = hex2(s, x >> 8); ++ s = hex2(s, x); ++ return s; ++} ++ ++static inline char * dash2(char * s) ++{ ++ *s++ = '-'; ++ *s++ = '-'; ++ return s; ++} ++ ++static void ++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) ++{ ++ size_t i; ++ s = hex4(s, offset); ++ m += offset; ++ for (i = 0; i != 8; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); + } ++ *s++ = ' '; ++ *s++ = ':'; ++ for (; i != 16; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); ++ } ++ *s++ = 0; ++} + +- if (capture->streamon) +- return 0; ++static void ++log_dump(void * logctx, int lvl, const void * const data, const size_t len) ++{ ++ size_t i; ++ for (i = 0; i < len; i += 16) { ++ char buf[80]; ++ data16(buf, i, data, len); ++ av_log(logctx, lvl, "%s\n", buf); ++ } ++} ++#endif + +- /* 2. get the capture format */ +- capture->format.type = capture->type; +- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); +- return ret; ++static unsigned int pts_stats_interval(const pts_stats_t * const stats) ++{ ++ return stats->last_interval; ++} ++ ++static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess) ++{ ++ if (stats->last_count <= 1) ++ return stats->last_pts; ++ if (stats->last_pts == AV_NOPTS_VALUE || ++ fail_bad_guess && (stats->last_interval == 0 || ++ stats->last_count >= STATS_LAST_COUNT_MAX)) ++ return AV_NOPTS_VALUE; ++ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; ++} ++ ++static void pts_stats_add(pts_stats_t * const stats, int64_t pts) ++{ ++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { ++ if (stats->last_count < STATS_LAST_COUNT_MAX) ++ ++stats->last_count; ++ return; + } + +- /* 2.1 update the AVCodecContext */ +- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); +- capture->av_pix_fmt = avctx->pix_fmt; ++ if (stats->last_pts != AV_NOPTS_VALUE) { ++ const int64_t interval = pts - stats->last_pts; + +- /* 3. set the crop parameters */ +- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; +- selection.r.height = avctx->coded_height; +- selection.r.width = avctx->coded_width; +- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); +- if (!ret) { +- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); +- } else { +- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); +- /* update the size of the resulting frame */ +- capture->height = selection.r.height; +- capture->width = selection.r.width; ++ if (interval < 0 || interval >= STATS_INTERVAL_MAX || ++ stats->last_count >= STATS_LAST_COUNT_MAX) { ++ if (stats->last_interval != 0) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", ++ __func__, stats->name, interval, stats->last_count); ++ stats->last_interval = 0; ++ } ++ else { ++ const int64_t frame_time = interval / (int64_t)stats->last_count; ++ ++ if (frame_time != stats->last_interval) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", ++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); ++ stats->last_interval = frame_time; + } + } + +- /* 4. init the capture context now that we have the capture format */ +- if (!capture->buffers) { +- ret = ff_v4l2_context_init(capture); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); +- return AVERROR(ENOMEM); ++ stats->last_pts = pts; ++ stats->last_count = 1; ++} ++ ++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) ++{ ++ *stats = (pts_stats_t){ ++ .logctx = logctx, ++ .name = name, ++ .last_count = 1, ++ .last_interval = 0, ++ .last_pts = AV_NOPTS_VALUE ++ }; ++} ++ ++// If abdata == NULL then this just counts space required ++// Unpacks avcC if detected ++static int ++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) ++{ ++ const uint8_t * const xdend = extradata + extrasize; ++ const uint8_t * p = extradata; ++ uint8_t * d = abdata; ++ unsigned int n; ++ unsigned int len; ++ const unsigned int hdrlen = 4; ++ unsigned int need_pps = 1; ++ ++ if (extrasize < 8) ++ return AVERROR(EINVAL); ++ ++ if (p[0] == 0 && p[1] == 0) { ++ // Assume a couple of leading zeros are good enough to indicate NAL ++ if (abdata) ++ memcpy(d, p, extrasize); ++ return extrasize; ++ } ++ ++ // avcC starts with a 1 ++ if (p[0] != 1) ++ return AVERROR(EINVAL); ++ ++ p += 5; ++ n = *p++ & 0x1f; ++ ++doxps: ++ while (n--) { ++ if (xdend - p < 2) ++ return AVERROR(EINVAL); ++ len = (p[0] << 8) | p[1]; ++ p += 2; ++ if (xdend - p < (ptrdiff_t)len) ++ return AVERROR(EINVAL); ++ if (abdata) { ++ d[0] = 0; ++ d[1] = 0; ++ d[2] = 0; ++ d[3] = 1; ++ memcpy(d + 4, p, len); + } ++ d += len + hdrlen; ++ p += len; ++ } ++ if (need_pps) { ++ need_pps = 0; ++ if (p >= xdend) ++ return AVERROR(EINVAL); ++ n = *p++; ++ goto doxps; + } + +- /* 5. start the capture process */ +- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); +- if (ret) { +- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); ++ return d - abdata; ++} ++ ++static int ++copy_extradata(AVCodecContext * const avctx, ++ const void * const src_data, const int src_len, ++ void ** const pdst_data, size_t * const pdst_len) ++{ ++ int len; ++ ++ *pdst_len = 0; ++ av_freep(pdst_data); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ len = h264_xd_copy(src_data, src_len, NULL); ++ else ++ len = src_len < 0 ? AVERROR(EINVAL) : src_len; ++ ++ // Zero length is OK but we want to stop - -ve is error val ++ if (len <= 0) ++ return len; ++ ++ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ return AVERROR(ENOMEM); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ h264_xd_copy(src_data, src_len, *pdst_data); ++ else ++ memcpy(*pdst_data, src_data, len); ++ *pdst_len = len; ++ ++ return 0; ++} ++ ++ ++ ++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) ++{ ++ int ret; ++ struct v4l2_decoder_cmd cmd = { ++ .cmd = V4L2_DEC_CMD_START, ++ .flags = 0, ++ }; ++ ++ if (s->output.streamon) ++ return 0; ++ ++ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); ++ if (ret != 0) { ++ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); + return ret; + } + ++ // STREAMON should do implicit START so this just for those that don't. ++ // It is optional so don't worry if it fails ++ if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); ++ } ++ else { ++ av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); ++ } ++ return 0; ++} ++ ++static int v4l2_try_start(AVCodecContext *avctx) ++{ ++ V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; ++ int ret; ++ ++ /* 1. start the output process */ ++ if ((ret = check_output_streamon(avctx, s)) != 0) ++ return ret; + return 0; + } + +@@ -133,51 +343,823 @@ static int v4l2_prepare_decoder(V4L2m2mC + return 0; + } + +-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++static void ++set_best_effort_pts(AVCodecContext *const avctx, ++ pts_stats_t * const ps, ++ AVFrame *const frame) ++{ ++ pts_stats_add(ps, frame->pts); ++ ++ frame->best_effort_timestamp = pts_stats_guess(ps, 1); ++ // If we can't guess from just PTS - try DTS ++ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) ++ frame->best_effort_timestamp = frame->pkt_dts; ++ ++ // We can't emulate what s/w does in a useful manner and using the ++ // "correct" answer seems to just confuse things. ++ frame->pkt_dts = frame->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); ++} ++ ++static void ++xlat_flush(xlat_track_t * const x) ++{ ++ unsigned int i; ++ // Do not reset track_no - this ensures that any frames left in the decoder ++ // that turn up later get discarded. ++ ++ x->last_pts = AV_NOPTS_VALUE; ++ x->last_opaque = 0; ++ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { ++ x->track_els[i].pending = 0; ++ x->track_els[i].discard = 1; ++ } ++} ++ ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ xlat_flush(x); ++} ++ ++static int ++xlat_pending(const V4L2m2mContext * const s) ++{ ++ const xlat_track_t *const x = &s->xlat; ++ unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; ++ int i; ++ const int64_t now = pts_stats_guess(&s->pts_stat, 0); ++ int64_t first_dts = AV_NOPTS_VALUE; ++ int no_dts_count = 0; ++ unsigned int interval = pts_stats_interval(&s->pts_stat); ++ ++ for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { ++ const V4L2m2mTrackEl * const t = x->track_els + n; ++ ++ if (first_dts == AV_NOPTS_VALUE) ++ if (t->dts == AV_NOPTS_VALUE) ++ ++no_dts_count; ++ else ++ first_dts = t->dts; ++ ++ // Discard only set on never-set or flushed entries ++ // So if we get here we've never successfully decoded a frame so allow ++ // more frames into the buffer before stalling ++ if (t->discard) ++ return i - 16; ++ ++ // If we've got this frame out then everything before this point ++ // must have entered the decoder ++ if (!t->pending) ++ break; ++ ++ // If we've never seen a pts all we can do is count frames ++ if (now == AV_NOPTS_VALUE) ++ continue; ++ ++ if (t->dts != AV_NOPTS_VALUE && now >= t->dts) ++ break; ++ } ++ ++ if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) { ++ const int iframes = (first_dts - now) / (int)interval; ++ const int t = iframes - s->reorder_size + no_dts_count; ++ ++// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n", ++// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count); ++ ++ if (iframes > 0 && iframes < 64 && t < i) { ++ return t; ++ } ++ } ++ ++ return i; ++} ++ ++static inline int stream_started(const V4L2m2mContext * const s) { ++ return s->output.streamon; ++} ++ ++#define NQ_OK 0 ++#define NQ_Q_FULL 1 ++#define NQ_SRC_EMPTY 2 ++#define NQ_NONE 3 ++#define NQ_DRAINING 4 ++#define NQ_DEAD 5 ++ ++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) ++#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) ++ ++// do_not_get If true then no new packet will be got but status will ++// be set appropriately ++ ++// AVERROR_EOF Flushing an already flushed stream ++// -ve Error (all errors except EOF are unexpected) ++// NQ_OK (0) OK ++// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) ++// NQ_SRC_EMPTY Src empty (do not retry) ++// NQ_NONE Enqueue not attempted ++// NQ_DRAINING At EOS, dQ dest until EOS there too ++// NQ_DEAD Not running (do not retry, do not attempt capture dQ) ++ ++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get) + { +- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; +- V4L2Context *const capture = &s->capture; +- V4L2Context *const output = &s->output; + int ret; + +- if (!s->buf_pkt.size) { +- ret = ff_decode_get_packet(avctx, &s->buf_pkt); ++ // If we don't already have a coded packet - get a new one ++ // We will already have a coded pkt if the output Q was full last time we ++ // tried to Q it ++ if (!s->buf_pkt.size && !do_not_get) { ++ unsigned int i; ++ ++ for (i = 0; i < 256; ++i) { ++ uint8_t * side_data; ++ size_t side_size; ++ ++ ret = ff_decode_get_packet(avctx, &s->buf_pkt); ++ if (ret != 0) ++ break; ++ ++ // New extradata is the only side-data we undertand ++ side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); ++ if (side_data) { ++ av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); ++ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) ++ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); ++ s->extdata_sent = 0; ++ } ++ ++ if (s->buf_pkt.size != 0) ++ break; ++ ++ if (s->buf_pkt.side_data_elems == 0) { ++ av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n"); ++ ret = AVERROR_EOF; ++ break; ++ } ++ ++ // Retry a side-data only pkt ++ } ++ // If i >= 256 something has gone wrong ++ if (i >= 256) { ++ av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n"); ++ return AVERROR(EIO); ++ } ++ ++ if (ret == AVERROR(EAGAIN)) { ++ if (!stream_started(s)) { ++ av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); ++ return NQ_DEAD; ++ } ++ return NQ_SRC_EMPTY; ++ } ++ ++ if (ret == AVERROR_EOF) { ++ // EOF - enter drain mode ++ av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n", ++ ret, s->buf_pkt.size, stream_started(s), s->draining); ++ if (!stream_started(s)) { ++ av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n"); ++ s->draining = 1; ++ s->capture.done = 1; ++ return AVERROR_EOF; ++ } ++ ++ if (!s->draining) { ++ // Calling enqueue with an empty pkt starts drain ++ av_assert0(s->buf_pkt.size == 0); ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); ++ if (ret) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); ++ return ret; ++ } ++ } ++ return NQ_DRAINING; ++ } ++ + if (ret < 0) { +- if (ret == AVERROR(EAGAIN)) +- return ff_v4l2_context_dequeue_frame(capture, frame, 0); +- else if (ret != AVERROR_EOF) +- return ret; ++ av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); ++ return ret; + } + } + +- if (s->draining) +- goto dequeue; ++ if (s->draining) { ++ if (s->buf_pkt.size) { ++ av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); ++ av_packet_unref(&s->buf_pkt); ++ } ++ return NQ_DRAINING; ++ } ++ ++ if (!s->buf_pkt.size) ++ return NQ_NONE; + +- ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); +- if (ret < 0 && ret != AVERROR(EAGAIN)) +- goto fail; ++ if ((ret = check_output_streamon(avctx, s)) != 0) ++ return ret; + +- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ +- if (ret != AVERROR(EAGAIN)) ++ if (s->extdata_sent) ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); ++ else ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); ++ ++ if (ret == AVERROR(EAGAIN)) { ++ // Out of input buffers - keep packet ++ ret = NQ_Q_FULL; ++ } ++ else { ++ // In all other cases we are done with this packet + av_packet_unref(&s->buf_pkt); ++ s->extdata_sent = 1; + +- if (!s->draining) { +- ret = v4l2_try_start(avctx); + if (ret) { +- /* cant recover */ +- if (ret != AVERROR(ENOMEM)) +- ret = 0; +- goto fail; ++ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); ++ return ret; ++ } ++ } ++ ++ // Start if we haven't ++ { ++ const int ret2 = v4l2_try_start(avctx); ++ if (ret2) { ++ av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); ++ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; + } + } + +-dequeue: +- return ff_v4l2_context_dequeue_frame(capture, frame, -1); +-fail: +- av_packet_unref(&s->buf_pkt); + return ret; + } + ++static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) ++{ ++ int rv = 0; ++ ++ ff_mutex_lock(&ctx->lock); ++ ++ while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) { ++ if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) { ++ rv = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv)); ++ break; ++ } ++ } ++ ++ ff_mutex_unlock(&ctx->lock); ++ return rv; ++} ++ ++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; ++ int src_rv = -1; ++ int dst_rv = 1; // Non-zero (done), non-negative (error) number ++ unsigned int i = 0; ++ ++ do { ++ const int pending = xlat_pending(s); ++ const int prefer_dq = (pending > 4); ++ const int last_src_rv = src_rv; ++ ++ av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); ++ ++ // Enqueue another pkt for decode if ++ // (a) We don't have a lot of stuff in the buffer already OR ++ // (b) ... we (think we) do but we've failed to get a frame already OR ++ // (c) We've dequeued a lot of frames without asking for input ++ src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2)); ++ ++ // If we got a frame last time or we've already tried to get a frame and ++ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) ++ // indicating that we want more input. ++ // This should mean that once decode starts we enter a stable state where ++ // we alternately ask for input and produce output ++ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) ++ break; ++ ++ if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) { ++ av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n"); ++ break; ++ } ++ ++ // Try to get a new frame if ++ // (a) we haven't already got one AND ++ // (b) enqueue returned a status indicating that decode should be attempted ++ if (dst_rv != 0 && TRY_DQ(src_rv)) { ++ // Pick a timeout depending on state ++ // The pending count isn't completely reliable so it is good enough ++ // hint that we want a frame but not good enough to require it in ++ // all cases; however if it has got > 31 that exceeds its margin of ++ // error so require a frame to prevent ridiculous levels of latency ++ const int t = ++ src_rv == NQ_Q_FULL ? -1 : ++ src_rv == NQ_DRAINING ? 300 : ++ prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0; ++ ++ // Dequeue frame will unref any previous contents of frame ++ // if it returns success so we don't need an explicit unref ++ // when discarding ++ // This returns AVERROR(EAGAIN) on timeout or if ++ // there is room in the input Q and timeout == -1 ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ ++ // Failure due to no buffer in Q? ++ if (dst_rv == AVERROR(ENOSPC)) { ++ // Wait & retry ++ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ } ++ } ++ ++ if (dst_rv == 0) { ++ set_best_effort_pts(avctx, &s->pts_stat, frame); ++ if (!s->running) { ++ s->running = 1; ++ av_log(avctx, AV_LOG_VERBOSE, "Decode running\n"); ++ } ++ } ++ ++ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { ++ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); ++ dst_rv = AVERROR_EOF; ++ s->capture.done = 1; ++ } ++ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) ++ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", ++ s->draining, s->capture.done); ++ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) ++ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", ++ s->draining, s->capture.done, dst_rv); ++ } ++ ++ ++i; ++ if (i >= 256) { ++ av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i); ++ src_rv = AVERROR(EIO); ++ } ++ ++ // Continue trying to enqueue packets if either ++ // (a) we succeeded last time OR ++ // (b) we didn't ret a frame and we can retry the input ++ } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv))); ++ ++ // Ensure that the frame contains nothing if we aren't returning a frame ++ // (might happen when discarding) ++ if (dst_rv) ++ av_frame_unref(frame); ++ ++ // If we got a frame this time ask for a pkt next time ++ s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0; ++ ++#if 0 ++ if (dst_rv == 0) ++ { ++ static int z = 0; ++ if (++z > 50) { ++ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); ++ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); ++ return -1; ++ } ++ } ++#endif ++ ++ return dst_rv == 0 ? 0 : ++ src_rv < 0 ? src_rv : ++ dst_rv < 0 ? dst_rv : ++ AVERROR(EAGAIN); ++} ++ ++#if 0 ++#include <time.h> ++static int64_t us_time(void) ++{ ++ struct timespec ts; ++ clock_gettime(CLOCK_MONOTONIC, &ts); ++ return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; ++} ++ ++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ int ret; ++ const int64_t now = us_time(); ++ int64_t done; ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ret = v4l2_receive_frame2(avctx, frame); ++ done = us_time(); ++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret); ++ return ret; ++} ++#endif ++ ++static uint32_t ++avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile) ++{ ++ switch (codec_id) { ++ case AV_CODEC_ID_H264: ++ switch (avprofile) { ++ case FF_PROFILE_H264_BASELINE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; ++ case FF_PROFILE_H264_CONSTRAINED_BASELINE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE; ++ case FF_PROFILE_H264_MAIN: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN; ++ case FF_PROFILE_H264_EXTENDED: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED; ++ case FF_PROFILE_H264_HIGH: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH; ++ case FF_PROFILE_H264_HIGH_10: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10; ++ case FF_PROFILE_H264_HIGH_10_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA; ++ case FF_PROFILE_H264_MULTIVIEW_HIGH: ++ case FF_PROFILE_H264_HIGH_422: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422; ++ case FF_PROFILE_H264_HIGH_422_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA; ++ case FF_PROFILE_H264_STEREO_HIGH: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH; ++ case FF_PROFILE_H264_HIGH_444_PREDICTIVE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE; ++ case FF_PROFILE_H264_HIGH_444_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA; ++ case FF_PROFILE_H264_CAVLC_444: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA; ++ case FF_PROFILE_H264_HIGH_444: ++ default: ++ break; ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12, ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13, ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, ++// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, ++// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, ++ } ++ break; ++ case AV_CODEC_ID_MPEG2VIDEO: ++ case AV_CODEC_ID_MPEG4: ++ case AV_CODEC_ID_VC1: ++ case AV_CODEC_ID_VP8: ++ case AV_CODEC_ID_VP9: ++ case AV_CODEC_ID_AV1: ++ // Most profiles are a simple number that matches the V4L2 enum ++ return avprofile; ++ default: ++ break; ++ } ++ return ~(uint32_t)0; ++} ++ ++// This check mirrors Chrome's profile check by testing to see if the profile ++// exists as a possible value for the V4L2 profile control ++static int ++check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) ++{ ++ struct v4l2_queryctrl query_ctrl; ++ struct v4l2_querymenu query_menu; ++ uint32_t profile_id; ++ ++ // An unset profile is almost certainly zero or -99 - do not reject ++ if (avctx->profile <= 0) { ++ av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile); ++ return 0; ++ } ++ ++ memset(&query_ctrl, 0, sizeof(query_ctrl)); ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_MPEG2VIDEO: ++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE; ++ break; ++ case AV_CODEC_ID_MPEG4: ++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE; ++ break; ++ case AV_CODEC_ID_H264: ++ profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE; ++ break; ++ case AV_CODEC_ID_VP8: ++ profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE; ++ break; ++ case AV_CODEC_ID_VP9: ++ profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE; ++ break; ++#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE ++ case AV_CODEC_ID_AV1: ++ profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE; ++ break; ++#endif ++ default: ++ av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id); ++ return 0; ++ } ++ ++ query_ctrl = (struct v4l2_queryctrl){.id = profile_id}; ++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) { ++ av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id); ++ } ++ else { ++ av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id); ++ ++ query_menu = (struct v4l2_querymenu){ ++ .id = query_ctrl.id, ++ .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile), ++ }; ++ ++ if (query_menu.index > query_ctrl.maximum || ++ query_menu.index < query_ctrl.minimum || ++ ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) { ++ return AVERROR(ENOENT); ++ } ++ } ++ ++ return 0; ++}; ++ ++static int ++check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc) ++{ ++ unsigned int i; ++ const uint32_t w = avctx->coded_width; ++ const uint32_t h = avctx->coded_height; ++ ++ if (w == 0 || h == 0 || fcc == 0) { ++ av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); ++ return 0; ++ } ++ if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) { ++ av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc)); ++ return 0; ++ } ++ ++ for (i = 0;; ++i) { ++ struct v4l2_frmsizeenum fs = { ++ .index = i, ++ .pixel_format = fcc, ++ }; ++ ++ while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) { ++ const int err = AVERROR(errno); ++ if (err == AVERROR(EINTR)) ++ continue; ++ if (i == 0 && err == AVERROR(ENOTTY)) { ++ av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n"); ++ return 0; ++ } ++ if (err != AVERROR(EINVAL)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); ++ return err; ++ } ++ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n", ++ w, h, av_fourcc2str(fcc), i); ++ return err; ++ } ++ ++ switch (fs.type) { ++ case V4L2_FRMSIZE_TYPE_DISCRETE: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i, ++ fs.discrete.width,fs.discrete.height); ++ if (w == fs.discrete.width && h == fs.discrete.height) ++ return 0; ++ break; ++ case V4L2_FRMSIZE_TYPE_STEPWISE: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, ++ fs.stepwise.min_width, fs.stepwise.min_height, ++ fs.stepwise.max_width, fs.stepwise.max_height, ++ fs.stepwise.step_width,fs.stepwise.step_height); ++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && ++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height && ++ (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 && ++ (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0) ++ return 0; ++ break; ++ case V4L2_FRMSIZE_TYPE_CONTINUOUS: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, ++ fs.stepwise.min_width, fs.stepwise.min_height, ++ fs.stepwise.max_width, fs.stepwise.max_height, ++ fs.stepwise.step_width,fs.stepwise.step_height); ++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && ++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height) ++ return 0; ++ break; ++ default: ++ av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type); ++ return AVERROR(EINVAL); ++ } ++ } ++} ++ ++static int ++get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ struct v4l2_capability cap; ++ ++ memset(&cap, 0, sizeof(cap)); ++ while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) { ++ int err = errno; ++ if (err == EINTR) ++ continue; ++ av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err)); ++ return AVERROR(err); ++ } ++ ++ // Could be made table driven if we have a few more but right now there ++ // seems no point ++ ++ // Meson (amlogic) always gives a resolution changed event after output ++ // streamon and userspace must (re)allocate capture buffers and streamon ++ // capture to clear the event even if the capture buffers were the right ++ // size in the first place. ++ if (strcmp(cap.driver, "meson-vdec") == 0) ++ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); ++ return 0; ++} ++ ++// This heuristic is for H264 but use for everything ++static uint32_t max_coded_size(const AVCodecContext * const avctx) ++{ ++ uint32_t wxh = avctx->coded_width * avctx->coded_height; ++ uint32_t size; ++ ++ size = wxh * 3 / 2; ++ // H.264 Annex A table A-1 gives minCR which is either 2 or 4 ++ // unfortunately that doesn't yield an actually useful limit ++ // and it should be noted that frame 0 is special cased to allow ++ // a bigger number which really isn't helpful for us. So just pick ++ // frame_size / 2 ++ size /= 2; ++ // Add 64k to allow for any overheads and/or encoder hopefulness ++ // with small WxH ++ return size + (1 << 16); ++} ++ ++static void ++parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ s->reorder_size = 0; ++ ++ if (!avctx->extradata || !avctx->extradata_size) ++ return; ++ ++ switch (avctx->codec_id) { ++#if CONFIG_H264_DECODER ++ case AV_CODEC_ID_H264: ++ { ++ H264ParamSets ps; ++ int is_avc = 0; ++ int nal_length_size = 0; ++ int ret; ++ ++ memset(&ps, 0, sizeof(ps)); ++ ++ ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, ++ &ps, &is_avc, &nal_length_size, ++ avctx->err_recognition, avctx); ++ if (ret > 0) { ++ const SPS * sps = NULL; ++ unsigned int i; ++ for (i = 0; i != MAX_SPS_COUNT; ++i) { ++ if (ps.sps_list[i]) { ++ sps = (const SPS *)ps.sps_list[i]->data; ++ break; ++ } ++ } ++ if (sps) { ++ avctx->profile = ff_h264_get_profile(sps); ++ avctx->level = sps->level_idc; ++ s->reorder_size = sps->num_reorder_frames; ++ } ++ } ++ ff_h264_ps_uninit(&ps); ++ break; ++ } ++#endif ++#if CONFIG_HEVC_DECODER ++ case AV_CODEC_ID_HEVC: ++ { ++ HEVCParamSets ps; ++ HEVCSEI sei; ++ int is_nalff = 0; ++ int nal_length_size = 0; ++ int ret; ++ ++ memset(&ps, 0, sizeof(ps)); ++ memset(&sei, 0, sizeof(sei)); ++ ++ ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, ++ &ps, &sei, &is_nalff, &nal_length_size, ++ avctx->err_recognition, 0, avctx); ++ if (ret > 0) { ++ const HEVCSPS * sps = NULL; ++ unsigned int i; ++ for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) { ++ if (ps.sps_list[i]) { ++ sps = (const HEVCSPS *)ps.sps_list[i]->data; ++ break; ++ } ++ } ++ if (sps) { ++ avctx->profile = sps->ptl.general_ptl.profile_idc; ++ avctx->level = sps->ptl.general_ptl.level_idc; ++ s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering; ++ } ++ } ++ ff_hevc_ps_uninit(&ps); ++ ff_hevc_reset_sei(&sei); ++ break; ++ } ++#endif ++ default: ++ break; ++ } ++} ++ ++static int ++choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ const V4L2m2mPriv * const priv = avctx->priv_data; ++ unsigned int fmts_n; ++ uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n); ++ enum AVPixelFormat *fmts2 = NULL; ++ enum AVPixelFormat gf_pix_fmt; ++ unsigned int i; ++ unsigned int n = 0; ++ unsigned int pref_n = 1; ++ int rv = AVERROR(ENOENT); ++ ++ if (!fmts) ++ return AVERROR(ENOENT); ++ ++ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) { ++ rv = AVERROR(ENOMEM); ++ goto error; ++ } ++ ++ // Filter for formats that are supported by ffmpeg and ++ // can accomodate the stream size ++ fmts2[n++] = AV_PIX_FMT_DRM_PRIME; ++ for (i = 0; i != fmts_n; ++i) { ++ const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO); ++ av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f)); ++ if (f == AV_PIX_FMT_NONE) ++ continue; ++ ++ if (check_size(avctx, s, fmts[i]) != 0) ++ continue; ++ ++ if (f == priv->pix_fmt) ++ pref_n = n; ++ fmts2[n++] = f; ++ } ++ ++ if (n < 2) { ++ av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__); ++ goto error; ++ } ++ ++ if (n != 2) { ++ // ffmpeg.c really only expects one s/w format. It thinks that the ++ // last format in the list is the s/w format of the h/w format but ++ // also chooses the first non-h/w format as the preferred s/w format. ++ // The only way of reconciling this is to dup our preferred format into ++ // both last & first place :-( ++ const enum AVPixelFormat t = fmts2[pref_n]; ++ fmts2[pref_n] = fmts2[1]; ++ fmts2[1] = t; ++ fmts2[n++] = t; ++ } ++ ++ fmts2[n] = AV_PIX_FMT_NONE; ++ ++ gf_pix_fmt = ff_get_format(avctx, fmts2); ++ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", ++ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), ++ avctx->coded_width, avctx->coded_height, ++ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); ++ ++ if (gf_pix_fmt == AV_PIX_FMT_NONE) ++ goto error; ++ ++ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { ++ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; ++ s->capture.av_pix_fmt = avctx->sw_pix_fmt; ++ s->output_drm = 1; ++ } ++ else { ++ avctx->pix_fmt = gf_pix_fmt; ++ s->capture.av_pix_fmt = gf_pix_fmt; ++ s->output_drm = 0; ++ } ++ ++ // Get format converts capture.av_pix_fmt back into a V4L2 format in the context ++ if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0) ++ goto error; ++ rv = ff_v4l2_context_set_format(&s->capture); ++ ++error: ++ av_free(fmts2); ++ av_free(fmts); ++ return rv; ++} ++ + static av_cold int v4l2_decode_init(AVCodecContext *avctx) + { + V4L2Context *capture, *output; +@@ -185,10 +1167,27 @@ static av_cold int v4l2_decode_init(AVCo + V4L2m2mPriv *priv = avctx->priv_data; + int ret; + ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) { ++ if (avctx->ticks_per_frame == 1) { ++ if(avctx->time_base.den < INT_MAX/2) { ++ avctx->time_base.den *= 2; ++ } else ++ avctx->time_base.num /= 2; ++ } ++ avctx->ticks_per_frame = 2; ++ } ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; + ++ parse_extradata(avctx, s); ++ ++ xlat_init(&s->xlat); ++ pts_stats_init(&s->pts_stat, avctx, "decoder"); ++ + capture = &s->capture; + output = &s->output; + +@@ -196,14 +1195,45 @@ static av_cold int v4l2_decode_init(AVCo + * by the v4l2 driver; this event will trigger a full pipeline reconfig and + * the proper values will be retrieved from the kernel driver. + */ +- output->height = capture->height = avctx->coded_height; +- output->width = capture->width = avctx->coded_width; ++// output->height = capture->height = avctx->coded_height; ++// output->width = capture->width = avctx->coded_width; ++ output->height = capture->height = 0; ++ output->width = capture->width = 0; + + output->av_codec_id = avctx->codec_id; + output->av_pix_fmt = AV_PIX_FMT_NONE; ++ output->min_buf_size = max_coded_size(avctx); + + capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; + capture->av_pix_fmt = avctx->pix_fmt; ++ capture->min_buf_size = 0; ++ ++ capture->av_pix_fmt = AV_PIX_FMT_NONE; ++ s->output_drm = 0; ++ ++ s->db_ctl = NULL; ++ if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { ++ if (strcmp(priv->dmabuf_alloc, "cma") == 0) ++ s->db_ctl = dmabufs_ctl_new(); ++ else { ++ av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc); ++ return AVERROR(EINVAL); ++ } ++ if (!s->db_ctl) { ++ av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc); ++ return AVERROR(ENOMEM); ++ } ++ } ++ ++ s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); ++ if (!s->device_ref) { ++ ret = AVERROR(ENOMEM); ++ return ret; ++ } ++ ++ ret = av_hwdevice_ctx_init(s->device_ref); ++ if (ret < 0) ++ return ret; + + s->avctx = avctx; + ret = ff_v4l2_m2m_codec_init(priv); +@@ -212,12 +1242,90 @@ static av_cold int v4l2_decode_init(AVCo + return ret; + } + +- return v4l2_prepare_decoder(s); ++ if (avctx->extradata && ++ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); ++#if DUMP_FAILED_EXTRADATA ++ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); ++#endif ++ return ret; ++ } ++ ++ if ((ret = get_quirks(avctx, s)) != 0) ++ return ret; ++ ++ if ((ret = check_profile(avctx, s)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); ++ return ret; ++ } ++ ++ // Size check done as part of format filtering ++ if ((ret = choose_capture_format(avctx, s)) != 0) ++ return ret; ++ ++ if ((ret = v4l2_prepare_decoder(s)) < 0) ++ return ret; ++ ++ return 0; + } + + static av_cold int v4l2_decode_close(AVCodecContext *avctx) + { +- return ff_v4l2_m2m_codec_end(avctx->priv_data); ++ int rv; ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ rv = ff_v4l2_m2m_codec_end(avctx->priv_data); ++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); ++ return rv; ++} ++ ++static void v4l2_decode_flush(AVCodecContext *avctx) ++{ ++ // An alternatve and more drastic form of flush is to simply do this: ++ // v4l2_decode_close(avctx); ++ // v4l2_decode_init(avctx); ++ // The downside is that this keeps a decoder open until all the frames ++ // associated with it have been returned. This is a bit wasteful on ++ // possibly limited h/w resources and fails on a Pi for this reason unless ++ // more GPU mem is allocated than is the default. ++ ++ V4L2m2mPriv * const priv = avctx->priv_data; ++ V4L2m2mContext * const s = priv->context; ++ V4L2Context * const output = &s->output; ++ V4L2Context * const capture = &s->capture; ++ ++ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); ++ ++ // Reflushing everything is benign, quick and avoids having to worry about ++ // states like EOS processing so don't try to optimize out (having got it ++ // wrong once) ++ ++ ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); ++ ++ // Clear any buffered input packet ++ av_packet_unref(&s->buf_pkt); ++ ++ // Clear a pending EOS ++ if (ff_v4l2_ctx_eos(capture)) { ++ // Arguably we could delay this but this is easy and doesn't require ++ // thought or extra vars ++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); ++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); ++ } ++ ++ // V4L2 makes no guarantees about whether decoded frames are flushed or not ++ // so mark all frames we are tracking to be discarded if they appear ++ xlat_flush(&s->xlat); ++ ++ // resend extradata ++ s->extdata_sent = 0; ++ // clear status vars ++ s->running = 0; ++ s->draining = 0; ++ output->done = 0; ++ capture->done = 0; ++ ++ // Stream on will occur when we actually submit a new frame ++ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); + } + + #define OFFSET(x) offsetof(V4L2m2mPriv, x) +@@ -226,10 +1334,17 @@ static av_cold int v4l2_decode_close(AVC + static const AVOption options[] = { + V4L_M2M_DEFAULT_OPTS, + { "num_capture_buffers", "Number of buffers in the capture context", +- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS }, ++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, ++ { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, ++ { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, + { NULL}, + }; + ++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { ++ HW_CONFIG_INTERNAL(DRM_PRIME), ++ NULL ++}; ++ + #define M2MDEC_CLASS(NAME) \ + static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ + .class_name = #NAME "_v4l2m2m_decoder", \ +@@ -250,10 +1365,16 @@ static const AVOption options[] = { + .init = v4l2_decode_init, \ + FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \ + .close = v4l2_decode_close, \ ++ .flush = v4l2_decode_flush, \ + .bsfs = bsf_name, \ + .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ + .caps_internal = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \ + .p.wrapper_name = "v4l2m2m", \ ++ .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ ++ AV_PIX_FMT_NV12, \ ++ AV_PIX_FMT_YUV420P, \ ++ AV_PIX_FMT_NONE}, \ ++ .hw_configs = v4l2_m2m_hw_configs, \ + } + + M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb"); +--- a/libavcodec/v4l2_m2m_enc.c ++++ b/libavcodec/v4l2_m2m_enc.c +@@ -24,6 +24,8 @@ + #include <linux/videodev2.h> + #include <sys/ioctl.h> + #include <search.h> ++#include <drm_fourcc.h> ++ + #include "encode.h" + #include "libavcodec/avcodec.h" + #include "libavutil/pixdesc.h" +@@ -38,6 +40,34 @@ + #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x + #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x + ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in videodev2.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ + static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) + { + struct v4l2_streamparm parm = { 0 }; +@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_fro + static int v4l2_check_b_frame_support(V4L2m2mContext *s) + { + if (s->avctx->max_b_frames) +- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); ++ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); + +- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); ++ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); + v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); + if (s->avctx->max_b_frames == 0) + return 0; + + avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); +- + return AVERROR_PATCHWELCOME; + } + +@@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mC + return 0; + } + ++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ case DRM_FORMAT_P030: ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ return 0; ++} ++ ++// Do we have similar enough formats to be usable? ++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) ++{ ++ if (a->type != b->type) ++ return 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { ++ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; ++ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; ++ unsigned int i; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->num_planes != pb->num_planes) ++ return 0; ++ for (i = 0; i != pa->num_planes; ++i) { ++ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) ++ return 0; ++ } ++ } ++ else { ++ const struct v4l2_pix_format *const pa = &a->fmt.pix; ++ const struct v4l2_pix_format *const pb = &b->fmt.pix; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->bytesperline != pb->bytesperline) ++ return 0; ++ } ++ return 1; ++} ++ ++static inline int q_full(const V4L2Context *const output) ++{ ++ return ff_v4l2_context_q_count(output) == output->num_buffers; ++} ++ + static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) + { + V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; + V4L2Context *const output = &s->output; ++ int rv; ++ const int needs_slot = q_full(output); ++ ++ av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); ++ ++ // Signal EOF if needed (doesn't need q slot) ++ if (!frame) { ++ av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__); ++ return ff_v4l2_context_enqueue_frame(output, frame); ++ } ++ ++ if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) { ++ // We should be able to return AVERROR(EAGAIN) to indicate buffer ++ // exhaustion, but ffmpeg currently treats that as fatal. ++ av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv)); ++ return rv; ++ } ++ ++ if (s->input_drm && !output->streamon) { ++ struct v4l2_format req_format = {.type = output->format.type}; ++ ++ // Set format when we first get a buffer ++ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); ++ return rv; ++ } ++ ++ ff_v4l2_context_release(output); ++ ++ output->format = req_format; ++ ++ if ((rv = ff_v4l2_context_set_format(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); ++ return rv; ++ } ++ ++ if (!fmt_eq(&req_format, &output->format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ output->selection.top = frame->crop_top; ++ output->selection.left = frame->crop_left; ++ output->selection.width = av_frame_cropped_width(frame); ++ output->selection.height = av_frame_cropped_height(frame); ++ ++ if ((rv = ff_v4l2_context_init(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); ++ return rv; ++ } ++ ++ { ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, ++ .target = V4L2_SEL_TGT_CROP, ++ .r = output->selection ++ }; ++ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top, ++ av_err2str(AVERROR(errno))); ++ } ++ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top); ++ } ++ } + + #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME +- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) ++ if (frame->pict_type == AV_PICTURE_TYPE_I) + v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); + #endif + +- return ff_v4l2_context_enqueue_frame(output, frame); ++ rv = ff_v4l2_context_enqueue_frame(output, frame); ++ if (rv) { ++ av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv)); ++ } ++ ++ return rv; + } + + static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) +@@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecCo + AVFrame *frame = s->frame; + int ret; + ++ av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__, ++ ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture)); ++ ++ ff_v4l2_dq_all(output, 0); ++ + if (s->draining) + goto dequeue; + +@@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecCo + } + + dequeue: +- return ff_v4l2_context_dequeue_packet(capture, avpkt); ++ // Dequeue a frame ++ for (;;) { ++ int t = q_full(output) ? -1 : s->draining ? 300 : 0; ++ int rv2; ++ ++ // If output is full wait for either a packet or output to become not full ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t); ++ ++ // If output was full retry packet dequeue ++ t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300; ++ rv2 = ff_v4l2_dq_all(output, t); ++ if (t == 0 || rv2 != 0) ++ break; ++ } ++ if (ret) ++ return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; ++ ++ if (capture->first_buf == 1) { ++ uint8_t * data; ++ const int len = avpkt->size; ++ ++ // 1st buffer after streamon should be SPS/PPS ++ capture->first_buf = 2; ++ ++ // Clear both possible stores so there is no chance of confusion ++ av_freep(&s->extdata_data); ++ s->extdata_size = 0; ++ av_freep(&avctx->extradata); ++ avctx->extradata_size = 0; ++ ++ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ goto fail_no_mem; ++ ++ memcpy(data, avpkt->data, len); ++ av_packet_unref(avpkt); ++ ++ // We need to copy the header, but keep local if not global ++ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { ++ avctx->extradata = data; ++ avctx->extradata_size = len; ++ } ++ else { ++ s->extdata_data = data; ++ s->extdata_size = len; ++ } ++ ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0); ++ ff_v4l2_dq_all(output, 0); ++ if (ret) ++ return ret; ++ } ++ ++ // First frame must be key so mark as such even if encoder forgot ++ if (capture->first_buf == 2) { ++ avpkt->flags |= AV_PKT_FLAG_KEY; ++ ++ // Add any extradata to the 1st packet we emit as we cannot create it at init ++ if (avctx->extradata_size > 0 && avctx->extradata) { ++ void * const side = av_packet_new_side_data(avpkt, ++ AV_PKT_DATA_NEW_EXTRADATA, ++ avctx->extradata_size); ++ if (!side) ++ goto fail_no_mem; ++ ++ memcpy(side, avctx->extradata, avctx->extradata_size); ++ } ++ } ++ ++ // Add SPS/PPS to the start of every key frame if non-global headers ++ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { ++ const size_t newlen = s->extdata_size + avpkt->size; ++ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); ++ ++ if (buf == NULL) ++ goto fail_no_mem; ++ ++ memcpy(buf->data, s->extdata_data, s->extdata_size); ++ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); ++ ++ av_buffer_unref(&avpkt->buf); ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ avpkt->size = newlen; ++ } ++ else if (ff_v4l2_context_q_count(capture) < 2) { ++ // Avoid running out of capture buffers ++ // In most cases the buffers will be returned quickly in which case ++ // we don't copy and can use the v4l2 buffers directly but sometimes ++ // ffmpeg seems to hold onto all of them for a long time (.mkv ++ // creation?) so avoid deadlock in those cases. ++ AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE); ++ if (buf == NULL) ++ goto fail_no_mem; ++ ++ memcpy(buf->data, avpkt->data, avpkt->size); ++ av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer ++ ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ } ++ ++ capture->first_buf = 0; ++ return 0; ++ ++fail_no_mem: ++ av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n"); ++ ret = AVERROR(ENOMEM); ++ av_packet_unref(avpkt); ++ return ret; + } + + static av_cold int v4l2_encode_init(AVCodecContext *avctx) +@@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCo + uint32_t v4l2_fmt_output; + int ret; + ++ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; +@@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCo + capture = &s->capture; + output = &s->output; + ++ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); ++ + /* common settings output/capture */ + output->height = capture->height = avctx->height; + output->width = capture->width = avctx->width; + + /* output context */ + output->av_codec_id = AV_CODEC_ID_RAWVIDEO; +- output->av_pix_fmt = avctx->pix_fmt; ++ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : ++ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : ++ AV_PIX_FMT_YUV420P; + + /* capture context */ + capture->av_codec_id = avctx->codec_id; +@@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCo + v4l2_fmt_output = output->format.fmt.pix.pixelformat; + + pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); +- if (pix_fmt_output != avctx->pix_fmt) { ++ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); + av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); + return AVERROR(EINVAL); +@@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVC + #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM + + #define V4L_M2M_CAPTURE_OPTS \ +- V4L_M2M_DEFAULT_OPTS,\ ++ { "num_output_buffers", "Number of buffers in the output context",\ ++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ + { "num_capture_buffers", "Number of buffers in the capture context", \ +- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } ++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } + + static const AVOption mpeg4_options[] = { + V4L_M2M_CAPTURE_OPTS, +--- /dev/null ++++ b/libavcodec/v4l2_req_decode_q.c +@@ -0,0 +1,84 @@ ++#include <memory.h> ++#include <semaphore.h> ++#include <pthread.h> ++ ++#include "v4l2_req_decode_q.h" ++ ++int decode_q_in_q(const req_decode_ent * const d) ++{ ++ return d->in_q; ++} ++ ++void decode_q_add(req_decode_q * const q, req_decode_ent * const d) ++{ ++ pthread_mutex_lock(&q->q_lock); ++ if (!q->head) { ++ q->head = d; ++ q->tail = d; ++ d->prev = NULL; ++ } ++ else { ++ q->tail->next = d; ++ d->prev = q->tail; ++ q->tail = d; ++ } ++ d->next = NULL; ++ d->in_q = 1; ++ pthread_mutex_unlock(&q->q_lock); ++} ++ ++// Remove entry from Q - if head wake-up anything that was waiting ++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d) ++{ ++ int try_signal = 0; ++ ++ if (!d->in_q) ++ return; ++ ++ pthread_mutex_lock(&q->q_lock); ++ if (d->prev) ++ d->prev->next = d->next; ++ else { ++ try_signal = 1; // Only need to signal if we were head ++ q->head = d->next; ++ } ++ ++ if (d->next) ++ d->next->prev = d->prev; ++ else ++ q->tail = d->prev; ++ ++ // Not strictly needed but makes debug easier ++ d->next = NULL; ++ d->prev = NULL; ++ d->in_q = 0; ++ pthread_mutex_unlock(&q->q_lock); ++ ++ if (try_signal) ++ pthread_cond_broadcast(&q->q_cond); ++} ++ ++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d) ++{ ++ pthread_mutex_lock(&q->q_lock); ++ ++ while (q->head != d) ++ pthread_cond_wait(&q->q_cond, &q->q_lock); ++ ++ pthread_mutex_unlock(&q->q_lock); ++} ++ ++void decode_q_uninit(req_decode_q * const q) ++{ ++ pthread_mutex_destroy(&q->q_lock); ++ pthread_cond_destroy(&q->q_cond); ++} ++ ++void decode_q_init(req_decode_q * const q) ++{ ++ memset(q, 0, sizeof(*q)); ++ pthread_mutex_init(&q->q_lock, NULL); ++ pthread_cond_init(&q->q_cond, NULL); ++} ++ ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_decode_q.h +@@ -0,0 +1,27 @@ ++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H ++#define AVCODEC_V4L2_REQ_DECODE_Q_H ++ ++#include <pthread.h> ++ ++typedef struct req_decode_ent { ++ struct req_decode_ent * next; ++ struct req_decode_ent * prev; ++ int in_q; ++} req_decode_ent; ++ ++typedef struct req_decode_q { ++ pthread_mutex_t q_lock; ++ pthread_cond_t q_cond; ++ req_decode_ent * head; ++ req_decode_ent * tail; ++} req_decode_q; ++ ++int decode_q_in_q(const req_decode_ent * const d); ++void decode_q_add(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_uninit(req_decode_q * const q); ++void decode_q_init(req_decode_q * const q); ++ ++#endif ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_devscan.c +@@ -0,0 +1,451 @@ ++#include <errno.h> ++#include <fcntl.h> ++#include <libudev.h> ++#include <stdlib.h> ++#include <string.h> ++#include <unistd.h> ++ ++#include <sys/ioctl.h> ++#include <sys/sysmacros.h> ++ ++#include <linux/media.h> ++#include <linux/videodev2.h> ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_utils.h" ++ ++struct decdev { ++ enum v4l2_buf_type src_type; ++ uint32_t src_fmt_v4l2; ++ const char * vname; ++ const char * mname; ++}; ++ ++struct devscan { ++ struct decdev env; ++ unsigned int dev_size; ++ unsigned int dev_count; ++ struct decdev *devs; ++}; ++ ++static int video_src_pixfmt_supported(uint32_t fmt) ++{ ++ return 1; ++} ++ ++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type, ++ unsigned int width, unsigned int height, ++ unsigned int pixelformat) ++{ ++ unsigned int sizeimage; ++ ++ memset(format, 0, sizeof(*format)); ++ format->type = type; ++ ++ sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(type)) { ++ format->fmt.pix_mp.width = width; ++ format->fmt.pix_mp.height = height; ++ format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage; ++ format->fmt.pix_mp.pixelformat = pixelformat; ++ } else { ++ format->fmt.pix.width = width; ++ format->fmt.pix.height = height; ++ format->fmt.pix.sizeimage = sizeimage; ++ format->fmt.pix.pixelformat = pixelformat; ++ } ++} ++ ++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat, ++ unsigned int width, unsigned int height) ++{ ++ struct v4l2_format format; ++ ++ v4l2_setup_format(&format, type, width, height, pixelformat); ++ ++ return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0; ++} ++ ++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities) ++{ ++ struct v4l2_capability capability = { 0 }; ++ int rc; ++ ++ rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability); ++ if (rc < 0) ++ return -errno; ++ ++ if (capabilities != NULL) { ++ if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0) ++ *capabilities = capability.device_caps; ++ else ++ *capabilities = capability.capabilities; ++ } ++ ++ return 0; ++} ++ ++static int devscan_add(struct devscan *const scan, ++ enum v4l2_buf_type src_type, ++ uint32_t src_fmt_v4l2, ++ const char * vname, ++ const char * mname) ++{ ++ struct decdev *d; ++ ++ if (scan->dev_size <= scan->dev_count) { ++ unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2; ++ d = realloc(scan->devs, n * sizeof(*d)); ++ if (!d) ++ return -ENOMEM; ++ scan->devs = d; ++ scan->dev_size = n; ++ } ++ ++ d = scan->devs + scan->dev_count; ++ d->src_type = src_type; ++ d->src_fmt_v4l2 = src_fmt_v4l2; ++ d->vname = strdup(vname); ++ if (!d->vname) ++ return -ENOMEM; ++ d->mname = strdup(mname); ++ if (!d->mname) { ++ free((char *)d->vname); ++ return -ENOMEM; ++ } ++ ++scan->dev_count; ++ return 0; ++} ++ ++void devscan_delete(struct devscan **const pScan) ++{ ++ unsigned int i; ++ struct devscan * const scan = *pScan; ++ ++ if (!scan) ++ return; ++ *pScan = NULL; ++ ++ for (i = 0; i < scan->dev_count; ++i) { ++ free((char*)scan->devs[i].mname); ++ free((char*)scan->devs[i].vname); ++ } ++ free(scan->devs); ++ free(scan); ++} ++ ++#define REQ_BUF_CAPS (\ ++ V4L2_BUF_CAP_SUPPORTS_DMABUF |\ ++ V4L2_BUF_CAP_SUPPORTS_REQUESTS |\ ++ V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) ++ ++static void probe_formats(void * const dc, ++ struct devscan *const scan, ++ const int fd, ++ const unsigned int type_v4l2, ++ const char *const mpath, ++ const char *const vpath) ++{ ++ unsigned int i; ++ for (i = 0;; ++i) { ++ struct v4l2_fmtdesc fmtdesc = { ++ .index = i, ++ .type = type_v4l2 ++ }; ++ struct v4l2_requestbuffers rbufs = { ++ .count = 0, ++ .type = type_v4l2, ++ .memory = V4L2_MEMORY_MMAP ++ }; ++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { ++ if (errno == EINTR) ++ continue; ++ if (errno != EINVAL) ++ request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2); ++ return; ++ } ++ if (!video_src_pixfmt_supported(fmtdesc.pixelformat)) ++ continue; ++ ++ if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) { ++ request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat); ++ continue; ++ } ++ ++ while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) { ++ if (errno != EINTR) { ++ request_debug(dc, "%s: Reqbufs failed\n", vpath); ++ continue; ++ } ++ } ++ ++ if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) { ++ request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities); ++ continue; ++ } ++ ++ request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n", ++ mpath, vpath, fmtdesc.pixelformat, type_v4l2); ++ devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath); ++ } ++} ++ ++ ++static int probe_video_device(void * const dc, ++ struct udev_device *const device, ++ struct devscan *const scan, ++ const char *const mpath) ++{ ++ int ret; ++ unsigned int capabilities = 0; ++ int video_fd = -1; ++ ++ const char *path = udev_device_get_devnode(device); ++ if (!path) { ++ request_err(dc, "%s: get video device devnode failed\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ video_fd = open(path, O_RDWR, 0); ++ if (video_fd == -1) { ++ ret = -errno; ++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno); ++ goto fail; ++ } ++ ++ ret = v4l2_query_capabilities(video_fd, &capabilities); ++ if (ret < 0) { ++ request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities); ++ ++ if (!(capabilities & V4L2_CAP_STREAMING)) { ++ request_debug(dc, "%s: missing required streaming capability\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) { ++ request_debug(dc, "%s: missing required mem2mem capability\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ /* Should check capture formats too... */ ++ if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0) ++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path); ++ if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) ++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path); ++ ++ close(video_fd); ++ return 0; ++ ++fail: ++ if (video_fd >= 0) ++ close(video_fd); ++ return ret; ++} ++ ++static int probe_media_device(void * const dc, ++ struct udev_device *const device, ++ struct devscan *const scan) ++{ ++ int ret; ++ int rv; ++ struct media_device_info device_info = { 0 }; ++ struct media_v2_topology topology = { 0 }; ++ struct media_v2_interface *interfaces = NULL; ++ struct udev *udev = udev_device_get_udev(device); ++ struct udev_device *video_device; ++ dev_t devnum; ++ int media_fd = -1; ++ ++ const char *path = udev_device_get_devnode(device); ++ if (!path) { ++ request_err(dc, "%s: get media device devnode failed\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ media_fd = open(path, O_RDWR, 0); ++ if (media_fd < 0) { ++ ret = -errno; ++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ if (topology.num_interfaces <= 0) { ++ request_err(dc, "%s: media device has no interfaces\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ interfaces = calloc(topology.num_interfaces, sizeof(*interfaces)); ++ if (!interfaces) { ++ request_err(dc, "%s: allocating media interface struct failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces; ++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ for (int i = 0; i < topology.num_interfaces; i++) { ++ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO) ++ continue; ++ ++ devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor); ++ video_device = udev_device_new_from_devnum(udev, 'c', devnum); ++ if (!video_device) { ++ ret = -errno; ++ request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device); ++ continue; ++ } ++ ++ ret = probe_video_device(dc, video_device, scan, path); ++ udev_device_unref(video_device); ++ ++ if (ret != 0) ++ goto fail; ++ } ++ ++fail: ++ free(interfaces); ++ if (media_fd != -1) ++ close(media_fd); ++ return ret; ++} ++ ++const char *decdev_media_path(const struct decdev *const dev) ++{ ++ return !dev ? NULL : dev->mname; ++} ++ ++const char *decdev_video_path(const struct decdev *const dev) ++{ ++ return !dev ? NULL : dev->vname; ++} ++ ++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev) ++{ ++ return !dev ? 0 : dev->src_type; ++} ++ ++uint32_t decdev_src_pixelformat(const struct decdev *const dev) ++{ ++ return !dev ? 0 : dev->src_fmt_v4l2; ++} ++ ++ ++const struct decdev *devscan_find(struct devscan *const scan, ++ const uint32_t src_fmt_v4l2) ++{ ++ unsigned int i; ++ ++ if (scan->env.mname && scan->env.vname) ++ return &scan->env; ++ ++ if (!src_fmt_v4l2) ++ return scan->dev_count ? scan->devs + 0 : NULL; ++ ++ for (i = 0; i != scan->dev_count; ++i) { ++ if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2) ++ return scan->devs + i; ++ } ++ return NULL; ++} ++ ++int devscan_build(void * const dc, struct devscan **pscan) ++{ ++ int ret; ++ struct udev *udev; ++ struct udev_enumerate *enumerate; ++ struct udev_list_entry *devices; ++ struct udev_list_entry *entry; ++ struct udev_device *device; ++ struct devscan * scan; ++ ++ *pscan = NULL; ++ ++ scan = calloc(1, sizeof(*scan)); ++ if (!scan) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH"); ++ scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH"); ++ if (scan->env.mname && scan->env.vname) { ++ request_info(dc, "Media/video device env overrides found: %s,%s\n", ++ scan->env.mname, scan->env.vname); ++ *pscan = scan; ++ return 0; ++ } ++ ++ udev = udev_new(); ++ if (!udev) { ++ request_err(dc, "%s: allocating udev context failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ enumerate = udev_enumerate_new(udev); ++ if (!enumerate) { ++ request_err(dc, "%s: allocating udev enumerator failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ udev_enumerate_add_match_subsystem(enumerate, "media"); ++ udev_enumerate_scan_devices(enumerate); ++ ++ devices = udev_enumerate_get_list_entry(enumerate); ++ udev_list_entry_foreach(entry, devices) { ++ const char *path = udev_list_entry_get_name(entry); ++ if (!path) ++ continue; ++ ++ device = udev_device_new_from_syspath(udev, path); ++ if (!device) ++ continue; ++ ++ probe_media_device(dc, device, scan); ++ udev_device_unref(device); ++ } ++ ++ udev_enumerate_unref(enumerate); ++ udev_unref(udev); ++ ++ *pscan = scan; ++ return 0; ++ ++fail: ++ if (udev) ++ udev_unref(udev); ++ devscan_delete(&scan); ++ return ret; ++} ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_devscan.h +@@ -0,0 +1,23 @@ ++#ifndef _DEVSCAN_H_ ++#define _DEVSCAN_H_ ++ ++#include <stdint.h> ++ ++struct devscan; ++struct decdev; ++enum v4l2_buf_type; ++ ++/* These return pointers to data in the devscan structure and so are vaild ++ * for the lifetime of that ++ */ ++const char *decdev_media_path(const struct decdev *const dev); ++const char *decdev_video_path(const struct decdev *const dev); ++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev); ++uint32_t decdev_src_pixelformat(const struct decdev *const dev); ++ ++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2); ++ ++int devscan_build(void * const dc, struct devscan **pscan); ++void devscan_delete(struct devscan **const pScan); ++ ++#endif +--- /dev/null ++++ b/libavcodec/v4l2_req_dmabufs.c +@@ -0,0 +1,409 @@ ++#include <stdatomic.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <inttypes.h> ++#include <fcntl.h> ++#include <errno.h> ++#include <string.h> ++#include <sys/ioctl.h> ++#include <sys/mman.h> ++#include <linux/mman.h> ++#include <linux/dma-buf.h> ++#include <linux/dma-heap.h> ++ ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_utils.h" ++ ++#define TRACE_ALLOC 0 ++ ++#ifndef __O_CLOEXEC ++#define __O_CLOEXEC 0 ++#endif ++ ++struct dmabufs_ctl; ++struct dmabuf_h; ++ ++struct dmabuf_fns { ++ int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size); ++ void (*buf_free)(struct dmabuf_h * dh); ++ int (*ctl_new)(struct dmabufs_ctl * dbsc); ++ void (*ctl_free)(struct dmabufs_ctl * dbsc); ++}; ++ ++struct dmabufs_ctl { ++ atomic_int ref_count; ++ int fd; ++ size_t page_size; ++ void * v; ++ const struct dmabuf_fns * fns; ++}; ++ ++struct dmabuf_h { ++ int fd; ++ size_t size; ++ size_t len; ++ void * mapptr; ++ void * v; ++ const struct dmabuf_fns * fns; ++}; ++ ++#if TRACE_ALLOC ++static unsigned int total_bufs = 0; ++static size_t total_size = 0; ++#endif ++ ++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size) ++{ ++ struct dmabuf_h *dh; ++ ++ if (mapptr == MAP_FAILED) ++ return NULL; ++ ++ dh = malloc(sizeof(*dh)); ++ if (!dh) ++ return NULL; ++ ++ *dh = (struct dmabuf_h) { ++ .fd = -1, ++ .size = size, ++ .mapptr = mapptr ++ }; ++ ++ return dh; ++} ++ ++struct dmabuf_h * dmabuf_import(int fd, size_t size) ++{ ++ struct dmabuf_h *dh; ++ ++ fd = dup(fd); ++ if (fd < 0 || size == 0) ++ return NULL; ++ ++ dh = malloc(sizeof(*dh)); ++ if (!dh) { ++ close(fd); ++ return NULL; ++ } ++ ++ *dh = (struct dmabuf_h) { ++ .fd = fd, ++ .size = size, ++ .mapptr = MAP_FAILED ++ }; ++ ++#if TRACE_ALLOC ++ ++total_bufs; ++ total_size += dh->size; ++ request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ return dh; ++} ++ ++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) ++{ ++ struct dmabuf_h * dh; ++ if (old != NULL) { ++ if (old->size >= size) { ++ return old; ++ } ++ dmabuf_free(old); ++ } ++ ++ if (size == 0 || ++ (dh = malloc(sizeof(*dh))) == NULL) ++ return NULL; ++ ++ *dh = (struct dmabuf_h){ ++ .fd = -1, ++ .mapptr = MAP_FAILED, ++ .fns = dbsc->fns ++ }; ++ ++ if (dh->fns->buf_alloc(dbsc, dh, size) != 0) ++ goto fail; ++ ++ ++#if TRACE_ALLOC ++ ++total_bufs; ++ total_size += dh->size; ++ request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ return dh; ++ ++fail: ++ free(dh); ++ return NULL; ++} ++ ++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) ++{ ++ struct dma_buf_sync sync = { ++ .flags = flags ++ }; ++ if (dh->fd == -1) ++ return 0; ++ while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { ++ const int err = errno; ++ if (errno == EINTR) ++ continue; ++ request_log("%s: ioctl failed: flags=%#x\n", __func__, flags); ++ return -err; ++ } ++ return 0; ++} ++ ++int dmabuf_write_start(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE); ++} ++ ++int dmabuf_write_end(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE); ++} ++ ++int dmabuf_read_start(struct dmabuf_h * const dh) ++{ ++ if (!dmabuf_map(dh)) ++ return -1; ++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ); ++} ++ ++int dmabuf_read_end(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ); ++} ++ ++ ++void * dmabuf_map(struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return NULL; ++ if (dh->mapptr != MAP_FAILED) ++ return dh->mapptr; ++ dh->mapptr = mmap(NULL, dh->size, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_POPULATE, ++ dh->fd, 0); ++ if (dh->mapptr == MAP_FAILED) { ++ request_log("%s: Map failed\n", __func__); ++ return NULL; ++ } ++ return dh->mapptr; ++} ++ ++int dmabuf_fd(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return -1; ++ return dh->fd; ++} ++ ++size_t dmabuf_size(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return 0; ++ return dh->size; ++} ++ ++size_t dmabuf_len(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return 0; ++ return dh->len; ++} ++ ++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) ++{ ++ dh->len = len; ++} ++ ++void dmabuf_free(struct dmabuf_h * dh) ++{ ++ if (!dh) ++ return; ++ ++#if TRACE_ALLOC ++ --total_bufs; ++ total_size -= dh->size; ++ request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ if (dh->fns != NULL && dh->fns->buf_free) ++ dh->fns->buf_free(dh); ++ ++ if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) ++ munmap(dh->mapptr, dh->size); ++ if (dh->fd != -1) ++ while (close(dh->fd) == -1 && errno == EINTR) ++ /* loop */; ++ free(dh); ++} ++ ++static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns) ++{ ++ struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc)); ++ ++ if (!dbsc) ++ return NULL; ++ ++ dbsc->fd = -1; ++ dbsc->fns = fns; ++ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); ++ ++ if (fns->ctl_new(dbsc) != 0) ++ goto fail; ++ ++ return dbsc; ++ ++fail: ++ free(dbsc); ++ return NULL; ++} ++ ++static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) ++{ ++ request_debug(NULL, "Free dmabuf ctl\n"); ++ ++ dbsc->fns->ctl_free(dbsc); ++ ++ free(dbsc); ++} ++ ++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc) ++{ ++ struct dmabufs_ctl * const dbsc = *pDbsc; ++ ++ if (!dbsc) ++ return; ++ *pDbsc = NULL; ++ ++ if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0) ++ return; ++ ++ dmabufs_ctl_free(dbsc); ++} ++ ++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) ++{ ++ atomic_fetch_add(&dbsc->ref_count, 1); ++ return dbsc; ++} ++ ++//----------------------------------------------------------------------------- ++// ++// Alloc dmabuf via CMA ++ ++static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names) ++{ ++ for (; *names != NULL; ++names) ++ { ++ while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 && ++ errno == EINTR) ++ /* Loop */; ++ if (dbsc->fd != -1) ++ { ++ request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names); ++ return 0; ++ } ++ request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno)); ++ } ++ request_log("Unable to open any dma_heap device\n"); ++ return -1; ++} ++ ++static int ctl_cma_new(struct dmabufs_ctl * dbsc) ++{ ++ static const char * const names[] = { ++ "/dev/dma_heap/linux,cma", ++ "/dev/dma_heap/reserved", ++ NULL ++ }; ++ ++ return ctl_cma_new2(dbsc, names); ++} ++ ++static void ctl_cma_free(struct dmabufs_ctl * dbsc) ++{ ++ if (dbsc->fd != -1) ++ while (close(dbsc->fd) == -1 && errno == EINTR) ++ /* loop */; ++} ++ ++static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) ++{ ++ struct dma_heap_allocation_data data = { ++ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), ++ .fd = 0, ++ .fd_flags = O_RDWR, ++ .heap_flags = 0 ++ }; ++ ++ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { ++ int err = errno; ++ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", ++ (uint64_t)data.len, ++ dbsc->fd, ++ err, ++ strerror(err)); ++ if (err == EINTR) ++ continue; ++ return -err; ++ } ++ ++ dh->fd = data.fd; ++ dh->size = (size_t)data.len; ++ ++// fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__, ++// dh->size, (size_t)lseek(dh->fd, 0, SEEK_END)); ++ ++ return 0; ++} ++ ++static void buf_cma_free(struct dmabuf_h * dh) ++{ ++ // Nothing needed ++} ++ ++static const struct dmabuf_fns dmabuf_cma_fns = { ++ .buf_alloc = buf_cma_alloc, ++ .buf_free = buf_cma_free, ++ .ctl_new = ctl_cma_new, ++ .ctl_free = ctl_cma_free, ++}; ++ ++struct dmabufs_ctl * dmabufs_ctl_new(void) ++{ ++ request_debug(NULL, "Dmabufs using CMA\n"); ++ return dmabufs_ctl_new2(&dmabuf_cma_fns); ++} ++ ++static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc) ++{ ++ static const char * const names[] = { ++ "/dev/dma_heap/vidbuf_cached", ++ "/dev/dma_heap/linux,cma", ++ "/dev/dma_heap/reserved", ++ NULL ++ }; ++ ++ return ctl_cma_new2(dbsc, names); ++} ++ ++static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = { ++ .buf_alloc = buf_cma_alloc, ++ .buf_free = buf_cma_free, ++ .ctl_new = ctl_cma_new_vidbuf_cached, ++ .ctl_free = ctl_cma_free, ++}; ++ ++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void) ++{ ++ request_debug(NULL, "Dmabufs using Vidbuf\n"); ++ return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns); ++} ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_dmabufs.h +@@ -0,0 +1,45 @@ ++#ifndef DMABUFS_H ++#define DMABUFS_H ++ ++#include <stddef.h> ++ ++struct dmabufs_ctl; ++struct dmabuf_h; ++ ++struct dmabufs_ctl * dmabufs_ctl_new(void); ++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void); ++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); ++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); ++ ++// Need not preserve old contents ++// On NULL return old buffer is freed ++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size); ++ ++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) { ++ return dmabuf_realloc(dbsc, NULL, size); ++} ++/* Create from existing fd - dups(fd) */ ++struct dmabuf_h * dmabuf_import(int fd, size_t size); ++/* Import an MMAP - return NULL if mapptr = MAP_FAIL */ ++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size); ++ ++void * dmabuf_map(struct dmabuf_h * const dh); ++ ++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ ++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags); ++ ++int dmabuf_write_start(struct dmabuf_h * const dh); ++int dmabuf_write_end(struct dmabuf_h * const dh); ++int dmabuf_read_start(struct dmabuf_h * const dh); ++int dmabuf_read_end(struct dmabuf_h * const dh); ++ ++int dmabuf_fd(const struct dmabuf_h * const dh); ++/* Allocated size */ ++size_t dmabuf_size(const struct dmabuf_h * const dh); ++/* Bytes in use */ ++size_t dmabuf_len(const struct dmabuf_h * const dh); ++/* Set bytes in use */ ++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len); ++void dmabuf_free(struct dmabuf_h * dh); ++ ++#endif +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v1.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 1 ++#include "v4l2_req_hevc_vx.c" ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v2.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 2 ++#include "v4l2_req_hevc_vx.c" ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v3.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 3 ++#include "v4l2_req_hevc_vx.c" ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v4.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 4 ++#include "v4l2_req_hevc_vx.c" ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_vx.c +@@ -0,0 +1,1362 @@ ++// File included by v4l2_req_hevc_v* - not compiled on its own ++ ++#include "decode.h" ++#include "hevcdec.h" ++#include "hwconfig.h" ++#include "internal.h" ++#include "thread.h" ++ ++#if HEVC_CTRLS_VERSION == 1 ++#include "hevc-ctrls-v1.h" ++ ++// Fixup renamed entries ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT ++ ++#elif HEVC_CTRLS_VERSION == 2 ++#include "hevc-ctrls-v2.h" ++#elif HEVC_CTRLS_VERSION == 3 ++#include "hevc-ctrls-v3.h" ++#elif HEVC_CTRLS_VERSION == 4 ++#include <linux/v4l2-controls.h> ++#if !defined(V4L2_CID_STATELESS_HEVC_SPS) ++#include "hevc-ctrls-v4.h" ++#endif ++#else ++#error Unknown HEVC_CTRLS_VERSION ++#endif ++ ++#ifndef V4L2_CID_STATELESS_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE ++#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE ++ ++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED ++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED ++#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE ++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B ++#endif ++ ++#include "v4l2_request_hevc.h" ++ ++#include "libavutil/hwcontext_drm.h" ++ ++#include <semaphore.h> ++#include <pthread.h> ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_utils.h" ++ ++// Attached to buf[0] in frame ++// Pooled in hwcontext so generally create once - 1/frame ++typedef struct V4L2MediaReqDescriptor { ++ AVDRMFrameDescriptor drm; ++ ++ // Media ++ uint64_t timestamp; ++ struct qent_dst * qe_dst; ++ ++ // Decode only - should be NULL by the time we emit the frame ++ struct req_decode_ent decode_ent; ++ ++ struct media_request *req; ++ struct qent_src *qe_src; ++ ++#if HEVC_CTRLS_VERSION >= 2 ++ struct v4l2_ctrl_hevc_decode_params dec; ++#endif ++ ++ size_t num_slices; ++ size_t alloced_slices; ++ struct v4l2_ctrl_hevc_slice_params * slice_params; ++ struct slice_info * slices; ++ ++ size_t num_offsets; ++ size_t alloced_offsets; ++ uint32_t *offsets; ++ ++} V4L2MediaReqDescriptor; ++ ++struct slice_info { ++ const uint8_t * ptr; ++ size_t len; // bytes ++ size_t n_offsets; ++}; ++ ++// Handy container for accumulating controls before setting ++struct req_controls { ++ int has_scaling; ++ struct timeval tv; ++ struct v4l2_ctrl_hevc_sps sps; ++ struct v4l2_ctrl_hevc_pps pps; ++ struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; ++}; ++ ++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 }; ++ ++ ++// Get an FFmpeg format from the v4l2 format ++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format) ++{ ++ switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? ++ format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) { ++ case V4L2_PIX_FMT_YUV420: ++ return AV_PIX_FMT_YUV420P; ++ case V4L2_PIX_FMT_NV12: ++ return AV_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ return AV_PIX_FMT_RPI4_8; ++ case V4L2_PIX_FMT_NV12_10_COL128: ++ return AV_PIX_FMT_RPI4_10; ++#endif ++ default: ++ break; ++ } ++ return AV_PIX_FMT_NONE; ++} ++ ++static inline uint64_t frame_capture_dpb(const AVFrame * const frame) ++{ ++ const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; ++ return rd->timestamp; ++} ++ ++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp) ++{ ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; ++ rd->timestamp = dpb_stamp; ++} ++ ++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table) ++{ ++ int32_t luma_weight_denom, chroma_weight_denom; ++ const SliceHeader *sh = &h->sh; ++ ++ if (sh->slice_type == HEVC_SLICE_I || ++ (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) || ++ (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag)) ++ return; ++ ++ table->luma_log2_weight_denom = sh->luma_log2_weight_denom; ++ ++ if (h->ps.sps->chroma_format_idc) ++ table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom; ++ ++ luma_weight_denom = (1 << sh->luma_log2_weight_denom); ++ chroma_weight_denom = (1 << sh->chroma_log2_weight_denom); ++ ++ for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) { ++ table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom; ++ table->luma_offset_l0[i] = sh->luma_offset_l0[i]; ++ table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom; ++ table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom; ++ table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0]; ++ table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1]; ++ } ++ ++ if (sh->slice_type != HEVC_SLICE_B) ++ return; ++ ++ for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) { ++ table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom; ++ table->luma_offset_l1[i] = sh->luma_offset_l1[i]; ++ table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom; ++ table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom; ++ table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0]; ++ table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1]; ++ } ++} ++ ++#if HEVC_CTRLS_VERSION <= 2 ++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) ++{ ++ const HEVCFrame *frame; ++ int i; ++ ++ for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { ++ frame = h->rps[ST_CURR_BEF].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->frame)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE; ++ } ++ ++ for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { ++ frame = h->rps[ST_CURR_AFT].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->frame)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER; ++ } ++ ++ for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) { ++ frame = h->rps[LT_CURR].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->frame)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR; ++ } ++ ++ return 0; ++} ++#endif ++ ++static unsigned int ++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, ++ const struct v4l2_hevc_dpb_entry * const entries, ++ const unsigned int num_entries) ++{ ++ uint64_t timestamp; ++ ++ if (!frame) ++ return 0; ++ ++ timestamp = frame_capture_dpb(frame->frame); ++ ++ for (unsigned int i = 0; i < num_entries; i++) { ++ if (entries[i].timestamp == timestamp) ++ return i; ++ } ++ ++ return 0; ++} ++ ++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx) ++{ ++ unsigned int z = 0; ++ while (idx--) { ++ if (*b++ == 0) { ++ ++z; ++ if (z >= 2 && *b == 3) { ++ ++b; ++ z = 0; ++ } ++ } ++ else { ++ z = 0; ++ } ++ } ++ return b; ++} ++ ++static int slice_add(V4L2MediaReqDescriptor * const rd) ++{ ++ if (rd->num_slices >= rd->alloced_slices) { ++ struct v4l2_ctrl_hevc_slice_params * p2; ++ struct slice_info * s2; ++ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; ++ ++ p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); ++ if (p2 == NULL) ++ return AVERROR(ENOMEM); ++ rd->slice_params = p2; ++ ++ s2 = av_realloc_array(rd->slices, n2, sizeof(*s2)); ++ if (s2 == NULL) ++ return AVERROR(ENOMEM); ++ rd->slices = s2; ++ ++ rd->alloced_slices = n2; ++ } ++ ++rd->num_slices; ++ return 0; ++} ++ ++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) ++{ ++ if (rd->num_offsets + n > rd->alloced_offsets) { ++ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; ++ void * p2; ++ while (rd->num_offsets + n > n2) ++ n2 *= 2; ++ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->offsets = p2; ++ rd->alloced_offsets = n2; ++ } ++ for (size_t i = 0; i != n; ++i) ++ rd->offsets[rd->num_offsets++] = offsets[i] - 1; ++ return 0; ++} ++ ++static unsigned int ++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) ++{ ++ unsigned int i; ++ unsigned int n = 0; ++ const HEVCFrame * const pic = h->ref; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { ++ const HEVCFrame * const frame = &h->DPB[i]; ++ if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { ++ struct v4l2_hevc_dpb_entry * const entry = entries + n++; ++ ++ entry->timestamp = frame_capture_dpb(frame->frame); ++#if HEVC_CTRLS_VERSION <= 2 ++ entry->rps = find_frame_rps_type(h, entry->timestamp); ++#else ++ entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 : ++ V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE; ++#endif ++ entry->field_pic = frame->frame->interlaced_frame; ++ ++#if HEVC_CTRLS_VERSION <= 3 ++ /* TODO: Interleaved: Get the POC for each field. */ ++ entry->pic_order_cnt[0] = frame->poc; ++ entry->pic_order_cnt[1] = frame->poc; ++#else ++ entry->pic_order_cnt_val = frame->poc; ++#endif ++ } ++ } ++ return n; ++} ++ ++static void fill_slice_params(const HEVCContext * const h, ++#if HEVC_CTRLS_VERSION >= 2 ++ const struct v4l2_ctrl_hevc_decode_params * const dec, ++#endif ++ struct v4l2_ctrl_hevc_slice_params *slice_params, ++ uint32_t bit_size, uint32_t bit_offset) ++{ ++ const SliceHeader * const sh = &h->sh; ++#if HEVC_CTRLS_VERSION >= 2 ++ const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb; ++ const unsigned int dpb_n = dec->num_active_dpb_entries; ++#else ++ struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb; ++ unsigned int dpb_n; ++#endif ++ unsigned int i; ++ RefPicList *rpl; ++ ++ *slice_params = (struct v4l2_ctrl_hevc_slice_params) { ++ .bit_size = bit_size, ++#if HEVC_CTRLS_VERSION <= 3 ++ .data_bit_offset = bit_offset, ++#else ++ .data_byte_offset = bit_offset / 8 + 1, ++#endif ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .slice_segment_addr = sh->slice_segment_addr, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ .nal_unit_type = h->nal_unit_type, ++ .nuh_temporal_id_plus1 = h->temporal_id + 1, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .slice_type = sh->slice_type, ++ .colour_plane_id = sh->colour_plane_id, ++ .slice_pic_order_cnt = h->ref->poc, ++ .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0, ++ .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0, ++ .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0, ++ .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand, ++ .slice_qp_delta = sh->slice_qp_delta, ++ .slice_cb_qp_offset = sh->slice_cb_qp_offset, ++ .slice_cr_qp_offset = sh->slice_cr_qp_offset, ++ .slice_act_y_qp_offset = 0, ++ .slice_act_cb_qp_offset = 0, ++ .slice_act_cr_qp_offset = 0, ++ .slice_beta_offset_div2 = sh->beta_offset / 2, ++ .slice_tc_offset_div2 = sh->tc_offset / 2, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ .pic_struct = h->sei.picture_timing.picture_struct, ++ ++#if HEVC_CTRLS_VERSION < 2 ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, ++ .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, ++ .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs, ++#endif ++ }; ++ ++ if (sh->slice_sample_adaptive_offset_flag[0]) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA; ++ ++ if (sh->slice_sample_adaptive_offset_flag[1]) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA; ++ ++ if (sh->slice_temporal_mvp_enabled_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED; ++ ++ if (sh->mvd_l1_zero_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO; ++ ++ if (sh->cabac_init_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT; ++ ++ if (sh->collocated_list == L0) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0; ++ ++ if (sh->disable_deblocking_filter_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED; ++ ++ if (sh->slice_loop_filter_across_slices_enabled_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED; ++ ++ if (sh->dependent_slice_segment_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT; ++ ++#if HEVC_CTRLS_VERSION < 2 ++ dpb_n = fill_dpb_entries(h, dpb); ++ slice_params->num_active_dpb_entries = dpb_n; ++#endif ++ ++ if (sh->slice_type != HEVC_SLICE_I) { ++ rpl = &h->ref->refPicList[0]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); ++ } ++ ++ if (sh->slice_type == HEVC_SLICE_B) { ++ rpl = &h->ref->refPicList[1]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); ++ } ++ ++ fill_pred_table(h, &slice_params->pred_weight_table); ++ ++ slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; ++#if HEVC_CTRLS_VERSION <= 3 ++ if (slice_params->num_entry_point_offsets > 256) { ++ slice_params->num_entry_point_offsets = 256; ++ av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); ++ } ++ ++ for (i = 0; i < slice_params->num_entry_point_offsets; i++) ++ slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; ++#endif ++} ++ ++#if HEVC_CTRLS_VERSION >= 2 ++static void ++fill_decode_params(const HEVCContext * const h, ++ struct v4l2_ctrl_hevc_decode_params * const dec) ++{ ++ unsigned int i; ++ ++ *dec = (struct v4l2_ctrl_hevc_decode_params){ ++ .pic_order_cnt_val = h->poc, ++ .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, ++ .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, ++ .num_poc_lt_curr = h->rps[LT_CURR].nb_refs, ++ }; ++ ++ dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb); ++ ++ // The docn does seem to ask that we fit our 32 bit signed POC into ++ // a U8 so... (To be fair 16 bits would be enough) ++ // Luckily we (Pi) don't use these fields ++ for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i) ++ dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc; ++ for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i) ++ dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc; ++ for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i) ++ dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc; ++ ++ if (IS_IRAP(h)) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC; ++ if (IS_IDR(h)) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC; ++ if (h->sh.no_output_of_prior_pics_flag) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR; ++ ++} ++#endif ++ ++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps) ++{ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ ++ *ctrl = (struct v4l2_ctrl_hevc_sps) { ++ .chroma_format_idc = sps->chroma_format_idc, ++ .pic_width_in_luma_samples = sps->width, ++ .pic_height_in_luma_samples = sps->height, ++ .bit_depth_luma_minus8 = sps->bit_depth - 8, ++ .bit_depth_chroma_minus8 = sps->bit_depth - 8, ++ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, ++ .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1, ++ .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics, ++ .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1, ++ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, ++ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, ++ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, ++ .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, ++ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, ++ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, ++ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, ++ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, ++ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, ++ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, ++ .num_short_term_ref_pic_sets = sps->nb_st_rps, ++ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, ++ .chroma_format_idc = sps->chroma_format_idc, ++ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, ++ }; ++ ++ if (sps->separate_colour_plane_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE; ++ ++ if (sps->scaling_list_enable_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED; ++ ++ if (sps->amp_enabled_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED; ++ ++ if (sps->sao_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET; ++ ++ if (sps->pcm_enabled_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED; ++ ++ if (sps->pcm.loop_filter_disable_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED; ++ ++ if (sps->long_term_ref_pics_present_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT; ++ ++ if (sps->sps_temporal_mvp_enabled_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED; ++ ++ if (sps->sps_strong_intra_smoothing_enable_flag) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED; ++} ++ ++static void fill_scaling_matrix(const ScalingList * const sl, ++ struct v4l2_ctrl_hevc_scaling_matrix * const sm) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < 6; i++) { ++ unsigned int j; ++ ++ for (j = 0; j < 16; j++) ++ sm->scaling_list_4x4[i][j] = sl->sl[0][i][j]; ++ for (j = 0; j < 64; j++) { ++ sm->scaling_list_8x8[i][j] = sl->sl[1][i][j]; ++ sm->scaling_list_16x16[i][j] = sl->sl[2][i][j]; ++ if (i < 2) ++ sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j]; ++ } ++ sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i]; ++ if (i < 2) ++ sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3]; ++ } ++} ++ ++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps) ++{ ++ uint64_t flags = 0; ++ ++ if (pps->dependent_slice_segments_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED; ++ ++ if (pps->output_flag_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT; ++ ++ if (pps->sign_data_hiding_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED; ++ ++ if (pps->cabac_init_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT; ++ ++ if (pps->constrained_intra_pred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED; ++ ++ if (pps->transform_skip_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED; ++ ++ if (pps->cu_qp_delta_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED; ++ ++ if (pps->pic_slice_level_chroma_qp_offsets_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT; ++ ++ if (pps->weighted_pred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED; ++ ++ if (pps->weighted_bipred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED; ++ ++ if (pps->transquant_bypass_enable_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED; ++ ++ if (pps->tiles_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED; ++ ++ if (pps->entropy_coding_sync_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED; ++ ++ if (pps->loop_filter_across_tiles_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED; ++ ++ if (pps->seq_loop_filter_across_slices_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED; ++ ++ if (pps->deblocking_filter_override_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED; ++ ++ if (pps->disable_dbf) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER; ++ ++ if (pps->lists_modification_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT; ++ ++ if (pps->slice_header_extension_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ ++ *ctrl = (struct v4l2_ctrl_hevc_pps) { ++ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, ++ .init_qp_minus26 = pps->pic_init_qp_minus26, ++ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, ++ .pps_cb_qp_offset = pps->cb_qp_offset, ++ .pps_cr_qp_offset = pps->cr_qp_offset, ++ .pps_beta_offset_div2 = pps->beta_offset / 2, ++ .pps_tc_offset_div2 = pps->tc_offset / 2, ++ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, ++ .flags = flags ++ }; ++ ++ ++ if (pps->tiles_enabled_flag) { ++ ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1; ++ ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1; ++ ++ for (int i = 0; i < pps->num_tile_columns; i++) ++ ctrl->column_width_minus1[i] = pps->column_width[i] - 1; ++ ++ for (int i = 0; i < pps->num_tile_rows; i++) ++ ctrl->row_height_minus1[i] = pps->row_height[i] - 1; ++ } ++} ++ ++// Called before finally returning the frame to the user ++// Set corrupt flag here as this is actually the frame structure that ++// is going to the user (in MT land each thread has its own pool) ++static int frame_post_process(void *logctx, AVFrame *frame) ++{ ++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0]; ++ ++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); ++ frame->flags &= ~AV_FRAME_FLAG_CORRUPT; ++ if (rd->qe_dst) { ++ MediaBufsStatus stat = qent_dst_wait(rd->qe_dst); ++ if (stat != MEDIABUFS_STATUS_SUCCESS) { ++ av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__); ++ frame->flags |= AV_FRAME_FLAG_CORRUPT; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline struct timeval cvt_dpb_to_tv(uint64_t t) ++{ ++ t /= 1000; ++ return (struct timeval){ ++ .tv_usec = t % 1000000, ++ .tv_sec = t / 1000000 ++ }; ++} ++ ++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t) ++{ ++ return (uint64_t)t * 1000; ++} ++ ++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx, ++ av_unused const uint8_t *buffer, ++ av_unused uint32_t size) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ ++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); ++ decode_q_add(&ctx->decode_q, &rd->decode_ent); ++ ++ rd->num_slices = 0; ++ ctx->timestamp++; ++ rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp); ++ ++ { ++ FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data; ++ fdd->post_process = frame_post_process; ++ } ++ ++ // qe_dst needs to be bound to the data buffer and only returned when that is ++ if (!rd->qe_dst) ++ { ++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ } ++ ++ ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame ++ ++ return 0; ++} ++ ++// Object fd & size will be zapped by this & need setting later ++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format) ++{ ++ AVDRMLayerDescriptor *layer = &desc->layers[0]; ++ unsigned int width; ++ unsigned int height; ++ unsigned int bpl; ++ uint32_t pixelformat; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ width = format->fmt.pix_mp.width; ++ height = format->fmt.pix_mp.height; ++ pixelformat = format->fmt.pix_mp.pixelformat; ++ bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline; ++ } ++ else { ++ width = format->fmt.pix.width; ++ height = format->fmt.pix.height; ++ pixelformat = format->fmt.pix.pixelformat; ++ bpl = format->fmt.pix.bytesperline; ++ } ++ ++ switch (pixelformat) { ++ case V4L2_PIX_FMT_NV12: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); ++ break; ++ case V4L2_PIX_FMT_NV12_10_COL128: ++ layer->format = DRM_FORMAT_P030; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); ++ break; ++#endif ++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED ++ case V4L2_PIX_FMT_SUNXI_TILED_NV12: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED; ++ break; ++#endif ++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15) ++ case V4L2_PIX_FMT_NV15: ++ layer->format = DRM_FORMAT_NV15; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#endif ++ case V4L2_PIX_FMT_NV16: ++ layer->format = DRM_FORMAT_NV16; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20) ++ case V4L2_PIX_FMT_NV20: ++ layer->format = DRM_FORMAT_NV20; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#endif ++ default: ++ return -1; ++ } ++ ++ desc->nb_objects = 1; ++ desc->objects[0].fd = -1; ++ desc->objects[0].size = 0; ++ ++ desc->nb_layers = 1; ++ layer->nb_planes = 2; ++ ++ layer->planes[0].object_index = 0; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = bpl; ++#if CONFIG_SAND ++ if (pixelformat == V4L2_PIX_FMT_NV12_COL128) { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = width; ++ layer->planes[1].pitch = width; ++ } ++ else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy ++ layer->planes[1].pitch = width * 2; ++ } ++ else ++#endif ++ { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = layer->planes[0].pitch * height; ++ layer->planes[1].pitch = layer->planes[0].pitch; ++ } ++ ++ return 0; ++} ++ ++static int ++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, ++ struct req_controls *const controls, ++#if HEVC_CTRLS_VERSION >= 2 ++ struct v4l2_ctrl_hevc_decode_params * const dec, ++#endif ++ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, ++ void * const offsets, const size_t offset_count) ++{ ++ int rv; ++#if HEVC_CTRLS_VERSION >= 2 ++ unsigned int n = 3; ++#else ++ unsigned int n = 2; ++#endif ++ ++ struct v4l2_ext_control control[6] = { ++ { ++ .id = V4L2_CID_STATELESS_HEVC_SPS, ++ .ptr = &controls->sps, ++ .size = sizeof(controls->sps), ++ }, ++ { ++ .id = V4L2_CID_STATELESS_HEVC_PPS, ++ .ptr = &controls->pps, ++ .size = sizeof(controls->pps), ++ }, ++#if HEVC_CTRLS_VERSION >= 2 ++ { ++ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, ++ .ptr = dec, ++ .size = sizeof(*dec), ++ }, ++#endif ++ }; ++ ++ if (slices) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, ++ .ptr = slices, ++ .size = sizeof(*slices) * slice_count, ++ }; ++ ++ if (controls->has_scaling) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, ++ .ptr = &controls->scaling_matrix, ++ .size = sizeof(controls->scaling_matrix), ++ }; ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ if (offsets) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, ++ .ptr = offsets, ++ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, ++ }; ++#endif ++ ++ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); ++ ++ return rv; ++} ++ ++// This only works because we started out from a single coded frame buffer ++// that will remain intact until after end_frame ++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; ++ int bcount = get_bits_count(&h->HEVClc->gb); ++ uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; ++ ++ const unsigned int n = rd->num_slices; ++ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; ++ ++ int rv; ++ struct slice_info * si; ++ ++ // This looks dodgy but we know that FFmpeg has parsed this from a buffer ++ // that contains the entire frame including the start code ++ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { ++ buffer -= 3; ++ size += 3; ++ boff += 24; ++ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { ++ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", ++ buffer[0], buffer[1], buffer[2]); ++ } ++ } ++ ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { ++ if (rd->slices == NULL) { ++ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->slices->ptr = buffer; ++ rd->num_slices = 1; ++ } ++ rd->slices->len = buffer - rd->slices->ptr + size; ++ return 0; ++ } ++ ++ if ((rv = slice_add(rd)) != 0) ++ return rv; ++ ++ si = rd->slices + n; ++ si->ptr = buffer; ++ si->len = size; ++ si->n_offsets = rd->num_offsets; ++ ++ if (n != block_start) { ++ struct slice_info *const si0 = rd->slices + block_start; ++ const size_t offset = (buffer - si0->ptr); ++ boff += offset * 8; ++ size += offset; ++ si0->len = si->len + offset; ++ } ++ ++#if HEVC_CTRLS_VERSION >= 2 ++ if (n == 0) ++ fill_decode_params(h, &rd->dec); ++ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); ++#else ++ fill_slice_params(h, rd->slice_params + n, size * 8, boff); ++#endif ++ if (ctx->max_offsets != 0 && ++ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) ++ return rv; ++ ++ return 0; ++} ++ ++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ if (h->ref != NULL) { ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ ++ media_request_abort(&rd->req); ++ mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src); ++ ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ } ++} ++ ++static int send_slice(AVCodecContext * const avctx, ++ V4L2MediaReqDescriptor * const rd, ++ struct req_controls *const controls, ++ const unsigned int i, const unsigned int j) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ ++ const int is_last = (j == rd->num_slices); ++ struct slice_info *const si = rd->slices + i; ++ struct media_request * req = NULL; ++ struct qent_src * src = NULL; ++ MediaBufsStatus stat; ++ void * offsets = rd->offsets + rd->slices[i].n_offsets; ++ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; ++ ++ if ((req = media_request_get(ctx->mpool)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ ++ if (set_req_ctls(ctx, req, ++ controls, ++#if HEVC_CTRLS_VERSION >= 2 ++ &rd->dec, ++#endif ++ rd->slice_params + i, j - i, ++ offsets, n_offsets)) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); ++ goto fail1; ++ } ++ ++ if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__); ++ goto fail1; ++ } ++ ++ if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__); ++ goto fail2; ++ } ++ ++ if (qent_src_params_set(src, &controls->tv)) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__); ++ goto fail2; ++ } ++ ++ stat = mediabufs_start_request(ctx->mbufs, &req, &src, ++ i == 0 ? rd->qe_dst : NULL, ++ is_last); ++ ++ if (stat != MEDIABUFS_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); ++ return AVERROR_UNKNOWN; ++ } ++ return 0; ++ ++fail2: ++ mediabufs_src_qent_abort(ctx->mbufs, &src); ++fail1: ++ media_request_abort(&req); ++ return AVERROR_UNKNOWN; ++} ++ ++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; ++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; ++ struct req_controls rc; ++ unsigned int i; ++ int rv; ++ ++ // It is possible, though maybe a bug, to get an end_frame without ++ // a previous start_frame. If we do then give up. ++ if (!decode_q_in_q(&rd->decode_ent)) { ++ av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ { ++ const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ? ++ &h->ps.pps->scaling_list : ++ h->ps.sps->scaling_list_enable_flag ? ++ &h->ps.sps->scaling_list : NULL; ++ ++ ++ memset(&rc, 0, sizeof(rc)); ++ rc.tv = cvt_dpb_to_tv(rd->timestamp); ++ fill_sps(&rc.sps, h->ps.sps); ++ fill_pps(&rc.pps, h->ps.pps); ++ if (sl) { ++ rc.has_scaling = 1; ++ fill_scaling_matrix(sl, &rc.scaling_matrix); ++ } ++ } ++ ++ decode_q_wait(&ctx->decode_q, &rd->decode_ent); ++ ++ // qe_dst needs to be bound to the data buffer and only returned when that is ++ // Alloc almost certainly wants to be serialised if there is any chance of blocking ++ // so we get the next frame to be free in the thread that needs it for decode first. ++ // ++ // In our current world this probably isn't a concern but put it here anyway ++ if (!rd->qe_dst) ++ { ++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); ++ rv = AVERROR(ENOMEM); ++ goto fail; ++ } ++ } ++ ++ // Send as slices ++ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { ++ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); ++ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) ++ goto fail; ++ } ++ ++ // Set the drm_prime desriptor ++ drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); ++ rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0)); ++ rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0)); ++ ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ return 0; ++ ++fail: ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ return rv; ++} ++ ++static inline int ++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) ++{ ++ return v >= c->minimum && v <= c->maximum; ++} ++ ++// Initial check & init ++static int ++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ const HEVCSPS * const sps = h->ps.sps; ++ struct v4l2_ctrl_hevc_sps ctrl_sps; ++ unsigned int i; ++ ++ // Check for var slice array ++ struct v4l2_query_ext_ctrl qc[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_PPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, ++#if HEVC_CTRLS_VERSION >= 2 ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, ++#endif ++ }; ++ // Order & size must match! ++ static const size_t ctrl_sizes[] = { ++ sizeof(struct v4l2_ctrl_hevc_slice_params), ++ sizeof(int32_t), ++ sizeof(struct v4l2_ctrl_hevc_sps), ++ sizeof(struct v4l2_ctrl_hevc_pps), ++ sizeof(struct v4l2_ctrl_hevc_scaling_matrix), ++#if HEVC_CTRLS_VERSION >= 2 ++ sizeof(struct v4l2_ctrl_hevc_decode_params), ++#endif ++ }; ++ const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); ++ ++#if HEVC_CTRLS_VERSION == 2 ++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0)) ++ return AVERROR(EINVAL); ++#elif HEVC_CTRLS_VERSION == 3 ++ if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0)) ++ return AVERROR(EINVAL); ++#endif ++ ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); ++ i = 0; ++#if HEVC_CTRLS_VERSION >= 4 ++ // Skip slice check if no slice mode ++ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ i = 1; ++#else ++ // Fail frame mode silently for anything prior to V4 ++ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ return AVERROR(EINVAL); ++#endif ++ for (; i != noof_ctrls; ++i) { ++ if (qc[i].type == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); ++ return AVERROR(EINVAL); ++ } ++ if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", ++ HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ fill_sps(&ctrl_sps, sps); ++ ++ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++// Final init ++static int ++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) ++{ ++ int ret; ++ ++ struct v4l2_query_ext_ctrl querys[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, ++#if HEVC_CTRLS_VERSION >= 4 ++ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, ++#endif ++ }; ++ ++ struct v4l2_ext_control ctrls[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ }; ++ ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); ++ ++ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || ++ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? ++ 1 : querys[2].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? ++ 0 : querys[3].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); ++#else ++ ctx->max_offsets = 0; ++#endif ++ ++ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || ++ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) ++ ctx->decode_mode = querys[0].default_value; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || ++ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) ++ ctx->start_code = querys[1].default_value; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ // If we are in slice mode & START_CODE_NONE supported then pick that ++ // as it doesn't require the slightly dodgy look backwards in our raw buffer ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && ++ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ ++ ctrls[0].value = ctx->decode_mode; ++ ctrls[1].value = ctx->start_code; ++ ++ ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls)); ++ return !ret ? 0 : AVERROR(-ret); ++} ++ ++static void v4l2_req_frame_free(void *opaque, uint8_t *data) ++{ ++ AVCodecContext *avctx = opaque; ++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data; ++ ++ av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data); ++ ++ qent_dst_unref(&rd->qe_dst); ++ ++ // We don't expect req or qe_src to be set ++ if (rd->req || rd->qe_src) ++ av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src); ++ ++ av_freep(&rd->slices); ++ av_freep(&rd->slice_params); ++ av_freep(&rd->offsets); ++ ++ av_free(rd); ++} ++ ++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size) ++{ ++ AVCodecContext *avctx = opaque; ++// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; ++// V4L2MediaReqDescriptor *req; ++ AVBufferRef *ref; ++ uint8_t *data; ++// int ret; ++ ++ data = av_mallocz(size); ++ if (!data) ++ return NULL; ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data); ++ ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0); ++ if (!ref) { ++ av_freep(&data); ++ return NULL; ++ } ++ return ref; ++} ++ ++#if 0 ++static void v4l2_req_pool_free(void *opaque) ++{ ++ av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque); ++} ++ ++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc) ++{ ++ av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool); ++ ++ av_buffer_pool_uninit(&hwfc->pool); ++} ++#endif ++ ++static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) ++{ ++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; ++ AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data; ++ const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs); ++ ++ hwfc->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc->sw_format = pixel_format_from_format(vfmt); ++ if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) { ++ hwfc->width = vfmt->fmt.pix_mp.width; ++ hwfc->height = vfmt->fmt.pix_mp.height; ++ } else { ++ hwfc->width = vfmt->fmt.pix.width; ++ hwfc->height = vfmt->fmt.pix.height; ++ } ++#if 0 ++ hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free); ++ if (!hwfc->pool) ++ return AVERROR(ENOMEM); ++ ++ hwfc->free = v4l2_req_hwframe_ctx_free; ++ ++ hwfc->initial_pool_size = 1; ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_VP9: ++ hwfc->initial_pool_size += 8; ++ break; ++ case AV_CODEC_ID_VP8: ++ hwfc->initial_pool_size += 3; ++ break; ++ default: ++ hwfc->initial_pool_size += 2; ++ } ++#endif ++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size); ++ ++ return 0; ++} ++ ++static int alloc_frame(AVCodecContext * avctx, AVFrame *frame) ++{ ++ int rv; ++ ++ frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor)); ++ if (!frame->buf[0]) ++ return AVERROR(ENOMEM); ++ ++ frame->data[0] = frame->buf[0]->data; ++ ++ frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx); ++ ++ if ((rv = ff_attach_decode_data(frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n"); ++ av_frame_unref(frame); ++ return rv; ++ } ++ ++ return 0; ++} ++ ++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = { ++ .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE, ++ .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION), ++ .probe = probe, ++ .set_controls = set_controls, ++ ++ .start_frame = v4l2_request_hevc_start_frame, ++ .decode_slice = v4l2_request_hevc_decode_slice, ++ .end_frame = v4l2_request_hevc_end_frame, ++ .abort_frame = v4l2_request_hevc_abort_frame, ++ .frame_params = frame_params, ++ .alloc_frame = alloc_frame, ++}; ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_media.c +@@ -0,0 +1,1808 @@ ++/* ++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include <errno.h> ++#include <fcntl.h> ++#include <poll.h> ++#include <pthread.h> ++#include <semaphore.h> ++#include <stdatomic.h> ++#include <stdbool.h> ++#include <stdlib.h> ++#include <string.h> ++#include <unistd.h> ++#include <linux/media.h> ++#include <linux/mman.h> ++#include <sys/ioctl.h> ++#include <sys/select.h> ++#include <sys/ioctl.h> ++#include <sys/mman.h> ++ ++#include <linux/videodev2.h> ++ ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_utils.h" ++#include "weak_link.h" ++ ++ ++/* floor(log2(x)) */ ++static unsigned int log2_size(size_t x) ++{ ++ unsigned int n = 0; ++ ++ if (x & ~0xffff) { ++ n += 16; ++ x >>= 16; ++ } ++ if (x & ~0xff) { ++ n += 8; ++ x >>= 8; ++ } ++ if (x & ~0xf) { ++ n += 4; ++ x >>= 4; ++ } ++ if (x & ~3) { ++ n += 2; ++ x >>= 2; ++ } ++ return (x & ~1) ? n + 1 : n; ++} ++ ++static size_t round_up_size(const size_t x) ++{ ++ /* Admit no size < 256 */ ++ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; ++ ++ return x >= (3 << n) ? 4 << n : (3 << n); ++} ++ ++struct media_request; ++ ++struct media_pool { ++ int fd; ++ sem_t sem; ++ pthread_mutex_t lock; ++ unsigned int pool_n; ++ struct media_request * pool_reqs; ++ struct media_request * free_reqs; ++ struct pollqueue * pq; ++}; ++ ++struct media_request { ++ struct media_request * next; ++ struct media_pool * mp; ++ int fd; ++ struct polltask * pt; ++}; ++ ++static inline enum v4l2_memory ++mediabufs_memory_to_v4l2(const enum mediabufs_memory m) ++{ ++ return (enum v4l2_memory)m; ++} ++ ++const char * ++mediabufs_memory_name(const enum mediabufs_memory m) ++{ ++ switch (m) { ++ case MEDIABUFS_MEMORY_UNSET: ++ return "Unset"; ++ case MEDIABUFS_MEMORY_MMAP: ++ return "MMap"; ++ case MEDIABUFS_MEMORY_USERPTR: ++ return "UserPtr"; ++ case MEDIABUFS_MEMORY_OVERLAY: ++ return "Overlay"; ++ case MEDIABUFS_MEMORY_DMABUF: ++ return "DMABuf"; ++ default: ++ break; ++ } ++ return "Unknown"; ++} ++ ++ ++static inline int do_trywait(sem_t *const sem) ++{ ++ while (sem_trywait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static inline int do_wait(sem_t *const sem) ++{ ++ while (sem_wait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static int request_buffers(int video_fd, unsigned int type, ++ enum mediabufs_memory memory, unsigned int buffers_count) ++{ ++ struct v4l2_requestbuffers buffers; ++ int rc; ++ ++ memset(&buffers, 0, sizeof(buffers)); ++ buffers.type = type; ++ buffers.memory = mediabufs_memory_to_v4l2(memory); ++ buffers.count = buffers_count; ++ ++ rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); ++ if (rc < 0) { ++ rc = -errno; ++ request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc)); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++ ++static int set_stream(int video_fd, unsigned int type, bool enable) ++{ ++ enum v4l2_buf_type buf_type = type; ++ int rc; ++ ++ rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF, ++ &buf_type); ++ if (rc < 0) { ++ rc = -errno; ++ request_log("Unable to %sable stream: %s\n", ++ enable ? "en" : "dis", strerror(-rc)); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++ ++ ++struct media_request * media_request_get(struct media_pool * const mp) ++{ ++ struct media_request *req = NULL; ++ ++ /* Timeout handled by poll code */ ++ if (do_wait(&mp->sem)) ++ return NULL; ++ ++ pthread_mutex_lock(&mp->lock); ++ req = mp->free_reqs; ++ if (req) { ++ mp->free_reqs = req->next; ++ req->next = NULL; ++ } ++ pthread_mutex_unlock(&mp->lock); ++ return req; ++} ++ ++int media_request_fd(const struct media_request * const req) ++{ ++ return req->fd; ++} ++ ++int media_request_start(struct media_request * const req) ++{ ++ while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1) ++ { ++ const int err = errno; ++ if (err == EINTR) ++ continue; ++ request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err)); ++ return -err; ++ } ++ ++ pollqueue_add_task(req->pt, 2000); ++ return 0; ++} ++ ++static void media_request_done(void *v, short revents) ++{ ++ struct media_request *const req = v; ++ struct media_pool *const mp = req->mp; ++ ++ /* ** Not sure what to do about timeout */ ++ ++ if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) ++ request_log("Unable to reinit media request: %s\n", ++ strerror(errno)); ++ ++ pthread_mutex_lock(&mp->lock); ++ req->next = mp->free_reqs; ++ mp->free_reqs = req; ++ pthread_mutex_unlock(&mp->lock); ++ sem_post(&mp->sem); ++} ++ ++int media_request_abort(struct media_request ** const preq) ++{ ++ struct media_request * const req = *preq; ++ ++ if (req == NULL) ++ return 0; ++ *preq = NULL; ++ ++ media_request_done(req, 0); ++ return 0; ++} ++ ++static void free_req_pool(struct media_request * const pool, const unsigned int n) ++{ ++ unsigned int i; ++ for (i = 0; i != n; ++i) { ++ struct media_request * const req = pool + i; ++ if (req->pt) ++ polltask_delete(&req->pt); ++ if (req->fd != -1) ++ close(req->fd); ++ } ++ free(pool); ++} ++ ++struct media_pool * media_pool_new(const char * const media_path, ++ struct pollqueue * const pq, ++ const unsigned int n) ++{ ++ struct media_pool * const mp = calloc(1, sizeof(*mp)); ++ unsigned int i; ++ ++ if (!mp) ++ goto fail0; ++ ++ mp->pq = pq; ++ pthread_mutex_init(&mp->lock, NULL); ++ mp->fd = open(media_path, O_RDWR | O_NONBLOCK); ++ if (mp->fd == -1) { ++ request_log("Failed to open '%s': %s\n", media_path, strerror(errno)); ++ goto fail1; ++ } ++ ++ if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL) ++ goto fail3; ++ mp->pool_n = n; ++ for (i = 0; i != n; ++i) { ++ mp->pool_reqs[i].mp = mp; ++ mp->pool_reqs[i].fd = -1; ++ } ++ ++ for (i = 0; i != n; ++i) { ++ struct media_request * const req = mp->pool_reqs + i; ++ ++ if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { ++ request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); ++ goto fail4; ++ } ++ ++ req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); ++ if (!req->pt) ++ goto fail4; ++ ++ req->next = mp->free_reqs, ++ mp->free_reqs = req; ++ } ++ ++ sem_init(&mp->sem, 0, n); ++ ++ return mp; ++ ++fail4: ++ free_req_pool(mp->pool_reqs, mp->pool_n); ++fail3: ++ close(mp->fd); ++ pthread_mutex_destroy(&mp->lock); ++fail1: ++ free(mp); ++fail0: ++ return NULL; ++} ++ ++void media_pool_delete(struct media_pool ** pMp) ++{ ++ struct media_pool * const mp = *pMp; ++ ++ if (!mp) ++ return; ++ *pMp = NULL; ++ ++ free_req_pool(mp->pool_reqs, mp->pool_n); ++ close(mp->fd); ++ sem_destroy(&mp->sem); ++ pthread_mutex_destroy(&mp->lock); ++ free(mp); ++} ++ ++ ++#define INDEX_UNSET (~(uint32_t)0) ++ ++enum qent_status { ++ QENT_NEW = 0, // Initial state - shouldn't last ++ QENT_FREE, // On free chain ++ QENT_PENDING, // User has ent ++ QENT_WAITING, // On inuse ++ QENT_DONE, // Frame rx ++ QENT_ERROR, // Error ++ QENT_IMPORT ++}; ++ ++struct qent_base { ++ atomic_int ref_count; ++ struct qent_base *next; ++ struct qent_base *prev; ++ enum qent_status status; ++ enum mediabufs_memory memtype; ++ uint32_t index; ++ struct dmabuf_h *dh[VIDEO_MAX_PLANES]; ++ struct timeval timestamp; ++}; ++ ++struct qent_src { ++ struct qent_base base; ++ int fixed_size; ++}; ++ ++struct qent_dst { ++ struct qent_base base; ++ bool waiting; ++ pthread_mutex_t lock; ++ pthread_cond_t cond; ++ struct ff_weak_link_client * mbc_wl; ++}; ++ ++struct qe_list_head { ++ struct qent_base *head; ++ struct qent_base *tail; ++}; ++ ++struct buf_pool { ++ enum mediabufs_memory memtype; ++ pthread_mutex_t lock; ++ sem_t free_sem; ++ struct qe_list_head free; ++ struct qe_list_head inuse; ++}; ++ ++ ++static inline struct qent_dst *base_to_dst(struct qent_base *be) ++{ ++ return (struct qent_dst *)be; ++} ++ ++static inline struct qent_src *base_to_src(struct qent_base *be) ++{ ++ return (struct qent_src *)be; ++} ++ ++ ++#define QENT_BASE_INITIALIZER(mtype) {\ ++ .ref_count = ATOMIC_VAR_INIT(0),\ ++ .status = QENT_NEW,\ ++ .memtype = (mtype),\ ++ .index = INDEX_UNSET\ ++} ++ ++static void qe_base_uninit(struct qent_base *const be) ++{ ++ unsigned int i; ++ for (i = 0; i != VIDEO_MAX_PLANES; ++i) { ++ dmabuf_free(be->dh[i]); ++ be->dh[i] = NULL; ++ } ++} ++ ++static void qe_src_free(struct qent_src *const be_src) ++{ ++ if (!be_src) ++ return; ++ qe_base_uninit(&be_src->base); ++ free(be_src); ++} ++ ++static struct qent_src * qe_src_new(enum mediabufs_memory mtype) ++{ ++ struct qent_src *const be_src = malloc(sizeof(*be_src)); ++ if (!be_src) ++ return NULL; ++ *be_src = (struct qent_src){ ++ .base = QENT_BASE_INITIALIZER(mtype) ++ }; ++ return be_src; ++} ++ ++static void qe_dst_free(struct qent_dst *const be_dst) ++{ ++ if (!be_dst) ++ return; ++ ++ ff_weak_link_unref(&be_dst->mbc_wl); ++ pthread_cond_destroy(&be_dst->cond); ++ pthread_mutex_destroy(&be_dst->lock); ++ qe_base_uninit(&be_dst->base); ++ free(be_dst); ++} ++ ++static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype) ++{ ++ struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); ++ if (!be_dst) ++ return NULL; ++ *be_dst = (struct qent_dst){ ++ .base = QENT_BASE_INITIALIZER(memtype), ++ .lock = PTHREAD_MUTEX_INITIALIZER, ++ .cond = PTHREAD_COND_INITIALIZER, ++ .mbc_wl = ff_weak_link_ref(wl) ++ }; ++ return be_dst; ++} ++ ++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be) ++{ ++ if (ql->tail) ++ ql->tail->next = be; ++ else ++ ql->head = be; ++ be->prev = ql->tail; ++ be->next = NULL; ++ ql->tail = be; ++} ++ ++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be) ++{ ++ if (!be) ++ return NULL; ++ ++ if (be->next) ++ be->next->prev = be->prev; ++ else ++ ql->tail = be->prev; ++ if (be->prev) ++ be->prev->next = be->next; ++ else ++ ql->head = be->next; ++ be->next = NULL; ++ be->prev = NULL; ++ return be; ++} ++ ++ ++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be) ++{ ++ ql_add_tail(&bp->free, be); ++} ++ ++static struct qent_base * bq_get_free(struct buf_pool *const bp) ++{ ++ return ql_extract(&bp->free, bp->free.head); ++} ++ ++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be) ++{ ++ return ql_extract(&bp->inuse, be); ++} ++ ++static struct qent_base * bq_get_inuse(struct buf_pool *const bp) ++{ ++ return ql_extract(&bp->inuse, bp->inuse.head); ++} ++ ++static void bq_free_all_free_src(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_free(bp)) != NULL) ++ qe_src_free(base_to_src(be)); ++} ++ ++static void bq_free_all_inuse_src(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_inuse(bp)) != NULL) ++ qe_src_free(base_to_src(be)); ++} ++ ++static void bq_free_all_free_dst(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_free(bp)) != NULL) ++ qe_dst_free(base_to_dst(be)); ++} ++ ++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be) ++{ ++ unsigned int i; ++ ++ pthread_mutex_lock(&bp->lock); ++ /* Clear out state vars */ ++ be->timestamp.tv_sec = 0; ++ be->timestamp.tv_usec = 0; ++ be->status = QENT_FREE; ++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) ++ dmabuf_len_set(be->dh[i], 0); ++ bq_put_free(bp, be); ++ pthread_mutex_unlock(&bp->lock); ++ sem_post(&bp->free_sem); ++} ++ ++static bool queue_is_inuse(const struct buf_pool *const bp) ++{ ++ return bp->inuse.tail != NULL; ++} ++ ++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be) ++{ ++ if (!be) ++ return; ++ pthread_mutex_lock(&bp->lock); ++ ql_add_tail(&bp->inuse, be); ++ be->status = QENT_WAITING; ++ pthread_mutex_unlock(&bp->lock); ++} ++ ++static struct qent_base *queue_get_free(struct buf_pool *const bp) ++{ ++ struct qent_base *buf; ++ ++ if (do_wait(&bp->free_sem)) ++ return NULL; ++ pthread_mutex_lock(&bp->lock); ++ buf = bq_get_free(bp); ++ pthread_mutex_unlock(&bp->lock); ++ return buf; ++} ++ ++static struct qent_base *queue_tryget_free(struct buf_pool *const bp) ++{ ++ struct qent_base *buf; ++ ++ if (do_trywait(&bp->free_sem)) ++ return NULL; ++ pthread_mutex_lock(&bp->lock); ++ buf = bq_get_free(bp); ++ pthread_mutex_unlock(&bp->lock); ++ return buf; ++} ++ ++static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index) ++{ ++ struct qent_base *be; ++ ++ pthread_mutex_lock(&bp->lock); ++ /* Expect 1st in Q, but allow anywhere */ ++ for (be = bp->inuse.head; be; be = be->next) { ++ if (be->index == index) { ++ bq_extract_inuse(bp, be); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&bp->lock); ++ ++ return be; ++} ++ ++static void queue_delete(struct buf_pool *const bp) ++{ ++ sem_destroy(&bp->free_sem); ++ pthread_mutex_destroy(&bp->lock); ++ free(bp); ++} ++ ++static struct buf_pool* queue_new(const int vfd) ++{ ++ struct buf_pool *bp = calloc(1, sizeof(*bp)); ++ if (!bp) ++ return NULL; ++ pthread_mutex_init(&bp->lock, NULL); ++ sem_init(&bp->free_sem, 0, 0); ++ return bp; ++} ++ ++ ++struct mediabufs_ctl { ++ atomic_int ref_count; /* 0 is single ref for easier atomics */ ++ void * dc; ++ int vfd; ++ bool stream_on; ++ bool polling; ++ bool dst_fixed; // Dst Q is fixed size ++ pthread_mutex_t lock; ++ struct buf_pool * src; ++ struct buf_pool * dst; ++ struct polltask * pt; ++ struct pollqueue * pq; ++ struct ff_weak_link_master * this_wlm; ++ ++ enum mediabufs_memory src_memtype; ++ enum mediabufs_memory dst_memtype; ++ struct v4l2_format src_fmt; ++ struct v4l2_format dst_fmt; ++ struct v4l2_capability capability; ++}; ++ ++static int qe_v4l2_queue(struct qent_base *const be, ++ const int vfd, struct media_request *const mreq, ++ const struct v4l2_format *const fmt, ++ const bool is_dst, const bool hold_flag) ++{ ++ struct v4l2_buffer buffer = { ++ .type = fmt->type, ++ .memory = mediabufs_memory_to_v4l2(be->memtype), ++ .index = be->index ++ }; ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ unsigned int i; ++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (is_dst) ++ dmabuf_len_set(be->dh[i], 0); ++ ++ /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ ++ planes[i].length = dmabuf_size(be->dh[i]); ++ planes[i].bytesused = dmabuf_len(be->dh[i]); ++ if (be->memtype == MEDIABUFS_MEMORY_DMABUF) ++ planes[i].m.fd = dmabuf_fd(be->dh[i]); ++ else ++ planes[i].m.mem_offset = 0; ++ } ++ buffer.m.planes = planes; ++ buffer.length = i; ++ } ++ else { ++ if (is_dst) ++ dmabuf_len_set(be->dh[0], 0); ++ ++ buffer.bytesused = dmabuf_len(be->dh[0]); ++ buffer.length = dmabuf_size(be->dh[0]); ++ if (be->memtype == MEDIABUFS_MEMORY_DMABUF) ++ buffer.m.fd = dmabuf_fd(be->dh[0]); ++ else ++ buffer.m.offset = 0; ++ } ++ ++ if (!is_dst && mreq) { ++ buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD; ++ buffer.request_fd = media_request_fd(mreq); ++ if (hold_flag) ++ buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; ++ } ++ ++ if (is_dst) ++ be->timestamp = (struct timeval){0,0}; ++ ++ buffer.timestamp = be->timestamp; ++ ++ while (ioctl(vfd, VIDIOC_QBUF, &buffer)) { ++ const int err = errno; ++ if (err != EINTR) { ++ request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err)); ++ return -err; ++ } ++ } ++ return 0; ++} ++ ++static struct qent_base * qe_dequeue(struct buf_pool *const bp, ++ const int vfd, ++ const struct v4l2_format * const f) ++{ ++ struct qent_base *be; ++ int rc; ++ const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ struct v4l2_buffer buffer = { ++ .type = f->type, ++ .memory = mediabufs_memory_to_v4l2(bp->memtype) ++ }; ++ if (mp) { ++ buffer.length = f->fmt.pix_mp.num_planes; ++ buffer.m.planes = planes; ++ } ++ ++ while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 && ++ errno == EINTR) ++ /* Loop */; ++ if (rc) { ++ request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno)); ++ return NULL; ++ } ++ ++ be = queue_find_extract_index(bp, buffer.index); ++ if (!be) { ++ request_log("Failed to find index %d in Q\n", buffer.index); ++ return NULL; ++ } ++ ++ if (mp) { ++ unsigned int i; ++ for (i = 0; i != buffer.length; ++i) ++ dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0); ++ } ++ else ++ dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0); ++ ++ be->timestamp = buffer.timestamp; ++ be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; ++ return be; ++} ++ ++static void qe_dst_done(struct qent_dst * dst_be) ++{ ++ pthread_mutex_lock(&dst_be->lock); ++ dst_be->waiting = false; ++ pthread_cond_broadcast(&dst_be->cond); ++ pthread_mutex_unlock(&dst_be->lock); ++ ++ qent_dst_unref(&dst_be); ++} ++ ++static bool qe_dst_waiting(struct qent_dst *const dst_be) ++{ ++ bool waiting; ++ pthread_mutex_lock(&dst_be->lock); ++ waiting = dst_be->waiting; ++ dst_be->waiting = true; ++ pthread_mutex_unlock(&dst_be->lock); ++ return waiting; ++} ++ ++ ++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc) ++{ ++ return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst); ++} ++ ++static void mediabufs_poll_cb(void * v, short revents) ++{ ++ struct mediabufs_ctl *mbc = v; ++ struct qent_src *src_be = NULL; ++ struct qent_dst *dst_be = NULL; ++ ++ if (!revents) ++ request_err(mbc->dc, "%s: Timeout\n", __func__); ++ ++ pthread_mutex_lock(&mbc->lock); ++ mbc->polling = false; ++ ++ if ((revents & POLLOUT) != 0) ++ src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt)); ++ if ((revents & POLLIN) != 0) ++ dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt)); ++ ++ /* Reschedule */ ++ if (mediabufs_wants_poll(mbc)) { ++ mbc->polling = true; ++ pollqueue_add_task(mbc->pt, 2000); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ ++ if (src_be) ++ queue_put_free(mbc->src, &src_be->base); ++ if (dst_be) ++ qe_dst_done(dst_be); ++} ++ ++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp) ++{ ++ struct qent_base *const be = &be_src->base; ++ ++ be->timestamp = *timestamp; ++ return 0; ++} ++ ++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst) ++{ ++ return be_dst->base.timestamp; ++} ++ ++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ if (!be->dh[0] || len > dmabuf_size(be->dh[0])) { ++ size_t newsize = round_up_size(len); ++ request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize); ++ if (!dbsc) { ++ request_log("%s: No dmbabuf_ctrl for realloc\n", __func__); ++ return -ENOMEM; ++ } ++ if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) { ++ request_log("%s: Realloc %zd failed\n", __func__, newsize); ++ return -ENOMEM; ++ } ++ } ++ return 0; ++} ++ ++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ struct qent_base *const be = &be_src->base; ++ return qent_base_realloc(be, len, dbsc); ++} ++ ++ ++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ void * dst; ++ struct qent_base *const be = &be_src->base; ++ int rv; ++ ++ // Realloc doesn't copy so don't alloc if offset != 0 ++ if ((rv = qent_base_realloc(be, offset + len, ++ be_src->fixed_size || offset ? NULL : dbsc)) != 0) ++ return rv; ++ ++ dmabuf_write_start(be->dh[0]); ++ dst = dmabuf_map(be->dh[0]); ++ if (!dst) ++ return -1; ++ memcpy((char*)dst + offset, src, len); ++ dmabuf_len_set(be->dh[0], len); ++ dmabuf_write_end(be->dh[0]); ++ return 0; ++} ++ ++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane) ++{ ++ const struct qent_base *const be = &be_dst->base; ++ ++ return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane]; ++} ++ ++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane) ++{ ++ return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane))); ++} ++ ++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, ++ struct media_request **const pmreq, ++ struct qent_src **const psrc_be, ++ struct qent_dst *const dst_be, ++ const bool is_final) ++{ ++ struct media_request * mreq = *pmreq; ++ struct qent_src *const src_be = *psrc_be; ++ ++ // Req & src are always both "consumed" ++ *pmreq = NULL; ++ *psrc_be = NULL; ++ ++ pthread_mutex_lock(&mbc->lock); ++ ++ if (!src_be) ++ goto fail1; ++ ++ if (dst_be) { ++ if (qe_dst_waiting(dst_be)) { ++ request_info(mbc->dc, "Request buffer already waiting on start\n"); ++ goto fail1; ++ } ++ dst_be->base.timestamp = (struct timeval){0,0}; ++ if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false)) ++ goto fail1; ++ ++ qent_dst_ref(dst_be); ++ queue_put_inuse(mbc->dst, &dst_be->base); ++ } ++ ++ if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final)) ++ goto fail1; ++ queue_put_inuse(mbc->src, &src_be->base); ++ ++ if (!mbc->polling && mediabufs_wants_poll(mbc)) { ++ mbc->polling = true; ++ pollqueue_add_task(mbc->pt, 2000); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ ++ if (media_request_start(mreq)) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail1: ++ media_request_abort(&mreq); ++ if (src_be) ++ queue_put_free(mbc->src, &src_be->base); ++ ++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q ++ if (dst_be) { ++ dst_be->base.status = QENT_ERROR; ++ qe_dst_done(dst_be); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++ ++static int qe_alloc_from_fmt(struct qent_base *const be, ++ struct dmabufs_ctl *const dbsc, ++ const struct v4l2_format *const fmt) ++{ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ unsigned int i; ++ for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) { ++ be->dh[i] = dmabuf_realloc(dbsc, be->dh[i], ++ fmt->fmt.pix_mp.plane_fmt[i].sizeimage); ++ /* On failure tidy up and die */ ++ if (!be->dh[i]) { ++ while (i--) { ++ dmabuf_free(be->dh[i]); ++ be->dh[i] = NULL; ++ } ++ return -1; ++ } ++ } ++ } ++ else { ++// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage); ++ size_t size = fmt->fmt.pix.sizeimage; ++ be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size); ++ if (!be->dh[0]) ++ return -1; ++ } ++ return 0; ++} ++ ++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd, ++ const enum v4l2_buf_type buftype, ++ uint32_t pixfmt, ++ const unsigned int width, const unsigned int height, ++ const size_t bufsize) ++{ ++ *fmt = (struct v4l2_format){.type = buftype}; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { ++ fmt->fmt.pix_mp.width = width; ++ fmt->fmt.pix_mp.height = height; ++ fmt->fmt.pix_mp.pixelformat = pixfmt; ++ if (bufsize) { ++ fmt->fmt.pix_mp.num_planes = 1; ++ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize; ++ } ++ } ++ else { ++ fmt->fmt.pix.width = width; ++ fmt->fmt.pix.height = height; ++ fmt->fmt.pix.pixelformat = pixfmt; ++ fmt->fmt.pix.sizeimage = bufsize; ++ } ++ ++ while (ioctl(fd, VIDIOC_S_FMT, fmt)) ++ if (errno != EINTR) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ // Treat anything where we don't get at least what we asked for as a fail ++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { ++ if (fmt->fmt.pix_mp.width < width || ++ fmt->fmt.pix_mp.height < height || ++ fmt->fmt.pix_mp.pixelformat != pixfmt) { ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ } ++ else { ++ if (fmt->fmt.pix.width < width || ++ fmt->fmt.pix.height < height || ++ fmt->fmt.pix.pixelformat != pixfmt) { ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ } ++ ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt, ++ const int fd, ++ const unsigned int type_v4l2, ++ const uint32_t flags_must, ++ const uint32_t flags_not, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v) ++{ ++ unsigned int i; ++ ++ for (i = 0;; ++i) { ++ struct v4l2_fmtdesc fmtdesc = { ++ .index = i, ++ .type = type_v4l2 ++ }; ++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { ++ if (errno != EINTR) ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ if ((fmtdesc.flags & flags_must) != flags_must || ++ (fmtdesc.flags & flags_not)) ++ continue; ++ if (!accept_fn(accept_v, &fmtdesc)) ++ continue; ++ ++ if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat, ++ width, height, 0) == MEDIABUFS_STATUS_SUCCESS) ++ return MEDIABUFS_STATUS_SUCCESS; ++ } ++ return 0; ++} ++ ++ ++/* Wait for qent done */ ++ ++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ enum qent_status estat; ++ ++ pthread_mutex_lock(&be_dst->lock); ++ while (be_dst->waiting && ++ !pthread_cond_wait(&be_dst->cond, &be_dst->lock)) ++ /* Loop */; ++ estat = be->status; ++ pthread_mutex_unlock(&be_dst->lock); ++ ++ return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS : ++ estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR : ++ MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no) ++{ ++ struct qent_base *const be = &be_dst->base; ++ return dmabuf_map(be->dh[buf_no]); ++} ++ ++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ unsigned int i; ++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (dmabuf_read_start(be->dh[i])) { ++ while (i--) ++ dmabuf_read_end(be->dh[i]); ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ } ++ } ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ unsigned int i; ++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; ++ ++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (dmabuf_read_end(be->dh[i])) ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ return status; ++} ++ ++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst) ++{ ++ if (be_dst) ++ atomic_fetch_add(&be_dst->base.ref_count, 1); ++ return be_dst; ++} ++ ++void qent_dst_unref(struct qent_dst ** const pbe_dst) ++{ ++ struct qent_dst * const be_dst = *pbe_dst; ++ struct mediabufs_ctl * mbc; ++ if (!be_dst) ++ return; ++ *pbe_dst = NULL; ++ ++ if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0) ++ return; ++ ++ if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) { ++ queue_put_free(mbc->dst, &be_dst->base); ++ ff_weak_link_unlock(be_dst->mbc_wl); ++ } ++ else { ++ qe_dst_free(be_dst); ++ } ++} ++ ++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, ++ unsigned int plane, ++ int fd, size_t size) ++{ ++ struct qent_base *const be = &be_dst->base; ++ struct dmabuf_h * dh; ++ ++ if (be->status != QENT_IMPORT || be->dh[plane]) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ dh = dmabuf_import(fd, size); ++ if (!dh) ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ ++ be->dh[plane] = dh; ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++// Returns noof buffers created, -ve for error ++static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[]) ++{ ++ unsigned int i; ++ ++ struct v4l2_create_buffers cbuf = { ++ .count = n, ++ .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype), ++ .format = mbc->dst_fmt, ++ }; ++ ++ while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) { ++ const int err = -errno; ++ if (err != EINTR) { ++ request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__); ++ return -err; ++ } ++ } ++ ++ if (cbuf.count != n) ++ request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n); ++ ++ for (i = 0; i != cbuf.count; ++i) ++ qes[i]->base.index = cbuf.index + i; ++ ++ return cbuf.count; ++} ++ ++static MediaBufsStatus ++qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt, ++ const unsigned int n, const bool x_dmabuf) ++{ ++ struct v4l2_buffer buf = { ++ .index = n, ++ .type = fmt->type, ++ }; ++ struct v4l2_plane planes[VIDEO_MAX_PLANES]; ++ int ret; ++ ++ if (be->dh[0]) ++ return 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ memset(planes, 0, sizeof(planes)); ++ buf.m.planes = planes; ++ buf.length = VIDEO_MAX_PLANES; ++ } ++ ++ if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) { ++ request_err(mbc->dc, "VIDIOC_QUERYBUF failed"); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) ++ { ++ unsigned int i; ++ for (i = 0; i != buf.length; ++i) { ++ if (x_dmabuf) { ++ struct v4l2_exportbuffer xbuf = { ++ .type = buf.type, ++ .index = buf.index, ++ .plane = i, ++ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine ++ }; ++ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) { ++ be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length); ++ close(xbuf.fd); // dmabuf_import dups the fd so close this one ++ } ++ } ++ else { ++ be->dh[i] = dmabuf_import_mmap( ++ mmap(NULL, planes[i].length, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_POPULATE, ++ mbc->vfd, planes[i].m.mem_offset), ++ planes[i].length); ++ } ++ /* On failure tidy up and die */ ++ if (!be->dh[i]) { ++ while (i--) { ++ dmabuf_free(be->dh[i]); ++ be->dh[i] = NULL; ++ } ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ } ++ } ++ else ++ { ++ if (x_dmabuf) { ++ struct v4l2_exportbuffer xbuf = { ++ .type = buf.type, ++ .index = buf.index, ++ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine ++ }; ++ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) ++ be->dh[0] = dmabuf_import(xbuf.fd, buf.length); ++ } ++ else { ++ be->dh[0] = dmabuf_import_mmap( ++ mmap(NULL, buf.length, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_POPULATE, ++ mbc->vfd, buf.m.offset), ++ buf.length); ++ } ++ /* On failure tidy up and die */ ++ if (!be->dh[0]) { ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ } ++ ++ return 0; ++} ++ ++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) ++{ ++ struct qent_dst * be_dst; ++ ++ if (mbc == NULL) { ++ be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF); ++ if (be_dst) ++ be_dst->base.status = QENT_IMPORT; ++ return be_dst; ++ } ++ ++ if (mbc->dst_fixed) { ++ be_dst = base_to_dst(queue_get_free(mbc->dst)); ++ if (!be_dst) ++ return NULL; ++ } ++ else { ++ be_dst = base_to_dst(queue_tryget_free(mbc->dst)); ++ if (!be_dst) { ++ be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype); ++ if (!be_dst) ++ return NULL; ++ ++ if (create_dst_bufs(mbc, 1, &be_dst) != 1) { ++ qe_dst_free(be_dst); ++ return NULL; ++ } ++ } ++ } ++ ++ if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) { ++ if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) { ++ request_err(mbc->dc, "Failed to export as dmabuf\n"); ++ queue_put_free(mbc->dst, &be_dst->base); ++ return NULL; ++ } ++ } ++ else { ++ if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) { ++ /* Given how create buf works we can't uncreate it on alloc failure ++ * all we can do is put it on the free Q ++ */ ++ queue_put_free(mbc->dst, &be_dst->base); ++ return NULL; ++ } ++ } ++ ++ be_dst->base.status = QENT_PENDING; ++ atomic_store(&be_dst->base.ref_count, 0); ++ return be_dst; ++} ++ ++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc) ++{ ++ return &mbc->dst_fmt; ++} ++ ++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v) ++{ ++ MediaBufsStatus status; ++ unsigned int i; ++ const enum v4l2_buf_type buf_type = mbc->dst_fmt.type; ++ static const struct { ++ unsigned int flags_must; ++ unsigned int flags_not; ++ } trys[] = { ++ {0, V4L2_FMT_FLAG_EMULATED}, ++ {V4L2_FMT_FLAG_EMULATED, 0}, ++ }; ++ for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) { ++ status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd, ++ buf_type, ++ trys[i].flags_must, ++ trys[i].flags_not, ++ width, height, accept_fn, accept_v); ++ if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE) ++ return status; ++ } ++ ++ if (status != MEDIABUFS_STATUS_SUCCESS) ++ return status; ++ ++ /* Try to create a buffer - don't alloc */ ++ return status; ++} ++ ++// ** This is a mess if we get partial alloc but without any way to remove ++// individual V4L2 Q members we are somewhat stuffed ++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype) ++{ ++ unsigned int i; ++ int a = 0; ++ unsigned int qc; ++ struct qent_dst * qes[32]; ++ ++ if (n > 32) ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ ++ mbc->dst->memtype = memtype; ++ ++ // Create qents first as it is hard to get rid of the V4L2 buffers on error ++ for (qc = 0; qc != n; ++qc) ++ { ++ if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL) ++ goto fail; ++ } ++ ++ if ((a = create_dst_bufs(mbc, n, qes)) < 0) ++ goto fail; ++ ++ for (i = 0; i != a; ++i) ++ queue_put_free(mbc->dst, &qes[i]->base); ++ ++ if (a != n) ++ goto fail; ++ ++ mbc->dst_fixed = fixed; ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail: ++ for (i = (a < 0 ? 0 : a); i != qc; ++i) ++ qe_dst_free(qes[i]); ++ ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++} ++ ++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc) ++{ ++ struct qent_base * buf = queue_get_free(mbc->src); ++ buf->status = QENT_PENDING; ++ return base_to_src(buf); ++} ++ ++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src) ++{ ++ struct qent_src *const qe_src = *pqe_src; ++ if (!qe_src) ++ return; ++ *pqe_src = NULL; ++ queue_put_free(mbc->src, &qe_src->base); ++} ++ ++static MediaBufsStatus ++chk_memory_type(struct mediabufs_ctl *const mbc, ++ const struct v4l2_format * const f, ++ const enum mediabufs_memory m) ++{ ++ struct v4l2_create_buffers cbuf = { ++ .count = 0, ++ .memory = V4L2_MEMORY_MMAP, ++ .format = *f ++ }; ++ ++ if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ switch (m) { ++ case MEDIABUFS_MEMORY_DMABUF: ++ // 0 = Unknown but assume not in that case ++ if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0) ++ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; ++ break; ++ case MEDIABUFS_MEMORY_MMAP: ++ break; ++ default: ++ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; ++ } ++ ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++MediaBufsStatus ++mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) ++{ ++ return chk_memory_type(mbc, &mbc->src_fmt, memtype); ++} ++ ++MediaBufsStatus ++mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) ++{ ++ return chk_memory_type(mbc, &mbc->dst_fmt, memtype); ++} ++ ++/* src format must have been set up before this */ ++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, ++ struct dmabufs_ctl * const dbsc, ++ unsigned int n, const enum mediabufs_memory memtype) ++{ ++ unsigned int i; ++ struct v4l2_requestbuffers req = { ++ .count = n, ++ .type = mbc->src_fmt.type, ++ .memory = mediabufs_memory_to_v4l2(memtype) ++ }; ++ ++ bq_free_all_free_src(mbc->src); ++ ++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { ++ if (errno != EINTR) { ++ request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ } ++ ++ if (n > req.count) { ++ request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n); ++ n = req.count; ++ } ++ ++ for (i = 0; i != n; ++i) { ++ struct qent_src *const be_src = qe_src_new(memtype); ++ if (!be_src) { ++ request_err(mbc->dc, "Failed to create src be %d\n", i); ++ goto fail; ++ } ++ switch (memtype) { ++ case MEDIABUFS_MEMORY_MMAP: ++ if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) { ++ qe_src_free(be_src); ++ goto fail; ++ } ++ be_src->fixed_size = 1; ++ break; ++ case MEDIABUFS_MEMORY_DMABUF: ++ if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) { ++ qe_src_free(be_src); ++ goto fail; ++ } ++ be_src->fixed_size = !mediabufs_src_resizable(mbc); ++ break; ++ default: ++ request_err(mbc->dc, "Unexpected memorty type\n"); ++ goto fail; ++ } ++ be_src->base.index = i; ++ ++ queue_put_free(mbc->src, &be_src->base); ++ } ++ ++ mbc->src->memtype = memtype; ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail: ++ bq_free_all_free_src(mbc->src); ++ req.count = 0; ++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 && ++ errno == EINTR) ++ /* Loop */; ++ ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++ ++ ++/* ++ * Set stuff order: ++ * Set src fmt ++ * Set parameters (sps) on vfd ++ * Negotiate dst format (dst_fmt_set) ++ * Create src buffers ++ * Alloc a dst buffer or Create dst slots ++*/ ++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc) ++{ ++ if (mbc->stream_on) ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++ if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) { ++ request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) { ++ request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type); ++ set_stream(mbc->vfd, mbc->src_fmt.type, false); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ mbc->stream_on = true; ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc) ++{ ++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; ++ ++ if (!mbc->stream_on) ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) { ++ request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type); ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) { ++ request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type); ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ mbc->stream_on = false; ++ return status; ++} ++ ++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n) ++{ ++ struct v4l2_ext_controls controls = { ++ .controls = control_array, ++ .count = n ++ }; ++ ++ if (mreq) { ++ controls.which = V4L2_CTRL_WHICH_REQUEST_VAL; ++ controls.request_fd = media_request_fd(mreq); ++ } ++ ++ while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls)) ++ { ++ const int err = errno; ++ if (err != EINTR) { ++ request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err)); ++ return -err; ++ } ++ } ++ ++ return 0; ++} ++ ++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, ++ struct media_request * const mreq, ++ unsigned int id, void *data, ++ unsigned int size) ++{ ++ struct v4l2_ext_control control = { ++ .id = id, ++ .ptr = data, ++ .size = size ++ }; ++ ++ int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1); ++ return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, ++ enum v4l2_buf_type buf_type, ++ const uint32_t pixfmt, ++ const uint32_t width, const uint32_t height, ++ const size_t bufsize) ++{ ++ MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize); ++ if (rv != MEDIABUFS_STATUS_SUCCESS) ++ request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height); ++ ++ return rv; ++} ++ ++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n) ++{ ++ int rv = 0; ++ while (n--) { ++ while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) { ++ const int err = errno; ++ if (err != EINTR) { ++ // Often used for probing - errors are to be expected ++ request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err); ++ ctrls->type = 0; // 0 is invalid ++ rv = -err; ++ break; ++ } ++ } ++ ++ctrls; ++ } ++ return rv; ++} ++ ++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) ++{ ++#if 1 ++ return 0; ++#else ++ // Single planar OUTPUT can only take exact size buffers ++ // Multiplanar will take larger than negotiated ++ return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); ++#endif ++} ++ ++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) ++{ ++ if (!mbc) ++ return; ++ ++ // Break the weak link first ++ ff_weak_link_break(&mbc->this_wlm); ++ ++ polltask_delete(&mbc->pt); ++ ++ mediabufs_stream_off(mbc); ++ ++ // Empty v4l2 buffer stash ++ request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0); ++ request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0); ++ ++ bq_free_all_free_src(mbc->src); ++ bq_free_all_inuse_src(mbc->src); ++ bq_free_all_free_dst(mbc->dst); ++ ++ { ++ struct qent_dst *dst_be; ++ while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) { ++ dst_be->base.timestamp = (struct timeval){0}; ++ dst_be->base.status = QENT_ERROR; ++ qe_dst_done(dst_be); ++ } ++ } ++ ++ queue_delete(mbc->dst); ++ queue_delete(mbc->src); ++ close(mbc->vfd); ++ pthread_mutex_destroy(&mbc->lock); ++ ++ free(mbc); ++} ++ ++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc) ++{ ++ atomic_fetch_add(&mbc->ref_count, 1); ++ return mbc; ++} ++ ++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) ++{ ++ struct mediabufs_ctl *const mbc = *pmbc; ++ int n; ++ ++ if (!mbc) ++ return; ++ *pmbc = NULL; ++ n = atomic_fetch_sub(&mbc->ref_count, 1); ++ if (n) ++ return; ++ mediabufs_ctl_delete(mbc); ++} ++ ++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc) ++{ ++ return mbc->capability.version; ++} ++ ++static int set_capabilities(struct mediabufs_ctl *const mbc) ++{ ++ uint32_t caps; ++ ++ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) { ++ int err = errno; ++ request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); ++ return -err; ++ } ++ ++ caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? ++ mbc->capability.device_caps : ++ mbc->capability.capabilities; ++ ++ if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { ++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; ++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; ++ } ++ else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) { ++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; ++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; ++ } ++ else { ++ request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* One of these per context */ ++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq) ++{ ++ struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc)); ++ ++ if (!mbc) ++ return NULL; ++ ++ mbc->dc = dc; ++ // Default mono planar ++ mbc->pq = pq; ++ pthread_mutex_init(&mbc->lock, NULL); ++ ++ /* Pick a default - could we scan for this? */ ++ if (vpath == NULL) ++ vpath = "/dev/media0"; ++ ++ while ((mbc->vfd = open(vpath, O_RDWR)) == -1) ++ { ++ const int err = errno; ++ if (err != EINTR) { ++ request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err)); ++ goto fail0; ++ } ++ } ++ ++ if (set_capabilities(mbc)) { ++ request_err(dc, "Bad capabilities for video dev '%s'\n", vpath); ++ goto fail1; ++ } ++ ++ mbc->src = queue_new(mbc->vfd); ++ if (!mbc->src) ++ goto fail1; ++ mbc->dst = queue_new(mbc->vfd); ++ if (!mbc->dst) ++ goto fail2; ++ mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc); ++ if (!mbc->pt) ++ goto fail3; ++ mbc->this_wlm = ff_weak_link_new(mbc); ++ if (!mbc->this_wlm) ++ goto fail4; ++ ++ /* Cannot add polltask now - polling with nothing pending ++ * generates infinite error polls ++ */ ++ return mbc; ++ ++fail4: ++ polltask_delete(&mbc->pt); ++fail3: ++ queue_delete(mbc->dst); ++fail2: ++ queue_delete(mbc->src); ++fail1: ++ close(mbc->vfd); ++fail0: ++ free(mbc); ++ request_info(dc, "%s: FAILED\n", __func__); ++ return NULL; ++} ++ ++ ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_media.h +@@ -0,0 +1,171 @@ ++/* ++e.h ++* ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _MEDIA_H_ ++#define _MEDIA_H_ ++ ++#include <stdbool.h> ++#include <stdint.h> ++ ++struct v4l2_format; ++struct v4l2_fmtdesc; ++struct v4l2_query_ext_ctrl; ++ ++struct pollqueue; ++struct media_request; ++struct media_pool; ++ ++typedef enum media_buf_status { ++ MEDIABUFS_STATUS_SUCCESS = 0, ++ MEDIABUFS_ERROR_OPERATION_FAILED, ++ MEDIABUFS_ERROR_DECODING_ERROR, ++ MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, ++ MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, ++ MEDIABUFS_ERROR_ALLOCATION_FAILED, ++ MEDIABUFS_ERROR_UNSUPPORTED_MEMORY, ++} MediaBufsStatus; ++ ++struct media_pool * media_pool_new(const char * const media_path, ++ struct pollqueue * const pq, ++ const unsigned int n); ++void media_pool_delete(struct media_pool ** pmp); ++ ++// Obtain a media request ++// Will block if none availible - has a 2sec timeout ++struct media_request * media_request_get(struct media_pool * const mp); ++int media_request_fd(const struct media_request * const req); ++ ++// Start this request ++// Request structure is returned to pool once done ++int media_request_start(struct media_request * const req); ++ ++// Return an *unstarted* media_request to the pool ++// May later be upgraded to allow for aborting a started req ++int media_request_abort(struct media_request ** const preq); ++ ++ ++struct mediabufs_ctl; ++struct qent_src; ++struct qent_dst; ++struct dmabuf_h; ++struct dmabufs_ctl; ++ ++// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties ++enum mediabufs_memory { ++ MEDIABUFS_MEMORY_UNSET = 0, ++ MEDIABUFS_MEMORY_MMAP = 1, ++ MEDIABUFS_MEMORY_USERPTR = 2, ++ MEDIABUFS_MEMORY_OVERLAY = 3, ++ MEDIABUFS_MEMORY_DMABUF = 4, ++}; ++ ++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); ++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); ++ ++// prealloc ++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc); ++// dbsc may be NULL if realloc not required ++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc); ++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane); ++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane); ++MediaBufsStatus qent_dst_wait(struct qent_dst *const be); ++void qent_dst_delete(struct qent_dst *const be); ++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead ++void qent_dst_unref(struct qent_dst ** const pbe_dst); ++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst); ++ ++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no); ++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be); ++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be); ++/* Import an fd unattached to any mediabuf */ ++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, ++ unsigned int plane, ++ int fd, size_t size); ++ ++const char * mediabufs_memory_name(const enum mediabufs_memory m); ++ ++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, ++ struct media_request **const pmreq, ++ struct qent_src **const psrc_be, ++ struct qent_dst *const dst_be, ++ const bool is_final); ++// Get / alloc a dst buffer & associate with a slot ++// If the dst pool is empty then behaviour depends on the fixed flag passed to ++// dst_slots_create. Default is !fixed = unlimited alloc ++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, ++ struct dmabufs_ctl *const dbsc); ++// Create dst slots without alloc ++// If fixed true then qent_alloc will only get slots from this pool and will ++// block until a qent has been unrefed ++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype); ++ ++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); ++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); ++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc); ++ ++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc); ++ ++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v); ++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc); ++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src); ++ ++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, ++ struct v4l2_ext_control control_array[], unsigned int n); ++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, ++ struct media_request * const mreq, ++ unsigned int id, void *data, ++ unsigned int size); ++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n); ++ ++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc); ++ ++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, ++ enum v4l2_buf_type buf_type, ++ const uint32_t pixfmt, ++ const uint32_t width, const uint32_t height, ++ const size_t bufsize); ++ ++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, ++ struct dmabufs_ctl * const dbsc, ++ unsigned int n, ++ const enum mediabufs_memory memtype); ++ ++// Want to have appropriate formats set first ++MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); ++MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); ++ ++#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) ++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); ++ ++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, ++ const char *vpath, struct pollqueue *const pq); ++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); ++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc); ++ ++ ++#endif +--- /dev/null ++++ b/libavcodec/v4l2_req_pollqueue.c +@@ -0,0 +1,361 @@ ++#include <errno.h> ++#include <limits.h> ++#include <poll.h> ++#include <pthread.h> ++#include <semaphore.h> ++#include <stdatomic.h> ++#include <stdbool.h> ++#include <stdlib.h> ++#include <stdint.h> ++#include <stdio.h> ++#include <string.h> ++#include <unistd.h> ++#include <sys/eventfd.h> ++ ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_utils.h" ++ ++ ++struct pollqueue; ++ ++enum polltask_state { ++ POLLTASK_UNQUEUED = 0, ++ POLLTASK_QUEUED, ++ POLLTASK_RUNNING, ++ POLLTASK_Q_KILL, ++ POLLTASK_RUN_KILL, ++}; ++ ++struct polltask { ++ struct polltask *next; ++ struct polltask *prev; ++ struct pollqueue *q; ++ enum polltask_state state; ++ ++ int fd; ++ short events; ++ ++ void (*fn)(void *v, short revents); ++ void * v; ++ ++ uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */ ++ sem_t kill_sem; ++}; ++ ++struct pollqueue { ++ atomic_int ref_count; ++ pthread_mutex_t lock; ++ ++ struct polltask *head; ++ struct polltask *tail; ++ ++ bool kill; ++ bool no_prod; ++ int prod_fd; ++ struct polltask *prod_pt; ++ pthread_t worker; ++}; ++ ++struct polltask *polltask_new(struct pollqueue *const pq, ++ const int fd, const short events, ++ void (*const fn)(void *v, short revents), ++ void *const v) ++{ ++ struct polltask *pt; ++ ++ if (!events) ++ return NULL; ++ ++ pt = malloc(sizeof(*pt)); ++ if (!pt) ++ return NULL; ++ ++ *pt = (struct polltask){ ++ .next = NULL, ++ .prev = NULL, ++ .q = pollqueue_ref(pq), ++ .fd = fd, ++ .events = events, ++ .fn = fn, ++ .v = v ++ }; ++ ++ sem_init(&pt->kill_sem, 0, 0); ++ ++ return pt; ++} ++ ++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt) ++{ ++ if (pt->prev) ++ pt->prev->next = pt->next; ++ else ++ pq->head = pt->next; ++ if (pt->next) ++ pt->next->prev = pt->prev; ++ else ++ pq->tail = pt->prev; ++ pt->next = NULL; ++ pt->prev = NULL; ++} ++ ++static void polltask_free(struct polltask * const pt) ++{ ++ sem_destroy(&pt->kill_sem); ++ free(pt); ++} ++ ++static int pollqueue_prod(const struct pollqueue *const pq) ++{ ++ static const uint64_t one = 1; ++ return write(pq->prod_fd, &one, sizeof(one)); ++} ++ ++void polltask_delete(struct polltask **const ppt) ++{ ++ struct polltask *const pt = *ppt; ++ struct pollqueue * pq; ++ enum polltask_state state; ++ bool prodme; ++ ++ if (!pt) ++ return; ++ ++ pq = pt->q; ++ pthread_mutex_lock(&pq->lock); ++ state = pt->state; ++ pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL; ++ prodme = !pq->no_prod; ++ pthread_mutex_unlock(&pq->lock); ++ ++ if (state != POLLTASK_UNQUEUED) { ++ if (prodme) ++ pollqueue_prod(pq); ++ while (sem_wait(&pt->kill_sem) && errno == EINTR) ++ /* loop */; ++ } ++ ++ // Leave zapping the ref until we have DQed the PT as might well be ++ // legitimately used in it ++ *ppt = NULL; ++ polltask_free(pt); ++ pollqueue_unref(&pq); ++} ++ ++static uint64_t pollqueue_now(int timeout) ++{ ++ struct timespec now; ++ uint64_t now_ms; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &now)) ++ return 0; ++ now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout; ++ return now_ms ? now_ms : (uint64_t)1; ++} ++ ++void pollqueue_add_task(struct polltask *const pt, const int timeout) ++{ ++ bool prodme = false; ++ struct pollqueue * const pq = pt->q; ++ ++ pthread_mutex_lock(&pq->lock); ++ if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) { ++ if (pq->tail) ++ pq->tail->next = pt; ++ else ++ pq->head = pt; ++ pt->prev = pq->tail; ++ pt->next = NULL; ++ pt->state = POLLTASK_QUEUED; ++ pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout); ++ pq->tail = pt; ++ prodme = !pq->no_prod; ++ } ++ pthread_mutex_unlock(&pq->lock); ++ if (prodme) ++ pollqueue_prod(pq); ++} ++ ++static void *poll_thread(void *v) ++{ ++ struct pollqueue *const pq = v; ++ struct pollfd *a = NULL; ++ size_t asize = 0; ++ ++ pthread_mutex_lock(&pq->lock); ++ do { ++ unsigned int i; ++ unsigned int n = 0; ++ struct polltask *pt; ++ struct polltask *pt_next; ++ uint64_t now = pollqueue_now(0); ++ int timeout = -1; ++ int rv; ++ ++ for (pt = pq->head; pt; pt = pt_next) { ++ int64_t t; ++ ++ pt_next = pt->next; ++ ++ if (pt->state == POLLTASK_Q_KILL) { ++ pollqueue_rem_task(pq, pt); ++ sem_post(&pt->kill_sem); ++ continue; ++ } ++ ++ if (n >= asize) { ++ asize = asize ? asize * 2 : 4; ++ a = realloc(a, asize * sizeof(*a)); ++ if (!a) { ++ request_log("Failed to realloc poll array to %zd\n", asize); ++ goto fail_locked; ++ } ++ } ++ ++ a[n++] = (struct pollfd){ ++ .fd = pt->fd, ++ .events = pt->events ++ }; ++ ++ t = (int64_t)(pt->timeout - now); ++ if (pt->timeout && t < INT_MAX && ++ (timeout < 0 || (int)t < timeout)) ++ timeout = (t < 0) ? 0 : (int)t; ++ } ++ pthread_mutex_unlock(&pq->lock); ++ ++ if ((rv = poll(a, n, timeout)) == -1) { ++ if (errno != EINTR) { ++ request_log("Poll error: %s\n", strerror(errno)); ++ goto fail_unlocked; ++ } ++ } ++ ++ pthread_mutex_lock(&pq->lock); ++ now = pollqueue_now(0); ++ ++ /* Prodding in this loop is pointless and might lead to ++ * infinite looping ++ */ ++ pq->no_prod = true; ++ for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) { ++ pt_next = pt->next; ++ ++ /* Pending? */ ++ if (a[i].revents || ++ (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) { ++ pollqueue_rem_task(pq, pt); ++ if (pt->state == POLLTASK_QUEUED) ++ pt->state = POLLTASK_RUNNING; ++ if (pt->state == POLLTASK_Q_KILL) ++ pt->state = POLLTASK_RUN_KILL; ++ pthread_mutex_unlock(&pq->lock); ++ ++ /* This can add new entries to the Q but as ++ * those are added to the tail our existing ++ * chain remains intact ++ */ ++ pt->fn(pt->v, a[i].revents); ++ ++ pthread_mutex_lock(&pq->lock); ++ if (pt->state == POLLTASK_RUNNING) ++ pt->state = POLLTASK_UNQUEUED; ++ if (pt->state == POLLTASK_RUN_KILL) ++ sem_post(&pt->kill_sem); ++ } ++ } ++ pq->no_prod = false; ++ ++ } while (!pq->kill); ++ ++fail_locked: ++ pthread_mutex_unlock(&pq->lock); ++fail_unlocked: ++ free(a); ++ return NULL; ++} ++ ++static void prod_fn(void *v, short revents) ++{ ++ struct pollqueue *const pq = v; ++ char buf[8]; ++ if (revents) ++ read(pq->prod_fd, buf, 8); ++ if (!pq->kill) ++ pollqueue_add_task(pq->prod_pt, -1); ++} ++ ++struct pollqueue * pollqueue_new(void) ++{ ++ struct pollqueue *pq = malloc(sizeof(*pq)); ++ if (!pq) ++ return NULL; ++ *pq = (struct pollqueue){ ++ .ref_count = ATOMIC_VAR_INIT(0), ++ .lock = PTHREAD_MUTEX_INITIALIZER, ++ .head = NULL, ++ .tail = NULL, ++ .kill = false, ++ .prod_fd = -1 ++ }; ++ ++ pq->prod_fd = eventfd(0, EFD_NONBLOCK); ++ if (pq->prod_fd == 1) ++ goto fail1; ++ pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq); ++ if (!pq->prod_pt) ++ goto fail2; ++ pollqueue_add_task(pq->prod_pt, -1); ++ if (pthread_create(&pq->worker, NULL, poll_thread, pq)) ++ goto fail3; ++ // Reset ref count which will have been inced by the add_task ++ atomic_store(&pq->ref_count, 0); ++ return pq; ++ ++fail3: ++ polltask_free(pq->prod_pt); ++fail2: ++ close(pq->prod_fd); ++fail1: ++ free(pq); ++ return NULL; ++} ++ ++static void pollqueue_free(struct pollqueue *const pq) ++{ ++ void *rv; ++ ++ pthread_mutex_lock(&pq->lock); ++ pq->kill = true; ++ pollqueue_prod(pq); ++ pthread_mutex_unlock(&pq->lock); ++ ++ pthread_join(pq->worker, &rv); ++ polltask_free(pq->prod_pt); ++ pthread_mutex_destroy(&pq->lock); ++ close(pq->prod_fd); ++ free(pq); ++} ++ ++struct pollqueue * pollqueue_ref(struct pollqueue *const pq) ++{ ++ atomic_fetch_add(&pq->ref_count, 1); ++ return pq; ++} ++ ++void pollqueue_unref(struct pollqueue **const ppq) ++{ ++ struct pollqueue * const pq = *ppq; ++ ++ if (!pq) ++ return; ++ *ppq = NULL; ++ ++ if (atomic_fetch_sub(&pq->ref_count, 1) != 0) ++ return; ++ ++ pollqueue_free(pq); ++} ++ ++ ++ +--- /dev/null ++++ b/libavcodec/v4l2_req_pollqueue.h +@@ -0,0 +1,18 @@ ++#ifndef POLLQUEUE_H_ ++#define POLLQUEUE_H_ ++ ++struct polltask; ++struct pollqueue; ++ ++struct polltask *polltask_new(struct pollqueue *const pq, ++ const int fd, const short events, ++ void (*const fn)(void *v, short revents), ++ void *const v); ++void polltask_delete(struct polltask **const ppt); ++ ++void pollqueue_add_task(struct polltask *const pt, const int timeout); ++struct pollqueue * pollqueue_new(void); ++void pollqueue_unref(struct pollqueue **const ppq); ++struct pollqueue * pollqueue_ref(struct pollqueue *const pq); ++ ++#endif /* POLLQUEUE_H_ */ +--- /dev/null ++++ b/libavcodec/v4l2_req_utils.h +@@ -0,0 +1,27 @@ ++#ifndef AVCODEC_V4L2_REQ_UTILS_H ++#define AVCODEC_V4L2_REQ_UTILS_H ++ ++#include <stdint.h> ++#include "libavutil/log.h" ++ ++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) ++ ++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__) ++#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__) ++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__) ++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__) ++ ++static inline char safechar(char c) { ++ return c > 0x20 && c < 0x7f ? c : '.'; ++} ++ ++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) { ++ tbuf[0] = safechar((fcc >> 0) & 0xff); ++ tbuf[1] = safechar((fcc >> 8) & 0xff); ++ tbuf[2] = safechar((fcc >> 16) & 0xff); ++ tbuf[3] = safechar((fcc >> 24) & 0xff); ++ tbuf[4] = '\0'; ++ return tbuf; ++} ++ ++#endif +--- /dev/null ++++ b/libavcodec/v4l2_request_hevc.c +@@ -0,0 +1,351 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++#include "config.h" ++#include "decode.h" ++#include "hevcdec.h" ++#include "hwconfig.h" ++#include "internal.h" ++ ++#include "v4l2_request_hevc.h" ++ ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/pixdesc.h" ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_utils.h" ++ ++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8) ++{ ++ const size_t wxh = w * h; ++ size_t bits_alloc; ++ ++ /* Annex A gives a min compression of 2 @ lvl 3.1 ++ * (wxh <= 983040) and min 4 thereafter but avoid ++ * the odity of 983041 having a lower limit than ++ * 983040. ++ * Multiply by 3/2 for 4:2:0 ++ */ ++ bits_alloc = wxh < 983040 ? wxh * 3 / 4 : ++ wxh < 983040 * 2 ? 983040 * 3 / 4 : ++ wxh * 3 / 8; ++ /* Allow for bit depth */ ++ bits_alloc += (bits_alloc * bits_minus8) / 8; ++ /* Add a few bytes (16k) for overhead */ ++ bits_alloc += 0x4000; ++ return bits_alloc; ++} ++ ++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx, ++ av_unused const uint8_t *buffer, ++ av_unused uint32_t size) ++{ ++ const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ return ctx->fns->start_frame(avctx, buffer, size); ++} ++ ++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ return ctx->fns->decode_slice(avctx, buffer, size); ++} ++ ++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx) ++{ ++ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; ++ return ctx->fns->end_frame(avctx); ++} ++ ++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ ctx->fns->abort_frame(avctx); ++} ++ ++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ return ctx->fns->frame_params(avctx, hw_frames_ctx); ++} ++ ++static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ return ctx->fns->alloc_frame(avctx, frame); ++} ++ ++ ++static int v4l2_request_hevc_uninit(AVCodecContext *avctx) ++{ ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ ++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode ++ ++ mediabufs_ctl_unref(&ctx->mbufs); ++ media_pool_delete(&ctx->mpool); ++ pollqueue_unref(&ctx->pq); ++ dmabufs_ctl_unref(&ctx->dbufs); ++ devscan_delete(&ctx->devscan); ++ ++ decode_q_uninit(&ctx->decode_q); ++ ++// if (avctx->hw_frames_ctx) { ++// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data; ++// av_buffer_pool_flush(hwfc->pool); ++// } ++ return 0; ++} ++ ++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc) ++{ ++ AVCodecContext *const avctx = v; ++ const HEVCContext *const h = avctx->priv_data; ++ ++ if (h->ps.sps->bit_depth == 8) { ++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 || ++ fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) { ++ return 1; ++ } ++ } ++ else if (h->ps.sps->bit_depth == 10) { ++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static int v4l2_request_hevc_init(AVCodecContext *avctx) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; ++ const HEVCSPS * const sps = h->ps.sps; ++ int ret; ++ const struct decdev * decdev; ++ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes ++ size_t src_size; ++ enum mediabufs_memory src_memtype; ++ enum mediabufs_memory dst_memtype; ++ ++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ // Give up immediately if this is something that we have no code to deal with ++ if (h->ps.sps->chroma_format_idc != 1) { ++ av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc); ++ return AVERROR_PATCHWELCOME; ++ } ++ if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) || ++ h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) { ++ av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma); ++ return AVERROR_PATCHWELCOME; ++ } ++ ++ if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); ++ return (AVERROR(-ret)); ++ } ++ ret = AVERROR(ENOMEM); // Assume mem fail by default for these ++ ++ if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL) ++ { ++ av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n"); ++ ret = AVERROR(ENODEV); ++ goto fail0; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", ++ decdev_media_path(decdev), decdev_video_path(decdev)); ++ ++ if ((ctx->pq = pollqueue_new()) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); ++ goto fail1; ++ } ++ ++ if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n"); ++ goto fail2; ++ } ++ ++ if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n"); ++ goto fail3; ++ } ++ ++ // Version test for functional Pi5 HEVC iommu. ++ // rpivid kernel patch was merged in 6.1.57 ++ // *** Remove when it is unlikely that there are any broken kernels left ++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57)) ++ ctx->dbufs = dmabufs_ctl_new_vidbuf_cached(); ++ else ++ ctx->dbufs = dmabufs_ctl_new(); ++ ++ if (ctx->dbufs == NULL) { ++ av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); ++ src_memtype = MEDIABUFS_MEMORY_MMAP; ++ dst_memtype = MEDIABUFS_MEMORY_MMAP; ++ } ++ else { ++ av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); ++ src_memtype = MEDIABUFS_MEMORY_DMABUF; ++ dst_memtype = MEDIABUFS_MEMORY_DMABUF; ++ } ++ ++ // Ask for an initial bitbuf size of max size / 4 ++ // We will realloc if we need more ++ // Must use sps->h/w as avctx contains cropped size ++retry_src_memtype: ++ src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); ++ if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs)) ++ src_size /= 4; ++ // Kludge for conformance tests which break Annex A limits ++ else if (src_size < 0x40000) ++ src_size = 0x40000; ++ ++ if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt, ++ sps->width, sps->height, src_size)) { ++ char tbuf1[5]; ++ av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); ++ goto fail4; ++ } ++ ++ if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) { ++ if (src_memtype == MEDIABUFS_MEMORY_DMABUF) { ++ src_memtype = MEDIABUFS_MEMORY_MMAP; ++ goto retry_src_memtype; ++ } ++ av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n"); ++ goto fail4; ++ } ++ ++ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 4); ++#if CONFIG_V4L2_REQ_HEVC_VX ++ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 3); ++ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 2); ++ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 1); ++#endif ++ else { ++ av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); ++ ret = AVERROR(EINVAL); ++ goto fail4; ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n", ++ ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs)); ++ ++ if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { ++ char tbuf1[5]; ++ av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); ++ goto fail4; ++ } ++ ++ if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); ++ goto fail4; ++ } ++ ++ { ++ unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + ++ avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6); ++ av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots, ++ sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, ++ avctx->thread_count, avctx->extra_hw_frames); ++ ++ if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) { ++ if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n"); ++ goto fail4; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n"); ++ dst_memtype = MEDIABUFS_MEMORY_MMAP; ++ } ++ ++ // extra_hw_frames is -1 if unset ++ if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); ++ goto fail4; ++ } ++ } ++ ++ if (mediabufs_stream_on(ctx->mbufs)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed stream on\n"); ++ goto fail4; ++ } ++ ++ if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n"); ++ goto fail4; ++ } ++ ++ if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed set controls\n"); ++ goto fail5; ++ } ++ ++ decode_q_init(&ctx->decode_q); ++ ++ // Set our s/w format ++ avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; ++ ++ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n", ++ ctx->fns->name, ++ decdev_media_path(decdev), decdev_video_path(decdev), ++ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype), ++ av_get_pix_fmt_name(avctx->sw_pix_fmt)); ++ ++ return 0; ++ ++fail5: ++ av_buffer_unref(&avctx->hw_frames_ctx); ++fail4: ++ mediabufs_ctl_unref(&ctx->mbufs); ++fail3: ++ media_pool_delete(&ctx->mpool); ++fail2: ++ pollqueue_unref(&ctx->pq); ++fail1: ++ dmabufs_ctl_unref(&ctx->dbufs); ++fail0: ++ devscan_delete(&ctx->devscan); ++ return ret; ++} ++ ++const AVHWAccel ff_hevc_v4l2request_hwaccel = { ++ .name = "hevc_v4l2request", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .id = AV_CODEC_ID_HEVC, ++ .pix_fmt = AV_PIX_FMT_DRM_PRIME, ++ .alloc_frame = v4l2_req_hevc_alloc_frame, ++ .start_frame = v4l2_req_hevc_start_frame, ++ .decode_slice = v4l2_req_hevc_decode_slice, ++ .end_frame = v4l2_req_hevc_end_frame, ++ .abort_frame = v4l2_req_hevc_abort_frame, ++ .init = v4l2_request_hevc_init, ++ .uninit = v4l2_request_hevc_uninit, ++ .priv_data_size = sizeof(V4L2RequestContextHEVC), ++ .frame_params = v4l2_req_hevc_frame_params, ++ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE, ++}; +--- /dev/null ++++ b/libavcodec/v4l2_request_hevc.h +@@ -0,0 +1,102 @@ ++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H ++#define AVCODEC_V4L2_REQUEST_HEVC_H ++ ++#include <stdint.h> ++#include <drm_fourcc.h> ++#include "v4l2_req_decode_q.h" ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#include <linux/videodev2.h> ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in drm_fourcc.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ ++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY ++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 ++#endif ++ ++#define VCAT(name, version) name##_v##version ++#define V2(n,v) VCAT(n, v) ++#define V(n) V2(n, HEVC_CTRLS_VERSION) ++ ++#define S2(x) #x ++#define STR(x) S2(x) ++ ++// 1 per decoder ++struct v4l2_req_decode_fns; ++ ++typedef struct V4L2RequestContextHEVC { ++// V4L2RequestContext base; ++ const struct v4l2_req_decode_fns * fns; ++ ++ unsigned int timestamp; // ?? maybe uint64_t ++ ++ int decode_mode; ++ int start_code; ++ unsigned int max_slices; // 0 => not wanted (frame mode) ++ unsigned int max_offsets; // 0 => not wanted ++ ++ req_decode_q decode_q; ++ ++ struct devscan *devscan; ++ struct dmabufs_ctl *dbufs; ++ struct pollqueue *pq; ++ struct media_pool * mpool; ++ struct mediabufs_ctl *mbufs; ++} V4L2RequestContextHEVC; ++ ++typedef struct v4l2_req_decode_fns { ++ int src_pix_fmt_v4l2; ++ const char * name; ++ ++ // Init setup ++ int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); ++ int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); ++ ++ // Passthrough of hwaccel fns ++ int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); ++ int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); ++ int (*end_frame)(AVCodecContext *avctx); ++ void (*abort_frame)(AVCodecContext *avctx); ++ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); ++ int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame); ++} v4l2_req_decode_fns; ++ ++ ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); ++ ++#endif +--- /dev/null ++++ b/libavcodec/weak_link.c +@@ -0,0 +1,103 @@ ++#include <stdlib.h> ++#include <pthread.h> ++#include <stdatomic.h> ++#include "weak_link.h" ++ ++struct ff_weak_link_master { ++ atomic_int ref_count; /* 0 is single ref for easier atomics */ ++ pthread_rwlock_t lock; ++ void * ptr; ++}; ++ ++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c) ++{ ++ return (struct ff_weak_link_master *)c; ++} ++ ++struct ff_weak_link_master * ff_weak_link_new(void * p) ++{ ++ struct ff_weak_link_master * w = malloc(sizeof(*w)); ++ if (!w) ++ return NULL; ++ atomic_init(&w->ref_count, 0); ++ w->ptr = p; ++ if (pthread_rwlock_init(&w->lock, NULL)) { ++ free(w); ++ return NULL; ++ } ++ return w; ++} ++ ++static void weak_link_do_unref(struct ff_weak_link_master * const w) ++{ ++ int n = atomic_fetch_sub(&w->ref_count, 1); ++ if (n) ++ return; ++ ++ pthread_rwlock_destroy(&w->lock); ++ free(w); ++} ++ ++// Unref & break link ++void ff_weak_link_break(struct ff_weak_link_master ** ppLink) ++{ ++ struct ff_weak_link_master * const w = *ppLink; ++ if (!w) ++ return; ++ ++ *ppLink = NULL; ++ pthread_rwlock_wrlock(&w->lock); ++ w->ptr = NULL; ++ pthread_rwlock_unlock(&w->lock); ++ ++ weak_link_do_unref(w); ++} ++ ++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w) ++{ ++ if (!w) ++ return NULL; ++ atomic_fetch_add(&w->ref_count, 1); ++ return (struct ff_weak_link_client*)w; ++} ++ ++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(*ppLink); ++ if (!w) ++ return; ++ ++ *ppLink = NULL; ++ weak_link_do_unref(w); ++} ++ ++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(*ppLink); ++ ++ if (!w) ++ return NULL; ++ ++ if (pthread_rwlock_rdlock(&w->lock)) ++ goto broken; ++ ++ if (w->ptr) ++ return w->ptr; ++ ++ pthread_rwlock_unlock(&w->lock); ++ ++broken: ++ *ppLink = NULL; ++ weak_link_do_unref(w); ++ return NULL; ++} ++ ++// Ignores a NULL c (so can be on the return path of both broken & live links) ++void ff_weak_link_unlock(struct ff_weak_link_client * c) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(c); ++ if (w) ++ pthread_rwlock_unlock(&w->lock); ++} ++ ++ +--- /dev/null ++++ b/libavcodec/weak_link.h +@@ -0,0 +1,23 @@ ++struct ff_weak_link_master; ++struct ff_weak_link_client; ++ ++struct ff_weak_link_master * ff_weak_link_new(void * p); ++void ff_weak_link_break(struct ff_weak_link_master ** ppLink); ++ ++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w); ++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink); ++ ++// Returns NULL if link broken - in this case it will also zap ++// *ppLink and unref the weak_link. ++// Returns NULL if *ppLink is NULL (so a link once broken stays broken) ++// ++// The above does mean that there is a race if this is called simultainiously ++// by two threads using the same weak_link_client (so don't do that) ++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink); ++void ff_weak_link_unlock(struct ff_weak_link_client * c); ++ ++ ++ ++ ++ ++ +--- a/libavdevice/Makefile ++++ b/libavdevice/Makefile +@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) + OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o + OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o + OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o ++OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o ++OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o + OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o + OBJS-$(CONFIG_XV_OUTDEV) += xv.o + +--- a/libavdevice/alldevices.c ++++ b/libavdevice/alldevices.c +@@ -51,6 +51,8 @@ extern const AVOutputFormat ff_sndio_mux + extern const AVInputFormat ff_v4l2_demuxer; + extern const AVOutputFormat ff_v4l2_muxer; + extern const AVInputFormat ff_vfwcap_demuxer; ++extern const AVOutputFormat ff_vout_drm_muxer; ++extern const AVOutputFormat ff_vout_egl_muxer; + extern const AVInputFormat ff_xcbgrab_demuxer; + extern const AVOutputFormat ff_xv_muxer; + +--- /dev/null ++++ b/libavdevice/drm_vout.c +@@ -0,0 +1,680 @@ ++/* ++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++// *** This module is a work in progress and its utility is strictly ++// limited to testing. ++ ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavformat/internal.h" ++#include "avdevice.h" ++ ++#include "pthread.h" ++#include <semaphore.h> ++#include <unistd.h> ++ ++#include <xf86drm.h> ++#include <xf86drmMode.h> ++#include <drm_fourcc.h> ++ ++#define TRACE_ALL 0 ++ ++#define DRM_MODULE "vc4" ++ ++#define ERRSTR strerror(errno) ++ ++struct drm_setup { ++ int conId; ++ uint32_t crtcId; ++ int crtcIdx; ++ uint32_t planeId; ++ unsigned int out_fourcc; ++ struct { ++ int x, y, width, height; ++ } compose; ++}; ++ ++typedef struct drm_aux_s { ++ unsigned int fb_handle; ++ uint32_t bo_handles[AV_DRM_MAX_PLANES]; ++ AVFrame * frame; ++} drm_aux_t; ++ ++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS ++// we get initial flicker probably due to dodgy drm timing ++#define AUX_SIZE 3 ++typedef struct drm_display_env_s ++{ ++ AVClass *class; ++ ++ int drm_fd; ++ uint32_t con_id; ++ struct drm_setup setup; ++ enum AVPixelFormat avfmt; ++ ++ int show_all; ++ const char * drm_module; ++ ++ unsigned int ano; ++ drm_aux_t aux[AUX_SIZE]; ++ ++ pthread_t q_thread; ++ sem_t q_sem_in; ++ sem_t q_sem_out; ++ int q_terminate; ++ AVFrame * q_next; ++ ++} drm_display_env_t; ++ ++ ++static int drm_vout_write_trailer(AVFormatContext *s) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ ++ return 0; ++} ++ ++static int drm_vout_write_header(AVFormatContext *s) ++{ ++ const AVCodecParameters * const par = s->streams[0]->codecpar; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ if ( s->nb_streams > 1 ++ || par->codec_type != AVMEDIA_TYPE_VIDEO ++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { ++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++static int find_plane(struct AVFormatContext * const avctx, ++ const int drmfd, const int crtcidx, const uint32_t format, ++ uint32_t * const pplane_id) ++{ ++ drmModePlaneResPtr planes; ++ drmModePlanePtr plane; ++ drmModeObjectPropertiesPtr props = NULL; ++ drmModePropertyPtr prop = NULL; ++ unsigned int i; ++ unsigned int j; ++ int ret = -1; ++ ++ planes = drmModeGetPlaneResources(drmfd); ++ if (!planes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR); ++ return -1; ++ } ++ ++ for (i = 0; i < planes->count_planes; ++i) { ++ plane = drmModeGetPlane(drmfd, planes->planes[i]); ++ if (!planes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR); ++ break; ++ } ++ ++ if (!(plane->possible_crtcs & (1 << crtcidx))) { ++ drmModeFreePlane(plane); ++ continue; ++ } ++ ++ for (j = 0; j < plane->count_formats; ++j) { ++ if (plane->formats[j] == format) ++ break; ++ } ++ ++ if (j == plane->count_formats) { ++ drmModeFreePlane(plane); ++ continue; ++ } ++ ++ *pplane_id = plane->plane_id; ++ drmModeFreePlane(plane); ++ break; ++ } ++ ++ if (i == planes->count_planes) { ++ ret = -1; ++ goto fail; ++ } ++ ++ props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE); ++ if (!props) ++ goto fail; ++ for (i = 0; i != props->count_props; ++i) { ++ if (prop) ++ drmModeFreeProperty(prop); ++ prop = drmModeGetProperty(drmfd, props->props[i]); ++ if (!prop) ++ goto fail; ++ if (strcmp("zpos", prop->name) == 0) { ++ if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0) ++ av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]); ++ else ++ av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n"); ++ break; ++ } ++ } ++ ++ ret = 0; ++fail: ++ if (props) ++ drmModeFreeObjectProperties(props); ++ if (prop) ++ drmModeFreeProperty(prop); ++ drmModeFreePlaneResources(planes); ++ return ret; ++} ++ ++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) ++{ ++ if (da->fb_handle != 0) { ++ drmModeRmFB(de->drm_fd, da->fb_handle); ++ da->fb_handle = 0; ++ } ++ ++ for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) { ++ if (da->bo_handles[i]) { ++ struct drm_gem_close gem_close = {.handle = da->bo_handles[i]}; ++ drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close); ++ da->bo_handles[i] = 0; ++ } ++ } ++ av_frame_free(&da->frame); ++} ++ ++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame) ++{ ++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; ++ drm_aux_t * da = de->aux + de->ano; ++ const uint32_t format = desc->layers[0].format; ++ int ret = 0; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd); ++#endif ++ ++ if (de->setup.out_fourcc != format) { ++ if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) { ++ av_frame_free(&frame); ++ av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format); ++ return -1; ++ } ++ de->setup.out_fourcc = format; ++ } ++ ++ { ++ drmVBlank vbl = { ++ .request = { ++ .type = DRM_VBLANK_RELATIVE, ++ .sequence = 0 ++ } ++ }; ++ ++ while (drmWaitVBlank(de->drm_fd, &vbl)) { ++ if (errno != EINTR) { ++// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR); ++ break; ++ } ++ } ++ } ++ ++ da_uninit(de, da); ++ ++ { ++ uint32_t pitches[4] = {0}; ++ uint32_t offsets[4] = {0}; ++ uint64_t modifiers[4] = {0}; ++ uint32_t bo_handles[4] = {0}; ++ int has_mods = 0; ++ int i, j, n; ++ ++ da->frame = frame; ++ ++ for (i = 0; i < desc->nb_objects; ++i) { ++ if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) { ++ av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); ++ return -1; ++ } ++ if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR && ++ desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID) ++ has_mods = 1; ++ } ++ ++ n = 0; ++ for (i = 0; i < desc->nb_layers; ++i) { ++ for (j = 0; j < desc->layers[i].nb_planes; ++j) { ++ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; ++ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; ++ pitches[n] = p->pitch; ++ offsets[n] = p->offset; ++ modifiers[n] = obj->format_modifier; ++ bo_handles[n] = da->bo_handles[p->object_index]; ++ ++n; ++ } ++ } ++ ++#if 1 && TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," ++ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", ++ av_frame_cropped_width(frame), ++ av_frame_cropped_height(frame), ++ desc->layers[0].format, ++ bo_handles[0], ++ bo_handles[1], ++ bo_handles[2], ++ bo_handles[3], ++ pitches[0], ++ pitches[1], ++ pitches[2], ++ pitches[3], ++ offsets[0], ++ offsets[1], ++ offsets[2], ++ offsets[3], ++ (long long)modifiers[0], ++ (long long)modifiers[1], ++ (long long)modifiers[2], ++ (long long)modifiers[3] ++ ); ++#endif ++ ++ if (drmModeAddFB2WithModifiers(de->drm_fd, ++ av_frame_cropped_width(frame), ++ av_frame_cropped_height(frame), ++ desc->layers[0].format, bo_handles, ++ pitches, offsets, ++ has_mods ? modifiers : NULL, ++ &da->fb_handle, ++ has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) { ++ av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); ++ return -1; ++ } ++ } ++ ++ ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId, ++ da->fb_handle, 0, ++ de->setup.compose.x, de->setup.compose.y, ++ de->setup.compose.width, ++ de->setup.compose.height, ++ 0, 0, ++ av_frame_cropped_width(frame) << 16, ++ av_frame_cropped_height(frame) << 16); ++ ++ if (ret != 0) { ++ av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR); ++ } ++ ++ de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1; ++ ++ return ret; ++} ++ ++static int do_sem_wait(sem_t * const sem, const int nowait) ++{ ++ while (nowait ? sem_trywait(sem) : sem_wait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static void * display_thread(void * v) ++{ ++ AVFormatContext * const s = v; ++ drm_display_env_t * const de = s->priv_data; ++ int i; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++#endif ++ ++ sem_post(&de->q_sem_out); ++ ++ for (;;) { ++ AVFrame * frame; ++ ++ do_sem_wait(&de->q_sem_in, 0); ++ ++ if (de->q_terminate) ++ break; ++ ++ frame = de->q_next; ++ de->q_next = NULL; ++ sem_post(&de->q_sem_out); ++ ++ do_display(s, de, frame); ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++#endif ++ ++ for (i = 0; i != AUX_SIZE; ++i) ++ da_uninit(de, de->aux + i); ++ ++ av_frame_free(&de->q_next); ++ ++ return NULL; ++} ++ ++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt) ++{ ++ const AVFrame * const src_frame = (AVFrame *)pkt->data; ++ AVFrame * frame; ++ drm_display_env_t * const de = s->priv_data; ++ int ret; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ ++ if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) { ++ av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts); ++ return 0; ++ } ++ ++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { ++ frame = av_frame_alloc(); ++ av_frame_ref(frame, src_frame); ++ } ++ else if (src_frame->format == AV_PIX_FMT_VAAPI) { ++ frame = av_frame_alloc(); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (av_hwframe_map(frame, src_frame, 0) != 0) ++ { ++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); ++ av_frame_free(&frame); ++ return AVERROR(EINVAL); ++ } ++ } ++ else { ++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); ++ return AVERROR(EINVAL); ++ } ++ ++ ret = do_sem_wait(&de->q_sem_out, !de->show_all); ++ if (ret) { ++ av_frame_free(&frame); ++ } ++ else { ++ de->q_next = frame; ++ sem_post(&de->q_sem_in); ++ } ++ ++ return 0; ++} ++ ++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, ++ unsigned flags) ++{ ++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); ++ return AVERROR_PATCHWELCOME; ++} ++ ++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type); ++#endif ++ switch(type) { ++ case AV_APP_TO_DEV_WINDOW_REPAINT: ++ return 0; ++ default: ++ break; ++ } ++ return AVERROR(ENOSYS); ++} ++ ++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId) ++{ ++ int ret = -1; ++ int i; ++ drmModeRes *res = drmModeGetResources(drmfd); ++ drmModeConnector *c; ++ ++ if(!res) ++ { ++ printf( "drmModeGetResources failed: %s\n", ERRSTR); ++ return -1; ++ } ++ ++ if (res->count_crtcs <= 0) ++ { ++ printf( "drm: no crts\n"); ++ goto fail_res; ++ } ++ ++ if (!s->conId) { ++ fprintf(stderr, ++ "No connector ID specified. Choosing default from list:\n"); ++ ++ for (i = 0; i < res->count_connectors; i++) { ++ drmModeConnector *con = ++ drmModeGetConnector(drmfd, res->connectors[i]); ++ drmModeEncoder *enc = NULL; ++ drmModeCrtc *crtc = NULL; ++ ++ if (con->encoder_id) { ++ enc = drmModeGetEncoder(drmfd, con->encoder_id); ++ if (enc->crtc_id) { ++ crtc = drmModeGetCrtc(drmfd, enc->crtc_id); ++ } ++ } ++ ++ if (!s->conId && crtc) { ++ s->conId = con->connector_id; ++ s->crtcId = crtc->crtc_id; ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n", ++ con->connector_id, ++ crtc ? crtc->crtc_id : 0, ++ con->connector_type, ++ crtc ? crtc->width : 0, ++ crtc ? crtc->height : 0, ++ (s->conId == (int)con->connector_id ? ++ " (chosen)" : "")); ++ ++ if (crtc) ++ drmModeFreeCrtc(crtc); ++ if (enc) ++ drmModeFreeEncoder(enc); ++ if (con) ++ drmModeFreeConnector(con); ++ } ++ ++ if (!s->conId) { ++ av_log(avctx, AV_LOG_ERROR, ++ "No suitable enabled connector found.\n"); ++ return -1;; ++ } ++ } ++ ++ s->crtcIdx = -1; ++ ++ for (i = 0; i < res->count_crtcs; ++i) { ++ if (s->crtcId == res->crtcs[i]) { ++ s->crtcIdx = i; ++ break; ++ } ++ } ++ ++ if (s->crtcIdx == -1) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId); ++ goto fail_res; ++ } ++ ++ if (res->count_connectors <= 0) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n"); ++ goto fail_res; ++ } ++ ++ c = drmModeGetConnector(drmfd, s->conId); ++ if (!c) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR); ++ goto fail_res; ++ } ++ ++ if (!c->count_modes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n"); ++ goto fail_conn; ++ } ++ ++ { ++ drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId); ++ s->compose.x = crtc->x; ++ s->compose.y = crtc->y; ++ s->compose.width = crtc->width; ++ s->compose.height = crtc->height; ++ drmModeFreeCrtc(crtc); ++ } ++ ++ if (pConId) ++ *pConId = c->connector_id; ++ ret = 0; ++ ++fail_conn: ++ drmModeFreeConnector(c); ++ ++fail_res: ++ drmModeFreeResources(res); ++ ++ return ret; ++} ++ ++// deinit is called if init fails so no need to clean up explicity here ++static int drm_vout_init(struct AVFormatContext * s) ++{ ++ drm_display_env_t * const de = s->priv_data; ++ int rv; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->drm_fd = -1; ++ de->con_id = 0; ++ de->setup = (struct drm_setup){0}; ++ de->q_terminate = 0; ++ ++ if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0) ++ { ++ rv = AVERROR(errno); ++ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv)); ++ return rv; ++ } ++ ++ if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0) ++ { ++ av_log(s, AV_LOG_ERROR, "failed to find valid mode\n"); ++ rv = AVERROR(EINVAL); ++ goto fail_close; ++ } ++ ++ sem_init(&de->q_sem_in, 0, 0); ++ sem_init(&de->q_sem_out, 0, 0); ++ if (pthread_create(&de->q_thread, NULL, display_thread, s)) { ++ rv = AVERROR(errno); ++ av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv)); ++ goto fail_close; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++ ++ return 0; ++ ++fail_close: ++ close(de->drm_fd); ++ de->drm_fd = -1; ++ av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__); ++ ++ return rv; ++} ++ ++static void drm_vout_deinit(struct AVFormatContext * s) ++{ ++ drm_display_env_t * const de = s->priv_data; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->q_terminate = 1; ++ sem_post(&de->q_sem_in); ++ pthread_join(de->q_thread, NULL); ++ sem_destroy(&de->q_sem_in); ++ sem_destroy(&de->q_sem_out); ++ ++ for (unsigned int i = 0; i != AUX_SIZE; ++i) ++ da_uninit(de, de->aux + i); ++ ++ av_frame_free(&de->q_next); ++ ++ if (de->drm_fd >= 0) { ++ close(de->drm_fd); ++ de->drm_fd = -1; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++} ++ ++ ++#define OFFSET(x) offsetof(drm_display_env_t, x) ++static const AVOption options[] = { ++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, ++ { NULL } ++}; ++ ++static const AVClass drm_vout_class = { ++ .class_name = "drm vid outdev", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, ++}; ++ ++AVOutputFormat ff_vout_drm_muxer = { ++ .name = "vout_drm", ++ .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"), ++ .priv_data_size = sizeof(drm_display_env_t), ++ .audio_codec = AV_CODEC_ID_NONE, ++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, ++ .write_header = drm_vout_write_header, ++ .write_packet = drm_vout_write_packet, ++ .write_uncoded_frame = drm_vout_write_frame, ++ .write_trailer = drm_vout_write_trailer, ++ .control_message = drm_vout_control_message, ++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, ++ .priv_class = &drm_vout_class, ++ .init = drm_vout_init, ++ .deinit = drm_vout_deinit, ++}; ++ +--- /dev/null ++++ b/libavdevice/egl_vout.c +@@ -0,0 +1,781 @@ ++/* ++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++// *** This module is a work in progress and its utility is strictly ++// limited to testing. ++// Amongst other issues it doesn't wait for the pic to be displayed before ++// returning the buffer so flikering does occur. ++ ++#include <epoxy/gl.h> ++#include <epoxy/egl.h> ++ ++#include "libavutil/opt.h" ++#include "libavutil/avassert.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavformat/internal.h" ++#include "avdevice.h" ++ ++#include "pthread.h" ++#include <semaphore.h> ++#include <stdatomic.h> ++#include <unistd.h> ++ ++#include <X11/Xlib.h> ++#include <X11/Xutil.h> ++ ++#include "libavutil/rpi_sand_fns.h" ++ ++#define TRACE_ALL 0 ++ ++struct egl_setup { ++ int conId; ++ ++ Display *dpy; ++ EGLDisplay egl_dpy; ++ EGLContext ctx; ++ EGLSurface surf; ++ Window win; ++ ++ uint32_t crtcId; ++ int crtcIdx; ++ uint32_t planeId; ++ struct { ++ int x, y, width, height; ++ } compose; ++}; ++ ++typedef struct egl_aux_s { ++ int fd; ++ GLuint texture; ++ ++} egl_aux_t; ++ ++typedef struct egl_display_env_s { ++ AVClass *class; ++ ++ struct egl_setup setup; ++ enum AVPixelFormat avfmt; ++ ++ int show_all; ++ int window_width, window_height; ++ int window_x, window_y; ++ int fullscreen; ++ ++ egl_aux_t aux[32]; ++ ++ pthread_t q_thread; ++ pthread_mutex_t q_lock; ++ sem_t display_start_sem; ++ sem_t q_sem; ++ int q_terminate; ++ AVFrame *q_this; ++ AVFrame *q_next; ++ ++} egl_display_env_t; ++ ++ ++/** ++ * Remove window border/decorations. ++ */ ++static void ++no_border(Display *dpy, Window w) ++{ ++ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); ++ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; ++ ++ typedef struct { ++ unsigned long flags; ++ unsigned long functions; ++ unsigned long decorations; ++ long inputMode; ++ unsigned long status; ++ } PropMotifWmHints; ++ ++ PropMotifWmHints motif_hints; ++ Atom prop, proptype; ++ unsigned long flags = 0; ++ ++ /* setup the property */ ++ motif_hints.flags = MWM_HINTS_DECORATIONS; ++ motif_hints.decorations = flags; ++ ++ /* get the atom for the property */ ++ prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True); ++ if (!prop) { ++ /* something went wrong! */ ++ return; ++ } ++ ++ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ ++ proptype = prop; ++ ++ XChangeProperty(dpy, w, /* display, window */ ++ prop, proptype, /* property, type */ ++ 32, /* format: 32-bit datums */ ++ PropModeReplace, /* mode */ ++ (unsigned char *)&motif_hints, /* data */ ++ PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ ++ ); ++} ++ ++ ++/* ++ * Create an RGB, double-buffered window. ++ * Return the window and context handles. ++ */ ++static int ++make_window(struct AVFormatContext *const s, ++ egl_display_env_t *const de, ++ Display *dpy, EGLDisplay egl_dpy, const char *name, ++ Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) ++{ ++ int scrnum = DefaultScreen(dpy); ++ XSetWindowAttributes attr; ++ unsigned long mask; ++ Window root = RootWindow(dpy, scrnum); ++ Window win; ++ EGLContext ctx; ++ const int fullscreen = de->fullscreen; ++ EGLConfig config; ++ int x = de->window_x; ++ int y = de->window_y; ++ int width = de->window_width ? de->window_width : 1280; ++ int height = de->window_height ? de->window_height : 720; ++ ++ ++ if (fullscreen) { ++ int scrnum = DefaultScreen(dpy); ++ ++ x = 0; y = 0; ++ width = DisplayWidth(dpy, scrnum); ++ height = DisplayHeight(dpy, scrnum); ++ } ++ ++ { ++ EGLint num_configs; ++ static const EGLint attribs[] = { ++ EGL_RED_SIZE, 1, ++ EGL_GREEN_SIZE, 1, ++ EGL_BLUE_SIZE, 1, ++ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, ++ EGL_NONE ++ }; ++ ++ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { ++ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); ++ return -1; ++ } ++ } ++ ++ { ++ EGLint vid; ++ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); ++ return -1; ++ } ++ ++ { ++ XVisualInfo visTemplate = { ++ .visualid = vid, ++ }; ++ int num_visuals; ++ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, ++ &visTemplate, &num_visuals); ++ ++ /* window attributes */ ++ attr.background_pixel = 0; ++ attr.border_pixel = 0; ++ attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone); ++ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; ++ /* XXX this is a bad way to get a borderless window! */ ++ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; ++ ++ win = XCreateWindow(dpy, root, x, y, width, height, ++ 0, visinfo->depth, InputOutput, ++ visinfo->visual, mask, &attr); ++ XFree(visinfo); ++ } ++ } ++ ++ if (fullscreen) ++ no_border(dpy, win); ++ ++ /* set hints and properties */ ++ { ++ XSizeHints sizehints; ++ sizehints.x = x; ++ sizehints.y = y; ++ sizehints.width = width; ++ sizehints.height = height; ++ sizehints.flags = USSize | USPosition; ++ XSetNormalHints(dpy, win, &sizehints); ++ XSetStandardProperties(dpy, win, name, name, ++ None, (char **)NULL, 0, &sizehints); ++ } ++ ++ eglBindAPI(EGL_OPENGL_ES_API); ++ ++ { ++ static const EGLint ctx_attribs[] = { ++ EGL_CONTEXT_CLIENT_VERSION, 2, ++ EGL_NONE ++ }; ++ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs); ++ if (!ctx) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } ++ } ++ ++ ++ XMapWindow(dpy, win); ++ ++ { ++ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); ++ if (!surf) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); ++ return -1; ++ } ++ ++ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } ++ ++ *winRet = win; ++ *ctxRet = ctx; ++ *surfRet = surf; ++ } ++ ++ return 0; ++} ++ ++static GLint ++compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source) ++{ ++ GLuint s = glCreateShader(target); ++ ++ if (s == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); ++ return 0; ++ } ++ ++ glShaderSource(s, 1, (const GLchar **)&source, NULL); ++ glCompileShader(s); ++ ++ { ++ GLint ok; ++ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); ++ ++ if (!ok) { ++ GLchar *info; ++ GLint size; ++ ++ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); ++ info = malloc(size); ++ ++ glGetShaderInfoLog(s, size, NULL, info); ++ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); ++ ++ return 0; ++ } ++ } ++ ++ return s; ++} ++ ++static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs) ++{ ++ GLuint prog = glCreateProgram(); ++ ++ if (prog == 0) { ++ av_log(s, AV_LOG_ERROR, "Failed to create program\n"); ++ return 0; ++ } ++ ++ glAttachShader(prog, vs); ++ glAttachShader(prog, fs); ++ glLinkProgram(prog); ++ ++ { ++ GLint ok; ++ glGetProgramiv(prog, GL_LINK_STATUS, &ok); ++ if (!ok) { ++ /* Some drivers return a size of 1 for an empty log. This is the size ++ * of a log that contains only a terminating NUL character. ++ */ ++ GLint size; ++ GLchar *info = NULL; ++ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); ++ if (size > 1) { ++ info = malloc(size); ++ glGetProgramInfoLog(prog, size, NULL, info); ++ } ++ ++ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", ++ (info != NULL) ? info : "<empty log>"); ++ return 0; ++ } ++ } ++ ++ return prog; ++} ++ ++static int ++gl_setup(struct AVFormatContext *const s) ++{ ++ const char *vs = ++ "attribute vec4 pos;\n" ++ "varying vec2 texcoord;\n" ++ "\n" ++ "void main() {\n" ++ " gl_Position = pos;\n" ++ " texcoord.x = (pos.x + 1.0) / 2.0;\n" ++ " texcoord.y = (-pos.y + 1.0) / 2.0;\n" ++ "}\n"; ++ const char *fs = ++ "#extension GL_OES_EGL_image_external : enable\n" ++ "precision mediump float;\n" ++ "uniform samplerExternalOES s;\n" ++ "varying vec2 texcoord;\n" ++ "void main() {\n" ++ " gl_FragColor = texture2D(s, texcoord);\n" ++ "}\n"; ++ ++ GLuint vs_s; ++ GLuint fs_s; ++ GLuint prog; ++ ++ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || ++ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || ++ !(prog = link_program(s, vs_s, fs_s))) ++ return -1; ++ ++ glUseProgram(prog); ++ ++ { ++ static const float verts[] = { ++ -1, -1, ++ 1, -1, ++ 1, 1, ++ -1, 1, ++ }; ++ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); ++ } ++ ++ glEnableVertexAttribArray(0); ++ return 0; ++} ++ ++static int egl_vout_write_trailer(AVFormatContext *s) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ ++ return 0; ++} ++ ++static int egl_vout_write_header(AVFormatContext *s) ++{ ++ const AVCodecParameters *const par = s->streams[0]->codecpar; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ if (s->nb_streams > 1 ++ || par->codec_type != AVMEDIA_TYPE_VIDEO ++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { ++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++ ++static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame) ++{ ++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0]; ++ egl_aux_t *da = NULL; ++ unsigned int i; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++#endif ++ ++ for (i = 0; i != 32; ++i) { ++ if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) { ++ da = de->aux + i; ++ break; ++ } ++ } ++ ++ if (da == NULL) { ++ av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ if (da->texture == 0) { ++ EGLint attribs[50]; ++ EGLint *a = attribs; ++ int i, j; ++ static const EGLint anames[] = { ++ EGL_DMA_BUF_PLANE0_FD_EXT, ++ EGL_DMA_BUF_PLANE0_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE0_PITCH_EXT, ++ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, ++ EGL_DMA_BUF_PLANE1_FD_EXT, ++ EGL_DMA_BUF_PLANE1_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE1_PITCH_EXT, ++ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, ++ EGL_DMA_BUF_PLANE2_FD_EXT, ++ EGL_DMA_BUF_PLANE2_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE2_PITCH_EXT, ++ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, ++ }; ++ const EGLint *b = anames; ++ ++ *a++ = EGL_WIDTH; ++ *a++ = av_frame_cropped_width(frame); ++ *a++ = EGL_HEIGHT; ++ *a++ = av_frame_cropped_height(frame); ++ *a++ = EGL_LINUX_DRM_FOURCC_EXT; ++ *a++ = desc->layers[0].format; ++ ++ for (i = 0; i < desc->nb_layers; ++i) { ++ for (j = 0; j < desc->layers[i].nb_planes; ++j) { ++ const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j; ++ const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index; ++ *a++ = *b++; ++ *a++ = obj->fd; ++ *a++ = *b++; ++ *a++ = p->offset; ++ *a++ = *b++; ++ *a++ = p->pitch; ++ if (obj->format_modifier == 0) { ++ b += 2; ++ } ++ else { ++ *a++ = *b++; ++ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); ++ *a++ = *b++; ++ *a++ = (EGLint)(obj->format_modifier >> 32); ++ } ++ } ++ } ++ ++ *a = EGL_NONE; ++ ++#if TRACE_ALL ++ for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { ++ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); ++ } ++#endif ++ { ++ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, ++ EGL_NO_CONTEXT, ++ EGL_LINUX_DMA_BUF_EXT, ++ NULL, attribs); ++ if (!image) { ++ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); ++ return -1; ++ } ++ ++ glGenTextures(1, &da->texture); ++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); ++ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); ++ ++ eglDestroyImageKHR(de->setup.egl_dpy, image); ++ } ++ ++ da->fd = desc->objects[0].fd; ++ } ++ ++ glClearColor(0.5, 0.5, 0.5, 0.5); ++ glClear(GL_COLOR_BUFFER_BIT); ++ ++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); ++ glDrawArrays(GL_TRIANGLE_FAN, 0, 4); ++ eglSwapBuffers(de->setup.egl_dpy, de->setup.surf); ++ ++ glDeleteTextures(1, &da->texture); ++ da->texture = 0; ++ da->fd = -1; ++ ++ return 0; ++} ++ ++static void* display_thread(void *v) ++{ ++ AVFormatContext *const s = v; ++ egl_display_env_t *const de = s->priv_data; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++#endif ++ { ++ EGLint egl_major, egl_minor; ++ ++ de->setup.dpy = XOpenDisplay(NULL); ++ if (!de->setup.dpy) { ++ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); ++ goto fail; ++ } ++ ++ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); ++ if (!de->setup.egl_dpy) { ++ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); ++ goto fail; ++ } ++ ++ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); ++ goto fail; ++ } ++ ++ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); ++ ++ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { ++ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); ++ goto fail; ++ } ++ } ++ ++ if (!de->window_width || !de->window_height) { ++ de->window_width = 1280; ++ de->window_height = 720; ++ } ++ if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", ++ &de->setup.win, &de->setup.ctx, &de->setup.surf)) { ++ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); ++ goto fail; ++ } ++ ++ if (gl_setup(s)) { ++ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); ++ goto fail; ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); ++#endif ++ sem_post(&de->display_start_sem); ++ ++ for (;;) { ++ AVFrame *frame; ++ ++ while (sem_wait(&de->q_sem) != 0) { ++ av_assert0(errno == EINTR); ++ } ++ ++ if (de->q_terminate) ++ break; ++ ++ pthread_mutex_lock(&de->q_lock); ++ frame = de->q_next; ++ de->q_next = NULL; ++ pthread_mutex_unlock(&de->q_lock); ++ ++ do_display(s, de, frame); ++ ++ av_frame_free(&de->q_this); ++ de->q_this = frame; ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); ++#endif ++ ++ return NULL; ++ ++fail: ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__); ++#endif ++ de->q_terminate = 1; ++ sem_post(&de->display_start_sem); ++ ++ return NULL; ++} ++ ++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) ++{ ++ const AVFrame *const src_frame = (AVFrame *)pkt->data; ++ AVFrame *frame; ++ egl_display_env_t *const de = s->priv_data; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ ++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { ++ frame = av_frame_alloc(); ++ av_frame_ref(frame, src_frame); ++ } ++ else if (src_frame->format == AV_PIX_FMT_VAAPI) { ++ frame = av_frame_alloc(); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (av_hwframe_map(frame, src_frame, 0) != 0) { ++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); ++ av_frame_free(&frame); ++ return AVERROR(EINVAL); ++ } ++ } ++ else { ++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); ++ return AVERROR(EINVAL); ++ } ++ ++ // Really hacky sync ++ while (de->show_all && de->q_next) { ++ usleep(3000); ++ } ++ ++ pthread_mutex_lock(&de->q_lock); ++ { ++ AVFrame *const t = de->q_next; ++ de->q_next = frame; ++ frame = t; ++ } ++ pthread_mutex_unlock(&de->q_lock); ++ ++ if (frame == NULL) ++ sem_post(&de->q_sem); ++ else ++ av_frame_free(&frame); ++ ++ return 0; ++} ++ ++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, ++ unsigned flags) ++{ ++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); ++ return AVERROR_PATCHWELCOME; ++} ++ ++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); ++#endif ++ switch (type) { ++ case AV_APP_TO_DEV_WINDOW_REPAINT: ++ return 0; ++ default: ++ break; ++ } ++ return AVERROR(ENOSYS); ++} ++ ++// deinit is called if init fails so no need to clean up explicity here ++static int egl_vout_init(struct AVFormatContext *s) ++{ ++ egl_display_env_t *const de = s->priv_data; ++ unsigned int i; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->setup = (struct egl_setup) { 0 }; ++ ++ for (i = 0; i != 32; ++i) { ++ de->aux[i].fd = -1; ++ } ++ ++ de->q_terminate = 0; ++ pthread_mutex_init(&de->q_lock, NULL); ++ sem_init(&de->q_sem, 0, 0); ++ sem_init(&de->display_start_sem, 0, 0); ++ av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0); ++ ++ sem_wait(&de->display_start_sem); ++ if (de->q_terminate) { ++ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); ++ return -1; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++ ++ return 0; ++} ++ ++static void egl_vout_deinit(struct AVFormatContext *s) ++{ ++ egl_display_env_t *const de = s->priv_data; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->q_terminate = 1; ++ sem_post(&de->q_sem); ++ pthread_join(de->q_thread, NULL); ++ sem_destroy(&de->q_sem); ++ pthread_mutex_destroy(&de->q_lock); ++ ++ av_frame_free(&de->q_next); ++ av_frame_free(&de->q_this); ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++} ++ ++#define OFFSET(x) offsetof(egl_display_env_t, x) ++static const AVOption options[] = { ++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { NULL } ++ ++}; ++ ++static const AVClass egl_vout_class = { ++ .class_name = "egl vid outdev", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, ++}; ++ ++AVOutputFormat ff_vout_egl_muxer = { ++ .name = "vout_egl", ++ .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"), ++ .priv_data_size = sizeof(egl_display_env_t), ++ .audio_codec = AV_CODEC_ID_NONE, ++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, ++ .write_header = egl_vout_write_header, ++ .write_packet = egl_vout_write_packet, ++ .write_uncoded_frame = egl_vout_write_frame, ++ .write_trailer = egl_vout_write_trailer, ++ .control_message = egl_vout_control_message, ++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, ++ .priv_class = &egl_vout_class, ++ .init = egl_vout_init, ++ .deinit = egl_vout_deinit, ++}; ++ +--- a/libavfilter/Makefile ++++ b/libavfilter/Makefile +@@ -254,6 +254,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER) + OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o + OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o + OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o ++OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o + OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o + OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o + OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o +@@ -509,6 +510,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) + OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o + OBJS-$(CONFIG_TRIM_FILTER) += trim.o + OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o ++OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o + OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o + OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ + opencl/unsharp.o +--- a/libavfilter/aarch64/Makefile ++++ b/libavfilter/aarch64/Makefile +@@ -1,3 +1,5 @@ ++OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o + OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o + ++NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_neon.o + NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o +--- /dev/null ++++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c +@@ -0,0 +1,125 @@ ++/* ++ * bwdif aarch64 NEON optimisations ++ * ++ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk> ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/common.h" ++#include "libavfilter/bwdif.h" ++#include "libavutil/aarch64/cpu.h" ++ ++void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int parity, int clip_max, int spat); ++ ++void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs, ++ int prefs3, int mrefs3, int parity, int clip_max); ++ ++void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int prefs3, int mrefs3, int prefs4, int mrefs4, ++ int parity, int clip_max); ++ ++void ff_bwdif_filter_line3_neon(void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int s_stride, ++ int w, int parity, int clip_max); ++ ++ ++static void filter_line3_helper(void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int s_stride, ++ int w, int parity, int clip_max) ++{ ++ // Asm works on 16 byte chunks ++ // If w is a multiple of 16 then all is good - if not then if width rounded ++ // up to nearest 16 will fit in both src & dst strides then allow the asm ++ // to write over the padding bytes as that is almost certainly faster than ++ // having to invoke the C version to clean up the tail. ++ const int w1 = FFALIGN(w, 16); ++ const int w0 = clip_max != 255 ? 0 : ++ d_stride <= w1 && s_stride <= w1 ? w : w & ~15; ++ ++ ff_bwdif_filter_line3_neon(dst1, d_stride, ++ prev1, cur1, next1, s_stride, ++ w0, parity, clip_max); ++ ++ if (w0 < w) ++ ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride, ++ (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride, ++ w - w0, parity, clip_max); ++} ++ ++static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int prefs3, int mrefs3, int prefs4, int mrefs4, ++ int parity, int clip_max) ++{ ++ const int w0 = clip_max != 255 ? 0 : w & ~15; ++ ++ ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1, ++ w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); ++ ++ if (w0 < w) ++ ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, ++ w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); ++} ++ ++static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int parity, int clip_max, int spat) ++{ ++ const int w0 = clip_max != 255 ? 0 : w & ~15; ++ ++ ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2, ++ parity, clip_max, spat); ++ ++ if (w0 < w) ++ ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, ++ w - w0, prefs, mrefs, prefs2, mrefs2, ++ parity, clip_max, spat); ++} ++ ++static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs, ++ int prefs3, int mrefs3, int parity, int clip_max) ++{ ++ const int w0 = clip_max != 255 ? 0 : w & ~15; ++ ++ ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); ++ ++ if (w0 < w) ++ ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0, ++ w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); ++} ++ ++void ++ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) ++{ ++ const int cpu_flags = av_get_cpu_flags(); ++ ++ if (bit_depth != 8) ++ return; ++ ++ if (!have_neon(cpu_flags)) ++ return; ++ ++ s->filter_intra = filter_intra_helper; ++ s->filter_line = filter_line_helper; ++ s->filter_edge = filter_edge_helper; ++ s->filter_line3 = filter_line3_helper; ++} ++ +--- /dev/null ++++ b/libavfilter/aarch64/vf_bwdif_neon.S +@@ -0,0 +1,788 @@ ++/* ++ * bwdif aarch64 NEON optimisations ++ * ++ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk> ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++#include "libavutil/aarch64/asm.S" ++ ++// Space taken on the stack by an int (32-bit) ++#ifdef __APPLE__ ++.set SP_INT, 4 ++#else ++.set SP_INT, 8 ++#endif ++ ++.macro SQSHRUNN b, s0, s1, s2, s3, n ++ sqshrun \s0\().4h, \s0\().4s, #\n - 8 ++ sqshrun2 \s0\().8h, \s1\().4s, #\n - 8 ++ sqshrun \s1\().4h, \s2\().4s, #\n - 8 ++ sqshrun2 \s1\().8h, \s3\().4s, #\n - 8 ++ uzp2 \b\().16b, \s0\().16b, \s1\().16b ++.endm ++ ++.macro SMULL4K a0, a1, a2, a3, s0, s1, k ++ smull \a0\().4s, \s0\().4h, \k ++ smull2 \a1\().4s, \s0\().8h, \k ++ smull \a2\().4s, \s1\().4h, \k ++ smull2 \a3\().4s, \s1\().8h, \k ++.endm ++ ++.macro UMULL4K a0, a1, a2, a3, s0, s1, k ++ umull \a0\().4s, \s0\().4h, \k ++ umull2 \a1\().4s, \s0\().8h, \k ++ umull \a2\().4s, \s1\().4h, \k ++ umull2 \a3\().4s, \s1\().8h, \k ++.endm ++ ++.macro UMLAL4K a0, a1, a2, a3, s0, s1, k ++ umlal \a0\().4s, \s0\().4h, \k ++ umlal2 \a1\().4s, \s0\().8h, \k ++ umlal \a2\().4s, \s1\().4h, \k ++ umlal2 \a3\().4s, \s1\().8h, \k ++.endm ++ ++.macro UMLSL4K a0, a1, a2, a3, s0, s1, k ++ umlsl \a0\().4s, \s0\().4h, \k ++ umlsl2 \a1\().4s, \s0\().8h, \k ++ umlsl \a2\().4s, \s1\().4h, \k ++ umlsl2 \a3\().4s, \s1\().8h, \k ++.endm ++ ++// int b = m2s1 - m1; ++// int f = p2s1 - p1; ++// int dc = c0s1 - m1; ++// int de = c0s1 - p1; ++// int sp_max = FFMIN(p1 - c0s1, m1 - c0s1); ++// sp_max = FFMIN(sp_max, FFMAX(-b,-f)); ++// int sp_min = FFMIN(c0s1 - p1, c0s1 - m1); ++// sp_min = FFMIN(sp_min, FFMAX(b,f)); ++// diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max); ++.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3 ++ uqsub \t0\().16b, \p1\().16b, \c0s1\().16b ++ uqsub \t2\().16b, \m1\().16b, \c0s1\().16b ++ umin \t2\().16b, \t0\().16b, \t2\().16b ++ ++ uqsub \t1\().16b, \m1\().16b, \m2s1\().16b ++ uqsub \t3\().16b, \p1\().16b, \p2s1\().16b ++ umax \t3\().16b, \t3\().16b, \t1\().16b ++ umin \t3\().16b, \t3\().16b, \t2\().16b ++ ++ uqsub \t0\().16b, \c0s1\().16b, \p1\().16b ++ uqsub \t2\().16b, \c0s1\().16b, \m1\().16b ++ umin \t2\().16b, \t0\().16b, \t2\().16b ++ ++ uqsub \t1\().16b, \m2s1\().16b, \m1\().16b ++ uqsub \t0\().16b, \p2s1\().16b, \p1\().16b ++ umax \t0\().16b, \t0\().16b, \t1\().16b ++ umin \t2\().16b, \t2\().16b, \t0\().16b ++ ++ cmeq \t1\().16b, \diff\().16b, #0 ++ umax \diff\().16b, \diff\().16b, \t3\().16b ++ umax \diff\().16b, \diff\().16b, \t2\().16b ++ bic \diff\().16b, \diff\().16b, \t1\().16b ++.endm ++ ++// i0 = s0; ++// if (i0 > d0 + diff0) ++// i0 = d0 + diff0; ++// else if (i0 < d0 - diff0) ++// i0 = d0 - diff0; ++// ++// i0 = s0 is safe ++.macro DIFF_CLIP i0, s0, d0, diff, t0, t1 ++ uqadd \t0\().16b, \d0\().16b, \diff\().16b ++ uqsub \t1\().16b, \d0\().16b, \diff\().16b ++ umin \i0\().16b, \s0\().16b, \t0\().16b ++ umax \i0\().16b, \i0\().16b, \t1\().16b ++.endm ++ ++// i0 = FFABS(m1 - p1) > td0 ? i1 : i2; ++// DIFF_CLIP ++// ++// i0 = i1 is safe ++.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2 ++ uabd \t0\().16b, \m1\().16b, \p1\().16b ++ cmhi \t0\().16b, \t0\().16b, \td0\().16b ++ bsl \t0\().16b, \i1\().16b, \i2\().16b ++ DIFF_CLIP \i0, \t0, \d0, \diff, \t1, \t2 ++.endm ++ ++.macro PUSH_VREGS ++ stp d8, d9, [sp, #-64]! ++ stp d10, d11, [sp, #16] ++ stp d12, d13, [sp, #32] ++ stp d14, d15, [sp, #48] ++.endm ++ ++.macro POP_VREGS ++ ldp d14, d15, [sp, #48] ++ ldp d12, d13, [sp, #32] ++ ldp d10, d11, [sp, #16] ++ ldp d8, d9, [sp], #64 ++.endm ++ ++.macro LDR_COEFFS d, t0 ++ movrel \t0, coeffs, 0 ++ ld1 {\d\().8h}, [\t0] ++.endm ++ ++// static const uint16_t coef_lf[2] = { 4309, 213 }; ++// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; ++// static const uint16_t coef_sp[2] = { 5077, 981 }; ++ ++const coeffs, align=4 // align 4 means align on 2^4 boundry ++ .hword 4309 * 4, 213 * 4 // lf[0]*4 = v0.h[0] ++ .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] = v0.h[5] ++ .hword 5077, 981 // sp[0] = v0.h[6] ++endconst ++ ++// =========================================================================== ++// ++// void ff_bwdif_filter_line3_neon( ++// void * dst1, // x0 ++// int d_stride, // w1 ++// const void * prev1, // x2 ++// const void * cur1, // x3 ++// const void * next1, // x4 ++// int s_stride, // w5 ++// int w, // w6 ++// int parity, // w7 ++// int clip_max); // [sp, #0] (Ignored) ++ ++function ff_bwdif_filter_line3_neon, export=1 ++ // Sanity check w ++ cmp w6, #0 ++ ble 99f ++ ++ LDR_COEFFS v0, x17 ++ ++// #define prev2 cur ++// const uint8_t * restrict next2 = parity ? prev : next; ++ cmp w7, #0 ++ csel x17, x2, x4, ne ++ ++ // We want all the V registers - save all the ones we must ++ PUSH_VREGS ++ ++ // Some rearrangement of initial values for nice layout of refs in regs ++ mov w10, w6 // w10 = loop count ++ neg w9, w5 // w9 = mref ++ lsl w8, w9, #1 // w8 = mref2 ++ add w7, w9, w9, LSL #1 // w7 = mref3 ++ lsl w6, w9, #2 // w6 = mref4 ++ mov w11, w5 // w11 = pref ++ lsl w12, w5, #1 // w12 = pref2 ++ add w13, w5, w5, LSL #1 // w13 = pref3 ++ lsl w14, w5, #2 // w14 = pref4 ++ add w15, w5, w5, LSL #2 // w15 = pref5 ++ add w16, w14, w12 // w16 = pref6 ++ ++ lsl w5, w1, #1 // w5 = d_stride * 2 ++ ++// for (x = 0; x < w; x++) { ++// int diff0, diff2; ++// int d0, d2; ++// int temporal_diff0, temporal_diff2; ++// ++// int i1, i2; ++// int j1, j2; ++// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; ++ ++10: ++// c0 = prev2[0] + next2[0]; // c0 = v20, v21 ++// d0 = c0 >> 1; // d0 = v10 ++// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 ++ ldr q31, [x3] ++ ldr q21, [x17] ++ uhadd v10.16b, v31.16b, v21.16b ++ uabd v11.16b, v31.16b, v21.16b ++ uaddl v20.8h, v21.8b, v31.8b ++ uaddl2 v21.8h, v21.16b, v31.16b ++ ++ ldr q31, [x3, w6, sxtw] ++ ldr q23, [x17, w6, sxtw] ++ ++// i1 = coef_hf[0] * c0; // i1 = v2-v5 ++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] ++ ++ ldr q30, [x3, w14, sxtw] ++ ldr q25, [x17, w14, sxtw] ++ ++// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 ++ uaddl v22.8h, v23.8b, v31.8b ++ uaddl2 v23.8h, v23.16b, v31.16b ++ ++// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 ++ uhadd v12.16b, v25.16b, v30.16b ++ uaddl v24.8h, v25.8b, v30.8b ++ uaddl2 v25.8h, v25.16b, v30.16b ++ ++// j1 = -coef_hf[1] * (c0 + p4); // j1 = v6-v9 (-c0:v20,v21) ++ add v20.8h, v20.8h, v24.8h ++ add v21.8h, v21.8h, v25.8h ++ SMULL4K v6, v7, v8, v9, v20, v21, v0.h[5] ++ ++// m3 = cur[mrefs3]; // m3 = v20 ++ ldr q20, [x3, w7, sxtw] ++ ++// p3 = cur[prefs3]; // p3 = v21 ++ ldr q21, [x3, w13, sxtw] ++ ++// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) ++ add v22.8h, v22.8h, v24.8h ++ add v23.8h, v23.8h, v25.8h ++ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] ++ ++ ldr q29, [x3, w8, sxtw] ++ ldr q23, [x17, w8, sxtw] ++ ++// i1 -= coef_lf[1] * 4 * (m3 + p3); // - ++ uaddl v30.8h, v20.8b, v21.8b ++ uaddl2 v31.8h, v20.16b, v21.16b ++ ++ ldr q28, [x3, w16, sxtw] ++ ldr q25, [x17, w16, sxtw] ++ ++ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] ++ ++// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 ++ uhadd v13.16b, v23.16b, v29.16b ++ uaddl v22.8h, v23.8b, v29.8b ++ uaddl2 v23.8h, v23.16b, v29.16b ++ ++ ldr q31, [x3, w12, sxtw] ++ ldr q27, [x17, w12, sxtw] ++ ++// p6 = prev2[prefs6] + next2[prefs6]; // p6 = v24,v25 ++ uaddl v24.8h, v25.8b, v28.8b ++ uaddl2 v25.8h, v25.16b, v28.16b ++ ++// j1 += coef_hf[2] * (m2 + p6); // (-p6:v24,v25) ++ add v24.8h, v24.8h, v22.8h ++ add v25.8h, v25.8h, v23.8h ++ UMLAL4K v6, v7, v8, v9, v24, v25, v0.h[4] ++ ++// m1 = cur[mrefs]; // m1 = v24 ++ ldr q24, [x3, w9, sxtw] ++ ++// p5 = cur[prefs5]; // p5 = v25 ++ ldr q25, [x3, w15, sxtw] ++ ++// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 ++// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 ++// d2 = p2 >> 1; // d2 = v15 ++ uabd v14.16b, v31.16b, v27.16b ++ uhadd v15.16b, v31.16b, v27.16b ++ uaddl v26.8h, v27.8b, v31.8b ++ uaddl2 v27.8h, v27.16b, v31.16b ++ ++// j1 += coef_hf[0] * p2; // - ++ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[2] ++ ++// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) ++ add v22.8h, v22.8h, v26.8h ++ add v23.8h, v23.8h, v27.8h ++ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] ++ ++// p1 = cur[prefs]; // p1 = v22 ++ ldr q22, [x3, w11, sxtw] ++ ++// j1 -= coef_lf[1] * 4 * (m1 + p5); // - ++ uaddl v26.8h, v24.8b, v25.8b ++ uaddl2 v27.8h, v24.16b, v25.16b ++ UMLSL4K v6, v7, v8, v9, v26, v27, v0.h[1] ++ ++// j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16 ++ uaddl v18.8h, v22.8b, v21.8b ++ uaddl2 v19.8h, v22.16b, v21.16b ++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] ++ ++ uaddl v18.8h, v24.8b, v25.8b ++ uaddl2 v19.8h, v24.16b, v25.16b ++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] ++ ++ SQSHRUNN v16, v28, v29, v30, v31, 13 ++ ++// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 ++ uaddl v18.8h, v22.8b, v24.8b ++ uaddl2 v19.8h, v22.16b, v24.16b ++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] ++ ++ uaddl v18.8h, v20.8b, v21.8b ++ uaddl2 v19.8h, v20.16b, v21.16b ++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] ++ ++ SQSHRUNN v17, v28, v29, v30, v31, 13 ++ ++// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 ++ uaddl v26.8h, v24.8b, v22.8b ++ uaddl2 v27.8h, v24.16b, v22.16b ++ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] ++ ++ ldr q31, [x2, w9, sxtw] ++ ldr q29, [x4, w9, sxtw] ++ ++// j1 += coef_lf[0] * 4 * (p1 + p3); // p1 = v22, p3 = v21 ++ uaddl v26.8h, v21.8b, v22.8b ++ uaddl2 v27.8h, v21.16b, v22.16b ++ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[0] ++ ++ ldr q30, [x2, w11, sxtw] ++ ldr q28, [x4, w11, sxtw] ++ ++// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* ++ SQSHRUNN v2, v2, v3, v4, v5, 15 ++ ++// j1 >>= 15; // j1 = v3, -v6*, -v7*, -v8*, -v9* ++ SQSHRUNN v3, v6, v7, v8, v9, 15 ++ ++// { ++// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; ++// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; ++ uabd v30.16b, v22.16b, v30.16b ++ uabd v31.16b, v24.16b, v31.16b ++ uabd v28.16b, v22.16b, v28.16b ++ uabd v29.16b, v24.16b, v29.16b ++ uhadd v31.16b, v31.16b, v30.16b ++ uhadd v29.16b, v29.16b, v28.16b ++ ++ ldr q27, [x2, w13, sxtw] ++ ldr q26, [x4, w13, sxtw] ++ ++// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 ++ ushr v18.16b, v11.16b, #1 ++ umax v18.16b, v18.16b, v31.16b ++ umax v18.16b, v18.16b, v29.16b ++// } // v28, v30 preserved for next block ++// { // tdiff2 = v14 ++// int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1; ++// int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1; ++ uabd v31.16b, v21.16b, v27.16b ++ uabd v29.16b, v21.16b, v26.16b ++ uhadd v31.16b, v31.16b, v30.16b ++ uhadd v29.16b, v29.16b, v28.16b ++ ++// diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19 ++ ushr v19.16b, v14.16b, #1 ++ umax v19.16b, v19.16b, v31.16b ++ umax v19.16b, v19.16b, v29.16b ++// } ++ ++ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 ++ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 ++ ++ // diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12 ++ SPAT_CHECK v19, v10, v22, v15, v21, v12, v31, v30, v29, v28 ++ ++ // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19 ++ INTERPOL v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29 ++ ++// dst[d_stride * 2] = av_clip_uint8(interpol); ++ str q3, [x0, w5, sxtw] ++ ++// dst[d_stride] = p1; ++ str q22, [x0, w1, sxtw] ++ ++ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 ++ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 ++ ++// dst[0] = av_clip_uint8(interpol); ++ str q2, [x0], #16 ++// } ++// ++// dst++; ++// cur++; ++// prev++; ++// prev2++; ++// next++; ++// } ++ subs w10, w10, #16 ++ add x2, x2, #16 ++ add x3, x3, #16 ++ add x4, x4, #16 ++ add x17, x17, #16 ++ bgt 10b ++ ++ POP_VREGS ++99: ++ ret ++endfunc ++ ++// =========================================================================== ++// ++// void filter_line( ++// void *dst1, // x0 ++// void *prev1, // x1 ++// void *cur1, // x2 ++// void *next1, // x3 ++// int w, // w4 ++// int prefs, // w5 ++// int mrefs, // w6 ++// int prefs2, // w7 ++// int mrefs2, // [sp, #0] ++// int prefs3, // [sp, #SP_INT] ++// int mrefs3, // [sp, #SP_INT*2] ++// int prefs4, // [sp, #SP_INT*3] ++// int mrefs4, // [sp, #SP_INT*4] ++// int parity, // [sp, #SP_INT*5] ++// int clip_max) // [sp, #SP_INT*6] ++ ++function ff_bwdif_filter_line_neon, export=1 ++ // Sanity check w ++ cmp w4, #0 ++ ble 99f ++ ++ // Rearrange regs to be the same as line3 for ease of debug! ++ mov w10, w4 // w10 = loop count ++ mov w9, w6 // w9 = mref ++ mov w12, w7 // w12 = pref2 ++ mov w11, w5 // w11 = pref ++ ldr w8, [sp, #0] // w8 = mref2 ++ ldr w7, [sp, #SP_INT*2] // w7 = mref3 ++ ldr w6, [sp, #SP_INT*4] // w6 = mref4 ++ ldr w13, [sp, #SP_INT] // w13 = pref3 ++ ldr w14, [sp, #SP_INT*3] // w14 = pref4 ++ ++ mov x4, x3 ++ mov x3, x2 ++ mov x2, x1 ++ ++ LDR_COEFFS v0, x17 ++ ++// #define prev2 cur ++// const uint8_t * restrict next2 = parity ? prev : next; ++ ldr w17, [sp, #SP_INT*5] // parity ++ cmp w17, #0 ++ csel x17, x2, x4, ne ++ ++ PUSH_VREGS ++ ++// for (x = 0; x < w; x++) { ++// int diff0, diff2; ++// int d0, d2; ++// int temporal_diff0, temporal_diff2; ++// ++// int i1, i2; ++// int j1, j2; ++// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; ++ ++10: ++// c0 = prev2[0] + next2[0]; // c0 = v20, v21 ++// d0 = c0 >> 1; // d0 = v10 ++// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 ++ ldr q31, [x3] ++ ldr q21, [x17] ++ uhadd v10.16b, v31.16b, v21.16b ++ uabd v11.16b, v31.16b, v21.16b ++ uaddl v20.8h, v21.8b, v31.8b ++ uaddl2 v21.8h, v21.16b, v31.16b ++ ++ ldr q31, [x3, w6, sxtw] ++ ldr q23, [x17, w6, sxtw] ++ ++// i1 = coef_hf[0] * c0; // i1 = v2-v5 ++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] ++ ++ ldr q30, [x3, w14, sxtw] ++ ldr q25, [x17, w14, sxtw] ++ ++// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 ++ uaddl v22.8h, v23.8b, v31.8b ++ uaddl2 v23.8h, v23.16b, v31.16b ++ ++// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 ++ uhadd v12.16b, v25.16b, v30.16b ++ uaddl v24.8h, v25.8b, v30.8b ++ uaddl2 v25.8h, v25.16b, v30.16b ++ ++// m3 = cur[mrefs3]; // m3 = v20 ++ ldr q20, [x3, w7, sxtw] ++ ++// p3 = cur[prefs3]; // p3 = v21 ++ ldr q21, [x3, w13, sxtw] ++ ++// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) ++ add v22.8h, v22.8h, v24.8h ++ add v23.8h, v23.8h, v25.8h ++ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] ++ ++ ldr q29, [x3, w8, sxtw] ++ ldr q23, [x17, w8, sxtw] ++ ++// i1 -= coef_lf[1] * 4 * (m3 + p3); // - ++ uaddl v30.8h, v20.8b, v21.8b ++ uaddl2 v31.8h, v20.16b, v21.16b ++ ++ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] ++ ++ ldr q31, [x3, w12, sxtw] ++ ldr q27, [x17, w12, sxtw] ++ ++// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 ++ uhadd v13.16b, v23.16b, v29.16b ++ uaddl v22.8h, v23.8b, v29.8b ++ uaddl2 v23.8h, v23.16b, v29.16b ++ ++// m1 = cur[mrefs]; // m1 = v24 ++ ldr q24, [x3, w9, sxtw] ++ ++// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 ++// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 ++// d2 = p2 >> 1; // d2 = v15 ++ uabd v14.16b, v31.16b, v27.16b ++ uhadd v15.16b, v31.16b, v27.16b ++ uaddl v26.8h, v27.8b, v31.8b ++ uaddl2 v27.8h, v27.16b, v31.16b ++ ++// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) ++ add v22.8h, v22.8h, v26.8h ++ add v23.8h, v23.8h, v27.8h ++ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] ++ ++// p1 = cur[prefs]; // p1 = v22 ++ ldr q22, [x3, w11, sxtw] ++ ++// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 ++ uaddl v18.8h, v22.8b, v24.8b ++ uaddl2 v19.8h, v22.16b, v24.16b ++ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] ++ ++ uaddl v18.8h, v20.8b, v21.8b ++ uaddl2 v19.8h, v20.16b, v21.16b ++ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] ++ ++ SQSHRUNN v17, v28, v29, v30, v31, 13 ++ ++// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 ++ uaddl v26.8h, v24.8b, v22.8b ++ uaddl2 v27.8h, v24.16b, v22.16b ++ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] ++ ++ ldr q31, [x2, w9, sxtw] ++ ldr q29, [x4, w9, sxtw] ++ ++ ldr q30, [x2, w11, sxtw] ++ ldr q28, [x4, w11, sxtw] ++ ++// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* ++ SQSHRUNN v2, v2, v3, v4, v5, 15 ++ ++// { ++// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; ++// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; ++ uabd v30.16b, v22.16b, v30.16b ++ uabd v31.16b, v24.16b, v31.16b ++ uabd v28.16b, v22.16b, v28.16b ++ uabd v29.16b, v24.16b, v29.16b ++ uhadd v31.16b, v31.16b, v30.16b ++ uhadd v29.16b, v29.16b, v28.16b ++ ++// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 ++ ushr v18.16b, v11.16b, #1 ++ umax v18.16b, v18.16b, v31.16b ++ umax v18.16b, v18.16b, v29.16b ++ ++ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 ++ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 ++ ++ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 ++ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 ++ ++// dst[0] = av_clip_uint8(interpol); ++ str q2, [x0], #16 ++// } ++// ++// dst++; ++// cur++; ++// prev++; ++// prev2++; ++// next++; ++// } ++ ++ subs w10, w10, #16 ++ add x2, x2, #16 ++ add x3, x3, #16 ++ add x4, x4, #16 ++ add x17, x17, #16 ++ bgt 10b ++ ++ POP_VREGS ++99: ++ ret ++endfunc ++ ++// ============================================================================ ++// ++// void ff_bwdif_filter_edge_neon( ++// void *dst1, // x0 ++// void *prev1, // x1 ++// void *cur1, // x2 ++// void *next1, // x3 ++// int w, // w4 ++// int prefs, // w5 ++// int mrefs, // w6 ++// int prefs2, // w7 ++// int mrefs2, // [sp, #0] ++// int parity, // [sp, #SP_INT] ++// int clip_max, // [sp, #SP_INT*2] unused ++// int spat); // [sp, #SP_INT*3] ++ ++function ff_bwdif_filter_edge_neon, export=1 ++ // Sanity check w ++ cmp w4, #0 ++ ble 99f ++ ++// #define prev2 cur ++// const uint8_t * restrict next2 = parity ? prev : next; ++ ++ ldr w8, [sp, #0] // mrefs2 ++ ++ ldr w17, [sp, #SP_INT] // parity ++ ldr w16, [sp, #SP_INT*3] // spat ++ cmp w17, #0 ++ csel x17, x1, x3, ne ++ ++// for (x = 0; x < w; x++) { ++ ++10: ++// int m1 = cur[mrefs]; ++// int d = (prev2[0] + next2[0]) >> 1; ++// int p1 = cur[prefs]; ++// int temporal_diff0 = FFABS(prev2[0] - next2[0]); ++// int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; ++// int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; ++// int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); ++ ldr q31, [x2] ++ ldr q21, [x17] ++ uhadd v16.16b, v31.16b, v21.16b // d0 = v16 ++ uabd v17.16b, v31.16b, v21.16b // td0 = v17 ++ ldr q24, [x2, w6, sxtw] // m1 = v24 ++ ldr q22, [x2, w5, sxtw] // p1 = v22 ++ ++ ldr q0, [x1, w6, sxtw] // prev[mrefs] ++ ldr q2, [x1, w5, sxtw] // prev[prefs] ++ ldr q1, [x3, w6, sxtw] // next[mrefs] ++ ldr q3, [x3, w5, sxtw] // next[prefs] ++ ++ ushr v29.16b, v17.16b, #1 ++ ++ uabd v31.16b, v0.16b, v24.16b ++ uabd v30.16b, v2.16b, v22.16b ++ uhadd v0.16b, v31.16b, v30.16b // td1 = q0 ++ ++ uabd v31.16b, v1.16b, v24.16b ++ uabd v30.16b, v3.16b, v22.16b ++ uhadd v1.16b, v31.16b, v30.16b // td2 = q1 ++ ++ umax v0.16b, v0.16b, v29.16b ++ umax v0.16b, v0.16b, v1.16b // diff = v0 ++ ++// if (spat) { ++// SPAT_CHECK() ++// } ++// i0 = (m1 + p1) >> 1; ++ cbz w16, 1f ++ ++ ldr q31, [x2, w8, sxtw] ++ ldr q18, [x17, w8, sxtw] ++ ldr q30, [x2, w7, sxtw] ++ ldr q19, [x17, w7, sxtw] ++ uhadd v18.16b, v18.16b, v31.16b ++ uhadd v19.16b, v19.16b, v30.16b ++ ++ SPAT_CHECK v0, v18, v24, v16, v22, v19, v31, v30, v29, v28 ++ ++1: ++ uhadd v2.16b, v22.16b, v24.16b ++ ++ // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30 ++ DIFF_CLIP v2, v2, v16, v0, v31, v30 ++ ++// dst[0] = av_clip(interpol, 0, clip_max); ++ str q2, [x0], #16 ++ ++// dst++; ++// cur++; ++// } ++ subs w4, w4, #16 ++ add x1, x1, #16 ++ add x2, x2, #16 ++ add x3, x3, #16 ++ add x17, x17, #16 ++ bgt 10b ++ ++99: ++ ret ++endfunc ++ ++// ============================================================================ ++// ++// void ff_bwdif_filter_intra_neon( ++// void *dst1, // x0 ++// void *cur1, // x1 ++// int w, // w2 ++// int prefs, // w3 ++// int mrefs, // w4 ++// int prefs3, // w5 ++// int mrefs3, // w6 ++// int parity, // w7 unused ++// int clip_max) // [sp, #0] unused ++ ++function ff_bwdif_filter_intra_neon, export=1 ++ cmp w2, #0 ++ ble 99f ++ ++ LDR_COEFFS v0, x17 ++ ++// for (x = 0; x < w; x++) { ++10: ++ ++// interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; ++ ldr q31, [x1, w4, sxtw] ++ ldr q30, [x1, w3, sxtw] ++ ldr q29, [x1, w6, sxtw] ++ ldr q28, [x1, w5, sxtw] ++ ++ uaddl v20.8h, v31.8b, v30.8b ++ uaddl2 v21.8h, v31.16b, v30.16b ++ ++ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[6] ++ ++ uaddl v20.8h, v29.8b, v28.8b ++ uaddl2 v21.8h, v29.16b, v28.16b ++ ++ UMLSL4K v2, v3, v4, v5, v20, v21, v0.h[7] ++ ++// dst[0] = av_clip(interpol, 0, clip_max); ++ SQSHRUNN v2, v2, v3, v4, v5, 13 ++ str q2, [x0], #16 ++ ++// dst++; ++// cur++; ++// } ++ ++ subs w2, w2, #16 ++ add x1, x1, #16 ++ bgt 10b ++ ++99: ++ ret ++endfunc +--- a/libavfilter/allfilters.c ++++ b/libavfilter/allfilters.c +@@ -242,6 +242,7 @@ extern const AVFilter ff_vf_derain; + extern const AVFilter ff_vf_deshake; + extern const AVFilter ff_vf_deshake_opencl; + extern const AVFilter ff_vf_despill; ++extern const AVFilter ff_vf_deinterlace_v4l2m2m; + extern const AVFilter ff_vf_detelecine; + extern const AVFilter ff_vf_dilation; + extern const AVFilter ff_vf_dilation_opencl; +@@ -414,6 +415,7 @@ extern const AVFilter ff_vf_scale; + extern const AVFilter ff_vf_scale_cuda; + extern const AVFilter ff_vf_scale_npp; + extern const AVFilter ff_vf_scale_qsv; ++extern const AVFilter ff_vf_scale_v4l2m2m; + extern const AVFilter ff_vf_scale_vaapi; + extern const AVFilter ff_vf_scale_vulkan; + extern const AVFilter ff_vf_scale2ref; +@@ -483,6 +485,7 @@ extern const AVFilter ff_vf_trim; + extern const AVFilter ff_vf_unpremultiply; + extern const AVFilter ff_vf_unsharp; + extern const AVFilter ff_vf_unsharp_opencl; ++extern const AVFilter ff_vf_unsand; + extern const AVFilter ff_vf_untile; + extern const AVFilter ff_vf_uspp; + extern const AVFilter ff_vf_v360; +--- a/libavfilter/buffersink.c ++++ b/libavfilter/buffersink.c +@@ -62,6 +62,11 @@ typedef struct BufferSinkContext { + int sample_rates_size; + + AVFrame *peeked_frame; ++ ++ union { ++ av_buffersink_alloc_video_frame * video; ++ } alloc_cb; ++ void * alloc_v; + } BufferSinkContext; + + #define NB_ITEMS(list) (list ## _size / sizeof(*list)) +@@ -154,6 +159,22 @@ int attribute_align_arg av_buffersink_ge + return get_frame_internal(ctx, frame, 0, nb_samples); + } + ++static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h) ++{ ++ AVFilterContext * const ctx = link->dst; ++ BufferSinkContext * const bs = ctx->priv; ++ return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) : ++ ff_default_get_video_buffer(link, w, h); ++} ++ ++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v) ++{ ++ BufferSinkContext * const bs = ctx->priv; ++ bs->alloc_cb.video = cb; ++ bs->alloc_v = v; ++ return 0; ++} ++ + #if FF_API_BUFFERSINK_ALLOC + AVBufferSinkParams *av_buffersink_params_alloc(void) + { +@@ -403,6 +424,7 @@ static const AVFilterPad avfilter_vsink_ + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, ++ .get_buffer = {.video = alloc_video_buffer}, + }, + }; + +--- a/libavfilter/buffersink.h ++++ b/libavfilter/buffersink.h +@@ -202,6 +202,9 @@ int av_buffersink_get_frame(AVFilterCont + */ + int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples); + ++typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h); ++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v); ++ + /** + * @} + */ +--- a/libavfilter/buffersrc.c ++++ b/libavfilter/buffersrc.c +@@ -204,7 +204,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + + switch (ctx->outputs[0]->type) { + case AVMEDIA_TYPE_VIDEO: +- CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height, ++ CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame), + frame->format, frame->pts); + break; + case AVMEDIA_TYPE_AUDIO: +--- a/libavfilter/bwdif.h ++++ b/libavfilter/bwdif.h +@@ -35,8 +35,29 @@ typedef struct BWDIFContext { + void (*filter_edge)(void *dst, void *prev, void *cur, void *next, + int w, int prefs, int mrefs, int prefs2, int mrefs2, + int parity, int clip_max, int spat); ++ void (*filter_line3)(void *dst, int dstride, ++ const void *prev, const void *cur, const void *next, int prefs, ++ int w, int parity, int clip_max); + } BWDIFContext; + +-void ff_bwdif_init_x86(BWDIFContext *bwdif); ++void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); ++void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth); ++void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth); ++ ++void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int parity, int clip_max, int spat); ++ ++void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, ++ int prefs3, int mrefs3, int parity, int clip_max); ++ ++void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int prefs3, int mrefs3, int prefs4, int mrefs4, ++ int parity, int clip_max); ++ ++void ff_bwdif_filter_line3_c(void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int s_stride, ++ int w, int parity, int clip_max); + + #endif /* AVFILTER_BWDIF_H */ +--- a/libavfilter/vf_bwdif.c ++++ b/libavfilter/vf_bwdif.c +@@ -122,8 +122,8 @@ typedef struct ThreadData { + next2++; \ + } + +-static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, +- int prefs3, int mrefs3, int parity, int clip_max) ++void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, ++ int prefs3, int mrefs3, int parity, int clip_max) + { + uint8_t *dst = dst1; + uint8_t *cur = cur1; +@@ -132,10 +132,10 @@ static void filter_intra(void *dst1, voi + FILTER_INTRA() + } + +-static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, +- int w, int prefs, int mrefs, int prefs2, int mrefs2, +- int prefs3, int mrefs3, int prefs4, int mrefs4, +- int parity, int clip_max) ++void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int prefs3, int mrefs3, int prefs4, int mrefs4, ++ int parity, int clip_max) + { + uint8_t *dst = dst1; + uint8_t *prev = prev1; +@@ -150,9 +150,34 @@ static void filter_line_c(void *dst1, vo + FILTER2() + } + +-static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, +- int w, int prefs, int mrefs, int prefs2, int mrefs2, +- int parity, int clip_max, int spat) ++#define NEXT_LINE()\ ++ dst += d_stride; \ ++ prev += prefs; \ ++ cur += prefs; \ ++ next += prefs; ++ ++void ff_bwdif_filter_line3_c(void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int s_stride, ++ int w, int parity, int clip_max) ++{ ++ const int prefs = s_stride; ++ uint8_t * dst = dst1; ++ const uint8_t * prev = prev1; ++ const uint8_t * cur = cur1; ++ const uint8_t * next = next1; ++ ++ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, ++ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); ++ NEXT_LINE(); ++ memcpy(dst, cur, w); ++ NEXT_LINE(); ++ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, ++ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); ++} ++ ++void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int parity, int clip_max, int spat) + { + uint8_t *dst = dst1; + uint8_t *prev = prev1; +@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1 + FILTER2() + } + ++// Round job start line down to multiple of 4 so that if filter_line3 exists ++// and the frame is a multiple of 4 high then filter_line will never be called ++static inline int job_start(const int jobnr, const int nb_jobs, const int h) ++{ ++ return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3; ++} ++ + static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) + { + BWDIFContext *s = ctx->priv; +@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext + int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1; + int df = (yadif->csp->comp[td->plane].depth + 7) / 8; + int refs = linesize / df; +- int slice_start = (td->h * jobnr ) / nb_jobs; +- int slice_end = (td->h * (jobnr+1)) / nb_jobs; ++ int slice_start = job_start(jobnr, nb_jobs, td->h); ++ int slice_end = job_start(jobnr + 1, nb_jobs, td->h); + int y; + + for (y = slice_start; y < slice_end; y++) { +@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext + refs << 1, -(refs << 1), + td->parity ^ td->tff, clip_max, + (y < 2) || ((y + 3) > td->h) ? 0 : 1); ++ } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) { ++ s->filter_line3(dst, td->frame->linesize[td->plane], ++ prev, cur, next, linesize, td->w, ++ td->parity ^ td->tff, clip_max); ++ y += 2; + } else { + s->filter_line(dst, prev, cur, next, td->w, + refs, -refs, refs << 1, -(refs << 1), +@@ -265,22 +302,31 @@ static void filter(AVFilterContext *ctx, + YADIFContext *yadif = &bwdif->yadif; + ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff }; + int i; ++ int last_plane = -1; + + for (i = 0; i < yadif->csp->nb_components; i++) { + int w = dstpic->width; + int h = dstpic->height; ++ const AVComponentDescriptor * const comp = yadif->csp->comp + i; ++ ++ // If the last plane was the same as this plane assume we've dealt ++ // with all the pels already ++ if (last_plane == comp->plane) ++ continue; ++ last_plane = comp->plane; + + if (i == 1 || i == 2) { + w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w); + h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h); + } + +- td.w = w; +- td.h = h; +- td.plane = i; ++ // comp step is in bytes but td.w is in pels ++ td.w = w * comp->step / ((comp->depth + 7) / 8); ++ td.h = h; ++ td.plane = comp->plane; + + ff_filter_execute(ctx, filter_slice, &td, NULL, +- FFMIN(h, ff_filter_get_nb_threads(ctx))); ++ FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx))); + } + if (yadif->current_field == YADIF_FIELD_END) { + yadif->current_field = YADIF_FIELD_NORMAL; +@@ -313,6 +359,7 @@ static const enum AVPixelFormat pix_fmts + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, ++ AV_PIX_FMT_NV12, + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, +@@ -340,21 +387,29 @@ static int config_props(AVFilterLink *li + + yadif->csp = av_pix_fmt_desc_get(link->format); + yadif->filter = filter; +- if (yadif->csp->comp[0].depth > 8) { ++ ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth); ++ ++ return 0; ++} ++ ++av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) ++{ ++ s->filter_line3 = 0; ++ if (bit_depth > 8) { + s->filter_intra = filter_intra_16bit; + s->filter_line = filter_line_c_16bit; + s->filter_edge = filter_edge_16bit; + } else { +- s->filter_intra = filter_intra; +- s->filter_line = filter_line_c; +- s->filter_edge = filter_edge; ++ s->filter_intra = ff_bwdif_filter_intra_c; ++ s->filter_line = ff_bwdif_filter_line_c; ++ s->filter_edge = ff_bwdif_filter_edge_c; + } + + #if ARCH_X86 +- ff_bwdif_init_x86(s); ++ ff_bwdif_init_x86(s, bit_depth); ++#elif ARCH_AARCH64 ++ ff_bwdif_init_aarch64(s, bit_depth); + #endif +- +- return 0; + } + + +--- /dev/null ++++ b/libavfilter/vf_deinterlace_v4l2m2m.c +@@ -0,0 +1,2102 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * deinterlace video filter - V4L2 M2M ++ */ ++ ++#include <drm_fourcc.h> ++ ++#include <linux/videodev2.h> ++ ++#include <dirent.h> ++#include <fcntl.h> ++#include <poll.h> ++#include <stdatomic.h> ++#include <stdio.h> ++#include <string.h> ++#include <sys/ioctl.h> ++#include <sys/mman.h> ++#include <unistd.h> ++ ++#include "config.h" ++ ++#include "libavutil/avassert.h" ++#include "libavutil/avstring.h" ++#include "libavutil/common.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/internal.h" ++#include "libavutil/mathematics.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/time.h" ++ ++#define FF_INTERNAL_FIELDS 1 ++#include "framequeue.h" ++#include "filters.h" ++#include "avfilter.h" ++#include "formats.h" ++#include "internal.h" ++#include "scale_eval.h" ++#include "video.h" ++ ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in drm_fourcc.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ ++typedef struct V4L2Queue V4L2Queue; ++typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; ++ ++typedef enum filter_type_v4l2_e ++{ ++ FILTER_V4L2_DEINTERLACE = 1, ++ FILTER_V4L2_SCALE, ++} filter_type_v4l2_t; ++ ++typedef struct V4L2Buffer { ++ int enqueued; ++ int reenqueue; ++ struct v4l2_buffer buffer; ++ AVFrame frame; ++ struct v4l2_plane planes[VIDEO_MAX_PLANES]; ++ int num_planes; ++ AVDRMFrameDescriptor drm_frame; ++ V4L2Queue *q; ++} V4L2Buffer; ++ ++typedef struct V4L2Queue { ++ struct v4l2_format format; ++ struct v4l2_selection sel; ++ int eos; ++ int num_buffers; ++ V4L2Buffer *buffers; ++ const char * name; ++ DeintV4L2M2MContextShared *ctx; ++} V4L2Queue; ++ ++typedef struct pts_stats_s ++{ ++ void * logctx; ++ const char * name; // For debug ++ unsigned int last_count; ++ unsigned int last_interval; ++ int64_t last_pts; ++} pts_stats_t; ++ ++#define PTS_TRACK_SIZE 32 ++typedef struct pts_track_el_s ++{ ++ uint32_t n; ++ unsigned int interval; ++ AVFrame * props; ++} pts_track_el_t; ++ ++typedef struct pts_track_s ++{ ++ uint32_t n; ++ uint32_t last_n; ++ int got_2; ++ void * logctx; ++ pts_stats_t stats; ++ pts_track_el_t a[PTS_TRACK_SIZE]; ++} pts_track_t; ++ ++typedef enum drain_state_e ++{ ++ DRAIN_NONE = 0, // Not draining ++ DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame ++ DRAIN_LAST, // Drain with long timeout last_frame in received on output expected ++ DRAIN_EOS, // Drain with long timeout EOS expected ++ DRAIN_DONE // Drained ++} drain_state_t; ++ ++typedef struct DeintV4L2M2MContextShared { ++ void * logctx; // For logging - will be NULL when done ++ filter_type_v4l2_t filter_type; ++ ++ int fd; ++ int done; // fd closed - awating all refs dropped ++ int width; ++ int height; ++ ++ int drain; // EOS received (inlink status) ++ drain_state_t drain_state; ++ int64_t drain_pts; // PTS associated with inline status ++ ++ unsigned int frames_rx; ++ unsigned int frames_tx; ++ ++ // from options ++ int output_width; ++ int output_height; ++ enum AVPixelFormat output_format; ++ ++ int has_enc_stop; ++ // We expect to get exactly the same number of frames out as we put in ++ // We can drain by matching input to output ++ int one_to_one; ++ ++ int orig_width; ++ int orig_height; ++ atomic_uint refcount; ++ ++ AVBufferRef *hw_frames_ctx; ++ ++ unsigned int field_order; ++ ++ pts_track_t track; ++ ++ V4L2Queue output; ++ V4L2Queue capture; ++} DeintV4L2M2MContextShared; ++ ++typedef struct DeintV4L2M2MContext { ++ const AVClass *class; ++ ++ DeintV4L2M2MContextShared *shared; ++ ++ char * w_expr; ++ char * h_expr; ++ char * output_format_string;; ++ ++ int force_original_aspect_ratio; ++ int force_divisible_by; ++ ++ char *colour_primaries_string; ++ char *colour_transfer_string; ++ char *colour_matrix_string; ++ int colour_range; ++ char *chroma_location_string; ++ ++ enum AVColorPrimaries colour_primaries; ++ enum AVColorTransferCharacteristic colour_transfer; ++ enum AVColorSpace colour_matrix; ++ enum AVChromaLocation chroma_location; ++} DeintV4L2M2MContext; ++ ++ ++static inline int drain_frame_expected(const drain_state_t d) ++{ ++ return d == DRAIN_EOS || d == DRAIN_LAST; ++} ++ ++// These just list the ones we know we can cope with ++static uint32_t ++fmt_av_to_v4l2(const enum AVPixelFormat avfmt) ++{ ++ switch (avfmt) { ++ case AV_PIX_FMT_YUV420P: ++ return V4L2_PIX_FMT_YUV420; ++ case AV_PIX_FMT_NV12: ++ return V4L2_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case AV_PIX_FMT_RPI4_8: ++ case AV_PIX_FMT_SAND128: ++ return V4L2_PIX_FMT_NV12_COL128; ++#endif ++ default: ++ break; ++ } ++ return 0; ++} ++ ++static enum AVPixelFormat ++fmt_v4l2_to_av(const uint32_t pixfmt) ++{ ++ switch (pixfmt) { ++ case V4L2_PIX_FMT_YUV420: ++ return AV_PIX_FMT_YUV420P; ++ case V4L2_PIX_FMT_NV12: ++ return AV_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ return AV_PIX_FMT_RPI4_8; ++#endif ++ default: ++ break; ++ } ++ return AV_PIX_FMT_NONE; ++} ++ ++static unsigned int pts_stats_interval(const pts_stats_t * const stats) ++{ ++ return stats->last_interval; ++} ++ ++// Pick 64 for max last count - that is >1sec at 60fps ++#define STATS_LAST_COUNT_MAX 64 ++#define STATS_INTERVAL_MAX (1 << 30) ++static void pts_stats_add(pts_stats_t * const stats, int64_t pts) ++{ ++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { ++ if (stats->last_count < STATS_LAST_COUNT_MAX) ++ ++stats->last_count; ++ return; ++ } ++ ++ if (stats->last_pts != AV_NOPTS_VALUE) { ++ const int64_t interval = pts - stats->last_pts; ++ ++ if (interval < 0 || interval >= STATS_INTERVAL_MAX || ++ stats->last_count >= STATS_LAST_COUNT_MAX) { ++ if (stats->last_interval != 0) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", ++ __func__, stats->name, interval, stats->last_count); ++ stats->last_interval = 0; ++ } ++ else { ++ const int64_t frame_time = interval / (int64_t)stats->last_count; ++ ++ if (frame_time != stats->last_interval) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", ++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); ++ stats->last_interval = frame_time; ++ } ++ } ++ ++ stats->last_pts = pts; ++ stats->last_count = 1; ++} ++ ++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) ++{ ++ *stats = (pts_stats_t){ ++ .logctx = logctx, ++ .name = name, ++ .last_count = 1, ++ .last_interval = 0, ++ .last_pts = AV_NOPTS_VALUE ++ }; ++} ++ ++static inline uint32_t pts_track_next_n(pts_track_t * const trk) ++{ ++ if (++trk->n == 0) ++ trk->n = 1; ++ return trk->n; ++} ++ ++static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst) ++{ ++ uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000); ++ pts_track_el_t * t; ++ ++ // As a first guess assume that n==0 means last frame ++ if (n == 0) { ++ n = trk->last_n; ++ if (n == 0) ++ goto fail; ++ } ++ ++ t = trk->a + (n & (PTS_TRACK_SIZE - 1)); ++ ++ if (t->n != n) { ++ av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n); ++ goto fail; ++ } ++ ++ // 1st frame is simple - just believe it ++ if (n != trk->last_n) { ++ trk->last_n = n; ++ trk->got_2 = 0; ++ return av_frame_copy_props(dst, t->props); ++ } ++ ++ // Only believe in a single interpolated frame ++ if (trk->got_2) ++ goto fail; ++ trk->got_2 = 1; ++ ++ av_frame_copy_props(dst, t->props); ++ ++ ++ // If we can't guess - don't ++ if (t->interval == 0) { ++ dst->best_effort_timestamp = AV_NOPTS_VALUE; ++ dst->pts = AV_NOPTS_VALUE; ++ dst->pkt_dts = AV_NOPTS_VALUE; ++ } ++ else { ++ if (dst->best_effort_timestamp != AV_NOPTS_VALUE) ++ dst->best_effort_timestamp += t->interval / 2; ++ if (dst->pts != AV_NOPTS_VALUE) ++ dst->pts += t->interval / 2; ++ if (dst->pkt_dts != AV_NOPTS_VALUE) ++ dst->pkt_dts += t->interval / 2; ++ } ++ ++ return 0; ++ ++fail: ++ trk->last_n = 0; ++ trk->got_2 = 0; ++ dst->pts = AV_NOPTS_VALUE; ++ dst->pkt_dts = AV_NOPTS_VALUE; ++ return 0; ++} ++ ++// We are only ever expecting in-order frames so nothing more clever is required ++static unsigned int ++pts_track_count(const pts_track_t * const trk) ++{ ++ return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1); ++} ++ ++static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) ++{ ++ const uint32_t n = pts_track_next_n(trk); ++ pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1)); ++ ++ pts_stats_add(&trk->stats, src->pts); ++ ++ t->n = n; ++ t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last ++ av_frame_unref(t->props); ++ av_frame_copy_props(t->props, src); ++ ++ // We now know what the previous interval was, rather than having to guess, ++ // so set it. There is a better than decent chance that this is before ++ // we use it. ++ if (t->interval != 0) { ++ pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1)); ++ prev_t->interval = t->interval; ++ } ++ ++ // In case deinterlace interpolates frames use every other usec ++ return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2}; ++} ++ ++static void pts_track_uninit(pts_track_t * const trk) ++{ ++ unsigned int i; ++ for (i = 0; i != PTS_TRACK_SIZE; ++i) { ++ trk->a[i].n = 0; ++ av_frame_free(&trk->a[i].props); ++ } ++} ++ ++static int pts_track_init(pts_track_t * const trk, void *logctx) ++{ ++ unsigned int i; ++ trk->n = 1; ++ pts_stats_init(&trk->stats, logctx, "track"); ++ for (i = 0; i != PTS_TRACK_SIZE; ++i) { ++ trk->a[i].n = 0; ++ if ((trk->a[i].props = av_frame_alloc()) == NULL) { ++ pts_track_uninit(trk); ++ return AVERROR(ENOMEM); ++ } ++ } ++ return 0; ++} ++ ++static inline uint32_t ++fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline; ++} ++ ++static inline uint32_t ++fmt_height(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++} ++ ++static inline uint32_t ++fmt_width(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++} ++ ++static inline uint32_t ++fmt_pixelformat(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; ++} ++ ++static inline uint32_t ++buf_bytesused0(const struct v4l2_buffer * const buf) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused; ++} ++ ++static void ++init_format(V4L2Queue * const q, const uint32_t format_type) ++{ ++ memset(&q->format, 0, sizeof(q->format)); ++ memset(&q->sel, 0, sizeof(q->sel)); ++ q->format.type = format_type; ++ q->sel.type = format_type; ++} ++ ++static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) ++{ ++ struct v4l2_capability cap; ++ int ret; ++ ++ memset(&cap, 0, sizeof(cap)); ++ ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap); ++ if (ret < 0) ++ return ret; ++ ++ if (ctx->filter_type == FILTER_V4L2_SCALE && ++ strcmp("bcm2835-codec-isp", cap.card) != 0) ++ { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (!(cap.capabilities & V4L2_CAP_STREAMING)) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { ++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); ++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); ++ } ++ else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { ++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE); ++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT); ++ } ++ else { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++// Just use for probe - doesn't modify q format ++static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt) ++{ ++ struct v4l2_format fmt = {.type = queue->format.type}; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ int ret, field; ++ // Pick YUV to test with if not otherwise specified ++ uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt); ++ enum AVPixelFormat r_avfmt; ++ ++ ++ ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt); ++ if (ret) ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); ++ ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type)) ++ field = V4L2_FIELD_INTERLACED_TB; ++ else ++ field = V4L2_FIELD_NONE; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ fmt.fmt.pix_mp.pixelformat = pixelformat; ++ fmt.fmt.pix_mp.field = field; ++ fmt.fmt.pix_mp.width = width; ++ fmt.fmt.pix_mp.height = height; ++ } else { ++ fmt.fmt.pix.pixelformat = pixelformat; ++ fmt.fmt.pix.field = field; ++ fmt.fmt.pix.width = width; ++ fmt.fmt.pix.height = height; ++ } ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, ++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, ++ fmt.fmt.pix_mp.pixelformat, ++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); ++ ++ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt); ++ if (ret) ++ return AVERROR(EINVAL); ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, ++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, ++ fmt.fmt.pix_mp.pixelformat, ++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); ++ ++ r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt)); ++ if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); ++ return AVERROR(EINVAL); ++ } ++ if (r_avfmt == AV_PIX_FMT_NONE) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ if (fmt.fmt.pix_mp.field != field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); ++ ++ return AVERROR(EINVAL); ++ } ++ } else { ++ if (fmt.fmt.pix.field != field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); ++ ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ return 0; ++} ++ ++static int ++do_s_fmt(V4L2Queue * const q) ++{ ++ DeintV4L2M2MContextShared * const ctx = q->ctx; ++ const uint32_t pixelformat = fmt_pixelformat(&q->format); ++ int ret; ++ ++ ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret)); ++ return ret; ++ } ++ ++ if (pixelformat != fmt_pixelformat(&q->format)) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format))); ++ return AVERROR(EINVAL); ++ } ++ ++ q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, ++ q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE; ++ ++ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret)); ++ } ++ ++ return 0; ++} ++ ++static void ++set_fmt_color(struct v4l2_format *const fmt, ++ const enum AVColorPrimaries avcp, ++ const enum AVColorSpace avcs, ++ const enum AVColorTransferCharacteristic avxc) ++{ ++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; ++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; ++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; ++ ++ switch (avcp) { ++ case AVCOL_PRI_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ ycbcr = V4L2_YCBCR_ENC_709; ++ break; ++ case AVCOL_PRI_BT470M: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ ycbcr = V4L2_YCBCR_ENC_601; ++ break; ++ case AVCOL_PRI_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_PRI_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_PRI_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_PRI_BT2020: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ case AVCOL_PRI_SMPTE428: ++ case AVCOL_PRI_SMPTE431: ++ case AVCOL_PRI_SMPTE432: ++ case AVCOL_PRI_EBU3213: ++ case AVCOL_PRI_RESERVED: ++ case AVCOL_PRI_FILM: ++ case AVCOL_PRI_UNSPECIFIED: ++ default: ++ break; ++ } ++ ++ switch (avcs) { ++ case AVCOL_SPC_RGB: ++ cs = V4L2_COLORSPACE_SRGB; ++ break; ++ case AVCOL_SPC_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ break; ++ case AVCOL_SPC_FCC: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ break; ++ case AVCOL_SPC_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_SPC_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_SPC_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_SPC_BT2020_CL: ++ cs = V4L2_COLORSPACE_BT2020; ++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ default: ++ break; ++ } ++ ++ switch (xfer) { ++ case AVCOL_TRC_BT709: ++ xfer = V4L2_XFER_FUNC_709; ++ break; ++ case AVCOL_TRC_IEC61966_2_1: ++ xfer = V4L2_XFER_FUNC_SRGB; ++ break; ++ case AVCOL_TRC_SMPTE240M: ++ xfer = V4L2_XFER_FUNC_SMPTE240M; ++ break; ++ case AVCOL_TRC_SMPTE2084: ++ xfer = V4L2_XFER_FUNC_SMPTE2084; ++ break; ++ default: ++ break; ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.colorspace = cs; ++ fmt->fmt.pix_mp.ycbcr_enc = ycbcr; ++ fmt->fmt.pix_mp.xfer_func = xfer; ++ } else { ++ fmt->fmt.pix.colorspace = cs; ++ fmt->fmt.pix.ycbcr_enc = ycbcr; ++ fmt->fmt.pix.xfer_func = xfer; ++ } ++} ++ ++static void ++set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr) ++{ ++ const enum v4l2_quantization q = ++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : ++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : ++ V4L2_QUANTIZATION_DEFAULT; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.quantization = q; ++ } else { ++ fmt->fmt.pix.quantization = q; ++ } ++} ++ ++static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ switch(ycbcr) { ++ case V4L2_YCBCR_ENC_XV709: ++ case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709; ++ case V4L2_YCBCR_ENC_XV601: ++ case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M; ++ default: ++ break; ++ } ++ ++ switch(cs) { ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M; ++ case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020; ++ default: ++ break; ++ } ++ ++ return AVCOL_PRI_UNSPECIFIED; ++} ++ ++static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ switch(cs) { ++ case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB; ++ case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709; ++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC; ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M; ++ case V4L2_COLORSPACE_BT2020: ++ if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM) ++ return AVCOL_SPC_BT2020_CL; ++ else ++ return AVCOL_SPC_BT2020_NCL; ++ default: ++ break; ++ } ++ ++ return AVCOL_SPC_UNSPECIFIED; ++} ++ ++static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_xfer_func xfer; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.xfer_func: ++ fmt->fmt.pix.xfer_func; ++ ++ switch (xfer) { ++ case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709; ++ case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1; ++ default: ++ break; ++ } ++ ++ switch (cs) { ++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22; ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M; ++ default: ++ break; ++ } ++ ++ switch (ycbcr) { ++ case V4L2_YCBCR_ENC_XV709: ++ case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG; ++ default: ++ break; ++ } ++ ++ return AVCOL_TRC_UNSPECIFIED; ++} ++ ++static enum AVColorRange get_color_range(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_quantization qt; ++ ++ qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.quantization : ++ fmt->fmt.pix.quantization; ++ ++ switch (qt) { ++ case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG; ++ case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG; ++ default: ++ break; ++ } ++ ++ return AVCOL_RANGE_UNSPECIFIED; ++} ++ ++static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) ++{ ++ struct v4l2_format *const format = &q->format; ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++#if CONFIG_SAND ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++#endif ++ break; ++ ++ case DRM_FORMAT_P030: ++#if CONFIG_SAND ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++#endif ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc); ++ set_fmt_color_range(format, frame->color_range); ++ ++ q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right); ++ q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom); ++ q->sel.r.left = frame->crop_left; ++ q->sel.r.top = frame->crop_top; ++ ++ return 0; ++} ++ ++ ++static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height) ++{ ++ struct v4l2_format * const fmt = &queue->format; ++ struct v4l2_selection *const sel = &queue->sel; ++ ++ memset(&fmt->fmt, 0, sizeof(fmt->fmt)); ++ ++ // Align w/h to 16 here in case there are alignment requirements at the next ++ // stage of the filter chain (also RPi deinterlace setup is bust and this ++ // fixes it) ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.pixelformat = pixelformat; ++ fmt->fmt.pix_mp.field = field; ++ fmt->fmt.pix_mp.width = FFALIGN(width, 16); ++ fmt->fmt.pix_mp.height = FFALIGN(height, 16); ++ } else { ++ fmt->fmt.pix.pixelformat = pixelformat; ++ fmt->fmt.pix.field = field; ++ fmt->fmt.pix.width = FFALIGN(width, 16); ++ fmt->fmt.pix.height = FFALIGN(height, 16); ++ } ++ ++ set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer); ++ set_fmt_color_range(fmt, priv->colour_range); ++ ++ sel->r.width = width; ++ sel->r.height = height; ++ sel->r.left = 0; ++ sel->r.top = 0; ++ ++ return do_s_fmt(queue); ++} ++ ++static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) ++{ ++ int ret; ++ ++ ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0); ++ if (ctx->fd < 0) ++ return AVERROR(errno); ++ ++ ret = deint_v4l2m2m_prepare_context(ctx); ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n"); ++ goto fail; ++ } ++ ++ ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format); ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n"); ++ goto fail; ++ } ++ ++ ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE); ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n"); ++ goto fail; ++ } ++ ++ return 0; ++ ++fail: ++ close(ctx->fd); ++ ctx->fd = -1; ++ ++ return ret; ++} ++ ++static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx) ++{ ++ int ret = AVERROR(EINVAL); ++ struct dirent *entry; ++ char node[PATH_MAX]; ++ DIR *dirp; ++ ++ dirp = opendir("/dev"); ++ if (!dirp) ++ return AVERROR(errno); ++ ++ for (entry = readdir(dirp); entry; entry = readdir(dirp)) { ++ ++ if (strncmp(entry->d_name, "video", 5)) ++ continue; ++ ++ snprintf(node, sizeof(node), "/dev/%s", entry->d_name); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node); ++ ret = deint_v4l2m2m_probe_device(ctx, node); ++ if (!ret) ++ break; ++ } ++ ++ closedir(dirp); ++ ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n"); ++ ctx->fd = -1; ++ ++ return ret; ++ } ++ ++ av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node); ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) ++{ ++ int ret; ++ ++ ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ buf->enqueued = 1; ++ ++ return 0; ++} ++ ++static void ++drm_frame_init(AVDRMFrameDescriptor * const d) ++{ ++ unsigned int i; ++ for (i = 0; i != AV_DRM_MAX_PLANES; ++i) { ++ d->objects[i].fd = -1; ++ } ++} ++ ++static void ++drm_frame_uninit(AVDRMFrameDescriptor * const d) ++{ ++ unsigned int i; ++ for (i = 0; i != d->nb_objects; ++i) { ++ if (d->objects[i].fd != -1) { ++ close(d->objects[i].fd); ++ d->objects[i].fd = -1; ++ } ++ } ++} ++ ++static void ++avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n) ++{ ++ unsigned int i; ++ V4L2Buffer* const avbufs = *ppavbufs; ++ ++ if (avbufs == NULL) ++ return; ++ *ppavbufs = NULL; ++ ++ for (i = 0; i != n; ++i) { ++ V4L2Buffer* const avbuf = avbufs + i; ++ drm_frame_uninit(&avbuf->drm_frame); ++ } ++ ++ av_free(avbufs); ++} ++ ++static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) ++{ ++ struct v4l2_exportbuffer expbuf; ++ int i, ret; ++ uint64_t mod = DRM_FORMAT_MOD_LINEAR; ++ ++ AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame; ++ AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; ++ const struct v4l2_format *const fmt = &q->format; ++ const uint32_t height = fmt_height(fmt); ++ ptrdiff_t bpl0; ++ ++ /* fill the DRM frame descriptor */ ++ drm_desc->nb_layers = 1; ++ layer->nb_planes = avbuf->num_planes; ++ ++ for (int i = 0; i < avbuf->num_planes; i++) { ++ layer->planes[i].object_index = i; ++ layer->planes[i].offset = 0; ++ layer->planes[i].pitch = fmt_bpl(fmt, i); ++ } ++ bpl0 = layer->planes[0].pitch; ++ ++ switch (fmt_pixelformat(fmt)) { ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); ++ layer->format = V4L2_PIX_FMT_NV12; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = fmt_width(fmt); ++ layer->planes[1].pitch = layer->planes[0].pitch; ++ break; ++#endif ++ ++ case DRM_FORMAT_NV12: ++ layer->format = V4L2_PIX_FMT_NV12; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = bpl0 * height; ++ layer->planes[1].pitch = bpl0; ++ break; ++ ++ case V4L2_PIX_FMT_YUV420: ++ layer->format = DRM_FORMAT_YUV420; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 3; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = bpl0 * height; ++ layer->planes[1].pitch = bpl0 / 2; ++ layer->planes[2].object_index = 0; ++ layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4); ++ layer->planes[2].pitch = bpl0 / 2; ++ break; ++ ++ default: ++ drm_desc->nb_layers = 0; ++ return AVERROR(EINVAL); ++ } ++ ++ drm_desc->nb_objects = 0; ++ for (i = 0; i < avbuf->num_planes; i++) { ++ memset(&expbuf, 0, sizeof(expbuf)); ++ ++ expbuf.index = avbuf->buffer.index; ++ expbuf.type = avbuf->buffer.type; ++ expbuf.plane = i; ++ ++ ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ? ++ avbuf->buffer.m.planes[i].length : avbuf->buffer.length; ++ drm_desc->objects[i].fd = expbuf.fd; ++ drm_desc->objects[i].format_modifier = mod; ++ drm_desc->nb_objects = i + 1; ++ } ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) ++{ ++ struct v4l2_format *fmt = &queue->format; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ struct v4l2_requestbuffers req; ++ int ret, i, multiplanar; ++ uint32_t memory; ++ ++ memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? ++ V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; ++ ++ multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type); ++ ++ memset(&req, 0, sizeof(req)); ++ req.count = queue->num_buffers; ++ req.memory = memory; ++ req.type = fmt->type; ++ ++ ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req); ++ if (ret < 0) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno)); ++ ++ return AVERROR(errno); ++ } ++ ++ queue->num_buffers = req.count; ++ queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer)); ++ if (!queue->buffers) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n"); ++ ++ return AVERROR(ENOMEM); ++ } ++ ++ for (i = 0; i < queue->num_buffers; i++) { ++ V4L2Buffer * const buf = &queue->buffers[i]; ++ ++ buf->enqueued = 0; ++ buf->q = queue; ++ ++ buf->buffer.type = fmt->type; ++ buf->buffer.memory = memory; ++ buf->buffer.index = i; ++ ++ if (multiplanar) { ++ buf->buffer.length = VIDEO_MAX_PLANES; ++ buf->buffer.m.planes = buf->planes; ++ } ++ ++ drm_frame_init(&buf->drm_frame); ++ } ++ ++ for (i = 0; i < queue->num_buffers; i++) { ++ V4L2Buffer * const buf = &queue->buffers[i]; ++ ++ ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); ++ if (ret < 0) { ++ ret = AVERROR(errno); ++ ++ goto fail; ++ } ++ ++ buf->num_planes = multiplanar ? buf->buffer.length : 1; ++ ++ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { ++ ret = deint_v4l2m2m_enqueue_buffer(buf); ++ if (ret) ++ goto fail; ++ ++ ret = v4l2_buffer_export_drm(queue, buf); ++ if (ret) ++ goto fail; ++ } ++ } ++ ++ return 0; ++ ++fail: ++ avbufs_delete(&queue->buffers, queue->num_buffers); ++ queue->num_buffers = 0; ++ return ret; ++} ++ ++static int deint_v4l2m2m_streamon(V4L2Queue *queue) ++{ ++ DeintV4L2M2MContextShared * const ctx = queue->ctx; ++ int type = queue->format.type; ++ int ret; ++ ++ ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_streamoff(V4L2Queue *queue) ++{ ++ DeintV4L2M2MContextShared * const ctx = queue->ctx; ++ int type = queue->format.type; ++ int ret; ++ ++ ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ return 0; ++} ++ ++// timeout in ms ++static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout) ++{ ++ struct v4l2_plane planes[VIDEO_MAX_PLANES]; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ struct v4l2_buffer buf = { 0 }; ++ V4L2Buffer* avbuf = NULL; ++ struct pollfd pfd; ++ short events; ++ int ret; ++ ++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) ++ events = POLLOUT | POLLWRNORM; ++ else ++ events = POLLIN | POLLRDNORM; ++ ++ pfd.events = events; ++ pfd.fd = ctx->fd; ++ ++ for (;;) { ++ ret = poll(&pfd, 1, timeout); ++ if (ret > 0) ++ break; ++ if (errno == EINTR) ++ continue; ++ return NULL; ++ } ++ ++ if (pfd.revents & POLLERR) ++ return NULL; ++ ++ if (pfd.revents & events) { ++ memset(&buf, 0, sizeof(buf)); ++ buf.memory = V4L2_MEMORY_MMAP; ++ buf.type = queue->format.type; ++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { ++ memset(planes, 0, sizeof(planes)); ++ buf.length = VIDEO_MAX_PLANES; ++ buf.m.planes = planes; ++ } ++ ++ ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf); ++ if (ret) { ++ if (errno != EAGAIN) ++ av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n", ++ av_err2str(AVERROR(errno))); ++ return NULL; ++ } ++ ++ avbuf = &queue->buffers[buf.index]; ++ avbuf->enqueued = 0; ++ avbuf->buffer = buf; ++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { ++ memcpy(avbuf->planes, planes, sizeof(planes)); ++ avbuf->buffer.m.planes = avbuf->planes; ++ } ++ return avbuf; ++ } ++ ++ return NULL; ++} ++ ++static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue) ++{ ++ int i; ++ V4L2Buffer *buf = NULL; ++ ++ for (i = 0; i < queue->num_buffers; i++) ++ if (!queue->buffers[i].enqueued) { ++ buf = &queue->buffers[i]; ++ break; ++ } ++ return buf; ++} ++ ++static void deint_v4l2m2m_unref_queued(V4L2Queue *queue) ++{ ++ int i; ++ V4L2Buffer *buf = NULL; ++ ++ if (!queue || !queue->buffers) ++ return; ++ for (i = 0; i < queue->num_buffers; i++) { ++ buf = &queue->buffers[i]; ++ if (queue->buffers[i].enqueued) ++ av_frame_unref(&buf->frame); ++ } ++} ++ ++static void recycle_q(V4L2Queue * const queue) ++{ ++ V4L2Buffer* avbuf; ++ while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) { ++ av_frame_unref(&avbuf->frame); ++ } ++} ++ ++static int count_enqueued(V4L2Queue *queue) ++{ ++ int i; ++ int n = 0; ++ ++ if (queue->buffers == NULL) ++ return 0; ++ ++ for (i = 0; i < queue->num_buffers; i++) ++ if (queue->buffers[i].enqueued) ++ ++n; ++ return n; ++} ++ ++static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame) ++{ ++ DeintV4L2M2MContextShared *const ctx = queue->ctx; ++ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; ++ V4L2Buffer *buf; ++ int i; ++ ++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) ++ recycle_q(queue); ++ ++ buf = deint_v4l2m2m_find_free_buf(queue); ++ if (!buf) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0); ++ return AVERROR(EAGAIN); ++ } ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) ++ for (i = 0; i < drm_desc->nb_objects; i++) ++ buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd; ++ else ++ buf->buffer.m.fd = drm_desc->objects[0].fd; ++ ++ buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE : ++ frame->top_field_first ? V4L2_FIELD_INTERLACED_TB : ++ V4L2_FIELD_INTERLACED_BT; ++ ++ if (ctx->field_order != buf->buffer.field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field); ++ ctx->field_order = buf->buffer.field; ++ } ++ ++ buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame); ++ ++ buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd; ++ ++ av_frame_move_ref(&buf->frame, frame); ++ ++ return deint_v4l2m2m_enqueue_buffer(buf); ++} ++ ++static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) ++{ ++ if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { ++ V4L2Queue *capture = &ctx->capture; ++ V4L2Queue *output = &ctx->output; ++ ++ av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); ++ ++ if (ctx->fd >= 0) { ++ deint_v4l2m2m_streamoff(capture); ++ deint_v4l2m2m_streamoff(output); ++ } ++ ++ avbufs_delete(&capture->buffers, capture->num_buffers); ++ ++ deint_v4l2m2m_unref_queued(output); ++ ++ av_buffer_unref(&ctx->hw_frames_ctx); ++ ++ if (capture->buffers) ++ av_free(capture->buffers); ++ ++ if (output->buffers) ++ av_free(output->buffers); ++ ++ if (ctx->fd >= 0) { ++ close(ctx->fd); ++ ctx->fd = -1; ++ } ++ ++ av_free(ctx); ++ } ++} ++ ++static void v4l2_free_buffer(void *opaque, uint8_t *unused) ++{ ++ V4L2Buffer *buf = opaque; ++ DeintV4L2M2MContextShared *ctx = buf->q->ctx; ++ ++ if (!ctx->done) ++ deint_v4l2m2m_enqueue_buffer(buf); ++ ++ deint_v4l2m2m_destroy_context(ctx); ++} ++ ++// timeout in ms ++static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) ++{ ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ V4L2Buffer* avbuf; ++ enum AVColorPrimaries color_primaries; ++ enum AVColorSpace colorspace; ++ enum AVColorTransferCharacteristic color_trc; ++ enum AVColorRange color_range; ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++ if (queue->eos) { ++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__); ++ return AVERROR_EOF; ++ } ++ ++ avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); ++ if (!avbuf) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); ++ return AVERROR(EAGAIN); ++ } ++ ++ if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) { ++ if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0) ++ queue->eos = 1; ++ if (buf_bytesused0(&avbuf->buffer) == 0) ++ return queue->eos ? AVERROR_EOF : AVERROR(EINVAL); ++ } ++ ++ // Fill in PTS and anciliary info from src frame ++ pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); ++ ++ frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, ++ sizeof(avbuf->drm_frame), v4l2_free_buffer, ++ avbuf, AV_BUFFER_FLAG_READONLY); ++ if (!frame->buf[0]) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0); ++ return AVERROR(ENOMEM); ++ } ++ ++ atomic_fetch_add(&ctx->refcount, 1); ++ ++ frame->data[0] = (uint8_t *)&avbuf->drm_frame; ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (ctx->hw_frames_ctx) ++ frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); ++ frame->height = ctx->output_height; ++ frame->width = ctx->output_width; ++ ++ color_primaries = get_color_primaries(&ctx->capture.format); ++ colorspace = get_color_space(&ctx->capture.format); ++ color_trc = get_color_trc(&ctx->capture.format); ++ color_range = get_color_range(&ctx->capture.format); ++ ++ // If the color parameters are unspecified by V4L2 then leave alone as they ++ // will have been copied from src ++ if (color_primaries != AVCOL_PRI_UNSPECIFIED) ++ frame->color_primaries = color_primaries; ++ if (colorspace != AVCOL_SPC_UNSPECIFIED) ++ frame->colorspace = colorspace; ++ if (color_trc != AVCOL_TRC_UNSPECIFIED) ++ frame->color_trc = color_trc; ++ if (color_range != AVCOL_RANGE_UNSPECIFIED) ++ frame->color_range = color_range; ++ ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) { ++ // Not interlaced now ++ frame->interlaced_frame = 0; // *** Fill in from dst buffer? ++ frame->top_field_first = 0; ++ // Pkt duration halved ++ frame->pkt_duration /= 2; ++ } ++ ++ if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); ++ frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM; ++ } ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts); ++ return 0; ++} ++ ++static int deint_v4l2m2m_config_props(AVFilterLink *outlink) ++{ ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ AVFilterContext *avctx = outlink->src; ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ int ret; ++ ++ ctx->height = avctx->inputs[0]->h; ++ ctx->width = avctx->inputs[0]->w; ++ ++ if (ctx->filter_type == FILTER_V4L2_SCALE) { ++ if ((ret = ff_scale_eval_dimensions(priv, ++ priv->w_expr, priv->h_expr, ++ inlink, outlink, ++ &ctx->output_width, &ctx->output_height)) < 0) ++ return ret; ++ ++ ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height, ++ priv->force_original_aspect_ratio, priv->force_divisible_by); ++ } ++ else { ++ ctx->output_width = ctx->width; ++ ctx->output_height = ctx->height; ++ } ++ ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__, ++ ctx->width, ctx->height, ctx->output_width, ctx->output_height, ++ inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den); ++ ++ outlink->time_base = inlink->time_base; ++ outlink->w = ctx->output_width; ++ outlink->h = ctx->output_height; ++ outlink->format = inlink->format; ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0) ++ outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den}; ++ ++ if (inlink->sample_aspect_ratio.num) ++ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); ++ else ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ ++ ret = deint_v4l2m2m_find_device(ctx); ++ if (ret) ++ return ret; ++ ++ if (inlink->hw_frames_ctx) { ++ ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); ++ if (!ctx->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ } ++ return 0; ++} ++ ++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) ++{ ++ const uint64_t mod = drm_desc->objects[0].format_modifier; ++ const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID); ++ ++ // Only currently support single object things ++ if (drm_desc->nb_objects != 1) ++ return 0; ++ ++ switch (drm_desc->layers[0].format) { ++ case DRM_FORMAT_YUV420: ++ return is_linear ? V4L2_PIX_FMT_YUV420 : 0; ++ case DRM_FORMAT_NV12: ++ return is_linear ? V4L2_PIX_FMT_NV12 : ++#if CONFIG_SAND ++ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : ++#endif ++ 0; ++ default: ++ break; ++ } ++ return 0; ++} ++ ++static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *avctx = link->dst; ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ V4L2Queue *capture = &ctx->capture; ++ V4L2Queue *output = &ctx->output; ++ int ret; ++ ++ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n", ++ __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); ++ av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__, ++ avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); ++ ++ if (ctx->field_order == V4L2_FIELD_ANY) { ++ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; ++ uint32_t pixelformat = desc_pixelformat(drm_desc); ++ ++ if (pixelformat == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", ++ av_fourcc2str(drm_desc->layers[0].format), ++ drm_desc->nb_objects, drm_desc->objects[0].format_modifier); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->orig_width = drm_desc->layers[0].planes[0].pitch; ++ ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; ++ ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, ++ drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); ++ ++ if ((ret = set_src_fmt(output, in)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n", ++ av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier); ++ return ret; ++ } ++ ++ ret = do_s_fmt(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n"); ++ return ret; ++ } ++ ++ if (ctx->output_format != AV_PIX_FMT_NONE) ++ pixelformat = fmt_av_to_v4l2(ctx->output_format); ++ ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_allocate_buffers(capture); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_streamon(capture); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret)); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_allocate_buffers(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_streamon(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret)); ++ return ret; ++ } ++ ++ if (in->top_field_first) ++ ctx->field_order = V4L2_FIELD_INTERLACED_TB; ++ else ++ ctx->field_order = V4L2_FIELD_INTERLACED_BT; ++ ++ { ++ struct v4l2_encoder_cmd ecmd = { ++ .cmd = V4L2_ENC_CMD_STOP ++ }; ++ ctx->has_enc_stop = 0; ++ if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n"); ++ ctx->has_enc_stop = 1; ++ } ++ else { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno))); ++ } ++ ++ } ++ } ++ ++ ret = deint_v4l2m2m_enqueue_frame(output, in); ++ ++ av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret)); ++ return ret; ++} ++ ++static int ++ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s, ++ AVFilterLink * const inlink) ++{ ++ int instatus; ++ int64_t inpts; ++ ++ if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0) ++ return 0; ++ ++ s->drain = instatus; ++ s->drain_pts = inpts; ++ s->drain_state = DRAIN_TIMEOUT; ++ ++ if (s->field_order == V4L2_FIELD_ANY) { // Not yet started ++ s->drain_state = DRAIN_DONE; ++ } ++ else if (s->one_to_one) { ++ s->drain_state = DRAIN_LAST; ++ } ++ else if (s->has_enc_stop) { ++ struct v4l2_encoder_cmd ecmd = { ++ .cmd = V4L2_ENC_CMD_STOP ++ }; ++ if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) { ++ av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n"); ++ s->drain_state = DRAIN_EOS; ++ } ++ else { ++ av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno))); ++ } ++ } ++ return 1; ++} ++ ++static int deint_v4l2m2m_activate(AVFilterContext *avctx) ++{ ++ DeintV4L2M2MContext * const priv = avctx->priv; ++ DeintV4L2M2MContextShared *const s = priv->shared; ++ AVFilterLink * const outlink = avctx->outputs[0]; ++ AVFilterLink * const inlink = avctx->inputs[0]; ++ int n = 0; ++ int cn = 99; ++ int did_something = 0; ++ ++ av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++ FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); ++ ++ ack_inlink(avctx, s, inlink); ++ ++ if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! ++ { ++ AVFrame * frame = av_frame_alloc(); ++ int rv; ++ ++ recycle_q(&s->output); ++ n = count_enqueued(&s->output); ++ ++ if (frame == NULL) { ++ av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ ++ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, ++ drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0); ++ if (rv != 0) { ++ av_frame_free(&frame); ++ if (rv == AVERROR_EOF) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ else if (rv == AVERROR(EAGAIN)) { ++ if (s->drain_state != DRAIN_NONE) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ } ++ else { ++ av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ } ++ else { ++ frame->interlaced_frame = 0; ++ // frame is always consumed by filter_frame - even on error despite ++ // a somewhat confusing comment in the header ++ rv = ff_filter_frame(outlink, frame); ++ ++s->frames_tx; ++ ++ av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); ++ did_something = 1; ++ ++ if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ } ++ ++ cn = count_enqueued(&s->capture); ++ } ++ ++ if (s->drain_state == DRAIN_DONE) { ++ ff_outlink_set_status(outlink, s->drain, s->drain_pts); ++ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain)); ++ return 0; ++ } ++ ++ recycle_q(&s->output); ++ n = count_enqueued(&s->output); ++ ++ while (n < 6 && !s->drain) { ++ AVFrame * frame; ++ int rv; ++ ++ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { ++ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ ++ if (frame == NULL) { ++ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); ++ if (!ack_inlink(avctx, s, inlink)) { ++ ff_inlink_request_frame(inlink); ++ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); ++ } ++ break; ++ } ++ ++s->frames_rx; ++ ++ rv = deint_v4l2m2m_filter_frame(inlink, frame); ++ av_frame_free(&frame); ++ ++ if (rv != 0) ++ return rv; ++ ++ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); ++ did_something = 1; ++ ++n; ++ } ++ ++ if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) { ++ ff_filter_set_ready(avctx, 1); ++ did_something = 1; ++ av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); ++ } ++ ++ av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn); ++ return did_something ? 0 : FFERROR_NOT_READY; ++} ++ ++static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type) ++{ ++ DeintV4L2M2MContext * const priv = avctx->priv; ++ DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); ++ ++ if (!ctx) { ++ av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0); ++ return AVERROR(ENOMEM); ++ } ++ priv->shared = ctx; ++ ctx->logctx = priv; ++ ctx->filter_type = filter_type; ++ ctx->fd = -1; ++ ctx->output.ctx = ctx; ++ ctx->output.num_buffers = 8; ++ ctx->output.name = "OUTPUT"; ++ ctx->capture.ctx = ctx; ++ ctx->capture.num_buffers = 12; ++ ctx->capture.name = "CAPTURE"; ++ ctx->done = 0; ++ ctx->field_order = V4L2_FIELD_ANY; ++ ++ pts_track_init(&ctx->track, priv); ++ ++ atomic_init(&ctx->refcount, 1); ++ ++ if (priv->output_format_string) { ++ ctx->output_format = av_get_pix_fmt(priv->output_format_string); ++ if (ctx->output_format == AV_PIX_FMT_NONE) { ++ av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string); ++ return AVERROR(EINVAL); ++ } ++ if (fmt_av_to_v4l2(ctx->output_format) == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format)); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ // Use the input format once that is configured. ++ ctx->output_format = AV_PIX_FMT_NONE; ++ } ++ ++#define STRING_OPTION(var_name, func_name, default_value) do { \ ++ if (priv->var_name ## _string) { \ ++ int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \ ++ if (var < 0) { \ ++ av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \ ++ return AVERROR(EINVAL); \ ++ } \ ++ priv->var_name = var; \ ++ } else { \ ++ priv->var_name = default_value; \ ++ } \ ++ } while (0) ++ ++ STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED); ++ STRING_OPTION(colour_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); ++ STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED); ++ STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED); ++ ++ return 0; ++} ++ ++static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) ++{ ++ return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE); ++} ++ ++static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) ++{ ++ int rv; ++ DeintV4L2M2MContext * priv; ++ DeintV4L2M2MContextShared * ctx; ++ ++ if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0) ++ return rv; ++ ++ priv = avctx->priv; ++ ctx = priv->shared; ++ ++ ctx->one_to_one = 1; ++ return 0; ++} ++ ++static void deint_v4l2m2m_uninit(AVFilterContext *avctx) ++{ ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ ++ av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n", ++ ctx->frames_rx, ctx->frames_tx); ++ ctx->done = 1; ++ ctx->logctx = NULL; // Log to NULL works, log to missing crashes ++ pts_track_uninit(&ctx->track); ++ deint_v4l2m2m_destroy_context(ctx); ++} ++ ++static const AVOption deinterlace_v4l2m2m_options[] = { ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); ++ ++#define OFFSET(x) offsetof(DeintV4L2M2MContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) ++ ++static const AVOption scale_v4l2m2m_options[] = { ++ { "w", "Output video width", ++ OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, ++ { "h", "Output video height", ++ OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, ++ { "format", "Output video format (software format of hardware frames)", ++ OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, ++ // These colour properties match the ones of the same name in vf_scale. ++ { "out_color_matrix", "Output colour matrix coefficient set", ++ OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, ++ { "out_range", "Output colour range", ++ OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, ++ AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" }, ++ { "full", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "limited", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "jpeg", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "mpeg", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "tv", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "pc", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ // These colour properties match the ones in the VAAPI scaler ++ { "out_color_primaries", "Output colour primaries", ++ OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "out_color_transfer", "Output colour transfer characteristics", ++ OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "out_chroma_location", "Output chroma sample location", ++ OFFSET(chroma_location_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, ++ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(scale_v4l2m2m); ++ ++static const AVFilterPad deint_v4l2m2m_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++}; ++ ++static const AVFilterPad deint_v4l2m2m_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = deint_v4l2m2m_config_props, ++ }, ++}; ++ ++AVFilter ff_vf_deinterlace_v4l2m2m = { ++ .name = "deinterlace_v4l2m2m", ++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"), ++ .priv_size = sizeof(DeintV4L2M2MContext), ++ .init = &deint_v4l2m2m_init, ++ .uninit = &deint_v4l2m2m_uninit, ++ FILTER_INPUTS(deint_v4l2m2m_inputs), ++ FILTER_OUTPUTS(deint_v4l2m2m_outputs), ++ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), ++ .priv_class = &deinterlace_v4l2m2m_class, ++ .activate = deint_v4l2m2m_activate, ++}; ++ ++AVFilter ff_vf_scale_v4l2m2m = { ++ .name = "scale_v4l2m2m", ++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"), ++ .priv_size = sizeof(DeintV4L2M2MContext), ++ .init = &scale_v4l2m2m_init, ++ .uninit = &deint_v4l2m2m_uninit, ++ FILTER_INPUTS(deint_v4l2m2m_inputs), ++ FILTER_OUTPUTS(deint_v4l2m2m_outputs), ++ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), ++ .priv_class = &scale_v4l2m2m_class, ++ .activate = deint_v4l2m2m_activate, ++}; ++ +--- /dev/null ++++ b/libavfilter/vf_unsand.c +@@ -0,0 +1,228 @@ ++/* ++ * Copyright (c) 2007 Bobby Bingham ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * format and noformat video filters ++ */ ++ ++#include <string.h> ++ ++#include "libavutil/internal.h" ++#include "libavutil/mem.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/opt.h" ++#include "libavutil/rpi_sand_fns.h" ++ ++#include "avfilter.h" ++#include "formats.h" ++#include "internal.h" ++#include "video.h" ++ ++typedef struct UnsandContext { ++ const AVClass *class; ++} UnsandContext; ++ ++static av_cold void uninit(AVFilterContext *ctx) ++{ ++// UnsandContext *s = ctx->priv; ++} ++ ++static av_cold int init(AVFilterContext *ctx) ++{ ++// UnsandContext *s = ctx->priv; ++ ++ return 0; ++} ++ ++ ++static int filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterLink * const outlink = link->dst->outputs[0]; ++ AVFrame *out = NULL; ++ int rv = 0; ++ ++ if (outlink->format == in->format) { ++ // If nothing to do then do nothing ++ out = in; ++ } ++ else ++ { ++ if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL) ++ { ++ rv = AVERROR(ENOMEM); ++ goto fail; ++ } ++ if (av_rpi_sand_to_planar_frame(out, in) != 0) ++ { ++ rv = -1; ++ goto fail; ++ } ++ ++ av_frame_free(&in); ++ } ++ ++ return ff_filter_frame(outlink, out); ++ ++fail: ++ av_frame_free(&out); ++ av_frame_free(&in); ++ return rv; ++} ++ ++#if 0 ++static void dump_fmts(const AVFilterFormats * fmts) ++{ ++ int i; ++ if (fmts== NULL) { ++ printf("NULL\n"); ++ return; ++ } ++ for (i = 0; i < fmts->nb_formats; ++i) { ++ printf(" %d", fmts->formats[i]); ++ } ++ printf("\n"); ++} ++#endif ++ ++static int query_formats(AVFilterContext *ctx) ++{ ++// UnsandContext *s = ctx->priv; ++ int ret; ++ ++ // If we aren't connected at both ends then just do nothing ++ if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL) ++ return 0; ++ ++ // Our output formats depend on our input formats and we can't/don't ++ // want to convert between bit depths so we need to wait for the source ++ // to have an opinion before we do ++ if (ctx->inputs[0]->incfg.formats == NULL) ++ return AVERROR(EAGAIN); ++ ++ // Accept anything ++ if (ctx->inputs[0]->outcfg.formats == NULL && ++ (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0) ++ return ret; ++ ++ // Filter out sand formats ++ ++ // Generate a container if we don't already have one ++ if (ctx->outputs[0]->incfg.formats == NULL) ++ { ++ // Somewhat rubbish way of ensuring we have a good structure ++ const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; ++ AVFilterFormats *formats = ff_make_format_list(out_fmts); ++ ++ if (formats == NULL) ++ return AVERROR(ENOMEM); ++ if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0) ++ return ret; ++ } ++ ++ // Replace old format list with new filtered list derived from what our ++ // input says it can do ++ { ++ const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats; ++ AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats; ++ enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats); ++ int i; ++ int n = 0; ++ int seen_420p = 0; ++ int seen_420p10 = 0; ++ ++ for (i = 0; i < src_ff->nb_formats; ++i) { ++ const enum AVPixelFormat f = src_ff->formats[i]; ++ ++ switch (f){ ++ case AV_PIX_FMT_YUV420P: ++ case AV_PIX_FMT_SAND128: ++ case AV_PIX_FMT_RPI4_8: ++ if (!seen_420p) { ++ seen_420p = 1; ++ dst_fmts[n++] = AV_PIX_FMT_YUV420P; ++ } ++ break; ++ case AV_PIX_FMT_SAND64_10: ++ case AV_PIX_FMT_YUV420P10: ++ case AV_PIX_FMT_RPI4_10: ++ if (!seen_420p10) { ++ seen_420p10 = 1; ++ dst_fmts[n++] = AV_PIX_FMT_YUV420P10; ++ } ++ break; ++ default: ++ dst_fmts[n++] = f; ++ break; ++ } ++ } ++ ++ av_freep(&dst_ff->formats); ++ dst_ff->formats = dst_fmts; ++ dst_ff->nb_formats = n; ++ } ++ ++// printf("Unsand: %s calc: ", __func__); ++// dump_fmts(ctx->outputs[0]->incfg.formats); ++ ++ return 0; ++} ++ ++ ++#define OFFSET(x) offsetof(UnsandContext, x) ++static const AVOption unsand_options[] = { ++ { NULL } ++}; ++ ++ ++AVFILTER_DEFINE_CLASS(unsand); ++ ++static const AVFilterPad avfilter_vf_unsand_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = filter_frame, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad avfilter_vf_unsand_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO ++ }, ++}; ++ ++AVFilter ff_vf_unsand = { ++ .name = "unsand", ++ .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"), ++ ++ .init = init, ++ .uninit = uninit, ++ ++ FILTER_QUERY_FUNC(query_formats), ++ ++ .priv_size = sizeof(UnsandContext), ++ .priv_class = &unsand_class, ++ ++ FILTER_INPUTS(avfilter_vf_unsand_inputs), ++ FILTER_OUTPUTS(avfilter_vf_unsand_outputs), ++}; ++ +--- a/libavfilter/x86/vf_bwdif_init.c ++++ b/libavfilter/x86/vf_bwdif_init.c +@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(vo + int mrefs2, int prefs3, int mrefs3, int prefs4, + int mrefs4, int parity, int clip_max); + +-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) ++av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth) + { +- YADIFContext *yadif = &bwdif->yadif; + int cpu_flags = av_get_cpu_flags(); +- int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth; + + if (bit_depth <= 8) { + if (EXTERNAL_SSE2(cpu_flags)) +--- a/libavformat/matroskaenc.c ++++ b/libavformat/matroskaenc.c +@@ -75,6 +75,10 @@ + + #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \ + ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER) ++ ++/* Reserved size for H264 headers if not extant at init time */ ++#define MAX_H264_HEADER_SIZE 1024 ++ + #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ + !(mkv)->is_live) + +@@ -1119,8 +1123,12 @@ static int mkv_assemble_native_codecpriv + case AV_CODEC_ID_WAVPACK: + return put_wv_codecpriv(dyn_cp, extradata, extradata_size); + case AV_CODEC_ID_H264: +- return ff_isom_write_avcc(dyn_cp, extradata, +- extradata_size); ++ if (extradata_size) ++ return ff_isom_write_avcc(dyn_cp, extradata, ++ extradata_size); ++ else ++ *size_to_reserve = MAX_H264_HEADER_SIZE; ++ break; + case AV_CODEC_ID_HEVC: + return ff_isom_write_hvcc(dyn_cp, extradata, + extradata_size, 0); +@@ -2726,8 +2734,8 @@ static int mkv_check_new_extra_data(AVFo + } + break; + #endif +- // FIXME: Remove the following once libaom starts propagating proper extradata during init() +- // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208 ++ // FIXME: Remove the following once libaom starts propagating extradata during init() ++ // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 + case AV_CODEC_ID_AV1: + if (side_data_size && mkv->track.bc && !par->extradata_size) { + // If the reserved space doesn't suffice, only write +@@ -2739,6 +2747,16 @@ static int mkv_check_new_extra_data(AVFo + } else if (!par->extradata_size) + return AVERROR_INVALIDDATA; + break; ++ // H264 V4L2 has a similar issue ++ case AV_CODEC_ID_H264: ++ if (side_data_size && mkv->track.bc && !par->extradata_size) { ++ ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size, ++ par, mkv->track.bc, track, 0); ++ if (ret < 0) ++ return ret; ++ } else if (!par->extradata_size) ++ return AVERROR_INVALIDDATA; ++ break; + default: + if (side_data_size) + av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index); +@@ -3171,9 +3189,15 @@ static int mkv_init(struct AVFormatConte + track->reformat = mkv_reformat_wavpack; + break; + case AV_CODEC_ID_H264: ++ // Default to reformat if no extradata as the only current ++ // encoder which does this is v4l2m2m which needs reformat ++ if (par->extradata_size == 0 || ++ (par->extradata_size > 3 && ++ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))) ++ track->reformat = mkv_reformat_h2645; ++ break; + case AV_CODEC_ID_HEVC: +- if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 || +- par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) && ++ if (par->extradata_size > 6 && + (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)) + track->reformat = mkv_reformat_h2645; + break; +--- a/libavformat/movenc.c ++++ b/libavformat/movenc.c +@@ -6318,6 +6318,7 @@ static int mov_write_single_packet(AVFor + if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || + trk->par->codec_id == AV_CODEC_ID_AAC || + trk->par->codec_id == AV_CODEC_ID_AV1 || ++ trk->par->codec_id == AV_CODEC_ID_H264 || + trk->par->codec_id == AV_CODEC_ID_FLAC) { + size_t side_size; + uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); +--- a/libavformat/rtpenc.c ++++ b/libavformat/rtpenc.c +@@ -19,6 +19,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "avc.h" + #include "avformat.h" + #include "mpegts.h" + #include "internal.h" +@@ -584,8 +585,25 @@ static int rtp_write_packet(AVFormatCont + ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0); + break; + case AV_CODEC_ID_H264: ++ { ++ uint8_t *side_data; ++ size_t side_data_size = 0; ++ ++ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, ++ &side_data_size); ++ ++ if (side_data_size != 0) { ++ int ps_size = side_data_size; ++ uint8_t * ps_buf = NULL; ++ ++ ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size); ++ av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size); ++ ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size); ++ av_free(ps_buf); ++ } + ff_rtp_send_h264_hevc(s1, pkt->data, size); + break; ++ } + case AV_CODEC_ID_H261: + ff_rtp_send_h261(s1, pkt->data, size); + break; +--- a/libavutil/Makefile ++++ b/libavutil/Makefile +@@ -72,6 +72,7 @@ HEADERS = adler32.h + rational.h \ + replaygain.h \ + ripemd.h \ ++ rpi_sand_fns.h \ + samplefmt.h \ + sha.h \ + sha512.h \ +@@ -191,6 +192,7 @@ OBJS-$(CONFIG_MACOS_KPERF) + OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o + OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o + OBJS-$(CONFIG_QSV) += hwcontext_qsv.o ++OBJS-$(CONFIG_SAND) += rpi_sand_fns.o + OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o + OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o + OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o +@@ -211,6 +213,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA) + + SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h + SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h + SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h ++SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h + SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h + SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h + SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h +--- a/libavutil/aarch64/Makefile ++++ b/libavutil/aarch64/Makefile +@@ -1,4 +1,6 @@ + OBJS += aarch64/cpu.o \ + aarch64/float_dsp_init.o \ + +-NEON-OBJS += aarch64/float_dsp_neon.o ++NEON-OBJS += aarch64/float_dsp_neon.o \ ++ aarch64/rpi_sand_neon.o \ ++ +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.S +@@ -0,0 +1,672 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler <eiler.mike@gmail.com> ++*/ ++ ++#include "asm.S" ++ ++// void ff_rpi_sand8_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++ ++function ff_rpi_sand8_lines_to_planar_y8, export=1 ++ // w15 contains the number of rows we need to process ++ ldr w15, [sp, #0] ++ ++ // w8 will contain the number of blocks per row ++ // w8 = floor(_w/stride1) ++ // stride1 is assumed to always be 128 ++ mov w8, w1 ++ lsr w8, w8, #7 ++ ++ // in case the width of the image is not a multiple of 128, there will ++ // be an incomplete block at the end of every row ++ // w9 contains the number of pixels stored within this block ++ // w9 = _w - w8 * 128 ++ lsl w9, w8, #7 ++ sub w9, w7, w9 ++ ++ // this is the value we have to add to the src pointer after reading a complete block ++ // it will move the address to the start of the next block ++ // w10 = stride2 * stride1 - stride1 ++ mov w10, w4 ++ lsl w10, w10, #7 ++ sub w10, w10, #128 ++ ++ // w11 is the row offset, meaning the start offset of the first block of every collumn ++ // this will be increased with stride1 within every iteration of the row_loop ++ eor w11, w11, w11 ++ ++ // w12 = 0, processed row count ++ eor w12, w12, w12 ++row_loop: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x2 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++ ++ cmp w8, #0 ++ beq no_main_y8 ++ ++block_loop: ++ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 ++ // fortunately these aren't callee saved ones, meaning we don't need to backup them ++ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 ++ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 ++ ++ // write these registers back to the destination vector and increase the dst address by 128 ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 ++ ++ // move the source register to the beginning of the next block (x13 = src + block offset) ++ add x13, x13, x10 ++ // increase the block counter ++ add w14, w14, #1 ++ ++ // continue with the block_loop if we haven't copied all full blocks yet ++ cmp w8, w14 ++ bgt block_loop ++ ++ // handle the last block at the end of each row ++ // at most 127 byte values copied from src to dst ++no_main_y8: ++ eor w5, w5, w5 // i = 0 ++incomplete_block_loop_y8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_y8 ++ ++ ldrb w6, [x13] ++ strb w6, [x0] ++ add x13, x13, #1 ++ add x0, x0, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_y8 ++incomplete_block_loop_end_y8: ++ ++ ++ // increase the row offset by 128 (stride1) ++ add w11, w11, #128 ++ // increment the row counter ++ add w12, w12, #1 ++ ++ // process the next row if we haven't finished yet ++ cmp w15, w12 ++ bgt row_loop ++ ++ ret ++endfunc ++ ++ ++ ++// void ff_rpi_sand8_lines_to_planar_c8( ++// uint8_t * dst_u, : x0 ++// unsigned int dst_stride_u, : w1 == width ++// uint8_t * dst_v, : x2 ++// unsigned int dst_stride_v, : w3 == width ++// const uint8_t * src, : x4 ++// unsigned int stride1, : w5 == 128 ++// unsigned int stride2, : w6 ++// unsigned int _x, : w7 ++// unsigned int y, : [sp, #0] ++// unsigned int _w, : [sp, #8] ++// unsigned int h); : [sp, #16] ++ ++function ff_rpi_sand8_lines_to_planar_c8, export=1 ++ // w7 = width ++ ldr w7, [sp, #8] ++ ++ // w15 contains the number of rows we need to process ++ // counts down ++ ldr w15, [sp, #16] ++ ++ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 ++ mov w8, w7 ++ lsr w8, w8, #6 ++ ++ // number of pixels in block at the end of every row ++ // w9 = _w - (w8 * 64) ++ lsl w9, w8, #6 ++ sub w9, w7, w9 ++ ++ // Skip at the end of the line to account for stride ++ sub w12, w1, w7 ++ ++ // address delta to the beginning of the next block ++ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 ++ lsl w10, w6, #7 ++ sub w10, w10, #128 ++ ++ // w11 = row address start offset = 0 ++ eor w11, w11, w11 ++ ++row_loop_c8: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x4 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++ ++ cmp w8, #0 ++ beq no_main_c8 ++ ++block_loop_c8: ++ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values ++ ld2 { v0.16b, v1.16b }, [x13], #32 ++ ld2 { v2.16b, v3.16b }, [x13], #32 ++ ld2 { v4.16b, v5.16b }, [x13], #32 ++ ld2 { v6.16b, v7.16b }, [x13], #32 ++ ++ // swap register so that we can write them out with a single instruction ++ mov v16.16b, v1.16b ++ mov v17.16b, v3.16b ++ mov v18.16b, v5.16b ++ mov v1.16b, v2.16b ++ mov v2.16b, v4.16b ++ mov v3.16b, v6.16b ++ mov v4.16b, v16.16b ++ mov v5.16b, v17.16b ++ mov v6.16b, v18.16b ++ ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 ++ ++ // increment row counter and move src to the beginning of the next block ++ add w14, w14, #1 ++ add x13, x13, x10 ++ ++ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks ++ cmp w8, w14 ++ bgt block_loop_c8 ++ ++no_main_c8: ++ // handle incomplete block at the end of every row ++ eor w5, w5, w5 // point counter, this might be ++incomplete_block_loop_c8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_c8 ++ ++ ldrb w1, [x13] ++ strb w1, [x0] ++ add x13, x13, #1 ++ ++ ldrb w1, [x13] ++ strb w1, [x2] ++ add x13, x13, #1 ++ ++ add x0, x0, #1 ++ add x2, x2, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_c8 ++incomplete_block_loop_end_c8: ++ ++ // increase row_offset by stride1 ++ add w11, w11, #128 ++ add x0, x0, w12, sxtw ++ add x2, x2, w12, sxtw ++ ++ // jump to row_Loop_c8 iff the row count is small than the height ++ subs w15, w15, #1 ++ bgt row_loop_c8 ++ ++ ret ++endfunc ++ ++// Unzip chroma ++// ++// On entry: ++// a0 = V0, U2, ... ++// a1 = U0, V1, ... ++// a2 = U1, V2, ... ++// b0 = V8, U10, ... ++// b1 = U8, V9, ... ++// b2 = U9, V10, ... ++// ++// On exit: ++// d0 = U0, U3, ... ++// ... ++// a0 = V0, V3, .. ++// ... ++// ++// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs) ++ ++.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2 ++ uzp1 \d0\().8h, \a1\().8h, \b1\().8h ++ uzp1 \d1\().8h, \a2\().8h, \b2\().8h ++ uzp2 \d2\().8h, \a0\().8h, \b0\().8h ++ ++ uzp1 \a0\().8h, \a0\().8h, \b0\().8h ++ uzp2 \a1\().8h, \a1\().8h, \b1\().8h ++ uzp2 \a2\().8h, \a2\().8h, \b2\().8h ++.endm ++ ++// SAND30 -> 10bit ++.macro USAND10 d0, d1, d2, a0, a1 ++ shrn \d2\().4h, \a0\().4s, #14 ++ shrn \d1\().4h, \a0\().4s, #10 ++ ++ shrn2 \d2\().8h, \a1\().4s, #14 ++ shrn2 \d1\().8h, \a1\().4s, #10 ++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h ++ ++ ushr \d2\().8h, \d2\().8h, #6 ++ bic \d0\().8h, #0xfc, lsl #8 ++ bic \d1\().8h, #0xfc, lsl #8 ++.endm ++ ++// SAND30 -> 8bit ++.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2 ++ shrn \d1\().4h, \a0\().4s, #12 ++ shrn2 \d1\().8h, \a1\().4s, #12 ++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h ++ uzp2 \d2\().8h, \a0\().8h, \a1\().8h ++ ++ shrn \t1\().4h, \a2\().4s, #12 ++ shrn2 \t1\().8h, \a3\().4s, #12 ++ uzp1 \t0\().8h, \a2\().8h, \a3\().8h ++ uzp2 \t2\().8h, \a2\().8h, \a3\().8h ++ ++ shrn \d0\().8b, \d0\().8h, #2 ++ shrn2 \d0\().16b, \t0\().8h, #2 ++ shrn \d2\().8b, \d2\().8h, #6 ++ shrn2 \d2\().16b, \t2\().8h, #6 ++ uzp1 \d1\().16b, \d1\().16b, \t1\().16b ++.endm ++ ++ ++// void ff_rpi_sand30_lines_to_planar_c16( ++// uint8_t * dst_u, // [x0] ++// unsigned int dst_stride_u, // [w1] ++// uint8_t * dst_v, // [x2] ++// unsigned int dst_stride_v, // [w3] ++// const uint8_t * src, // [x4] ++// unsigned int stride1, // [w5] 128 ++// unsigned int stride2, // [w6] ++// unsigned int _x, // [w7] 0 ++// unsigned int y, // [sp, #0] ++// unsigned int _w, // [sp, #8] w9 ++// unsigned int h); // [sp, #16] w10 ++ ++function ff_rpi_sand30_lines_to_planar_c16, export=1 ++ ldr w7, [sp, #0] // y ++ ldr w8, [sp, #8] // _w ++ ldr w10, [sp, #16] // h ++ lsl w6, w6, #7 // Fixup stride2 ++ sub w6, w6, #64 ++ uxtw x6, w6 ++ sub w1, w1, w8, LSL #1 // Fixup chroma strides ++ sub w3, w3, w8, LSL #1 ++ lsl w7, w7, #7 // Add y to src ++ add x4, x4, w7, UXTW ++10: ++ mov w13, #0 ++ mov x5, x4 ++ mov w9, w8 ++1: ++ ld1 {v0.4s-v3.4s}, [x5], #64 ++ ld1 {v4.4s-v7.4s}, [x5], x6 ++ subs w9, w9, #48 ++ ++ USAND10 v17, v16, v18, v0, v1 ++ USAND10 v20, v19, v21, v2, v3 ++ UZPH_C v0, v1, v2, v16, v17, v18, v19, v20, v21 ++ USAND10 v23, v22, v24, v4, v5 ++ USAND10 v26, v25, v27, v6, v7 ++ UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27 ++ ++ blt 2f ++ ++ st3 {v0.8h-v2.8h}, [x0], #48 ++ st3 {v4.8h-v6.8h}, [x0], #48 ++ st3 {v16.8h-v18.8h}, [x2], #48 ++ st3 {v22.8h-v24.8h}, [x2], #48 ++ ++ bne 1b ++11: ++ subs w10, w10, #1 ++ add x4, x4, #128 ++ add x0, x0, w1, UXTW ++ add x2, x2, w3, UXTW ++ bne 10b ++99: ++ ret ++ ++// Partial final write ++2: ++ cmp w9, #24-48 ++ blt 1f ++ st3 {v0.8h - v2.8h}, [x0], #48 ++ st3 {v16.8h - v18.8h}, [x2], #48 ++ beq 11b ++ mov v0.16b, v4.16b ++ mov v1.16b, v5.16b ++ sub w9, w9, #24 ++ mov v2.16b, v6.16b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ mov v18.16b, v24.16b ++1: ++ cmp w9, #12-48 ++ blt 1f ++ st3 {v0.4h - v2.4h}, [x0], #24 ++ st3 {v16.4h - v18.4h}, [x2], #24 ++ beq 11b ++ mov v0.d[0], v0.d[1] ++ sub w9, w9, #12 ++ mov v1.d[0], v1.d[1] ++ mov v2.d[0], v2.d[1] ++ mov v16.d[0], v16.d[1] ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w9, #6-48 ++ blt 1f ++ st3 {v0.h - v2.h}[0], [x0], #6 ++ st3 {v0.h - v2.h}[1], [x0], #6 ++ st3 {v16.h - v18.h}[0], [x2], #6 ++ st3 {v16.h - v18.h}[1], [x2], #6 ++ beq 11b ++ mov v0.s[0], v0.s[1] ++ sub w9, w9, #6 ++ mov v1.s[0], v1.s[1] ++ mov v2.s[0], v2.s[1] ++ mov v16.s[0], v16.s[1] ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w9, #3-48 ++ blt 1f ++ st3 {v0.h - v2.h}[0], [x0], #6 ++ st3 {v16.h - v18.h}[0], [x2], #6 ++ beq 11b ++ mov v0.h[0], v0.h[1] ++ sub w9, w9, #3 ++ mov v1.h[0], v1.h[1] ++ mov v16.h[0], v16.h[1] ++ mov v17.h[0], v17.h[1] ++1: ++ cmp w9, #2-48 ++ blt 1f ++ st2 {v0.h - v1.h}[0], [x0], #4 ++ st2 {v16.h - v17.h}[0], [x2], #4 ++ b 11b ++1: ++ st1 {v0.h}[0], [x0], #2 ++ st1 {v16.h}[0], [x2], #2 ++ b 11b ++endfunc ++ ++ ++//void ff_rpi_sand30_lines_to_planar_p010( ++// uint8_t * dest, ++// unsigned int dst_stride, ++// const uint8_t * src, ++// unsigned int src_stride1, ++// unsigned int src_stride2, ++// unsigned int _x, ++// unsigned int y, ++// unsigned int _w, ++// unsigned int h); ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ uxtw x4, w4 ++ sub w1, w1, w7, lsl #1 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ USAND10 v16, v17, v18, v0, v1 ++ USAND10 v19, v20, v21, v2, v3 ++ USAND10 v22, v23, v24, v4, v5 ++ USAND10 v25, v26, v27, v6, v7 ++ ++ blt 2f ++ ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 ++ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++ mov v19.16b, v25.16b ++ mov v20.16b, v26.16b ++ mov v21.16b, v27.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v19.16b ++ mov v17.16b, v20.16b ++ sub w5, w5, #24 ++ mov v18.16b, v21.16b ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 ++ beq 11b ++ mov v16.d[0], v16.d[1] ++ sub w5, w5, #12 ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 ++ beq 11b ++ mov v16.s[0], v16.s[1] ++ sub w5, w5, #6 ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ beq 11b ++ mov v16.h[0], v16.h[1] ++ sub w5, w5, #3 ++ mov v17.h[0], v17.h[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.h, v17.h}[0], [x0], #4 ++ b 11b ++1: ++ st1 {v16.h}[0], [x0], #2 ++ b 11b ++ ++endfunc ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ uxtw x4, w4 ++ sub w1, w1, w7 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ USAND8 v16, v17, v18, v0, v1, v2, v3, v22, v23, v24 ++ USAND8 v19, v20, v21, v4, v5, v6, v7, v22, v23, v24 ++ ++ blt 2f ++ ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 ++ beq 11b ++ mov v16.d[0], v16.d[1] ++ sub w5, w5, #24 ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 ++ beq 11b ++ mov v16.s[0], v16.s[1] ++ sub w5, w5, #12 ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ beq 11b ++ mov v16.h[0], v16.h[1] ++ sub w5, w5, #6 ++ mov v17.h[0], v17.h[1] ++ mov v18.h[0], v18.h[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ beq 11b ++ mov v16.b[0], v16.b[1] ++ sub w5, w5, #3 ++ mov v17.b[0], v17.b[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.b, v17.b}[0], [x0], #2 ++ b 11b ++1: ++ st1 {v16.b}[0], [x0], #1 ++ b 11b ++ ++endfunc ++ +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.h +@@ -0,0 +1,59 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler <eiler.mike@gmail.com> ++*/ ++ ++#pragma once ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, ++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u, ++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, ++ unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++#ifdef __cplusplus ++} ++#endif ++ +--- a/libavutil/arm/Makefile ++++ b/libavutil/arm/Makefile +@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o + + NEON-OBJS += arm/float_dsp_init_neon.o \ + arm/float_dsp_neon.o \ ++ arm/rpi_sand_neon.o \ +--- /dev/null ++++ b/libavutil/arm/rpi_sand_neon.S +@@ -0,0 +1,925 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#include "libavutil/arm/asm.S" ++ ++ ++@ General notes: ++@ Having done some timing on this in sand8->y8 (Pi4) ++@ vst1 (680fps) is a bit faster than vstm (660fps) ++@ vldm (680fps) is noticably faster than vld1 (480fps) ++@ (or it might be that a mix is what is required) ++@ ++@ At least on a Pi4 it is no more expensive to have a single auto-inc register ++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted ++@ the latter was better) ++@ ++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless ++@ the memory is uncached. ++@ As these are Sand -> planar we can assume that src is going to be aligned but ++@ it is possible that dest isn't (converting to .yuv or other packed format). ++@ Luckily vst1 is faster than vstm :-) so all is well ++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4 ++@ .8 stores would let us do non-word aligned stores into uncached but it ++@ probably isn't worth it. ++ ++ ++ ++ ++@ void ff_rpi_sand128b_stripe_to_8_10( ++@ uint8_t * dest, // [r0] ++@ const uint8_t * src1, // [r1] ++@ const uint8_t * src2, // [r2] ++@ unsigned int lines); // [r3] ++ ++.macro stripe2_to_8, bit_depth ++ vpush {q4-q7} ++1: ++ vldm r1!, {q0-q7} ++ subs r3, #1 ++ vldm r2!, {q8-q15} ++ vqrshrn.u16 d0, q0, #\bit_depth - 8 ++ vqrshrn.u16 d1, q1, #\bit_depth - 8 ++ vqrshrn.u16 d2, q2, #\bit_depth - 8 ++ vqrshrn.u16 d3, q3, #\bit_depth - 8 ++ vqrshrn.u16 d4, q4, #\bit_depth - 8 ++ vqrshrn.u16 d5, q5, #\bit_depth - 8 ++ vqrshrn.u16 d6, q6, #\bit_depth - 8 ++ vqrshrn.u16 d7, q7, #\bit_depth - 8 ++ vqrshrn.u16 d8, q8, #\bit_depth - 8 ++ vqrshrn.u16 d9, q9, #\bit_depth - 8 ++ vqrshrn.u16 d10, q10, #\bit_depth - 8 ++ vqrshrn.u16 d11, q11, #\bit_depth - 8 ++ vqrshrn.u16 d12, q12, #\bit_depth - 8 ++ vqrshrn.u16 d13, q13, #\bit_depth - 8 ++ vqrshrn.u16 d14, q14, #\bit_depth - 8 ++ vqrshrn.u16 d15, q15, #\bit_depth - 8 ++ vstm r0!, {q0-q7} ++ bne 1b ++ vpop {q4-q7} ++ bx lr ++.endm ++ ++function ff_rpi_sand128b_stripe_to_8_10, export=1 ++ stripe2_to_8 10 ++endfunc ++ ++@ void ff_rpi_sand8_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand8_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 L ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ lsl r3, #7 ++ sub r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2, {q8-q15} ++ add r2, r3 ++ subs r5, #128 ++ blt 2f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d20, d21, d22, d23}, [r0]! ++ vst1.8 {d24, d25, d26, d27}, [r0]! ++ vst1.8 {d28, d29, d30, d31}, [r0]! ++ bne 1b ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #64-128 ++ blt 1f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d20, d21, d22, d23}, [r0]! ++ beq 11b ++ vmov q8, q12 ++ vmov q9, q13 ++ sub r5, #64 ++ vmov q10, q14 ++ vmov q11, q15 ++1: ++ cmp r5, #32-128 ++ blt 1f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ beq 11b ++ vmov q8, q10 ++ sub r5, #32 ++ vmov q9, q11 ++1: ++ cmp r5, #16-128 ++ blt 1f ++ vst1.8 {d16, d17}, [r0]! ++ beq 11b ++ sub r5, #16 ++ vmov q8, q9 ++1: ++ cmp r5, #8-128 ++ blt 1f ++ vst1.8 {d16}, [r0]! ++ beq 11b ++ sub r5, #8 ++ vmov d16, d17 ++1: ++ cmp r5, #4-128 ++ blt 1f ++ vst1.32 {d16[0]}, [r0]! ++ beq 11b ++ sub r5, #4 ++ vshr.u64 d16, #32 ++1: ++ cmp r5, #2-128 ++ blt 1f ++ vst1.16 {d16[0]}, [r0]! ++ beq 11b ++ vst1.8 {d16[2]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d16[0]}, [r0]! ++ b 11b ++endfunc ++ ++@ void ff_rpi_sand8_lines_to_planar_c8( ++@ uint8_t * dst_u, // [r0] ++@ unsigned int dst_stride_u, // [r1] ++@ uint8_t * dst_v, // [r2] ++@ unsigned int dst_stride_v, // [r3] ++@ const uint8_t * src, // [sp, #0] -> r4, r5 ++@ unsigned int stride1, // [sp, #4] 128 ++@ unsigned int stride2, // [sp, #8] -> r8 ++@ unsigned int _x, // [sp, #12] 0 ++@ unsigned int y, // [sp, #16] (r7 in prefix) ++@ unsigned int _w, // [sp, #20] -> r12, r6 ++@ unsigned int h); // [sp, #24] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand8_lines_to_planar_c8, export=1 ++ push {r4-r8, lr} @ +24 ++ ++ ldr r5, [sp, #24] ++ ldr r8, [sp, #32] ++ ldr r7, [sp, #40] ++ ldr r6, [sp, #44] ++ lsl r8, #7 ++ add r5, r5, r7, lsl #7 ++ sub r1, r1, r6 ++ sub r3, r3, r6 ++ ldr r7, [sp, #48] ++ vpush {q4-q7} ++ ++10: ++ mov r4, r5 ++ mov r12, r6 ++1: ++ subs r12, #64 ++ vldm r4, {q0-q7} ++ add r4, r8 ++ it gt ++ vldmgt r4, {q8-q15} ++ add r4, r8 ++ ++ vuzp.8 q0, q1 ++ vuzp.8 q2, q3 ++ vuzp.8 q4, q5 ++ vuzp.8 q6, q7 ++ ++ vuzp.8 q8, q9 ++ vuzp.8 q10, q11 ++ vuzp.8 q12, q13 ++ vuzp.8 q14, q15 ++ subs r12, #64 ++ ++ @ Rearrange regs so we can use vst1 with 4 regs ++ vswp q1, q2 ++ vswp q5, q6 ++ vswp q9, q10 ++ vswp q13, q14 ++ blt 2f ++ ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d8, d9, d10, d11}, [r0]! ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d24, d25, d26, d27}, [r0]! ++ ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ vst1.8 {d12, d13, d14, d15}, [r2]! ++ vst1.8 {d20, d21, d22, d23}, [r2]! ++ vst1.8 {d28, d29, d30, d31}, [r2]! ++ bne 1b ++11: ++ subs r7, #1 ++ add r5, #128 ++ add r0, r1 ++ add r2, r3 ++ bne 10b ++ vpop {q4-q7} ++ pop {r4-r8,pc} ++ ++2: ++ cmp r12, #64-128 ++ blt 1f ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d8, d9, d10, d11}, [r0]! ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ vst1.8 {d12, d13, d14, d15}, [r2]! ++ beq 11b ++ sub r12, #64 ++ vmov q0, q8 ++ vmov q1, q9 ++ vmov q2, q10 ++ vmov q3, q11 ++ vmov q4, q12 ++ vmov q5, q13 ++ vmov q6, q14 ++ vmov q7, q15 ++1: ++ cmp r12, #32-128 ++ blt 1f ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ beq 11b ++ sub r12, #32 ++ vmov q0, q4 ++ vmov q1, q5 ++ vmov q2, q6 ++ vmov q3, q7 ++1: ++ cmp r12, #16-128 ++ blt 1f ++ vst1.8 {d0, d1 }, [r0]! ++ vst1.8 {d4, d5 }, [r2]! ++ beq 11b ++ sub r12, #16 ++ vmov q0, q1 ++ vmov q2, q3 ++1: ++ cmp r12, #8-128 ++ blt 1f ++ vst1.8 {d0}, [r0]! ++ vst1.8 {d4}, [r2]! ++ beq 11b ++ sub r12, #8 ++ vmov d0, d1 ++ vmov d4, d5 ++1: ++ cmp r12, #4-128 ++ blt 1f ++ vst1.32 {d0[0]}, [r0]! ++ vst1.32 {d4[0]}, [r2]! ++ beq 11b ++ sub r12, #4 ++ vmov s0, s1 ++ vmov s8, s9 ++1: ++ cmp r12, #2-128 ++ blt 1f ++ vst1.16 {d0[0]}, [r0]! ++ vst1.16 {d4[0]}, [r2]! ++ beq 11b ++ vst1.8 {d0[2]}, [r0]! ++ vst1.8 {d4[2]}, [r2]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ vst1.8 {d4[0]}, [r2]! ++ b 11b ++endfunc ++ ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_y16( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ sub r3, #1 ++ lsl r3, #7 ++ sub r1, r1, r6, lsl #1 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2!, {q10-q13} ++ add lr, #64 ++ ++ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! ++ ands lr, #127 ++ vshrn.u32 d2, q10, #10 ++ vmovn.u32 d0, q10 ++ ++ vshrn.u32 d5, q11, #14 ++ it eq ++ addeq r2, r3 ++ vshrn.u32 d3, q11, #10 ++ vmovn.u32 d1, q11 ++ ++ subs r5, #48 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 ++ ++ vshrn.u32 d20, q12, #14 ++ vshrn.u32 d18, q12, #10 ++ vmovn.u32 d16, q12 ++ ++ vshrn.u32 d21, q13, #14 ++ vshrn.u32 d19, q13, #10 ++ vmovn.u32 d17, q13 ++ ++ vshr.u16 q10, #6 ++ vbic.u16 q8, #0xfc00 ++ vbic.u16 q9 , #0xfc00 ++ blt 2f ++ ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4], r12 ++ vst3.16 {d16, d18, d20}, [r0], r12 ++ vst3.16 {d17, d19, d21}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #24-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4] ++ beq 11b ++ vmov q0, q8 ++ sub r5, #24 ++ vmov q1, q9 ++ vmov q2, q10 ++1: ++ cmp r5, #12-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0]! ++ beq 11b ++ vmov d0, d1 ++ sub r5, #12 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r5, #6-48 ++ add r4, r0, #6 @ avoid [r0]! on sequential instructions ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0] ++ vst3.16 {d0[1], d2[1], d4[1]}, [r4] ++ add r0, #12 ++ beq 11b ++ vmov s0, s1 ++ sub r5, #6 ++ vmov s4, s5 ++ vmov s8, s9 ++1: ++ cmp r5, #3-48 ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #2-48 ++ blt 1f ++ vst2.16 {d0[0], d2[0]}, [r0]! ++ b 11b ++1: ++ vst1.16 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_c16( ++@ uint8_t * dst_u, // [r0] ++@ unsigned int dst_stride_u, // [r1] ++@ uint8_t * dst_v, // [r2] ++@ unsigned int dst_stride_v, // [r3] ++@ const uint8_t * src, // [sp, #0] -> r4, r5 ++@ unsigned int stride1, // [sp, #4] 128 ++@ unsigned int stride2, // [sp, #8] -> r8 ++@ unsigned int _x, // [sp, #12] 0 ++@ unsigned int y, // [sp, #16] (r7 in prefix) ++@ unsigned int _w, // [sp, #20] -> r6, r9 ++@ unsigned int h); // [sp, #24] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_c16, export=1 ++ push {r4-r10, lr} @ +32 ++ ldr r5, [sp, #32] ++ ldr r8, [sp, #40] ++ ldr r7, [sp, #48] ++ ldr r9, [sp, #52] ++ mov r12, #48 ++ sub r8, #1 ++ lsl r8, #7 ++ add r5, r5, r7, lsl #7 ++ sub r1, r1, r9, lsl #1 ++ sub r3, r3, r9, lsl #1 ++ ldr r7, [sp, #56] ++10: ++ mov lr, #0 ++ mov r4, r5 ++ mov r6, r9 ++1: ++ vldm r4!, {q0-q3} ++ add lr, #64 ++ ++ @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 ++ vshrn.u32 d20, q0, #14 ++ vmovn.u32 d18, q0 ++ vshrn.u32 d0, q0, #10 ++ ands lr, #127 ++ ++ vshrn.u32 d21, q1, #14 ++ vmovn.u32 d19, q1 ++ vshrn.u32 d1, q1, #10 ++ ++ vshrn.u32 d22, q2, #10 ++ vmovn.u32 d2, q2 ++ vshrn.u32 d4, q2, #14 ++ ++ add r10, r0, #24 ++ vshrn.u32 d23, q3, #10 ++ vmovn.u32 d3, q3 ++ vshrn.u32 d5, q3, #14 ++ ++ it eq ++ addeq r4, r8 ++ vuzp.16 q0, q11 ++ vuzp.16 q9, q1 ++ vuzp.16 q10, q2 ++ ++ @ q0 V0, V3,.. ++ @ q9 U0, U3... ++ @ q10 U1, U4... ++ @ q11 U2, U5,.. ++ @ q1 V1, V4, ++ @ q2 V2, V5,.. ++ ++ subs r6, #24 ++ vbic.u16 q11, #0xfc00 ++ vbic.u16 q9, #0xfc00 ++ vshr.u16 q10, #6 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 ++ ++ blt 2f ++ ++ vst3.16 {d18, d20, d22}, [r0], r12 ++ vst3.16 {d19, d21, d23}, [r10] ++ add r10, r2, #24 ++ vst3.16 {d0, d2, d4}, [r2], r12 ++ vst3.16 {d1, d3, d5}, [r10] ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r5, #128 ++ add r0, r1 ++ add r2, r3 ++ bne 10b ++ ++ pop {r4-r10, pc} ++ ++@ Partial final write ++2: ++ cmp r6, #-12 ++ blt 1f ++ vst3.16 {d18, d20, d22}, [r0]! ++ vst3.16 {d0, d2, d4}, [r2]! ++ beq 11b ++ vmov d18, d19 ++ vmov d20, d21 ++ vmov d22, d23 ++ sub r6, #12 ++ vmov d0, d1 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r6, #-18 ++ @ Rezip here as it makes the remaining tail handling easier ++ vzip.16 d0, d18 ++ vzip.16 d2, d20 ++ vzip.16 d4, d22 ++ blt 1f ++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! ++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! ++ vst3.16 {d0[3], d2[3], d4[3]}, [r0]! ++ vst3.16 {d0[2], d2[2], d4[2]}, [r2]! ++ beq 11b ++ vmov d0, d18 ++ vmov d2, d20 ++ sub r6, #6 ++ vmov d4, d22 ++1: ++ cmp r6, #-21 ++ blt 1f ++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! ++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! ++ beq 11b ++ vmov s4, s5 ++ sub r6, #3 ++ vmov s0, s1 ++1: ++ cmp r6, #-22 ++ blt 1f ++ vst2.16 {d0[1], d2[1]}, [r0]! ++ vst2.16 {d0[0], d2[0]}, [r2]! ++ b 11b ++1: ++ vst1.16 {d0[1]}, [r0]! ++ vst1.16 {d0[0]}, [r2]! ++ b 11b ++ ++endfunc ++ ++@ void ff_rpi_sand30_lines_to_planar_p010( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_p010, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ vmov.u16 q15, #0xffc0 ++ sub r3, #1 ++ lsl r3, #7 ++ sub r1, r1, r6, lsl #1 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2!, {q10-q13} ++ add lr, #64 ++ ++ vshl.u32 q14, q10, #6 ++ ands lr, #127 ++ vshrn.u32 d4, q10, #14 ++ vshrn.u32 d2, q10, #4 ++ vmovn.u32 d0, q14 ++ ++ vshl.u32 q14, q11, #6 ++ it eq ++ addeq r2, r3 ++ vshrn.u32 d5, q11, #14 ++ vshrn.u32 d3, q11, #4 ++ vmovn.u32 d1, q14 ++ ++ subs r5, #48 ++ vand q2, q15 ++ vand q1, q15 ++ vand q0, q15 ++ ++ vshl.u32 q14, q12, #6 ++ vshrn.u32 d20, q12, #14 ++ vshrn.u32 d18, q12, #4 ++ vmovn.u32 d16, q14 ++ ++ vshl.u32 q14, q13, #6 ++ vshrn.u32 d21, q13, #14 ++ vshrn.u32 d19, q13, #4 ++ vmovn.u32 d17, q14 ++ ++ vand q10, q15 ++ vand q9, q15 ++ vand q8, q15 ++ blt 2f ++ ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4], r12 ++ vst3.16 {d16, d18, d20}, [r0], r12 ++ vst3.16 {d17, d19, d21}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #24-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4] ++ beq 11b ++ vmov q0, q8 ++ sub r5, #24 ++ vmov q1, q9 ++ vmov q2, q10 ++1: ++ cmp r5, #12-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0]! ++ beq 11b ++ vmov d0, d1 ++ sub r5, #12 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r5, #6-48 ++ add r4, r0, #6 @ avoid [r0]! on sequential instructions ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0] ++ vst3.16 {d0[1], d2[1], d4[1]}, [r4] ++ add r0, #12 ++ beq 11b ++ vmov s0, s1 ++ sub r5, #6 ++ vmov s4, s5 ++ vmov s8, s9 ++1: ++ cmp r5, #3-48 ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #2-48 ++ blt 1f ++ vst2.16 {d0[0], d2[0]}, [r0]! ++ b 11b ++1: ++ vst1.16 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ lsl r3, #7 ++ sub r1, r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++1: ++ vldm r2, {q8-q15} ++ ++ subs r5, #96 ++ ++ vmovn.u32 d0, q8 ++ vshrn.u32 d2, q8, #12 ++ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! ++ ++ add r2, r3 ++ ++ vmovn.u32 d1, q9 ++ vshrn.u32 d3, q9, #12 ++ vshrn.u32 d5, q9, #16 ++ ++ pld [r2, #0] ++ ++ vshrn.u16 d0, q0, #2 ++ vmovn.u16 d1, q1 ++ vshrn.u16 d2, q2, #6 ++ ++ vmovn.u32 d16, q10 ++ vshrn.u32 d18, q10, #12 ++ vshrn.u32 d20, q10, #16 ++ ++ vmovn.u32 d17, q11 ++ vshrn.u32 d19, q11, #12 ++ vshrn.u32 d21, q11, #16 ++ ++ pld [r2, #64] ++ ++ vshrn.u16 d4, q8, #2 ++ vmovn.u16 d5, q9 ++ vshrn.u16 d6, q10, #6 ++ ++ vmovn.u32 d16, q12 ++ vshrn.u32 d18, q12, #12 ++ vshrn.u32 d20, q12, #16 ++ ++ vmovn.u32 d17, q13 ++ vshrn.u32 d19, q13, #12 ++ vshrn.u32 d21, q13, #16 ++ ++ vshrn.u16 d16, q8, #2 ++ vmovn.u16 d17, q9 ++ vshrn.u16 d18, q10, #6 ++ ++ vmovn.u32 d20, q14 ++ vshrn.u32 d22, q14, #12 ++ vshrn.u32 d24, q14, #16 ++ ++ vmovn.u32 d21, q15 ++ vshrn.u32 d23, q15, #12 ++ vshrn.u32 d25, q15, #16 ++ ++ vshrn.u16 d20, q10, #2 ++ vmovn.u16 d21, q11 ++ vshrn.u16 d22, q12, #6 ++ ++ blt 2f ++ ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ vst3.8 {d16, d17, d18}, [r0], r12 ++ vst3.8 {d20, d21, d22}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #48-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ beq 11b ++ vmov q0, q8 ++ vmov q2, q10 ++ sub r5, #48 ++ vmov d2, d18 ++ vmov d6, d22 ++1: ++ cmp r5, #24-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0]! ++ beq 11b ++ vmov q0, q2 ++ sub r5, #24 ++ vmov d2, d6 ++1: ++ cmp r5, #12-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! ++ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! ++ beq 11b ++ vmov s0, s1 ++ sub r5, #12 ++ vmov s2, s3 ++ vmov s4, s5 ++1: ++ cmp r5, #6-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ add r0, #12 ++ beq 11b ++ vshr.u32 d0, #16 ++ sub r5, #6 ++ vshr.u32 d1, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #3-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #8 ++ vshr.u32 d1, #8 ++1: ++ cmp r5, #2-96 ++ blt 1f ++ vst2.8 {d0[0], d1[0]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ +--- /dev/null ++++ b/libavutil/arm/rpi_sand_neon.h +@@ -0,0 +1,110 @@ ++/* ++Copyright (c) 2020 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#ifndef AVUTIL_ARM_SAND_NEON_H ++#define AVUTIL_ARM_SAND_NEON_H ++ ++void ff_rpi_sand128b_stripe_to_8_10( ++ uint8_t * dest, // [r0] ++ const uint8_t * src1, // [r1] ++ const uint8_t * src2, // [r2] ++ unsigned int lines); // [r3] ++ ++void ff_rpi_sand8_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand8_lines_to_planar_c8( ++ uint8_t * dst_u, // [r0] ++ unsigned int dst_stride_u, // [r1] ++ uint8_t * dst_v, // [r2] ++ unsigned int dst_stride_v, // [r3] ++ const uint8_t * src, // [sp, #0] -> r4, r5 ++ unsigned int stride1, // [sp, #4] 128 ++ unsigned int stride2, // [sp, #8] -> r8 ++ unsigned int _x, // [sp, #12] 0 ++ unsigned int y, // [sp, #16] (r7 in prefix) ++ unsigned int _w, // [sp, #20] -> r12, r6 ++ unsigned int h); // [sp, #24] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_y16( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_c16( ++ uint8_t * dst_u, // [r0] ++ unsigned int dst_stride_u, // [r1] ++ uint8_t * dst_v, // [r2] ++ unsigned int dst_stride_v, // [r3] ++ const uint8_t * src, // [sp, #0] -> r4, r5 ++ unsigned int stride1, // [sp, #4] 128 ++ unsigned int stride2, // [sp, #8] -> r8 ++ unsigned int _x, // [sp, #12] 0 ++ unsigned int y, // [sp, #16] (r7 in prefix) ++ unsigned int _w, // [sp, #20] -> r6, r9 ++ unsigned int h); // [sp, #24] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_p010( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++#endif // AVUTIL_ARM_SAND_NEON_H ++ +--- a/libavutil/frame.c ++++ b/libavutil/frame.c +@@ -16,6 +16,8 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "config.h" ++ + #include "channel_layout.h" + #include "avassert.h" + #include "buffer.h" +@@ -27,6 +29,9 @@ + #include "mem.h" + #include "samplefmt.h" + #include "hwcontext.h" ++#if CONFIG_SAND ++#include "rpi_sand_fns.h" ++#endif + + #if FF_API_OLD_CHANNEL_LAYOUT + #define CHECK_CHANNELS_CONSISTENCY(frame) \ +@@ -875,6 +880,12 @@ int av_frame_apply_cropping(AVFrame *fra + (frame->crop_top + frame->crop_bottom) >= frame->height) + return AVERROR(ERANGE); + ++#if CONFIG_SAND ++ // Sand cannot be cropped - do not try ++ if (av_rpi_is_sand_format(frame->format)) ++ return 0; ++#endif ++ + desc = av_pix_fmt_desc_get(frame->format); + if (!desc) + return AVERROR_BUG; +--- a/libavutil/frame.h ++++ b/libavutil/frame.h +@@ -940,6 +940,16 @@ int av_frame_apply_cropping(AVFrame *fra + */ + const char *av_frame_side_data_name(enum AVFrameSideDataType type); + ++ ++static inline int av_frame_cropped_width(const AVFrame * const frame) ++{ ++ return frame->width - (frame->crop_left + frame->crop_right); ++} ++static inline int av_frame_cropped_height(const AVFrame * const frame) ++{ ++ return frame->height - (frame->crop_top + frame->crop_bottom); ++} ++ + /** + * @} + */ +--- a/libavutil/hwcontext_drm.c ++++ b/libavutil/hwcontext_drm.c +@@ -21,6 +21,7 @@ + #include <fcntl.h> + #include <sys/mman.h> + #include <unistd.h> ++#include <sys/ioctl.h> + + /* This was introduced in version 4.6. And may not exist all without an + * optional package. So to prevent a hard dependency on needing the Linux +@@ -31,6 +32,7 @@ + #endif + + #include <drm.h> ++#include <libdrm/drm_fourcc.h> + #include <xf86drm.h> + + #include "avassert.h" +@@ -38,7 +40,9 @@ + #include "hwcontext_drm.h" + #include "hwcontext_internal.h" + #include "imgutils.h" +- ++#if CONFIG_SAND ++#include "libavutil/rpi_sand_fns.h" ++#endif + + static void drm_device_free(AVHWDeviceContext *hwdev) + { +@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceC + AVDRMDeviceContext *hwctx = hwdev->hwctx; + drmVersionPtr version; + ++ if (device == NULL) { ++ hwctx->fd = -1; ++ return 0; ++ } ++ + hwctx->fd = open(device, O_RDWR); + if (hwctx->fd < 0) + return AVERROR(errno); +@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesConte + if (flags & AV_HWFRAME_MAP_WRITE) + mmap_prot |= PROT_WRITE; + ++ if (dst->format == AV_PIX_FMT_NONE) ++ dst->format = hwfc->sw_format; + #if HAVE_LINUX_DMA_BUF_H + if (flags & AV_HWFRAME_MAP_READ) + map->sync_flags |= DMA_BUF_SYNC_READ; +@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesConte + + dst->width = src->width; + dst->height = src->height; ++ dst->crop_top = src->crop_top; ++ dst->crop_bottom = src->crop_bottom; ++ dst->crop_left = src->crop_left; ++ dst->crop_right = src->crop_right; ++ ++#if CONFIG_SAND ++ // Rework for sand frames ++ if (av_rpi_is_sand_frame(dst)) { ++ // As it stands the sand formats hold stride2 in linesize[3] ++ // linesize[0] & [1] contain stride1 which is always 128 for everything we do ++ // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1] ++ dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier); ++ dst->linesize[0] = 128; ++ dst->linesize[1] = 128; ++ // *** Are we sure src->height is actually what we want ??? ++ } ++#endif + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, + &drm_unmap_frame, map); +@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHW + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) + { +- enum AVPixelFormat *pix_fmts; ++ enum AVPixelFormat *p; + +- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); +- if (!pix_fmts) ++ p = *formats = av_malloc_array(3, sizeof(*p)); ++ if (!p) + return AVERROR(ENOMEM); + +- pix_fmts[0] = ctx->sw_format; +- pix_fmts[1] = AV_PIX_FMT_NONE; ++ // **** Offer native sand too ???? ++ *p++ = ++#if CONFIG_SAND ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? ++ AV_PIX_FMT_YUV420P : ++ ctx->sw_format == AV_PIX_FMT_RPI4_10 ? ++ AV_PIX_FMT_YUV420P10LE : ++#endif ++ ctx->sw_format; ++ ++#if CONFIG_SAND ++ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) ++ *p++ = AV_PIX_FMT_NV12; ++#endif + +- *formats = pix_fmts; ++ *p = AV_PIX_FMT_NONE; + return 0; + } + +@@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFr + map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); +- map->format = dst->format; + ++ // Map to default ++ map->format = AV_PIX_FMT_NONE; + err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); + if (err) + goto fail; + +- map->width = dst->width; +- map->height = dst->height; ++#if 0 ++ av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__, ++ hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE, ++ map->width, map->height, ++ map->linesize[0], ++ map->linesize[1], ++ map->linesize[2], ++ map->linesize[3], ++ dst->width, dst->height, ++ dst->linesize[0], ++ dst->linesize[1], ++ dst->linesize[2]); ++#endif ++#if CONFIG_SAND ++ if (av_rpi_is_sand_frame(map)) { ++ // Preserve crop - later ffmpeg code assumes that we have in that it ++ // overwrites any crop that we create with the old values ++ const unsigned int w = FFMIN(dst->width, map->width); ++ const unsigned int h = FFMIN(dst->height, map->height); ++ ++ map->crop_top = 0; ++ map->crop_bottom = 0; ++ map->crop_left = 0; ++ map->crop_right = 0; ++ ++ if (av_rpi_sand_to_planar_frame(dst, map) != 0) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); ++ err = AVERROR(EINVAL); ++ goto fail; ++ } ++ ++ dst->width = w; ++ dst->height = h; ++ } ++ else ++#endif ++ { ++ // Kludge mapped h/w s.t. frame_copy works ++ map->width = dst->width; ++ map->height = dst->height; ++ err = av_frame_copy(dst, map); ++ } + +- err = av_frame_copy(dst, map); + if (err) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__); + goto fail; ++ } + + err = 0; + fail: +@@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFram + int err; + + if (src->width > hwfc->width || src->height > hwfc->height) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height); + return AVERROR(EINVAL); ++ } + + map = av_frame_alloc(); + if (!map) +--- a/libavutil/hwcontext_vulkan.c ++++ b/libavutil/hwcontext_vulkan.c +@@ -57,6 +57,14 @@ + #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) + #endif + ++// Sometimes missing definitions ++#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME ++#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" ++#endif ++#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME ++#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" ++#endif ++ + typedef struct VulkanQueueCtx { + VkFence fence; + VkQueue queue; +--- a/libavutil/pixdesc.c ++++ b/libavutil/pixdesc.c +@@ -2491,6 +2491,50 @@ static const AVPixFmtDescriptor av_pix_f + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, ++ [AV_PIX_FMT_SAND128] = { ++ .name = "sand128", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 1, 0, 0, 8 }, /* Y */ ++ { 1, 2, 0, 0, 8 }, /* U */ ++ { 1, 2, 1, 0, 8 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_SAND64_10] = { ++ .name = "sand64_10", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 2, 0, 0, 10 }, /* Y */ ++ { 1, 4, 0, 0, 10 }, /* U */ ++ { 1, 4, 2, 0, 10 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_SAND64_16] = { ++ .name = "sand64_16", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 2, 0, 0, 16 }, /* Y */ ++ { 1, 4, 0, 0, 16 }, /* U */ ++ { 1, 4, 2, 0, 16 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_RPI4_8] = { ++ .name = "rpi4_8", ++ .flags = AV_PIX_FMT_FLAG_HWACCEL, ++ }, ++ [AV_PIX_FMT_RPI4_10] = { ++ .name = "rpi4_10", ++ .flags = AV_PIX_FMT_FLAG_HWACCEL, ++ }, + }; + + static const char * const color_range_names[] = { +--- a/libavutil/pixfmt.h ++++ b/libavutil/pixfmt.h +@@ -349,6 +349,14 @@ enum AVPixelFormat { + + AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian + AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian ++// RPI - not on ifdef so can be got at by calling progs ++// #define so code that uses this can know it is there ++#define AVUTIL_HAVE_PIX_FMT_SAND 1 ++ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_RPI4_8, ++ AV_PIX_FMT_RPI4_10, + + AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined + AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined +--- /dev/null ++++ b/libavutil/rpi_sand_fn_pw.h +@@ -0,0 +1,227 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++// * Included twice from rpi_sand_fn with different PW ++ ++#define STRCAT(x,y) x##y ++ ++#if PW == 1 ++#define pixel uint8_t ++#define FUNC(f) STRCAT(f, 8) ++#elif PW == 2 ++#define pixel uint16_t ++#define FUNC(f) STRCAT(f, 16) ++#else ++#error Unexpected PW ++#endif ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// unclipped ++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x; ++ const unsigned int w = _w; ++ const unsigned int mask = stride1 - 1; ++ ++#if PW == 1 && HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) { ++ memcpy(dst, p, w); ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const uint8_t * p = p2; ++ uint8_t * d = dst; ++ memcpy(d, p1, w1); ++ d += w1; ++ for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) { ++ memcpy(d, p, stride1); ++ } ++ memcpy(d, p, w3); ++ } ++ } ++} ++ ++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V) ++ ++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x * 2; ++ const unsigned int w = _w * 2; ++ const unsigned int mask = stride1 - 1; ++ ++#if PW == 1 && HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) { ++ pixel * du = (pixel *)dst_u; ++ pixel * dv = (pixel *)dst_v; ++ const pixel * p = (const pixel *)p1; ++ for (unsigned int k = 0; k < w; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const unsigned int sstride_p = (sstride - stride1) / PW; ++ ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const pixel * p = (const pixel *)p1; ++ pixel * du = (pixel *)dst_u; ++ pixel * dv = (pixel *)dst_v; ++ for (unsigned int k = 0; k < w1; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) { ++ for (unsigned int k = 0; k < stride1; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ for (unsigned int k = 0; k < w3; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ } ++} ++ ++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x * 2; ++ const unsigned int w = _w * 2; ++ const unsigned int mask = stride1 - 1; ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) { ++ const pixel * su = (const pixel *)src_u; ++ const pixel * sv = (const pixel *)src_v; ++ pixel * p = (pixel *)p1; ++ for (unsigned int k = 0; k < w; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const unsigned int sstride_p = (sstride - stride1) / PW; ++ ++ const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const pixel * su = (const pixel *)src_u; ++ const pixel * sv = (const pixel *)src_v; ++ pixel * p = (pixel *)p1; ++ for (unsigned int k = 0; k < w1; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) { ++ for (unsigned int k = 0; k < stride1; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ for (unsigned int k = 0; k < w3; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ } ++} ++ ++ ++#undef pixel ++#undef STRCAT ++#undef FUNC ++ +--- /dev/null ++++ b/libavutil/rpi_sand_fns.c +@@ -0,0 +1,447 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#include "config.h" ++#include <stdint.h> ++#include <string.h> ++#include "rpi_sand_fns.h" ++#include "avassert.h" ++#include "frame.h" ++ ++#if ARCH_ARM && HAVE_NEON ++#include "libavutil/arm/cpu.h" ++#include "libavutil/arm/rpi_sand_neon.h" ++#define HAVE_SAND_ASM 1 ++#elif ARCH_AARCH64 && HAVE_NEON ++#include "libavutil/aarch64/cpu.h" ++#include "libavutil/aarch64/rpi_sand_neon.h" ++#define HAVE_SAND_ASM 1 ++#else ++#define HAVE_SAND_ASM 0 ++#endif ++ ++#define PW 1 ++#include "rpi_sand_fn_pw.h" ++#undef PW ++ ++#define PW 2 ++#include "rpi_sand_fn_pw.h" ++#undef PW ++ ++#if 1 ++// Simple round ++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) ++{ ++ const unsigned int rnd = (1 << shr) >> 1; ++ const uint16_t * src = (const uint16_t *)_src; ++ ++ for (; n != 0; --n) { ++ *dst++ = (*src++ + rnd) >> shr; ++ } ++} ++#else ++// Dithered variation ++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) ++{ ++ unsigned int rnd = (1 << shr) >> 1; ++ const unsigned int mask = ((1 << shr) - 1); ++ const uint16_t * src = (const uint16_t *)_src; ++ ++ for (; n != 0; --n) { ++ rnd = *src++ + (rnd & mask); ++ *dst++ = rnd >> shr; ++ } ++} ++#endif ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// unclipped ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0 && have_neon(av_get_cpu_flags())) { ++ ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint16_t * d = (uint16_t *)dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 10) & 0x3ff; ++ *d++ = (p3 >> 20) & 0x3ff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = p3 & 0x3ff; ++ *d++ = (p3 >> 10) & 0x3ff; ++ *d++ = (p3 >> 20) & 0x3ff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = p3 & 0x3ff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 10) & 0x3ff; ++ } ++ } ++} ++ ++ ++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 3) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 8; ++ const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0 && have_neon(av_get_cpu_flags())) { ++ ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint16_t * du = (uint16_t *)dst_u; ++ uint16_t * dv = (uint16_t *)dst_v; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ if (xskip0 == 1) ++ { ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = (p3b >> 0) & 0x3ff; ++ } ++ *du++ = (p3b >> 10) & 0x3ff; ++ *dv++ = (p3b >> 20) & 0x3ff; ++ ++ if (((x += 8) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ *du++ = p3a & 0x3ff; ++ *dv++ = (p3a >> 10) & 0x3ff; ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = p3b & 0x3ff; ++ *du++ = (p3b >> 10) & 0x3ff; ++ *dv++ = (p3b >> 20) & 0x3ff; ++ ++ if (((x += 8) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ *du++ = p3a & 0x3ff; ++ *dv++ = (p3a >> 10) & 0x3ff; ++ if (xrem1 == 2) ++ { ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = p3b & 0x3ff; ++ } ++ } ++ } ++} ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// single lose bottom 2 bits truncation ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint8_t * d = dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = (p3 >> 2) & 0xff; ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = (p3 >> 2) & 0xff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 12) & 0xff; ++ } ++ } ++} ++ ++ ++ ++// w/h in pixels ++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, ++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, ++ unsigned int w, unsigned int h, const unsigned int shr) ++{ ++ const unsigned int n = dst_stride1 / 2; ++ unsigned int j; ++ ++ // This is true for our current layouts ++ av_assert0(dst_stride1 == src_stride1); ++ ++ // As we have the same stride1 for src & dest and src is wider than dest ++ // then if we loop on src we can always write contiguously to dest ++ // We make no effort to copy an exact width - round up to nearest src stripe ++ // as we will always have storage in dest for that ++ ++#if ARCH_ARM && HAVE_NEON ++ if (shr == 3 && src_stride1 == 128) { ++ for (j = 0; j + n < w; j += dst_stride1) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ const uint8_t * s2 = s1 + src_stride1 * src_stride2; ++ ++ ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h); ++ } ++ } ++ else ++#endif ++ { ++ for (j = 0; j + n < w; j += dst_stride1) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ const uint8_t * s2 = s1 + src_stride1 * src_stride2; ++ ++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) { ++ cpy16_to_8(d, s1, n, shr); ++ cpy16_to_8(d + n, s2, n, shr); ++ } ++ } ++ } ++ ++ // Fix up a trailing dest half stripe ++ if (j < w) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ ++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) { ++ cpy16_to_8(d, s1, n, shr); ++ } ++ } ++} ++ ++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) ++{ ++ const int w = av_frame_cropped_width(src); ++ const int h = av_frame_cropped_height(src); ++ const int x = src->crop_left; ++ const int y = src->crop_top; ++ ++ // We will crop as part of the conversion ++ dst->crop_top = 0; ++ dst->crop_left = 0; ++ dst->crop_bottom = 0; ++ dst->crop_right = 0; ++ ++ switch (src->format){ ++ case AV_PIX_FMT_SAND128: ++ case AV_PIX_FMT_RPI4_8: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w/2, h/2); ++ break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ case AV_PIX_FMT_SAND64_10: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P10: ++ av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x*2, y, w*2, h); ++ av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ case AV_PIX_FMT_RPI4_10: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P10: ++ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w/2, h/2); ++ break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ default: ++ return -1; ++ } ++ ++ return av_frame_copy_props(dst, src); ++} +--- /dev/null ++++ b/libavutil/rpi_sand_fns.h +@@ -0,0 +1,188 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#ifndef AVUTIL_RPI_SAND_FNS ++#define AVUTIL_RPI_SAND_FNS ++ ++#include "libavutil/frame.h" ++ ++// For all these fns _x & _w are measured as coord * PW ++// For the C fns coords are in chroma pels (so luma / 2) ++// Strides are in bytes ++ ++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_planar_to_sand_c8(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_planar_to_sand_c16(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++// w/h in pixels ++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, ++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, ++ unsigned int w, unsigned int h, const unsigned int shr); ++ ++ ++// dst must contain required pixel format & allocated data buffers ++// Cropping on the src buffer will be honoured and dst crop will be set to zero ++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src); ++ ++ ++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame) ++{ ++#ifdef RPI_ZC_SAND128_ONLY ++ // If we are sure we only only support 128 byte sand formats replace the ++ // var with a constant which should allow for better optimisation ++ return 128; ++#else ++ return frame->linesize[0]; ++#endif ++} ++ ++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame) ++{ ++ return frame->linesize[3]; ++} ++ ++ ++static inline int av_rpi_is_sand_format(const int format) ++{ ++ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10); ++} ++ ++static inline int av_rpi_is_sand_frame(const AVFrame * const frame) ++{ ++ return av_rpi_is_sand_format(frame->format); ++} ++ ++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame) ++{ ++ return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8); ++} ++ ++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame) ++{ ++ return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16); ++} ++ ++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame) ++{ ++ return (frame->format == AV_PIX_FMT_RPI4_10); ++} ++ ++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame) ++{ ++ return av_rpi_is_sand8_frame(frame) ? 0 : 1; ++} ++ ++// If x is measured in bytes (not pixels) then this works for sand64_16 as ++// well as sand128 - but in the general case we work that out ++ ++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y) ++{ ++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); ++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); ++ const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame); ++ const unsigned int x1 = x & (stride1 - 1); ++ const unsigned int x2 = x ^ x1; ++ ++ return x1 + stride1 * y + stride2 * x2; ++} ++ ++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c) ++{ ++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); ++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); ++ const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1); ++ const unsigned int x1 = x & (stride1 - 1); ++ const unsigned int x2 = x ^ x1; ++ ++ return x1 + stride1 * y_c + stride2 * x2; ++} ++ ++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y) ++{ ++ return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y); ++} ++ ++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y) ++{ ++ return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y); ++} ++ ++#endif ++ +--- a/libswscale/aarch64/rgb2rgb.c ++++ b/libswscale/aarch64/rgb2rgb.c +@@ -30,6 +30,12 @@ + void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int width, int height, + int src1Stride, int src2Stride, int dstStride); ++void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); ++void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); + + av_cold void rgb2rgb_init_aarch64(void) + { +@@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void) + + if (have_neon(cpu_flags)) { + interleaveBytes = ff_interleave_bytes_neon; ++ ff_rgb24toyv12 = ff_rgb24toyv12_aarch64; ++ ff_bgr24toyv12 = ff_bgr24toyv12_aarch64; + } + } +--- a/libswscale/aarch64/rgb2rgb_neon.S ++++ b/libswscale/aarch64/rgb2rgb_neon.S +@@ -77,3 +77,359 @@ function ff_interleave_bytes_neon, expor + 0: + ret + endfunc ++ ++// Expand rgb2 into r0+r1/g0+g1/b0+b1 ++.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2 ++ uxtl \r0\().8h, \r2\().8b ++ uxtl \g0\().8h, \g2\().8b ++ uxtl \b0\().8h, \b2\().8b ++ ++ uxtl2 \r1\().8h, \r2\().16b ++ uxtl2 \g1\().8h, \g2\().16b ++ uxtl2 \b1\().8h, \b2\().16b ++.endm ++ ++// Expand rgb2 into r0+r1/g0+g1/b0+b1 ++// and pick every other el to put back into rgb2 for chroma ++.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2 ++ XRGB3Y \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2 ++ ++ bic \r2\().8h, #0xff, LSL #8 ++ bic \g2\().8h, #0xff, LSL #8 ++ bic \b2\().8h, #0xff, LSL #8 ++.endm ++ ++.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2 ++ smull \d0\().4s, \s0\().4h, \c0 ++ smlal \d0\().4s, \s1\().4h, \c1 ++ smlal \d0\().4s, \s2\().4h, \c2 ++ smull2 \d1\().4s, \s0\().8h, \c0 ++ smlal2 \d1\().4s, \s1\().8h, \c1 ++ smlal2 \d1\().4s, \s2\().8h, \c2 ++.endm ++ ++// d0 may be s0 ++// s0, s2 corrupted ++.macro SHRN_Y d0, s0, s1, s2, s3, k128h ++ shrn \s0\().4h, \s0\().4s, #12 ++ shrn2 \s0\().8h, \s1\().4s, #12 ++ add \s0\().8h, \s0\().8h, \k128h\().8h // +128 (>> 3 = 16) ++ sqrshrun \d0\().8b, \s0\().8h, #3 ++ shrn \s2\().4h, \s2\().4s, #12 ++ shrn2 \s2\().8h, \s3\().4s, #12 ++ add \s2\().8h, \s2\().8h, \k128h\().8h ++ sqrshrun2 \d0\().16b, v28.8h, #3 ++.endm ++ ++.macro SHRN_C d0, s0, s1, k128b ++ shrn \s0\().4h, \s0\().4s, #14 ++ shrn2 \s0\().8h, \s1\().4s, #14 ++ sqrshrn \s0\().8b, \s0\().8h, #1 ++ add \d0\().8b, \s0\().8b, \k128b\().8b // +128 ++.endm ++ ++.macro STB2V s0, n, a ++ st1 {\s0\().b}[(\n+0)], [\a], #1 ++ st1 {\s0\().b}[(\n+1)], [\a], #1 ++.endm ++ ++.macro STB4V s0, n, a ++ STB2V \s0, (\n+0), \a ++ STB2V \s0, (\n+2), \a ++.endm ++ ++ ++// void ff_rgb24toyv12_aarch64( ++// const uint8_t *src, // x0 ++// uint8_t *ydst, // x1 ++// uint8_t *udst, // x2 ++// uint8_t *vdst, // x3 ++// int width, // w4 ++// int height, // w5 ++// int lumStride, // w6 ++// int chromStride, // w7 ++// int srcStr, // [sp, #0] ++// int32_t *rgb2yuv); // [sp, #8] ++ ++function ff_rgb24toyv12_aarch64, export=1 ++ ldr x15, [sp, #8] ++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[2], [x15] ++ mov v6.16b, v3.16b ++ mov v3.16b, v5.16b ++ mov v5.16b, v6.16b ++ b 99f ++endfunc ++ ++// void ff_bgr24toyv12_aarch64( ++// const uint8_t *src, // x0 ++// uint8_t *ydst, // x1 ++// uint8_t *udst, // x2 ++// uint8_t *vdst, // x3 ++// int width, // w4 ++// int height, // w5 ++// int lumStride, // w6 ++// int chromStride, // w7 ++// int srcStr, // [sp, #0] ++// int32_t *rgb2yuv); // [sp, #8] (including Mac) ++ ++// regs ++// v0-2 Src bytes - reused as chroma src ++// v3-5 Coeffs (packed very inefficiently - could be squashed) ++// v6 128b ++// v7 128h ++// v8-15 Reserved ++// v16-18 Lo Src expanded as H ++// v19 - ++// v20-22 Hi Src expanded as H ++// v23 - ++// v24 U out ++// v25 U tmp ++// v26 Y out ++// v27-29 Y tmp ++// v30 V out ++// v31 V tmp ++ ++function ff_bgr24toyv12_aarch64, export=1 ++ ldr x15, [sp, #8] ++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[2], [x15] ++ ++99: ++ ldr w14, [sp, #0] ++ movi v7.8b, #128 ++ uxtl v6.8h, v7.8b ++ // Ensure if nothing to do then we do nothing ++ cmp w4, #0 ++ b.le 90f ++ cmp w5, #0 ++ b.le 90f ++ // If w % 16 != 0 then -16 so we do main loop 1 fewer times with ++ // the remainder done in the tail ++ tst w4, #15 ++ b.eq 1f ++ sub w4, w4, #16 ++1: ++ ++// -------------------- Even line body - YUV ++11: ++ subs w9, w4, #0 ++ mov x10, x0 ++ mov x11, x1 ++ mov x12, x2 ++ mov x13, x3 ++ b.lt 12f ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ subs w9, w9, #16 ++ b.le 13f ++ ++10: ++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ ++ // Testing shows it is faster to stack the smull/smlal ops together ++ // rather than interleave them between channels and indeed even the ++ // shift/add sections seem happier not interleaved ++ ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ // U ++ // Vector subscript *2 as we loaded into S but are only using H ++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] ++ ++ // V ++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ ++ SHRN_C v24, v24, v25, v7 ++ SHRN_C v30, v30, v31, v7 ++ ++ subs w9, w9, #16 ++ ++ st1 {v26.16b}, [x11], #16 ++ st1 {v24.8b}, [x12], #8 ++ st1 {v30.8b}, [x13], #8 ++ ++ b.gt 10b ++ ++// -------------------- Even line tail - YUV ++// If width % 16 == 0 then simply runs once with preloaded RGB ++// If other then deals with preload & then does remaining tail ++ ++13: ++ // Body is simple copy of main loop body minus preload ++ ++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ // U ++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] ++ // V ++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] ++ ++ cmp w9, #-16 ++ ++ SHRN_C v24, v24, v25, v7 ++ SHRN_C v30, v30, v31, v7 ++ ++ // Here: ++ // w9 == 0 width % 16 == 0, tail done ++ // w9 > -16 1st tail done (16 pels), remainder still to go ++ // w9 == -16 shouldn't happen ++ // w9 > -32 2nd tail done ++ // w9 <= -32 shouldn't happen ++ ++ b.lt 2f ++ st1 {v26.16b}, [x11], #16 ++ st1 {v24.8b}, [x12], #8 ++ st1 {v30.8b}, [x13], #8 ++ cbz w9, 3f ++ ++12: ++ sub w9, w9, #16 ++ ++ tbz w9, #3, 1f ++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 ++1: tbz w9, #2, 1f ++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 ++1: tbz w9, #1, 1f ++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 ++1: tbz w9, #0, 13b ++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 ++ b 13b ++ ++2: ++ tbz w9, #3, 1f ++ st1 {v26.8b}, [x11], #8 ++ STB4V v24, 0, x12 ++ STB4V v30, 0, x13 ++1: tbz w9, #2, 1f ++ STB4V v26 8, x11 ++ STB2V v24, 4, x12 ++ STB2V v30, 4, x13 ++1: tbz w9, #1, 1f ++ STB2V v26, 12, x11 ++ st1 {v24.b}[6], [x12], #1 ++ st1 {v30.b}[6], [x13], #1 ++1: tbz w9, #0, 1f ++ st1 {v26.b}[14], [x11] ++ st1 {v24.b}[7], [x12] ++ st1 {v30.b}[7], [x13] ++1: ++3: ++ ++// -------------------- Odd line body - Y only ++ ++ subs w5, w5, #1 ++ b.eq 90f ++ ++ subs w9, w4, #0 ++ add x0, x0, w14, sxtw ++ add x1, x1, w6, sxtw ++ mov x10, x0 ++ mov x11, x1 ++ b.lt 12f ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ subs w9, w9, #16 ++ b.le 13f ++ ++10: ++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ subs w9, w9, #16 ++ ++ st1 {v26.16b}, [x11], #16 ++ ++ b.gt 10b ++ ++// -------------------- Odd line tail - Y ++// If width % 16 == 0 then simply runs once with preloaded RGB ++// If other then deals with preload & then does remaining tail ++ ++13: ++ // Body is simple copy of main loop body minus preload ++ ++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ ++ cmp w9, #-16 ++ ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ // Here: ++ // w9 == 0 width % 16 == 0, tail done ++ // w9 > -16 1st tail done (16 pels), remainder still to go ++ // w9 == -16 shouldn't happen ++ // w9 > -32 2nd tail done ++ // w9 <= -32 shouldn't happen ++ ++ b.lt 2f ++ st1 {v26.16b}, [x11], #16 ++ cbz w9, 3f ++ ++12: ++ sub w9, w9, #16 ++ ++ tbz w9, #3, 1f ++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 ++1: tbz w9, #2, 1f ++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 ++1: tbz w9, #1, 1f ++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 ++1: tbz w9, #0, 13b ++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 ++ b 13b ++ ++2: ++ tbz w9, #3, 1f ++ st1 {v26.8b}, [x11], #8 ++1: tbz w9, #2, 1f ++ STB4V v26, 8, x11 ++1: tbz w9, #1, 1f ++ STB2V v26, 12, x11 ++1: tbz w9, #0, 1f ++ st1 {v26.b}[14], [x11] ++1: ++3: ++ ++// ------------------- Loop to start ++ ++ add x0, x0, w14, sxtw ++ add x1, x1, w6, sxtw ++ add x2, x2, w7, sxtw ++ add x3, x3, w7, sxtw ++ subs w5, w5, #1 ++ b.gt 11b ++90: ++ ret ++endfunc +--- a/libswscale/rgb2rgb.c ++++ b/libswscale/rgb2rgb.c +@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *sr + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); ++void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); + void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); + void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, +--- a/libswscale/rgb2rgb.h ++++ b/libswscale/rgb2rgb.h +@@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, ui + void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); ++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); + + /** + * Height should be a multiple of 2 and width should be a multiple of 16. +@@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); ++extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); + extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); + +--- a/libswscale/rgb2rgb_template.c ++++ b/libswscale/rgb2rgb_template.c +@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const ui + * others are ignored in the C version. + * FIXME: Write HQ version. + */ +-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, +- int chromStride, int srcStride, int32_t *rgb2yuv) ++ int chromStride, int srcStride, int32_t *rgb2yuv, ++ const uint8_t x[9]) + { +- int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +- int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +- int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; ++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; ++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; ++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; + int y; + const int chromWidth = width >> 1; + +@@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + ydst[2 * i + 1] = Y; + } ++ if ((width & 1) != 0) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ } + ydst += lumStride; + src += srcStride; + +@@ -700,6 +714,15 @@ void ff_rgb24toyv12_c(const uint8_t *src + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + ydst[2 * i + 1] = Y; + } ++ if ((width & 1) != 0) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ } + udst += chromStride; + vdst += chromStride; + ydst += lumStride; +@@ -707,6 +730,147 @@ void ff_rgb24toyv12_c(const uint8_t *src + } + } + ++static const uint8_t x_rgb[9] = { ++ RY_IDX, GY_IDX, BY_IDX, ++ RU_IDX, GU_IDX, BU_IDX, ++ RV_IDX, GV_IDX, BV_IDX, ++}; ++ ++static const uint8_t x_bgr[9] = { ++ BY_IDX, GY_IDX, RY_IDX, ++ BU_IDX, GU_IDX, RU_IDX, ++ BV_IDX, GV_IDX, RV_IDX, ++}; ++ ++void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv, ++ const uint8_t x[9]) ++{ ++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; ++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; ++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; ++ int y; ++ const int chromWidth = width >> 1; ++ ++ for (y = 0; y < height; y += 2) { ++ int i; ++ for (i = 0; i < chromWidth; i++) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ ++ b = src[8 * i + 6]; ++ g = src[8 * i + 5]; ++ r = src[8 * i + 4]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; ++ } ++ if ((width & 1) != 0) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ } ++ ydst += lumStride; ++ src += srcStride; ++ ++ if (y+1 == height) ++ break; ++ ++ for (i = 0; i < chromWidth; i++) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ ++ b = src[8 * i + 6]; ++ g = src[8 * i + 5]; ++ r = src[8 * i + 4]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; ++ } ++ if ((width & 1) != 0) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ } ++ udst += chromStride; ++ vdst += chromStride; ++ ydst += lumStride; ++ src += srcStride; ++ } ++} ++ ++static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++// As the general code does no SIMD-like ops simply adding 1 to the src address ++// will fix the ignored alpha position ++static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++ + static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int width, int height, + int src1Stride, int src2Stride, int dstStride) +@@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void) + yuy2toyv12 = yuy2toyv12_c; + planar2x = planar2x_c; + ff_rgb24toyv12 = ff_rgb24toyv12_c; ++ ff_bgr24toyv12 = ff_bgr24toyv12_c; ++ ff_rgbxtoyv12 = ff_rgbxtoyv12_c; ++ ff_bgrxtoyv12 = ff_bgrxtoyv12_c; ++ ff_xrgbtoyv12 = ff_xrgbtoyv12_c; ++ ff_xbgrtoyv12 = ff_xbgrtoyv12_c; + interleaveBytes = interleaveBytes_c; + deinterleaveBytes = deinterleaveBytes_c; + vu9_to_vu12 = vu9_to_vu12_c; +--- a/libswscale/swscale_unscaled.c ++++ b/libswscale/swscale_unscaled.c +@@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext + return srcSliceH; + } + ++static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_bgr24toyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_bgrxtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_rgbxtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_xbgrtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_xrgbtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ + static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +@@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext + const enum AVPixelFormat dstFormat = c->dstFormat; + const int flags = c->flags; + const int dstH = c->dstH; +- const int dstW = c->dstW; + int needsDither; + + needsDither = isAnyRGB(dstFormat) && +@@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext + /* bgr24toYV12 */ + if (srcFormat == AV_PIX_FMT_BGR24 && + (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && +- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) ++ !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = bgr24ToYv12Wrapper; ++ /* rgb24toYV12 */ ++ if (srcFormat == AV_PIX_FMT_RGB24 && ++ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = rgb24ToYv12Wrapper; ++ ++ /* bgrxtoYV12 */ ++ if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = bgrxToYv12Wrapper; ++ /* rgbx24toYV12 */ ++ if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = rgbxToYv12Wrapper; ++ /* xbgrtoYV12 */ ++ if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = xbgrToYv12Wrapper; ++ /* xrgb24toYV12 */ ++ if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = xrgbToYv12Wrapper; + + /* RGB/BGR -> RGB/BGR (no dither needed forms) */ + if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) +--- a/libswscale/tests/swscale.c ++++ b/libswscale/tests/swscale.c +@@ -23,6 +23,7 @@ + #include <string.h> + #include <inttypes.h> + #include <stdarg.h> ++#include <time.h> + + #undef HAVE_AV_CONFIG_H + #include "libavutil/cpu.h" +@@ -78,6 +79,15 @@ struct Results { + uint32_t crc; + }; + ++static int time_rep = 0; ++ ++static uint64_t utime(void) ++{ ++ struct timespec ts; ++ clock_gettime(CLOCK_MONOTONIC, &ts); ++ return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; ++} ++ + // test by ref -> src -> dst -> out & compare out against ref + // ref & out are YV12 + static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, +@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const + goto end; + } + +- printf(" %s %dx%d -> %s %3dx%3d flags=%2d", ++ printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d", + desc_src->name, srcW, srcH, + desc_dst->name, dstW, dstH, + flags); +@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const + + sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); + ++ if (time_rep != 0) ++ { ++ const uint64_t now = utime(); ++ uint64_t done; ++ for (i = 1; i != time_rep; ++i) { ++ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); ++ } ++ done = utime(); ++ printf(" T=%7"PRId64"us ", done-now); ++ } ++ + for (i = 0; i < 4 && dstStride[i]; i++) + crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], + dstStride[i] * dstH); +@@ -355,56 +376,78 @@ static int fileTest(const uint8_t * cons + return 0; + } + +-#define W 96 +-#define H 96 +- + int main(int argc, char **argv) + { ++ unsigned int W = 96; ++ unsigned int H = 96; ++ unsigned int W2; ++ unsigned int H2; ++ unsigned int S; + enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; + enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; +- uint8_t *rgb_data = av_malloc(W * H * 4); +- const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; +- int rgb_stride[4] = { 4 * W, 0, 0, 0 }; +- uint8_t *data = av_malloc(4 * W * H); +- const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; +- int stride[4] = { W, W, W, W }; + int x, y; + struct SwsContext *sws; + AVLFG rand; + int res = -1; + int i; + FILE *fp = NULL; +- +- if (!rgb_data || !data) +- return -1; ++ uint8_t *rgb_data; ++ uint8_t * rgb_src[4] = { NULL }; ++ int rgb_stride[4] = { 0 }; ++ uint8_t *data; ++ uint8_t * src[4] = { NULL }; ++ int stride[4] = { 0 }; + + for (i = 1; i < argc; i += 2) { ++ const char * const arg2 = argv[i+1]; ++ + if (argv[i][0] != '-' || i + 1 == argc) + goto bad_option; + if (!strcmp(argv[i], "-ref")) { +- fp = fopen(argv[i + 1], "r"); ++ fp = fopen(arg2, "r"); + if (!fp) { +- fprintf(stderr, "could not open '%s'\n", argv[i + 1]); ++ fprintf(stderr, "could not open '%s'\n", arg2); + goto error; + } + } else if (!strcmp(argv[i], "-cpuflags")) { + unsigned flags = av_get_cpu_flags(); +- int ret = av_parse_cpu_caps(&flags, argv[i + 1]); ++ int ret = av_parse_cpu_caps(&flags, arg2); + if (ret < 0) { +- fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid cpu flags %s\n", arg2); + return ret; + } + av_force_cpu_flags(flags); + } else if (!strcmp(argv[i], "-src")) { +- srcFormat = av_get_pix_fmt(argv[i + 1]); ++ srcFormat = av_get_pix_fmt(arg2); + if (srcFormat == AV_PIX_FMT_NONE) { +- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid pixel format %s\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-dst")) { +- dstFormat = av_get_pix_fmt(argv[i + 1]); ++ dstFormat = av_get_pix_fmt(arg2); + if (dstFormat == AV_PIX_FMT_NONE) { +- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid pixel format %s\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-w")) { ++ char * p = NULL; ++ W = strtoul(arg2, &p, 0); ++ if (!W || *p) { ++ fprintf(stderr, "bad width %s\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-h")) { ++ char * p = NULL; ++ H = strtoul(arg2, &p, 0); ++ if (!H || *p) { ++ fprintf(stderr, "bad height '%s'\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-t")) { ++ char * p = NULL; ++ time_rep = (int)strtol(arg2, &p, 0); ++ if (*p) { ++ fprintf(stderr, "bad time repetitions '%s'\n", arg2); + return -1; + } + } else { +@@ -414,15 +457,34 @@ bad_option: + } + } + +- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, ++ S = (W + 15) & ~15; ++ rgb_data = av_mallocz(S * H * 4); ++ rgb_src[0] = rgb_data; ++ rgb_stride[0] = 4 * S; ++ data = av_mallocz(4 * S * H); ++ src[0] = data; ++ src[1] = data + S * H; ++ src[2] = data + S * H * 2; ++ src[3] = data + S * H * 3; ++ stride[0] = S; ++ stride[1] = S; ++ stride[2] = S; ++ stride[3] = S; ++ H2 = H < 96 ? 8 : H / 12; ++ W2 = W < 96 ? 8 : W / 12; ++ ++ if (!rgb_data || !data) ++ return -1; ++ ++ sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H, + AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + + av_lfg_init(&rand, 1); + + for (y = 0; y < H; y++) + for (x = 0; x < W * 4; x++) +- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); +- res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride); ++ rgb_data[ x + y * 4 * S] = av_lfg_get(&rand); ++ res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride); + if (res < 0 || res != H) { + res = -1; + goto error; +@@ -431,10 +493,10 @@ bad_option: + av_free(rgb_data); + + if(fp) { +- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); ++ res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat); + fclose(fp); + } else { +- selfTest(src, stride, W, H, srcFormat, dstFormat); ++ selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat); + res = 0; + } + error: +--- /dev/null ++++ b/pi-util/BUILD.txt +@@ -0,0 +1,67 @@ ++Building Pi FFmpeg ++================== ++ ++Current only building on a Pi is supported. ++This builds ffmpeg the way I've tested it ++ ++Get all dependencies - the current package dependencies are good enough ++ ++$ sudo apt-get build-dep ffmpeg ++ ++Configure using the pi-util/conf_native.sh script ++------------------------------------------------- ++ ++This sets the normal release options and creates an ouutput dir to build into ++The directory name will depend on system and options but will be under out/ ++ ++There are a few choices here ++ --mmal build including the legacy mmal-based decoders and zero-copy code ++ this requires appropriate libraries which currently will exist for ++ armv7 but not arm64 ++ --noshared ++ Build a static image rather than a shared library one. Static is ++ easier for testing as there is no need to worry about library ++ paths being confused and therefore running the wrong code, Shared ++ is what is needed, in most cases, when building for use by other ++ programs. ++ --usr Set install dir to /usr (i.e. system default) rather than in ++ <builddir>/install ++ ++So for a static build ++--------------------- ++ ++$ pi-util/conf_native.sh --noshared ++ ++$ make -j8 -C out/<wherever the script said it was building to> ++ ++You can now run ffmpeg directly from where it was built ++ ++For a shared build ++------------------ ++ ++There are two choices here ++ ++$ pi-util/conf_native.sh ++$ make -j8 -C out/<builddir> install ++ ++This sets the install prefix to <builddir>/install and is probably what you ++want if you don't want to overwrite the system files. ++ ++You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was ++built. You can copy the contents of <build dir>/install to /usr and that mostly ++works. The only downside is that paths in pkgconfig end up being set to the ++install directory in your build directory which may be less than ideal when ++building other packages. ++ ++The alternative if you just want to replace the system libs is: ++ ++$ pi-util/conf_native.sh --usr ++$ make -j8 -C out/<builddir> ++$ sudo pi-util/clean_usr_libs.sh ++$ sudo make -j8 -C out/<builddir> install ++ ++The clean_usr_libs.sh step wipes any existing libs & includes (for all ++architectures) from the system which helps avoid confusion when running other ++progs as you can be sure you're not running old code which is unfortunately ++easy to do otherwise. ++ +--- /dev/null ++++ b/pi-util/NOTES.txt +@@ -0,0 +1,69 @@ ++Notes on the hevc_rpi decoder & associated support code ++------------------------------------------------------- ++ ++There are 3 main parts to the existing code: ++ ++1) The decoder - this is all in libavcodec as rpi_hevc*. ++ ++2) A few filters to deal with Sand frames and a small patch to ++automatically select the sand->i420 converter when required. ++ ++3) A kludge in ffmpeg.c to display the decoded video. This could & should ++be converted into a proper ffmpeg display module. ++ ++ ++Decoder ++------- ++ ++The decoder is a modified version of the existing ffmpeg hevc decoder. ++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder. ++More complex bitstreams can be up to ~200% faster but particularly easy ++streams can cut its advantage down to ~50%. This means that a Pi3+ can ++display nearly all 8-bit 1080p30 streams and with some overclocking it can ++display most lower bitrate 10-bit 1080p30 streams - this latter case is ++not helped by the requirement to downsample to 8-bit before display on a ++Pi. ++ ++It has had co-processor offload added for inter-pred and large block ++residual transform. Various parts have had optimized ARM NEON assembler ++added and the existing ARM asm sections have been profiled and ++re-optimized for A53. The main C code has been substantially reworked at ++its lower levels in an attempt to optimize it and minimize memory ++bandwidth. To some extent code paths that deal with frame types that it ++doesn't support have been pruned. ++ ++It outputs frames in Broadcom Sand format. This is a somewhat annoying ++layout that doesn't fit into ffmpegs standard frame descriptions. It has ++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for ++the stripe followed by interleaved U & V, that is then followed by the Y ++for the next stripe, etc. The final stripe is always padded to ++stripe-width. This is used in an attempt to help with cache locality and ++cut down on the number of dram bank switches. It is annoying to use for ++inter-pred with conventional processing but the way the Pi QPU (which is ++used for inter-pred) works means that it has negligible downsides here and ++the improved memory performance exceeds the overhead of the increased ++complexity in the rest of the code. ++ ++Frames must be allocated out of GPU memory (as otherwise they can't be ++accessed by the co-processors). Utility functions (in rpi_zc.c) have been ++written to make this easier. As the frames are already in GPU memory they ++can be displayed by the Pi h/w without any further copying. ++ ++ ++Known non-features ++------------------ ++ ++Frame allocation should probably be done in some other way in order to fit ++into the standard framework better. ++ ++Sand frames are currently declared as software frames, there is an ++argument that they should be hardware frames but they aren't really. ++ ++There must be a better way of auto-selecting the hevc_rpi decoder over the ++normal s/w hevc decoder, but I became confused by the existing h/w ++acceleration framework and what I wanted to do didn't seem to fit in ++neatly. ++ ++Display should be a proper device rather than a kludge in ffmpeg.c ++ ++ +--- /dev/null ++++ b/pi-util/TESTMESA.txt +@@ -0,0 +1,82 @@ ++# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
++
++# These assume that the drm_mmal test for Sand8 has been built on this Pi
++# as build relies on many of the same files
++
++# 1st get everything required to build ffmpeg
++# If sources aren't already enabled on your Pi then enable them
++sudo su
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
++mv /tmp/sources.list /etc/apt/
++mv /tmp/raspi.list /etc/apt/sources.list.d/
++apt update
++
++# Get dependancies
++sudo apt build-dep ffmpeg
++
++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
++
++# Enable H265 V4L2 request decoder
++sudo su
++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
++# You may also want to add more CMA if you are going to try 4k videos
++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
++# dtoverlay=vc4-fkms-v3d,cma-512
++reboot
++# Check it has turned up
++ls -la /dev/video*
++# This should include video19
++# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19
++
++# Currently on the Pi the linux headers from the debian distro don't match
++# the kernel that we ship and we need to update them - hopefully this step
++# will be unneeded in the future
++sudo apt install git bc bison flex libssl-dev make
++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
++cd linux
++KERNEL=kernel7l
++make bcm2711_defconfig
++make headers_install
++sudo cp -r usr/include/linux /usr/include
++cd ..
++
++# Config - this builds a staticly linked ffmpeg which is easier for testing
++pi-util/conf_native.sh --noshared
++
++# Build (this is a bit dull)
++# If you want to poke the source the libavdevice/egl_vout.c contains the
++# output code -
++cd out/armv7-static-rel
++
++# Check that you have actually configured V4L2 request
++grep HEVC_V4L2REQUEST config.h
++# You are hoping for
++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
++# if you get 0 then the config has failed
++
++make -j6
++
++# Grab test streams
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
++
++# Test i420 output (works currently)
++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
++
++# Test Sand8 output - doesn't currently work but should once you have
++# Sand8 working in drm_mmal. I can't guarantee that this will work as
++# I can't test this path with a known working format, but the debug looks
++# good. If this doesn't work & drm_mmal does with sand8 then come back to me
++# The "show_all 1" forces vout to display every frame otherwise it drops any
++# frame that would cause it to block
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
++
++# Test Sand30 - doesn't currently work
++# (Beware that when FFmpeg errors out it often leaves your teminal window
++# in a state where you need to reset it)
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
++
++
++
+--- /dev/null ++++ b/pi-util/clean_usr_libs.sh +@@ -0,0 +1,42 @@ ++set -e ++U=/usr/include/arm-linux-gnueabihf ++rm -rf $U/libavcodec ++rm -rf $U/libavdevice ++rm -rf $U/libavfilter ++rm -rf $U/libavformat ++rm -rf $U/libavutil ++rm -rf $U/libswresample ++rm -rf $U/libswscale ++U=/usr/include/aarch64-linux-gnu ++rm -rf $U/libavcodec ++rm -rf $U/libavdevice ++rm -rf $U/libavfilter ++rm -rf $U/libavformat ++rm -rf $U/libavutil ++rm -rf $U/libswresample ++rm -rf $U/libswscale ++U=/usr/lib/arm-linux-gnueabihf ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++U=/usr/lib/arm-linux-gnueabihf/neon/vfp ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++U=/usr/lib/aarch64-linux-gnu ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++ +--- /dev/null ++++ b/pi-util/conf_arm64_native.sh +@@ -0,0 +1,45 @@ ++echo "Configure for ARM64 native build" ++ ++#RPI_KEEPS="-save-temps=obj" ++ ++SHARED_LIBS="--enable-shared" ++if [ "$1" == "--noshared" ]; then ++ SHARED_LIBS="--disable-shared" ++ echo Static libs ++ OUT=out/arm64-static-rel ++else ++ echo Shared libs ++ OUT=out/arm64-shared-rel ++fi ++ ++mkdir -p $OUT ++cd $OUT ++ ++A=aarch64-linux-gnu ++USR_PREFIX=`pwd`/install ++LIB_PREFIX=$USR_PREFIX/lib/$A ++INC_PREFIX=$USR_PREFIX/include/$A ++ ++../../configure \ ++ --prefix=$USR_PREFIX\ ++ --libdir=$LIB_PREFIX\ ++ --incdir=$INC_PREFIX\ ++ --disable-stripping\ ++ --disable-thumb\ ++ --disable-mmal\ ++ --enable-sand\ ++ --enable-v4l2-request\ ++ --enable-libdrm\ ++ --enable-epoxy\ ++ --enable-libudev\ ++ --enable-vout-drm\ ++ --enable-vout-egl\ ++ $SHARED_LIBS\ ++ --extra-cflags="-ggdb" ++ ++# --enable-decoder=hevc_rpi\ ++# --enable-extra-warnings\ ++# --arch=armv71\ ++ ++# gcc option for getting asm listing ++# -Wa,-ahls +--- /dev/null ++++ b/pi-util/conf_h265.2016.csv +@@ -0,0 +1,195 @@ ++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
++3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
+--- /dev/null ++++ b/pi-util/conf_h265.2016_HEVC_v1.csv +@@ -0,0 +1,147 @@ ++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null ++++ b/pi-util/conf_h265.csv +@@ -0,0 +1,144 @@ ++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null ++++ b/pi-util/conf_native.sh +@@ -0,0 +1,135 @@ ++echo "Configure for native build" ++ ++FFSRC=`pwd` ++MC=`dpkg --print-architecture` ++BUILDBASE=$FFSRC/out ++ ++#RPI_KEEPS="-save-temps=obj" ++RPI_KEEPS="" ++ ++NOSHARED= ++MMAL= ++USR_PREFIX= ++TOOLCHAIN= ++R=rel ++ ++while [ "$1" != "" ] ; do ++ case $1 in ++ --noshared) ++ NOSHARED=1 ++ ;; ++ --mmal) ++ MMAL=1 ++ ;; ++ --usr) ++ USR_PREFIX=/usr ++ ;; ++ --tsan) ++ TOOLCHAIN="--toolchain=gcc-tsan" ++ R=tsan ++ ;; ++ *) ++ echo "Usage $0: [--noshared] [--mmal] [--usr]" ++ echo " noshared Build static libs and executable - good for testing" ++ echo " mmal Build mmal decoders" ++ echo " usr Set install prefix to /usr [default=<build-dir>/install]" ++ exit 1 ++ ;; ++ esac ++ shift ++done ++ ++ ++MCOPTS= ++RPI_INCLUDES= ++RPI_LIBDIRS= ++RPI_DEFINES= ++RPI_EXTRALIBS= ++ ++# uname -m gives kernel type which may not have the same ++# 32/64bitness as userspace :-( getconf shoudl provide the answer ++# but use uname to check we are on the right processor ++MC=`uname -m` ++LB=`getconf LONG_BIT` ++if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then ++ if [ "$LB" == "32" ]; then ++ echo "M/C armv7" ++ A=arm-linux-gnueabihf ++ B=armv7 ++ MCOPTS="--arch=armv6t2 --cpu=cortex-a7" ++ RPI_DEFINES=-mfpu=neon-vfpv4 ++ elif [ "$LB" == "64" ]; then ++ echo "M/C aarch64" ++ A=aarch64-linux-gnu ++ B=arm64 ++ else ++ echo "Unknown LONG_BIT name: $LB" ++ exit 1 ++ fi ++else ++ echo "Unknown machine name: $MC" ++ exit 1 ++fi ++ ++if [ $MMAL ]; then ++ RPI_OPT_VC=/opt/vc ++ RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" ++ RPI_LIBDIRS="-L$RPI_OPT_VC/lib" ++ RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" ++ RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" ++ RPIOPTS="--enable-mmal" ++else ++ RPIOPTS="--disable-mmal" ++fi ++ ++C=`lsb_release -sc` ++V=`cat RELEASE` ++ ++SHARED_LIBS="--enable-shared" ++if [ $NOSHARED ]; then ++ SHARED_LIBS="--disable-shared" ++ OUT=$BUILDBASE/$B-$C-$V-static-$R ++ echo Static libs ++else ++ echo Shared libs ++ OUT=$BUILDBASE/$B-$C-$V-shared-$R ++fi ++ ++if [ ! $USR_PREFIX ]; then ++ USR_PREFIX=$OUT/install ++fi ++LIB_PREFIX=$USR_PREFIX/lib/$A ++INC_PREFIX=$USR_PREFIX/include/$A ++ ++echo Destination directory: $OUT ++mkdir -p $OUT ++# Nothing under here need worry git - including this .gitignore! ++echo "**" > $BUILDBASE/.gitignore ++cd $OUT ++ ++$FFSRC/configure \ ++ --prefix=$USR_PREFIX\ ++ --libdir=$LIB_PREFIX\ ++ --incdir=$INC_PREFIX\ ++ $MCOPTS\ ++ $TOOLCHAIN\ ++ --disable-stripping\ ++ --disable-thumb\ ++ --enable-sand\ ++ --enable-v4l2-request\ ++ --enable-libdrm\ ++ --enable-vout-egl\ ++ --enable-vout-drm\ ++ --enable-gpl\ ++ $SHARED_LIBS\ ++ $RPIOPTS\ ++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ ++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\ ++ --extra-ldflags="$RPI_LIBDIRS"\ ++ --extra-libs="$RPI_EXTRALIBS"\ ++ --extra-version="rpi" ++ ++echo "Configured into $OUT" ++ ++# gcc option for getting asm listing ++# -Wa,-ahls +--- /dev/null ++++ b/pi-util/ffconf.py +@@ -0,0 +1,215 @@ ++#!/usr/bin/env python3 ++ ++import string ++import os ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++CODEC_HEVC_RPI = 1 ++HWACCEL_RPI = 2 ++HWACCEL_DRM = 3 ++HWACCEL_VAAPI = 4 ++ ++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec): ++ hwaccel = "" ++ if dectype == HWACCEL_RPI: ++ hwaccel = "rpi" ++ elif dectype == HWACCEL_DRM: ++ hwaccel = "drm" ++ elif dectype == HWACCEL_VAAPI: ++ hwaccel = "vaapi" ++ ++ pix_fmt = [] ++ if pix == "8": ++ pix_fmt = ["-pix_fmt", "yuv420p"] ++ elif pix == "10": ++ pix_fmt = ["-pix_fmt", "yuv420p10le"] ++ elif pix == "12": ++ pix_fmt = ["-pix_fmt", "yuv420p12le"] ++ ++ tmp_root = "/tmp" ++ ++ names = srcname.split('/') ++ while len(names) > 1: ++ tmp_root = os.path.join(tmp_root, names[0]) ++ del names[0] ++ name = names[0] ++ ++ if not os.path.exists(tmp_root): ++ os.makedirs(tmp_root) ++ ++ dec_file = os.path.join(tmp_root, name + ".dec.md5") ++ try: ++ os.remove(dec_file) ++ except: ++ pass ++ ++ flog = open(os.path.join(tmp_root, name + ".log"), "wt") ++ ++ ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file] ++ ++ # Unaligned needed for cropping conformance ++ if hwaccel: ++ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) ++ else: ++ rstr = subprocess.call( ++ [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file], ++ stdout=flog, stderr=subprocess.STDOUT) ++ ++ try: ++ m1 = None ++ m2 = None ++ with open(os.path.join(fileroot, md5_file)) as f: ++ for line in f: ++ m1 = re.search("[0-9a-f]{32}", line.lower()) ++ if m1: ++ break ++ ++ with open(dec_file) as f: ++ m2 = re.search("[0-9a-f]{32}", f.readline()) ++ except: ++ pass ++ ++ if m1 and m2 and m1.group() == m2.group(): ++ print("Match: " + m1.group(), file=flog) ++ rv = 0 ++ elif not m1: ++ print("****** Cannot find m1", file=flog) ++ rv = 3 ++ elif not m2: ++ print("****** Cannot find m2", file=flog) ++ rv = 2 ++ else: ++ print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog) ++ rv = 1 ++ flog.close() ++ return rv ++ ++def scandir(root): ++ aconf = [] ++ ents = os.listdir(root) ++ ents.sort(key=str.lower) ++ for name in ents: ++ test_path = os.path.join(root, name) ++ if S_ISDIR(os.stat(test_path).st_mode): ++ files = os.listdir(test_path) ++ es_file = "?" ++ md5_file = "?" ++ for f in files: ++ (base, ext) = os.path.splitext(f) ++ if base[0] == '.': ++ pass ++ elif ext == ".bit" or ext == ".bin": ++ es_file = f ++ elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")): ++ if md5_file == "?": ++ md5_file = f ++ elif base[-3:] == "yuv": ++ md5_file = f ++ aconf.append((1, name, es_file, md5_file)) ++ return aconf ++ ++def runtest(name, tests): ++ if not tests: ++ return True ++ for t in tests: ++ if name[0:len(t)] == t or name.find("/" + t) != -1: ++ return True ++ return False ++ ++def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): ++ unx_failures = [] ++ unx_success = [] ++ failures = 0 ++ successes = 0 ++ for a in csva: ++ exp_test = int(a[0]) ++ if (exp_test and runtest(a[1], tests)): ++ name = a[1] ++ print ("==== ", name, end="") ++ sys.stdout.flush() ++ ++ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec) ++ if (rv == 0): ++ successes += 1 ++ else: ++ failures += 1 ++ ++ if (rv == 0): ++ if exp_test == 2: ++ print(": * OK *") ++ unx_success.append(name) ++ else: ++ print(": ok") ++ elif exp_test == 2 and rv == 1: ++ print(": fail") ++ elif exp_test == 3 and rv == 2: ++ # Call an expected "crash" an abort ++ print(": abort") ++ else: ++ unx_failures.append(name) ++ if rv == 1: ++ print(": * FAIL *") ++ elif (rv == 2) : ++ print(": * CRASH *") ++ elif (rv == 3) : ++ print(": * MD5 MISSING *") ++ else : ++ print(": * BANG *") ++ ++ if unx_failures or unx_success: ++ print("Unexpected Failures:", unx_failures) ++ print("Unexpected Success: ", unx_success) ++ else: ++ print("All tests normal:", successes, "ok,", failures, "failed") ++ ++ ++class ConfCSVDialect(csv.Dialect): ++ delimiter = ',' ++ doublequote = True ++ lineterminator = '\n' ++ quotechar='"' ++ quoting = csv.QUOTE_MINIMAL ++ skipinitialspace = True ++ strict = True ++ ++if __name__ == '__main__': ++ ++ argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester") ++ argp.add_argument("tests", nargs='*') ++ argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line") ++ argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line") ++ argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line") ++ argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test") ++ argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") ++ argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") ++ argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") ++ args = argp.parse_args() ++ ++ if args.csvgen: ++ csv.writer(sys.stdout).writerows(scandir(args.test_root)) ++ exit(0) ++ ++ with open(args.csv, 'rt') as csvfile: ++ csva = [a for a in csv.reader(csvfile, ConfCSVDialect())] ++ ++ dectype = CODEC_HEVC_RPI ++ if os.path.exists("/dev/rpivid-hevcmem"): ++ dectype = HWACCEL_RPI ++ if args.drm or os.path.exists("/sys/module/rpivid_hevc"): ++ dectype = HWACCEL_DRM ++ ++ if args.pi4: ++ dectype = HWACCEL_RPI ++ elif args.drm: ++ dectype = HWACCEL_DRM ++ elif args.vaapi: ++ dectype = HWACCEL_VAAPI ++ ++ doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg) ++ +--- /dev/null ++++ b/pi-util/ffperf.py +@@ -0,0 +1,128 @@ ++#!/usr/bin/env python3 ++ ++import time ++import string ++import os ++import tempfile ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class tstats: ++ close_threshold = 0.01 ++ ++ def __init__(self, stats_dict=None): ++ if stats_dict != None: ++ self.name = stats_dict["name"] ++ self.elapsed = float(stats_dict["elapsed"]) ++ self.user = float(stats_dict["user"]) ++ self.sys = float(stats_dict["sys"]) ++ ++ def times_str(self): ++ ctime = self.sys + self.user ++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) ++ ++ def dict(self): ++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} ++ ++ def is_close(self, other): ++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold ++ ++ def __lt__(self, other): ++ return self.elapsed < other.elapsed ++ def __gt__(self, other): ++ return self.elapsed > other.elapsed ++ ++ def time_file(name, prefix, ffmpeg="./ffmpeg"): ++ stats = tstats() ++ stats.name = name ++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", ++ "-vcodec", "hevc_rpi", ++ "-t", "30", "-i", prefix + name, ++ "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); ++ pinfo = os.wait4(cproc.pid, 0) ++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ stats.elapsed = end_time - start_time ++ stats.user = pinfo[2].ru_utime ++ stats.sys = pinfo[2].ru_stime ++ return stats ++ ++ ++def common_prefix(s1, s2): ++ for i in range(min(len(s1),len(s2))): ++ if s1[i] != s2[i]: ++ return s1[:i] ++ return s1[:i+1] ++ ++def main(): ++ global flog ++ ++ argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog=""" ++To blank the screen before starting use "xdg-screensaver activate" ++(For some reason this doesn't seem to work from within python). ++""") ++ ++ argp.add_argument("streams", nargs='*') ++ argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") ++ argp.add_argument("--csv_in", help="CSV input filename") ++ argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") ++ argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") ++ ++ args = argp.parse_args() ++ ++ csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) ++ csv_out.writeheader() ++ ++ stats_in = {} ++ if args.csv_in != None: ++ with open(args.csv_in, 'r', newline='') as f_in: ++ stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ ++ flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt") ++ ++ streams = args.streams ++ if not streams: ++ if not stats_in: ++ print ("No source streams specified") ++ return 1 ++ prefix = "" if args.prefix == None else args.prefix ++ streams = [k for k in stats_in] ++ elif args.prefix != None: ++ prefix = args.prefix ++ else: ++ prefix = streams[0] ++ for f in streams[1:]: ++ prefix = common_prefix(prefix, f) ++ pp = prefix.rpartition(os.sep) ++ prefix = pp[0] + pp[1] ++ streams = [s[len(prefix):] for s in streams] ++ ++ for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()): ++ print ("====", f) ++ ++ t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) ++ for i in range(args.repeat): ++ t = tstats.time_file(f, prefix, args.ffmpeg) ++ print ("...", t.times_str()) ++ if t0 > t: ++ t0 = t ++ ++ if t0.name in stats_in: ++ pstat = stats_in[t0.name] ++ print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str()) ++ ++ csv_out.writerow(t0.dict()) ++ ++ print () ++ ++ return 0 ++ ++ ++if __name__ == '__main__': ++ exit(main()) ++ +--- /dev/null ++++ b/pi-util/genpatch.sh +@@ -0,0 +1,35 @@ ++set -e ++ ++NOPATCH= ++if [ "$1" == "--notag" ]; then ++ shift ++ NOPATCH=1 ++fi ++ ++if [ "$1" == "" ]; then ++ echo Usage: $0 [--notag] \<patch_tag\> ++ echo e.g.: $0 mmal_4 ++ exit 1 ++fi ++ ++VERSION=`cat RELEASE` ++if [ "$VERSION" == "" ]; then ++ echo Can\'t find version RELEASE ++ exit 1 ++fi ++ ++PATCHFILE=../ffmpeg-$VERSION-$1.patch ++ ++if [ $NOPATCH ]; then ++ echo Not tagged ++else ++ # Only continue if we are all comitted ++ git diff --name-status --exit-code ++ ++ PATCHTAG=pi/$VERSION/$1 ++ echo Tagging: $PATCHTAG ++ ++ git tag $PATCHTAG ++fi ++echo Generating patch: $PATCHFILE ++git diff n$VERSION -- > $PATCHFILE +--- /dev/null ++++ b/pi-util/make_array.py +@@ -0,0 +1,23 @@ ++#!/usr/bin/env python ++ ++# Usage ++# make_array file.bin ++# Produces file.h with array of bytes. ++# ++import sys ++for file in sys.argv[1:]: ++ prefix,suffix = file.split('.') ++ assert suffix=='bin' ++ name=prefix.split('/')[-1] ++ print 'Converting',file ++ with open(prefix+'.h','wb') as out: ++ print >>out, 'static const unsigned char',name,'[] = {' ++ with open(file,'rb') as fd: ++ i = 0 ++ for byte in fd.read(): ++ print >>out, '0x%02x, ' % ord(byte), ++ i = i + 1 ++ if i % 8 == 0: ++ print >>out, ' // %04x' % (i - 8) ++ print >>out,'};' ++ +--- /dev/null ++++ b/pi-util/mkinst.sh +@@ -0,0 +1,5 @@ ++set -e ++ ++make install ++ ++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr +--- /dev/null ++++ b/pi-util/patkodi.sh +@@ -0,0 +1,9 @@ ++set -e ++KODIBASE=/home/jc/rpi/kodi/xbmc ++JOBS=-j20 ++make $JOBS ++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch ++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS ++make -C $KODIBASE/build install ++ ++ +--- /dev/null ++++ b/pi-util/perfcmp.py +@@ -0,0 +1,101 @@ ++#!/usr/bin/env python3 ++ ++import time ++import string ++import os ++import tempfile ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class tstats: ++ close_threshold = 0.01 ++ ++ def __init__(self, stats_dict=None): ++ if stats_dict != None: ++ self.name = stats_dict["name"] ++ self.elapsed = float(stats_dict["elapsed"]) ++ self.user = float(stats_dict["user"]) ++ self.sys = float(stats_dict["sys"]) ++ ++ def times_str(self): ++ ctime = self.sys + self.user ++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) ++ ++ def dict(self): ++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} ++ ++ def is_close(self, other): ++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold ++ ++ def __lt__(self, other): ++ return self.elapsed < other.elapsed ++ def __gt__(self, other): ++ return self.elapsed > other.elapsed ++ ++ def time_file(name, prefix): ++ stats = tstats() ++ stats.name = name ++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name, ++ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); ++ pinfo = os.wait4(cproc.pid, 0) ++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ stats.elapsed = end_time - start_time ++ stats.user = pinfo[2].ru_utime ++ stats.sys = pinfo[2].ru_stime ++ return stats ++ ++ ++def common_prefix(s1, s2): ++ for i in range(min(len(s1),len(s2))): ++ if s1[i] != s2[i]: ++ return s1[:i] ++ return s1[:i+1] ++ ++def main(): ++ argp = argparse.ArgumentParser(description="FFmpeg performance compare") ++ ++ argp.add_argument("stream0", help="CSV to compare") ++ argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare") ++ ++ args = argp.parse_args() ++ ++ with open(args.stream0, 'r', newline='') as f_in: ++ stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ with open(args.stream1, 'r', newline='') as f_in: ++ stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ ++ print (args.stream0, "<<-->>", args.stream1) ++ print () ++ ++ for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()): ++ if not (f in stats0) : ++ print (" XX :", f) ++ continue ++ if not (f in stats1) : ++ print (" XX :", f) ++ continue ++ ++ s0 = stats0[f] ++ s1 = stats1[f] ++ ++ pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0 ++ thresh = 0.3 ++ tc = 6 ++ ++ nchar = min(tc - 1, int(abs(pcent) / thresh)) ++ cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar ++ ++ print ("%6.2f %s%6.2f (%+5.2f) : %s" % ++ (s0.elapsed, cc, s1.elapsed, pcent, f)) ++ ++ return 0 ++ ++ ++if __name__ == '__main__': ++ exit(main()) ++ +--- /dev/null ++++ b/pi-util/qem.sh +@@ -0,0 +1,9 @@ ++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex ++QASM=python\ ../local/bin/qasm.py ++SRC_FILE=libavcodec/rpi_hevc_shader.qasm ++DST_BASE=shader ++ ++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR ++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c ++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h ++ +--- /dev/null ++++ b/pi-util/testfilt.py +@@ -0,0 +1,83 @@ ++#!/usr/bin/env python3 ++ ++import string ++import os ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class validator: ++ def __init__(self): ++ self.ok = False ++ ++ def isok(self): ++ return self.ok ++ ++ def setok(self): ++ self.ok = True ++ ++class valid_regex(validator): ++ def __init__(self, regex): ++ super().__init__() ++ self.regex = re.compile(regex) ++ ++ def scanline(self, line): ++ if self.isok() or self.regex.search(line): ++ self.setok() ++ ++ ++def validate(validators, flog): ++ for line in flog: ++ for v in validators: ++ v.scanline(line) ++ ++ ok = True ++ for v in validators: ++ if not v.isok(): ++ ok = False ++ # complain ++ print("Test failed") ++ ++ if ok: ++ print("OK") ++ return ok ++ ++def runtest(name, ffmpeg, args, suffix, validators): ++ log_root = os.path.join("/tmp", "testfilt", name) ++ ofilename = os.path.join(log_root, name + suffix) ++ ++ if not os.path.exists(log_root): ++ os.makedirs(log_root) ++ ++ try: ++ os.remove(ofilename) ++ except: ++ pass ++ ++ flog = open(os.path.join(log_root, name + ".log"), "wb") ++ ffargs = [ffmpeg] + args + [ofilename] ++ ++ subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False) ++ flog.close ++ ++ flog = open(os.path.join(log_root, name + ".log"), "rt") ++ return validate(validators, flog) ++ ++def sayok(log_root, flog): ++ print("Woohoo") ++ return True ++ ++if __name__ == '__main__': ++ ++ argp = argparse.ArgumentParser(description="FFmpeg filter tester") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") ++ args = argp.parse_args() ++ ++ runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i", ++ "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv", ++# "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv", ++ "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv", ++ [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')]) +--- /dev/null ++++ b/pi-util/v3dusage.py +@@ -0,0 +1,128 @@ ++#!/usr/bin/env python ++ ++import sys ++import argparse ++import re ++ ++def do_logparse(logname): ++ ++ rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ') ++ rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$') ++ rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$') ++ rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$') ++ ++ ttotal = {'idle':0.0} ++ tstart = {} ++ qctotal = {} ++ qtstotal = {} ++ l2hits = {} ++ l2total = {} ++ time0 = None ++ idle_start = None ++ qpu_op_no = 0 ++ op_count = 0 ++ ++ with open(logname, "rt") as infile: ++ for line in infile: ++ match = rmatch.match(line) ++ if match: ++# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":" ++ time = float(match.group(1)) ++ unit = match.group(3) ++ opstart = not match.group(2) ++ optype = match.group(7) ++ hascb = match.group(8) != "0" ++ ++ if unit == 'qpu1': ++ unit = unit + "." + str(qpu_op_no) ++ if not opstart: ++ if hascb or optype == 'EXECUTE_SYNC': ++ qpu_op_no = 0 ++ else: ++ qpu_op_no += 1 ++ ++ # Ignore sync type ++ if optype == 'EXECUTE_SYNC': ++ continue ++ ++ if not time0: ++ time0 = time ++ ++ if opstart: ++ tstart[unit] = time; ++ elif unit in tstart: ++ op_count += 1 ++ if not unit in ttotal: ++ ttotal[unit] = 0.0 ++ ttotal[unit] += time - tstart[unit] ++ del tstart[unit] ++ ++ if not idle_start and not tstart: ++ idle_start = time ++ elif idle_start and tstart: ++ ttotal['idle'] += time - idle_start ++ idle_start = None ++ ++ match = rqcycle.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in qctotal: ++ qctotal[unit] = 0 ++ qctotal[unit] += int(match.group(2)) ++ ++ match = rqtscycle.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in qtstotal: ++ qtstotal[unit] = 0 ++ qtstotal[unit] += int(match.group(2)) ++ ++ match = rl2hits.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in l2total: ++ l2total[unit] = 0 ++ l2hits[unit] = 0 ++ l2total[unit] += int(match.group(3)) ++ if match.group(2) == "hits": ++ l2hits[unit] += int(match.group(3)) ++ ++ ++ if not time0: ++ print "No v3d profile records found" ++ else: ++ tlogged = time - time0 ++ ++ print "Logged time:", tlogged, " Op count:", op_count ++ for unit in sorted(ttotal): ++ print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged) ++ print ++ for unit in sorted(qctotal): ++ if not unit in qtstotal: ++ qtstotal[unit] = 0; ++ print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit]) ++ if unit in l2total: ++ print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit]) ++ ++ ++ ++if __name__ == '__main__': ++ argp = argparse.ArgumentParser( ++ formatter_class=argparse.RawDescriptionHelpFormatter, ++ description="QPU/VPU perf summary from VC logging", ++ epilog = """ ++Will also summarise TMU stalls if logging requests set in qpu noflush param ++in the profiled code. ++ ++Example use: ++ vcgencmd set_logging level=0xc0 ++ <command to profile> ++ sudo vcdbg log msg >& t.log ++ v3dusage.py t.log ++""") ++ ++ argp.add_argument("logfile") ++ args = argp.parse_args() ++ ++ do_logparse(args.logfile) ++ +--- a/tests/checkasm/Makefile ++++ b/tests/checkasm/Makefile +@@ -38,6 +38,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) + # libavfilter tests + AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o + AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o ++AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o + AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o + AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o + AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o +@@ -56,8 +57,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWS + AVUTILOBJS += av_tx.o + AVUTILOBJS += fixed_dsp.o + AVUTILOBJS += float_dsp.o ++AVUTILOBJS-$(CONFIG_SAND) += rpi_sand.o + +-CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) ++CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) $(AVUTILOBJS-yes) + + CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o + CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o +--- a/tests/checkasm/checkasm.c ++++ b/tests/checkasm/checkasm.c +@@ -173,6 +173,9 @@ static const struct { + #if CONFIG_BLEND_FILTER + { "vf_blend", checkasm_check_blend }, + #endif ++ #if CONFIG_BWDIF_FILTER ++ { "vf_bwdif", checkasm_check_vf_bwdif }, ++ #endif + #if CONFIG_COLORSPACE_FILTER + { "vf_colorspace", checkasm_check_colorspace }, + #endif +@@ -201,6 +204,9 @@ static const struct { + { "fixed_dsp", checkasm_check_fixed_dsp }, + { "float_dsp", checkasm_check_float_dsp }, + { "av_tx", checkasm_check_av_tx }, ++ #if CONFIG_SAND ++ { "rpi_sand", checkasm_check_rpi_sand }, ++ #endif + #endif + { NULL } + }; +--- a/tests/checkasm/checkasm.h ++++ b/tests/checkasm/checkasm.h +@@ -72,6 +72,7 @@ void checkasm_check_motion(void); + void checkasm_check_nlmeans(void); + void checkasm_check_opusdsp(void); + void checkasm_check_pixblockdsp(void); ++void checkasm_check_rpi_sand(void); + void checkasm_check_sbrdsp(void); + void checkasm_check_synth_filter(void); + void checkasm_check_sw_gbrp(void); +@@ -81,6 +82,7 @@ void checkasm_check_utvideodsp(void); + void checkasm_check_v210dec(void); + void checkasm_check_v210enc(void); + void checkasm_check_vc1dsp(void); ++void checkasm_check_vf_bwdif(void); + void checkasm_check_vf_eq(void); + void checkasm_check_vf_gblur(void); + void checkasm_check_vf_hflip(void); +--- /dev/null ++++ b/tests/checkasm/rpi_sand.c +@@ -0,0 +1,118 @@ ++/* ++ * Copyright (c) 2023 John Cox ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include <string.h> ++#include "checkasm.h" ++#include "libavutil/common.h" ++#include "libavutil/rpi_sand_fns.h" ++ ++#if ARCH_ARM ++#include "libavutil/arm/cpu.h" ++#include "libavutil/arm/rpi_sand_neon.h" ++#elif ARCH_AARCH64 ++#include "libavutil/aarch64/cpu.h" ++#include "libavutil/aarch64/rpi_sand_neon.h" ++#endif ++ ++static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c) ++{ ++ return (a & 0x3ff) | ((b & 0x3ff) << 10) | ((c & 0x3ff) << 20); ++} ++ ++void checkasm_check_rpi_sand(void) ++{ ++ const unsigned int w = 1280; ++ const unsigned int h = 66; ++ const unsigned int stride1 = 128; ++ const unsigned int stride2 = h*3/2; ++ const unsigned int ssize = ((w+95)/96)*128*h*3/2; ++ const unsigned int ysize = ((w + 32) * (h + 32) * 2); ++ ++ uint8_t * sbuf0 = malloc(ssize); ++ uint8_t * sbuf1 = malloc(ssize); ++ uint8_t * ybuf0 = malloc(ysize); ++ uint8_t * ybuf1 = malloc(ysize); ++ uint8_t * vbuf0 = malloc(ysize); ++ uint8_t * vbuf1 = malloc(ysize); ++ uint8_t * yframe0 = (w + 32) * 16 + ybuf0; ++ uint8_t * yframe1 = (w + 32) * 16 + ybuf1; ++ uint8_t * vframe0 = (w + 32) * 16 + vbuf0; ++ uint8_t * vframe1 = (w + 32) * 16 + vbuf1; ++ unsigned int i; ++ ++ for (i = 0; i != ssize; i += 4) ++ *(uint32_t*)(sbuf0 + i) = rnd(); ++ memcpy(sbuf1, sbuf0, ssize); ++ ++ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_y16 : av_rpi_sand30_to_planar_y16, "rpi_sand30_to_planar_y16")) { ++ declare_func(void, uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++ memset(ybuf0, 0xbb, ysize); ++ memset(ybuf1, 0xbb, ysize); ++ ++ call_ref(yframe0, (w + 32) * 2, sbuf0, stride1, stride2, 0, 0, w, h); ++ call_new(yframe1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h); ++ ++ if (memcmp(sbuf0, sbuf1, ssize) ++ || memcmp(ybuf0, ybuf1, ysize)) ++ fail(); ++ ++ bench_new(ybuf1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h); ++ } ++ ++ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_c16 : av_rpi_sand30_to_planar_c16, "rpi_sand30_to_planar_c16")) { ++ declare_func(void, uint8_t * u_dst, const unsigned int u_stride, ++ uint8_t * v_dst, const unsigned int v_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++ memset(ybuf0, 0xbb, ysize); ++ memset(ybuf1, 0xbb, ysize); ++ memset(vbuf0, 0xbb, ysize); ++ memset(vbuf1, 0xbb, ysize); ++ ++ call_ref(yframe0, (w + 32), vframe0, (w + 32), sbuf0, stride1, stride2, 0, 0, w/2, h/2); ++ call_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2); ++ ++ if (memcmp(sbuf0, sbuf1, ssize) ++ || memcmp(ybuf0, ybuf1, ysize) ++ || memcmp(vbuf0, vbuf1, ysize)) ++ fail(); ++ ++ bench_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2); ++ } ++ ++ ++ report("sand30"); ++ ++ free(sbuf0); ++ free(sbuf1); ++ free(ybuf0); ++ free(ybuf1); ++ free(vbuf0); ++ free(vbuf1); ++} ++ +--- /dev/null ++++ b/tests/checkasm/vf_bwdif.c +@@ -0,0 +1,256 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include <string.h> ++#include "checkasm.h" ++#include "libavcodec/internal.h" ++#include "libavfilter/bwdif.h" ++#include "libavutil/mem_internal.h" ++ ++#define WIDTH 256 ++ ++#define randomize_buffers(buf0, buf1, mask, count) \ ++ for (size_t i = 0; i < count; i++) \ ++ buf0[i] = buf1[i] = rnd() & mask ++ ++#define randomize_overflow_check(buf0, buf1, mask, count) \ ++ for (size_t i = 0; i < count; i++) \ ++ buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0; ++ ++#define BODY(type, depth) \ ++ do { \ ++ type prev0[9*WIDTH], prev1[9*WIDTH]; \ ++ type next0[9*WIDTH], next1[9*WIDTH]; \ ++ type cur0[9*WIDTH], cur1[9*WIDTH]; \ ++ type dst0[WIDTH], dst1[WIDTH]; \ ++ const int stride = WIDTH; \ ++ const int mask = (1<<depth)-1; \ ++ \ ++ declare_func(void, void *dst, void *prev, void *cur, void *next, \ ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, \ ++ int prefs3, int mrefs3, int prefs4, int mrefs4, \ ++ int parity, int clip_max); \ ++ \ ++ randomize_buffers(prev0, prev1, mask, 9*WIDTH); \ ++ randomize_buffers(next0, next1, mask, 9*WIDTH); \ ++ randomize_buffers( cur0, cur1, mask, 9*WIDTH); \ ++ \ ++ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, \ ++ WIDTH, stride, -stride, 2*stride, -2*stride, \ ++ 3*stride, -3*stride, 4*stride, -4*stride, \ ++ 0, mask); \ ++ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \ ++ WIDTH, stride, -stride, 2*stride, -2*stride, \ ++ 3*stride, -3*stride, 4*stride, -4*stride, \ ++ 0, mask); \ ++ \ ++ if (memcmp(dst0, dst1, sizeof dst0) \ ++ || memcmp(prev0, prev1, sizeof prev0) \ ++ || memcmp(next0, next1, sizeof next0) \ ++ || memcmp( cur0, cur1, sizeof cur0)) \ ++ fail(); \ ++ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \ ++ WIDTH, stride, -stride, 2*stride, -2*stride, \ ++ 3*stride, -3*stride, 4*stride, -4*stride, \ ++ 0, mask); \ ++ } while (0) ++ ++void checkasm_check_vf_bwdif(void) ++{ ++ BWDIFContext ctx_8, ctx_10; ++ ++ ff_bwdif_init_filter_line(&ctx_8, 8); ++ ff_bwdif_init_filter_line(&ctx_10, 10); ++ ++ if (check_func(ctx_8.filter_line, "bwdif8")) { ++ BODY(uint8_t, 8); ++ report("bwdif8"); ++ } ++ ++ if (check_func(ctx_10.filter_line, "bwdif10")) { ++ BODY(uint16_t, 10); ++ report("bwdif10"); ++ } ++ ++ if (!ctx_8.filter_line3) ++ ctx_8.filter_line3 = ff_bwdif_filter_line3_c; ++ ++ { ++ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); ++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); ++ const int stride = WIDTH; ++ const int mask = (1<<8)-1; ++ int parity; ++ ++ for (parity = 0; parity != 2; ++parity) { ++ if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) { ++ ++ declare_func(void, void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int prefs, ++ int w, int parity, int clip_max); ++ ++ randomize_buffers(prev0, prev1, mask, 11*WIDTH); ++ randomize_buffers(next0, next1, mask, 11*WIDTH); ++ randomize_buffers( cur0, cur1, mask, 11*WIDTH); ++ ++ call_ref(dst0, stride, ++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, ++ WIDTH, parity, mask); ++ call_new(dst1, stride, ++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, ++ WIDTH, parity, mask); ++ ++ if (memcmp(dst0, dst1, WIDTH*3) ++ || memcmp(prev0, prev1, WIDTH*11) ++ || memcmp(next0, next1, WIDTH*11) ++ || memcmp( cur0, cur1, WIDTH*11)) ++ fail(); ++ ++ bench_new(dst1, stride, ++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, ++ WIDTH, parity, mask); ++ } ++ } ++ ++ // Use just 0s and ~0s to try to provoke bad cropping or overflow ++ // Parity makes no difference to this test so just test 0 ++ if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) { ++ ++ declare_func(void, void * dst1, int d_stride, ++ const void * prev1, const void * cur1, const void * next1, int prefs, ++ int w, int parity, int clip_max); ++ ++ randomize_overflow_check(prev0, prev1, mask, 11*WIDTH); ++ randomize_overflow_check(next0, next1, mask, 11*WIDTH); ++ randomize_overflow_check( cur0, cur1, mask, 11*WIDTH); ++ ++ call_ref(dst0, stride, ++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, ++ WIDTH, 0, mask); ++ call_new(dst1, stride, ++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, ++ WIDTH, 0, mask); ++ ++ if (memcmp(dst0, dst1, WIDTH*3) ++ || memcmp(prev0, prev1, WIDTH*11) ++ || memcmp(next0, next1, WIDTH*11) ++ || memcmp( cur0, cur1, WIDTH*11)) ++ fail(); ++ ++ // No point to benching ++ } ++ ++ report("bwdif8.line3"); ++ } ++ ++ { ++ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); ++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); ++ const int stride = WIDTH; ++ const int mask = (1<<8)-1; ++ int spat; ++ int parity; ++ ++ for (spat = 0; spat != 2; ++spat) { ++ for (parity = 0; parity != 2; ++parity) { ++ if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) { ++ ++ declare_func(void, void *dst1, void *prev1, void *cur1, void *next1, ++ int w, int prefs, int mrefs, int prefs2, int mrefs2, ++ int parity, int clip_max, int spat); ++ ++ randomize_buffers(prev0, prev1, mask, 11*WIDTH); ++ randomize_buffers(next0, next1, mask, 11*WIDTH); ++ randomize_buffers( cur0, cur1, mask, 11*WIDTH); ++ memset(dst0, 0xba, WIDTH * 3); ++ memset(dst1, 0xba, WIDTH * 3); ++ ++ call_ref(dst0 + stride, ++ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH, ++ stride, -stride, stride * 2, -stride * 2, ++ parity, mask, spat); ++ call_new(dst1 + stride, ++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, ++ stride, -stride, stride * 2, -stride * 2, ++ parity, mask, spat); ++ ++ if (memcmp(dst0, dst1, WIDTH*3) ++ || memcmp(prev0, prev1, WIDTH*11) ++ || memcmp(next0, next1, WIDTH*11) ++ || memcmp( cur0, cur1, WIDTH*11)) ++ fail(); ++ ++ bench_new(dst1 + stride, ++ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, ++ stride, -stride, stride * 2, -stride * 2, ++ parity, mask, spat); ++ } ++ } ++ } ++ ++ report("bwdif8.edge"); ++ } ++ ++ if (check_func(ctx_8.filter_intra, "bwdif8.intra")) { ++ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); ++ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); ++ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); ++ const int stride = WIDTH; ++ const int mask = (1<<8)-1; ++ ++ declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs, ++ int prefs3, int mrefs3, int parity, int clip_max); ++ ++ randomize_buffers( cur0, cur1, mask, 11*WIDTH); ++ memset(dst0, 0xba, WIDTH * 3); ++ memset(dst1, 0xba, WIDTH * 3); ++ ++ call_ref(dst0 + stride, ++ cur0 + stride * 4, WIDTH, ++ stride, -stride, stride * 3, -stride * 3, ++ 0, mask); ++ call_new(dst1 + stride, ++ cur0 + stride * 4, WIDTH, ++ stride, -stride, stride * 3, -stride * 3, ++ 0, mask); ++ ++ if (memcmp(dst0, dst1, WIDTH*3) ++ || memcmp( cur0, cur1, WIDTH*11)) ++ fail(); ++ ++ bench_new(dst1 + stride, ++ cur0 + stride * 4, WIDTH, ++ stride, -stride, stride * 3, -stride * 3, ++ 0, mask); ++ ++ report("bwdif8.intra"); ++ } ++} +--- a/tests/fate/checkasm.mak ++++ b/tests/fate/checkasm.mak +@@ -26,6 +26,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp + fate-checkasm-motion \ + fate-checkasm-opusdsp \ + fate-checkasm-pixblockdsp \ ++ fate-checkasm-rpi_sand \ + fate-checkasm-sbrdsp \ + fate-checkasm-synth_filter \ + fate-checkasm-sw_gbrp \ +@@ -36,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp + fate-checkasm-v210enc \ + fate-checkasm-vc1dsp \ + fate-checkasm-vf_blend \ ++ fate-checkasm-vf_bwdif \ + fate-checkasm-vf_colorspace \ + fate-checkasm-vf_eq \ + fate-checkasm-vf_gblur \ diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch b/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch new file mode 100644 index 0000000..5a064b8 --- /dev/null +++ b/recipes-multimedia/rpidistro-ffmpeg/files/2001-configure-setup-for-OE-core-usage.patch @@ -0,0 +1,79 @@ +From 702742f9575c87ac8c496d76daf51af7d4aaebd7 Mon Sep 17 00:00:00 2001 +From: Vincent Davis Jr <vince@underview.tech> +Date: Sun, 9 Jun 2024 18:09:25 -0400 +Subject: [PATCH] configure: setup for OE-core usage + +Upstream-Status: Inappropriate + +RPI-Distro repo clones original ffmpeg and applies patches to enable +raspiberry pi support. + +Add global CFLAGS and LDFLAGS. So, that when +./configure runs test it's able to locate proper +headers and libs in a cross-compile environment. + +Add new check to opengl. None of the above headers +exists and we also should be using GLESv2. + +Update where compiler finds OMX_Core.h + +Only check that sdl2 version greater than 3.0.0 + +Signed-off-by: Vincent Davis Jr <vince@underview.tech> +--- + configure | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/configure b/configure +index 7214c221..7a541e63 100755 +--- a/configure ++++ b/configure +@@ -5898,6 +5898,9 @@ enable_weak_pic() { + } + + enabled pic && enable_weak_pic ++# Set CFLAGS and LDFLAGS globally ++add_cflags -I${sysroot}/usr/include/ -I${sysroot}/usr/include/IL -I${sysroot}/usr/include/drm ++add_ldflags -L${sysroot}/usr/lib + + test_cc <<EOF || die "Symbol mangling check failed." + int ff_extern; +@@ -6716,8 +6719,8 @@ enabled mbedtls && { check_pkg_config mbedtls mbedtls mbedtls/x509_crt + enabled mediacodec && { enabled jni || die "ERROR: mediacodec requires --enable-jni"; } + enabled mmal && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host || + { ! enabled cross_compile && +- add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline && +- add_ldflags -L/opt/vc/lib/ && ++ add_cflags -I${sysroot}/usr/include -I${sysroot}/usr/include/interface/vmcs_host/linux -I${sysroot}/usr/include/interface/vcos/pthreads -fgnu89-inline && ++ add_ldflags -L${sysroot}/usr/lib && + check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host; } || + die "ERROR: mmal not found" && + check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS"; } +@@ -6737,12 +6740,13 @@ enabled opengl && { check_lib opengl GL/glx.h glXGetProcAddress "-lGL + check_lib opengl windows.h wglGetProcAddress "-lopengl32 -lgdi32" || + check_lib opengl OpenGL/gl3.h glGetError "-Wl,-framework,OpenGL" || + check_lib opengl ES2/gl.h glGetError "-isysroot=${sysroot} -Wl,-framework,OpenGLES" || ++ check_lib opengl GLES2/gl2.h glGetError "-lGLESv2" || + die "ERROR: opengl not found." + } + enabled omx_rpi && { test_code cc OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame || + { ! enabled cross_compile && +- add_cflags -isystem/opt/vc/include/IL && +- test_code cc OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame; } || ++ add_cflags -I${sysroot}/usr/include/IL && ++ test_code cc IL/OMX_Core.h OMX_IndexConfigBrcmVideoRequestIFrame; } || + die "ERROR: OpenMAX IL headers from raspberrypi/firmware not found"; } && + enable omx + enabled omx && require_headers OMX_Core.h +@@ -6788,7 +6792,7 @@ fi + + if enabled sdl2; then + SDL2_CONFIG="${cross_prefix}sdl2-config" +- test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 3.0.0" SDL_events.h SDL_PollEvent ++ test_pkg_config sdl2 "sdl2 >= 2.0.1" SDL_events.h SDL_PollEvent + if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then + sdl2_cflags=$("${SDL2_CONFIG}" --cflags) + sdl2_extralibs=$("${SDL2_CONFIG}" --libs) +-- +2.34.1 + diff --git a/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch b/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch new file mode 100644 index 0000000..02c07de --- /dev/null +++ b/recipes-multimedia/rpidistro-ffmpeg/files/2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch @@ -0,0 +1,35 @@ +From 0dfb56e12fa709794525cda1471091f6699905d5 Mon Sep 17 00:00:00 2001 +From: Vincent Davis Jr <vince@underview.tech> +Date: Thu, 8 Dec 2022 10:49:03 -0600 +Subject: [PATCH] libavcodec: omx replace /opt/vc path with /usr/lib + +Upstream-Status: Inappropriate + +RPI-Distro repo clones original ffmpeg and applies patches to enable +raspiberry pi support. + +Configures omx.c for OE usages as libbcm_host.so +and libopenmaxil.so are located in a different +location. + +Signed-off-by: Vincent Davis Jr <vince@underview.tech> +--- + libavcodec/omx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libavcodec/omx.c b/libavcodec/omx.c +index 0a6a3083..8c6e9193 100644 +--- a/libavcodec/omx.c ++++ b/libavcodec/omx.c +@@ -141,7 +141,7 @@ static av_cold OMXContext *omx_init(void *logctx, const char *libname, const cha + { + static const char * const libnames[] = { + #if CONFIG_OMX_RPI +- "/opt/vc/lib/libopenmaxil.so", "/opt/vc/lib/libbcm_host.so", ++ "/usr/lib/libopenmaxil.so", "/usr/lib/libbcm_host.so", + #else + "libOMX_Core.so", NULL, + "libOmxCore.so", NULL, +-- +2.38.1 + diff --git a/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb b/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb new file mode 100644 index 0000000..e8f640b --- /dev/null +++ b/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_5.1.4.bb @@ -0,0 +1,192 @@ +SUMMARY = "A complete, cross-platform solution to record, convert and stream audio and video." +DESCRIPTION = "FFmpeg is the leading multimedia framework, able to decode, encode, transcode, \ + mux, demux, stream, filter and play pretty much anything that humans and machines \ + have created. It supports the most obscure ancient formats up to the cutting edge." +HOMEPAGE = "https://www.ffmpeg.org/" +SECTION = "libs" + +LICENSE = "GPL-2.0-or-later & LGPL-2.1-or-later & ISC & MIT & BSD-2-Clause & BSD-3-Clause & IJG" +LICENSE:${PN} = "GPL-2.0-or-later" +LICENSE:libavcodec = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libavdevice = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libavfilter = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libavformat = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libavutil = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libpostproc = "GPL-2.0-or-later" +LICENSE:libswresample = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE:libswscale = "${@bb.utils.contains('PACKAGECONFIG', 'gpl', 'GPL-2.0-or-later', 'LGPL-2.1-or-later', d)}" +LICENSE_FLAGS = "commercial" + +LIC_FILES_CHKSUM = "file://COPYING.GPLv2;md5=b234ee4d69f5fce4486a80fdaf4a4263 \ + file://COPYING.GPLv3;md5=d32239bcb673463ab874e80d47fae504 \ + file://COPYING.LGPLv2.1;md5=bd7a443320af8c812e4c18d1b79df004 \ + file://COPYING.LGPLv3;md5=e6a600fd5e1d9cbde2d983680233ad02" + +# Build fails when thumb is enabled: https://bugzilla.yoctoproject.org/show_bug.cgi?id=7717 +ARM_INSTRUCTION_SET:armv4 = "arm" +ARM_INSTRUCTION_SET:armv5 = "arm" +ARM_INSTRUCTION_SET:armv6 = "arm" +# Should be API compatible with libav (which was a fork of ffmpeg) +# libpostproc was previously packaged from a separate recipe +PROVIDES = "ffmpeg libav libpostproc" +RPROVIDES:${PN} = "${PROVIDES}" +DEPENDS = "nasm-native" + +inherit autotools pkgconfig +PACKAGECONFIG ??= "avdevice avfilter avcodec avformat swresample swscale postproc ffplay \ + v4l2 drm udev alsa bzlib lzma pic pthreads shared theora zlib libvorbis x264 gpl \ + ${@bb.utils.contains('MACHINE_FEATURES', 'vc4graphics', '', 'mmal sand vout-drm', d)} \ + ${@bb.utils.contains('AVAILTUNES', 'mips32r2', 'mips32r2', '', d)} \ + ${@bb.utils.contains('DISTRO_FEATURES', 'opengl', 'opengl', '', d)} \ + ${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'xv xcb', '', d)} \ + ${@bb.utils.contains('DISTRO_FEATURES', 'x11 opengl', 'epoxy vout-egl', '', d)}" + +SRC_URI = "\ + git://git@github.com/RPi-Distro/ffmpeg;protocol=https;branch=pios/bookworm \ + file://0001-ffmpeg-5.1.4-rpi_24.patch \ + file://2001-configure-setup-for-OE-core-usage.patch \ + file://2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch \ + " + +SRCREV = "1c363463c432c5ed492c7b759abb6e015b93b6b5" + +S = "${WORKDIR}/git" + +# libraries to build in addition to avutil +PACKAGECONFIG[avdevice] = "--enable-avdevice,--disable-avdevice" +PACKAGECONFIG[avfilter] = "--enable-avfilter,--disable-avfilter" +PACKAGECONFIG[avcodec] = "--enable-avcodec,--disable-avcodec" +PACKAGECONFIG[avformat] = "--enable-avformat,--disable-avformat" +PACKAGECONFIG[swresample] = "--enable-swresample,--disable-swresample" +PACKAGECONFIG[swscale] = "--enable-swscale,--disable-swscale" +PACKAGECONFIG[postproc] = "--enable-postproc,--disable-postproc" +#PACKAGECONFIG[avresample] = "--enable-avresample,--disable-avresample" + +# features to support +PACKAGECONFIG[ffplay] = "--enable-ffplay,--disable-ffplay" +PACKAGECONFIG[alsa] = "--enable-alsa,--disable-alsa,alsa-lib" +PACKAGECONFIG[altivec] = "--enable-altivec,--disable-altivec," +PACKAGECONFIG[bzlib] = "--enable-bzlib,--disable-bzlib,bzip2" +PACKAGECONFIG[fdk-aac] = "--enable-libfdk-aac --enable-nonfree,--disable-libfdk-aac,fdk-aac" +PACKAGECONFIG[gpl] = "--enable-gpl,--disable-gpl" +PACKAGECONFIG[opengl] = "--enable-opengl,--disable-opengl,virtual/libgles2" +PACKAGECONFIG[gsm] = "--enable-libgsm,--disable-libgsm,libgsm" +PACKAGECONFIG[jack] = "--enable-indev=jack,--disable-indev=jack,jack" +PACKAGECONFIG[libvorbis] = "--enable-libvorbis,--disable-libvorbis,libvorbis" +PACKAGECONFIG[libopus] = "--enable-libopus,--disable-libopus,libopus" +PACKAGECONFIG[lzma] = "--enable-lzma,--disable-lzma,xz" +PACKAGECONFIG[mfx] = "--enable-libmfx,--disable-libmfx,intel-mediasdk" +PACKAGECONFIG[mp3lame] = "--enable-libmp3lame,--disable-libmp3lame,lame" +PACKAGECONFIG[openssl] = "--enable-openssl,--disable-openssl,openssl" +PACKAGECONFIG[sdl2] = "--enable-sdl2,--disable-sdl2,virtual/libsdl2" +PACKAGECONFIG[speex] = "--enable-libspeex,--disable-libspeex,speex" +PACKAGECONFIG[srt] = "--enable-libsrt,--disable-libsrt,srt" +PACKAGECONFIG[theora] = "--enable-libtheora,--disable-libtheora,libtheora libogg" +PACKAGECONFIG[vaapi] = "--enable-vaapi,--disable-vaapi,libva" +PACKAGECONFIG[vdpau] = "--enable-vdpau,--disable-vdpau,libvdpau" +PACKAGECONFIG[vpx] = "--enable-libvpx,--disable-libvpx,libvpx" +PACKAGECONFIG[x264] = "--enable-libx264,--disable-libx264,x264" +PACKAGECONFIG[xcb] = "--enable-libxcb,--disable-libxcb,libxcb" +PACKAGECONFIG[xv] = "--enable-outdev=xv,--disable-outdev=xv,libxv" +PACKAGECONFIG[zlib] = "--enable-zlib,--disable-zlib,zlib" +PACKAGECONFIG[snappy] = "--enable-libsnappy,--disable-libsnappy,snappy" +PACKAGECONFIG[udev] = "--enable-libudev,--disable-libudev,udev" +PACKAGECONFIG[drm] = "--enable-libdrm,--disable-libdrm,libdrm" +PACKAGECONFIG[epoxy] = "--enable-epoxy,--disable-epoxy,libepoxy" +PACKAGECONFIG[v4l2] = "--enable-libv4l2 --enable-v4l2-m2m,,v4l-utils" +PACKAGECONFIG[mmal] = "--enable-omx --enable-omx-rpi --enable-mmal,,userland" +PACKAGECONFIG[sand] = "--enable-sand,," +PACKAGECONFIG[vout-drm] = "--enable-vout-drm,,libdrm" +PACKAGECONFIG[vout-egl] = "--enable-vout-egl,,virtual/egl" + +# other configuration options +PACKAGECONFIG[mips32r2] = ",--disable-mipsdsp --disable-mipsdspr2" +PACKAGECONFIG[pic] = "--enable-pic" +PACKAGECONFIG[pthreads] = "--enable-pthreads,--disable-pthreads" +PACKAGECONFIG[shared] = "--enable-shared" +PACKAGECONFIG[strip] = ",--disable-stripping" + +# Check codecs that require --enable-nonfree +USE_NONFREE = "${@bb.utils.contains_any('PACKAGECONFIG', [ 'openssl' ], 'yes', '', d)}" + +def cpu(d): + for arg in (d.getVar('TUNE_CCARGS') or '').split(): + if arg.startswith('-mcpu='): + return arg[6:] + return 'generic' + +EXTRA_OECONF = " \ + ${@bb.utils.contains('USE_NONFREE', 'yes', '--enable-nonfree', '', d)} \ + \ + --cross-prefix=${TARGET_PREFIX} \ + \ + --ld="${CCLD}" \ + --cc="${CC}" \ + --cxx="${CXX}" \ + --arch=${TARGET_ARCH} \ + --target-os="linux" \ + --enable-cross-compile \ + --extra-cflags="${CFLAGS} ${HOST_CC_ARCH}${TOOLCHAIN_OPTIONS}" \ + --extra-ldflags="${LDFLAGS}" \ + --sysroot="${STAGING_DIR_TARGET}" \ + ${EXTRA_FFCONF} \ + --libdir=${libdir} \ + --shlibdir=${libdir} \ + --datadir=${datadir}/ffmpeg \ + --cpu=${@cpu(d)} \ + --pkg-config=pkg-config \ +" +EXTRA_OECONF:append:linux-gnux32 = " --disable-asm" + +# Some patches introduce assembly files which needs preprocessing with +# gcc e.g. src/libavutil/aarch64/rpi_sand_neon.S +TOOLCHAIN = "gcc" +# gold crashes on x86, another solution is to --disable-asm but thats more hacky +# ld.gold: internal error in relocate_section, at ../../gold/i386.cc:3684 +LDFLAGS:append:x86 = "${@bb.utils.contains('DISTRO_FEATURES', 'ld-is-gold', ' -fuse-ld=bfd ', '', d)}" +EXTRA_OEMAKE = "V=1" + +do_configure() { + ${S}/configure ${EXTRA_OECONF} +} + +# patch out build host paths for reproducibility +do_compile:prepend:class-target() { + sed -i -e "s,${WORKDIR},,g" ${B}/config.h +} + +PACKAGES =+ "libavcodec \ + libavdevice \ + libavfilter \ + libavformat \ + libavresample \ + libavutil \ + libpostproc \ + libswresample \ + libswscale" + +FILES:${PN}:append = " /usr/share/ffmpeg" +FILES:libavcodec = "${libdir}/libavcodec${SOLIBS}" +FILES:libavdevice = "${libdir}/libavdevice${SOLIBS}" +FILES:libavfilter = "${libdir}/libavfilter${SOLIBS}" +FILES:libavformat = "${libdir}/libavformat${SOLIBS}" +FILES:libavresample = "${libdir}/libavresample${SOLIBS}" +FILES:libavutil = "${libdir}/libavutil${SOLIBS}" +FILES:libpostproc = "${libdir}/libpostproc${SOLIBS}" +FILES:libswresample = "${libdir}/libswresample${SOLIBS}" +FILES:libswscale = "${libdir}/libswscale${SOLIBS}" +# ffmpeg disables PIC on some platforms (e.g. x86-32) +INSANE_SKIP:${MLPREFIX}libavcodec = "textrel" +INSANE_SKIP:${MLPREFIX}libavdevice = "textrel" +INSANE_SKIP:${MLPREFIX}libavfilter = "textrel" +INSANE_SKIP:${MLPREFIX}libavformat = "textrel" +INSANE_SKIP:${MLPREFIX}libavutil = "textrel" +INSANE_SKIP:${MLPREFIX}libavresample = "textrel" +INSANE_SKIP:${MLPREFIX}libswscale = "textrel" +INSANE_SKIP:${MLPREFIX}libswresample = "textrel" +INSANE_SKIP:${MLPREFIX}libpostproc = "textrel" + +# Only enable it for rpi class of machines +COMPATIBLE_HOST = "null" +COMPATIBLE_HOST:rpi = "(.*)" + |