diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094a..8b86ad0508 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260d..1b4bbc8ac4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique(gpu)), vp9_decoder(std::make_unique(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } -void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { - if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast(codec)); - current_codec = codec; +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); +void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { + if (current_codec != codec) { + current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); + } } void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast(frame_data.size()); + packet.size = static_cast(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 3e135a2a63..f2aef1699d 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -42,15 +42,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -59,6 +59,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -68,10 +70,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr h264_decoder; std::unique_ptr vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed989..5fb6d45eeb 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,135 +45,130 @@ H264::~H264() = default; const std::vector& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); - - writer.WriteUe(16); - writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } - - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag - - writer.End(); - - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); - - writer.WriteUe(0); - writer.WriteUe(0); - - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; - - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); - - writer.WriteBit(true); - - for (s32 index = 0; index < 6; index++) { - writer.WriteBit(true); - const auto matrix_x4 = - std::vector(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } - } - - s32 chroma_qp_index_offset2 = - static_cast((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; - - writer.WriteSe(chroma_qp_index_offset2); - - writer.End(); - - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); + return frame; } + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { + writer.WriteBit(false); + } + + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag + + writer.WriteUe(static_cast(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); + + const auto order_cnt_type = + static_cast(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + + writer.WriteSe(0); + writer.WriteSe(0); + writer.WriteUe(0); + } + + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); + + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { + writer.WriteBit(true); + std::span matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } + + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { + writer.WriteBit(true); + std::span matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); + } + } + + s32 chroma_qp_index_offset2 = + static_cast(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset2); + + writer.End(); + + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); + return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span list, s32 start, s32 count) { std::vector scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3f..bfe84a4720 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include #include +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector& list, s32 start, s32 count); + void WriteScalingList(std::span list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: - struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; - }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); - - struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array scaling_matrix_4; - std::array scaling_matrix_8; - }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - std::vector frame; GPU& gpu; + + struct H264ParameterSet { + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; + }; + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); + + struct H264DecoderContext { + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array weight_scale; ///< 0x01C0 + std::array weight_scale_8x8; ///< 0x0220 + }; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb314183..902bc2a982 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c6..2da14f3ca5 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array feature_mask{}; - std::array, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array feature_mask; + std::array, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array ref_deltas{}; - std::array mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array ref_deltas; + std::array mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array y_mode_prob{}; - std::array partition_prob{}; - std::array coef_probs{}; - std::array switchable_interp_prob{}; - std::array inter_mode_prob{}; - std::array intra_inter_prob{}; - std::array comp_inter_prob{}; - std::array single_ref_prob{}; - std::array comp_ref_prob{}; - std::array tx_32x32_prob{}; - std::array tx_16x16_prob{}; - std::array tx_8x8_prob{}; - std::array skip_probs{}; - std::array joints{}; - std::array sign{}; - std::array classes{}; - std::array class_0{}; - std::array prob_bits{}; - std::array class_0_fr{}; - std::array fr{}; - std::array class_0_hp{}; - std::array high_precision{}; + std::array y_mode_prob; ///< 0x0000 + std::array partition_prob; ///< 0x0024 + std::array coef_probs; ///< 0x0064 + std::array switchable_interp_prob; ///< 0x0724 + std::array inter_mode_prob; ///< 0x072C + std::array intra_inter_prob; ///< 0x0748 + std::array comp_inter_prob; ///< 0x074C + std::array single_ref_prob; ///< 0x0751 + std::array comp_ref_prob; ///< 0x075B + std::array tx_32x32_prob; ///< 0x0760 + std::array tx_16x16_prob; ///< 0x0766 + std::array tx_8x8_prob; ///< 0x076A + std::array skip_probs; ///< 0x076C + std::array joints; ///< 0x076F + std::array sign; ///< 0x0772 + std::array classes; ///< 0x0774 + std::array class_0; ///< 0x0788 + std::array prob_bits; ///< 0x078A + std::array class_0_fr; ///< 0x079E + std::array fr; ///< 0x07AA + std::array class_0_hp; ///< 0x07B0 + std::array high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array segment_feature_enable{}; - std::array, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array ref_deltas{}; - std::array mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array frame_offsets{}; - std::array refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array segment_feature_enable; + std::array, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array ref_deltas; + std::array mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array frame_offsets; + std::array refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array inter_mode_prob{}; - std::array intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array tx_8x8_prob{}; - std::array tx_16x16_prob{}; - std::array tx_32x32_prob{}; - std::array y_mode_prob_e8{}; - std::array, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array switchable_interp_prob{}; - std::array comp_inter_prob{}; - std::array skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array joints{}; - std::array sign{}; - std::array class_0{}; - std::array fr{}; - std::array class_0_hp{}; - std::array high_precision{}; - std::array classes{}; - std::array class_0_fr{}; - std::array pred_bits{}; - std::array single_ref_prob{}; - std::array comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array inter_mode_prob; ///< 0x0400 + std::array intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array tx_8x8_prob; ///< 0x0470 + std::array tx_16x16_prob; ///< 0x0472 + std::array tx_32x32_prob; ///< 0x0476 + std::array y_mode_prob_e8; ///< 0x047C + std::array, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array switchable_interp_prob; ///< 0x052A + std::array comp_inter_prob; ///< 0x0532 + std::array skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array joints; ///< 0x053B + std::array sign; ///< 0x053E + std::array class_0; ///< 0x0540 + std::array fr; ///< 0x0542 + std::array class_0_hp; ///< 0x0548 + std::array high_precision; ///< 0x054A + std::array classes; ///< 0x054C + std::array class_0_fr; ///< 0x0560 + std::array pred_bits; ///< 0x056C + std::array single_ref_prob; ///< 0x0580 + std::array comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd1..b5e3b70fcc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast(method), argument); - } else { - codec->StateWrite(static_cast(method), static_cast(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b88..6e1da0b04f 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d4..6a24e00a0c 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array surface_luma_offset{}; - std::array surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array surface_luma_offset; ///< 0x0860 + std::array surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon