Query Cache: Setup Base rework

This commit is contained in:
Fernando Sahmkow 2023-08-04 03:31:52 +02:00
parent ace91dd0c0
commit bdc01254a9
7 changed files with 1107 additions and 0 deletions

View File

@ -95,6 +95,12 @@ add_library(video_core STATIC
memory_manager.h
precompiled_headers.h
pte_kind.h
query_cache/bank_base.h
query_cache/query_base.h
query_cache/query_cache_base.h
query_cache/query_cache.h
query_cache/query_stream.h
query_cache/types.h
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h

View File

@ -0,0 +1,106 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <atomic>
#include <deque>
#include <utility>
#include "common/common_types.h"
namespace VideoCommon {
class BankBase {
protected:
const size_t base_bank_size;
size_t bank_size;
std::atomic<size_t> references;
size_t current_slot;
public:
BankBase(size_t bank_size_)
: base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
virtual ~BankBase() = default;
virtual std::pair<bool, size_t> Reserve() {
if (IsClosed()) {
return {false, bank_size};
}
const size_t result = current_slot++;
return {true, result};
}
virtual void Reset() {
current_slot = 0;
references = 0;
bank_size = base_bank_size;
}
size_t Size() const {
return bank_size;
}
void AddReference(size_t how_many = 1) {
references.fetch_add(how_many, std::memory_order_relaxed);
}
void CloseReference(size_t how_many = 1) {
if (how_many > references.load(std::memory_order_relaxed)) {
UNREACHABLE();
}
references.fetch_sub(how_many, std::memory_order_relaxed);
}
void Close() {
bank_size = current_slot;
}
constexpr bool IsClosed() {
return current_slot >= bank_size;
}
bool IsDead() {
return IsClosed() && references == 0;
}
};
template <typename BankType>
class BankPool {
private:
std::deque<BankType> bank_pool;
std::deque<size_t> bank_indices;
public:
BankPool() = default;
~BankPool() = default;
// Reserve a bank from the pool and return its index
template <typename Func>
size_t ReserveBank(Func&& builder) {
if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) {
size_t new_index = bank_indices.front();
bank_indices.pop_front();
bank_pool[new_index].Reset();
return new_index;
}
size_t new_index = bank_pool.size();
builder(bank_pool, new_index);
bank_indices.push_back(new_index);
return new_index;
}
// Get a reference to a bank using its index
BankType& GetBank(size_t index) {
return bank_pool[index];
}
// Get the total number of banks in the pool
size_t BankCount() const {
return bank_pool.size();
}
};
} // namespace VideoCommon

View File

@ -0,0 +1,72 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace VideoCommon {
enum class QueryFlagBits : u32 {
HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp.
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
IsFence = 1 << 8, ///< Indicates the query is a fence.
};
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
class QueryBase {
public:
VAddr guest_address;
QueryFlagBits flags;
u64 value;
protected:
// Default constructor
QueryBase() : guest_address(0), flags{}, value{} {}
// Parameterized constructor
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
: guest_address(address), flags(flags_), value{value_} {}
};
class GuestQuery : public QueryBase {
public:
// Parameterized constructor
GuestQuery(bool isLong, VAddr address, u64 queryValue)
: QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) {
if (isLong) {
flags |= QueryFlagBits::HasTimestamp;
}
}
};
class HostQueryBase : public QueryBase {
public:
// Default constructor
HostQueryBase()
: QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
size_banks{}, start_slot{}, size_slots{} {}
// Parameterized constructor
HostQueryBase(bool isLong, VAddr address)
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
start_slot{}, size_slots{} {
if (isLong) {
flags |= QueryFlagBits::HasTimestamp;
}
}
u32 start_bank_id;
u32 size_banks;
size_t start_slot;
size_t size_slots;
};
} // namespace VideoCommon

View File

@ -0,0 +1,543 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <array>
#include <deque>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/query_cache/bank_base.h"
#include "video_core/query_cache/query_base.h"
#include "video_core/query_cache/query_cache_base.h"
#include "video_core/query_cache/query_stream.h"
#include "video_core/query_cache/types.h"
namespace VideoCommon {
using Maxwell = Tegra::Engines::Maxwell3D;
struct SyncValuesStruct {
VAddr address;
u64 value;
u64 size;
static constexpr bool GeneratesBaseBuffer = true;
};
template <typename Traits>
class GuestStreamer : public SimpleStreamer<GuestQuery> {
public:
using RuntimeType = typename Traits::RuntimeType;
GuestStreamer(size_t id_, RuntimeType& runtime_)
: SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {}
virtual ~GuestStreamer() = default;
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport = std::nullopt) override {
auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value));
pending_sync.push_back(new_id);
return new_id;
}
bool HasPendingSync() override {
return !pending_sync.empty();
}
void SyncWrites() override {
if (pending_sync.empty()) {
return;
}
std::vector<SyncValuesStruct> sync_values;
sync_values.reserve(pending_sync.size());
for (size_t pending_id : pending_sync) {
auto& query = slot_queries[pending_id];
if (True(query.flags & QueryFlagBits::IsRewritten) ||
True(query.flags & QueryFlagBits::IsInvalidated)) {
continue;
}
query.flags |= QueryFlagBits::IsHostSynced;
sync_values.emplace_back(query.guest_address, query.value,
True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4);
}
pending_sync.clear();
if (sync_values.size() > 0) {
runtime.template SyncValues<SyncValuesStruct>(sync_values);
}
}
private:
RuntimeType& runtime;
std::deque<size_t> pending_sync;
};
template <typename Traits>
class StubStreamer : public GuestStreamer<Traits> {
public:
using RuntimeType = typename Traits::RuntimeType;
StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {}
~StubStreamer() override = default;
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
std::optional<u32> subreport = std::nullopt) override {
size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport);
return new_id;
}
};
template <typename Traits>
struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
using RuntimeType = typename Traits::RuntimeType;
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
if (streamers[i]) {
streamer_mask |= 1ULL << i;
}
}
}
template <typename Func>
void ForEachStreamerIn(u64 mask, Func&& func) {
static constexpr bool RETURNS_BOOL =
std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>;
while (mask != 0) {
size_t position = std::countr_zero(mask);
mask &= ~(1ULL << position);
if constexpr (RETURNS_BOOL) {
if (func(streamers[position])) {
return;
}
} else {
func(streamers[position]);
}
}
}
template <typename Func>
void ForEachStreamer(Func&& func) {
ForEachStreamerIn(streamer_mask, func);
}
QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) {
size_t which_stream = location.stream_id.Value();
auto* streamer = streamers[which_stream];
if (!streamer) {
return nullptr;
}
return streamer->GetQuery(location.query_id.Value());
}
QueryCacheBase<Traits>* owner;
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
Traits::RuntimeType& runtime;
Tegra::GPU& gpu;
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
u64 streamer_mask;
std::mutex flush_guard;
std::deque<u64> flushes_pending;
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
};
template <typename Traits>
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
: cached_queries{} {
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
this, rasterizer_, cpu_memory_, runtime_, gpu_);
}
template <typename Traits>
QueryCacheBase<Traits>::~QueryCacheBase() = default;
template <typename Traits>
void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) {
size_t index = static_cast<size_t>(counter_type);
StreamerInterface* streamer = impl->streamers[index];
if (!streamer) [[unlikely]] {
UNREACHABLE();
return;
}
if (is_enabled) {
streamer->StartCounter();
} else {
streamer->PauseCounter();
}
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
size_t index = static_cast<size_t>(counter_type);
StreamerInterface* streamer = impl->streamers[index];
if (!streamer) [[unlikely]] {
UNREACHABLE();
return;
}
streamer->CloseCounter();
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
size_t index = static_cast<size_t>(counter_type);
StreamerInterface* streamer = impl->streamers[index];
if (!streamer) [[unlikely]] {
UNIMPLEMENTED();
return;
}
streamer->ResetCounter();
}
template <typename Traits>
void QueryCacheBase<Traits>::BindToChannel(s32 id) {
VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id);
impl->runtime.Bind3DEngine(maxwell3d);
}
template <typename Traits>
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
QueryPropertiesFlags flags, u32 payload, u32 subreport) {
const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout);
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
size_t streamer_id = static_cast<size_t>(counter_type);
auto* streamer = impl->streamers[streamer_id];
if (!streamer) [[unlikely]] {
if (has_timestamp) {
u64 timestamp = impl->gpu.GetTicks();
gpu_memory->Write<u64>(addr + 8, timestamp);
gpu_memory->Write<u64>(addr, 1ULL);
} else {
gpu_memory->Write<u32>(addr, 1U);
}
return;
}
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
if (!cpu_addr_opt) [[unlikely]] {
return;
}
VAddr cpu_addr = *cpu_addr_opt;
const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
auto* query = streamer->GetQuery(new_query_id);
if (is_fence) {
query->flags |= QueryFlagBits::IsFence;
}
QueryLocation query_location{};
query_location.stream_id.Assign(static_cast<u32>(streamer_id));
query_location.query_id.Assign(static_cast<u32>(new_query_id));
const auto gen_caching_indexing = [](VAddr cur_addr) {
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
};
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
std::function<void()> operation(
[this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] {
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
return;
}
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
UNREACHABLE();
return;
}
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
u64 timestamp = impl->gpu.GetTicks();
std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
} else {
u32 value = static_cast<u32>(query_base->value);
std::memcpy(pointer, &value, sizeof(value));
}
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
});
if (is_fence) {
impl->rasterizer.SignalFence(std::move(operation));
} else {
impl->rasterizer.SyncOperation(std::move(operation));
}
if (is_synced) {
streamer->Free(new_query_id);
return;
}
auto [cont_addr, base] = gen_caching_indexing(cpu_addr);
{
std::scoped_lock lock(cache_mutex);
auto it1 = cached_queries.try_emplace(cont_addr);
auto& sub_container = it1.first->second;
auto it_current = sub_container.find(base);
if (it_current == sub_container.end()) {
sub_container.insert_or_assign(base, query_location);
return;
}
auto* old_query = impl->ObtainQuery(it_current->second);
old_query->flags |= QueryFlagBits::IsRewritten;
sub_container.insert_or_assign(base, query_location);
}
}
template <typename Traits>
void QueryCacheBase<Traits>::UnregisterPending() {
const auto gen_caching_indexing = [](VAddr cur_addr) {
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
};
std::scoped_lock lock(cache_mutex);
for (QueryLocation loc : impl->pending_unregister) {
const auto [streamer_id, query_id] = loc.unpack();
auto* streamer = impl->streamers[streamer_id];
if (!streamer) [[unlikely]] {
continue;
}
auto* query = streamer->GetQuery(query_id);
auto [cont_addr, base] = gen_caching_indexing(query->guest_address);
auto it1 = cached_queries.find(cont_addr);
if (it1 != cached_queries.end()) {
auto it2 = it1->second.find(base);
if (it2 != it1->second.end()) {
if (it2->second.raw == loc.raw) {
it1->second.erase(it2);
}
}
}
streamer->Free(query_id);
}
impl->pending_unregister.clear();
}
template <typename Traits>
void QueryCacheBase<Traits>::NotifyWFI() {
bool should_sync = false;
impl->ForEachStreamer(
[&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); });
if (!should_sync) {
return;
}
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); });
impl->runtime.Barriers(true);
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); });
impl->runtime.Barriers(false);
}
template <typename Traits>
void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) {
impl->runtime.ResumeHostConditionalRendering();
} else {
impl->runtime.PauseHostConditionalRendering();
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount);
}
}
template <typename Traits>
bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
bool qc_dirty = false;
const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData {
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr_opt) [[unlikely]] {
return VideoCommon::LookupData{
.address = 0,
.found_query = nullptr,
};
}
VAddr cpu_addr = *cpu_addr_opt;
std::scoped_lock lock(cache_mutex);
auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS);
if (it1 == cached_queries.end()) {
return VideoCommon::LookupData{
.address = cpu_addr,
.found_query = nullptr,
};
}
auto& sub_container = it1->second;
auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK);
if (it_current == sub_container.end()) {
auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4);
if (it_current_2 == sub_container.end()) {
return VideoCommon::LookupData{
.address = cpu_addr,
.found_query = nullptr,
};
}
}
auto* query = impl->ObtainQuery(it_current->second);
qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) &&
False(query->flags & QueryFlagBits::IsGuestSynced);
return VideoCommon::LookupData{
.address = cpu_addr,
.found_query = query,
};
};
auto& regs = maxwell3d->regs;
if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) {
impl->runtime.EndHostConditionalRendering();
return false;
}
/*if (!Settings::IsGPULevelHigh()) {
impl->runtime.EndHostConditionalRendering();
return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
VideoCommon::CacheType::BufferCache |
VideoCommon::CacheType::QueryCache);
}*/
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
const GPUVAddr address = regs.render_enable.Address();
switch (mode) {
case ComparisonMode::True:
impl->runtime.EndHostConditionalRendering();
return false;
case ComparisonMode::False:
impl->runtime.EndHostConditionalRendering();
return false;
case ComparisonMode::Conditional: {
VideoCommon::LookupData object_1{gen_lookup(address)};
return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty);
}
case ComparisonMode::IfEqual: {
VideoCommon::LookupData object_1{gen_lookup(address)};
VideoCommon::LookupData object_2{gen_lookup(address + 16)};
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
true);
}
case ComparisonMode::IfNotEqual: {
VideoCommon::LookupData object_1{gen_lookup(address)};
VideoCommon::LookupData object_2{gen_lookup(address + 16)};
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
false);
}
default:
return false;
}
}
// Async downloads
template <typename Traits>
void QueryCacheBase<Traits>::CommitAsyncFlushes() {
u64 mask{};
{
std::scoped_lock lk(impl->flush_guard);
impl->ForEachStreamer([&mask](StreamerInterface* streamer) {
bool local_result = streamer->HasUnsyncedQueries();
if (local_result) {
mask |= 1ULL << streamer->GetId();
}
});
impl->flushes_pending.push_back(mask);
}
std::function<void()> func([this] { UnregisterPending(); });
impl->rasterizer.SyncOperation(std::move(func));
if (mask == 0) {
return;
}
impl->ForEachStreamerIn(mask,
[](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); });
}
template <typename Traits>
bool QueryCacheBase<Traits>::HasUncommittedFlushes() const {
bool result = false;
impl->ForEachStreamer([&result](StreamerInterface* streamer) {
result |= streamer->HasUnsyncedQueries();
return result;
});
return result;
}
template <typename Traits>
bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() {
std::scoped_lock lk(impl->flush_guard);
return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL;
}
template <typename Traits>
void QueryCacheBase<Traits>::PopAsyncFlushes() {
u64 mask;
{
std::scoped_lock lk(impl->flush_guard);
mask = impl->flushes_pending.front();
impl->flushes_pending.pop_front();
}
if (mask == 0) {
return;
}
impl->ForEachStreamerIn(mask,
[](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); });
}
// Invalidation
template <typename Traits>
void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) {
auto* query_base = impl->ObtainQuery(location);
if (!query_base) {
return;
}
query_base->flags |= QueryFlagBits::IsInvalidated;
}
template <typename Traits>
bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
auto* query_base = impl->ObtainQuery(location);
if (!query_base) {
return false;
}
return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
False(query_base->flags & QueryFlagBits::IsGuestSynced);
}
template <typename Traits>
bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
auto* query_base = impl->ObtainQuery(location);
if (!query_base) {
return false;
}
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
return false;
}
u32 value_l = static_cast<u32>(query_base->value);
std::memcpy(ptr, &value_l, sizeof(value_l));
return false;
}
return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
False(query_base->flags & QueryFlagBits::IsGuestSynced);
}
template <typename Traits>
void QueryCacheBase<Traits>::RequestGuestHostSync() {
impl->rasterizer.ReleaseFences();
}
} // namespace VideoCommon

View File

@ -0,0 +1,181 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <functional>
#include <mutex>
#include <optional>
#include <span>
#include <unordered_map>
#include <utility>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/query_cache/query_base.h"
#include "video_core/query_cache/types.h"
namespace Core::Memory {
class Memory;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra {
class GPU;
}
namespace VideoCommon {
struct LookupData {
VAddr address;
QueryBase* found_query;
};
template <typename Traits>
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
using RuntimeType = typename Traits::RuntimeType;
public:
union QueryLocation {
BitField<27, 5, u32> stream_id;
BitField<0, 27, u32> query_id;
u32 raw;
std::pair<size_t, size_t> unpack() {
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
}
};
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
~QueryCacheBase();
void InvalidateRegion(VAddr addr, std::size_t size) {
IterateCache<true>(addr, size,
[this](QueryLocation location) { InvalidateQuery(location); });
}
void FlushRegion(VAddr addr, std::size_t size) {
bool result = false;
IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
result |= SemiFlushQueryDirty(location);
return result;
});
if (result) {
RequestGuestHostSync();
}
}
static u64 BuildMask(std::span<QueryType> types) {
u64 mask = 0;
for (auto query_type : types) {
mask |= 1ULL << (static_cast<u64>(query_type));
}
return mask;
}
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) {
bool result = false;
IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
result |= IsQueryDirty(location);
return result;
});
return result;
}
void CounterEnable(QueryType counter_type, bool is_enabled);
void CounterReset(QueryType counter_type);
void CounterClose(QueryType counter_type);
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,
u32 payload, u32 subreport);
void NotifyWFI();
bool AccelerateHostConditionalRendering();
// Async downloads
void CommitAsyncFlushes();
bool HasUncommittedFlushes() const;
bool ShouldWaitAsyncFlushes();
void PopAsyncFlushes();
void NotifySegment(bool resume);
void BindToChannel(s32 id) override;
protected:
template <bool remove_from_cache, typename Func>
void IterateCache(VAddr addr, std::size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL =
std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>;
const u64 addr_begin = addr;
const u64 addr_end = addr_begin + size;
const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS;
std::scoped_lock lock(cache_mutex);
for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) {
const u64 page_start = page << Core::Memory::YUZU_PAGEBITS;
const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
const u64 cache_begin = page_start + query_location;
const u64 cache_end = cache_begin + sizeof(u32);
return cache_begin < addr_end && addr_begin < cache_end;
};
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
}
auto& contents = it->second;
for (auto& query : contents) {
if (!in_range(query.first)) {
continue;
}
if constexpr (RETURNS_BOOL) {
if (func(query.second)) {
return;
}
} else {
func(query.second);
}
}
if constexpr (remove_from_cache) {
const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) {
return in_range(pair.first);
};
std::erase_if(contents, in_range2);
}
}
}
using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
void InvalidateQuery(QueryLocation location);
bool IsQueryDirty(QueryLocation location);
bool SemiFlushQueryDirty(QueryLocation location);
void RequestGuestHostSync();
void UnregisterPending();
std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries;
std::mutex cache_mutex;
struct QueryCacheBaseImpl;
friend struct QueryCacheBaseImpl;
friend RuntimeType;
std::unique_ptr<struct QueryCacheBaseImpl> impl;
};
} // namespace VideoCommon

View File

@ -0,0 +1,125 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <deque>
#include <optional>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/query_cache/bank_base.h"
#include "video_core/query_cache/query_base.h"
namespace VideoCommon {
class StreamerInterface {
public:
StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {}
virtual ~StreamerInterface() = default;
virtual QueryBase* GetQuery(size_t id) = 0;
virtual void StartCounter() {
/* Do Nothing */
}
virtual void PauseCounter() {
/* Do Nothing */
}
virtual void ResetCounter() {
/* Do Nothing */
}
virtual void CloseCounter() {
/* Do Nothing */
}
virtual bool HasPendingSync() {
return false;
}
virtual void PresyncWrites() {
/* Do Nothing */
}
virtual void SyncWrites() {
/* Do Nothing */
}
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport = std::nullopt) = 0;
virtual bool HasUnsyncedQueries() {
return false;
}
virtual void PushUnsyncedQueries() {
/* Do Nothing */
}
virtual void PopUnsyncedQueries() {
/* Do Nothing */
}
virtual void Free(size_t query_id) = 0;
size_t GetId() const {
return id;
}
protected:
const size_t id;
const u64 dependance_mask;
};
template <typename QueryType>
class SimpleStreamer : public StreamerInterface {
public:
SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
virtual ~SimpleStreamer() = default;
protected:
virtual QueryType* GetQuery(size_t query_id) override {
if (query_id < slot_queries.size()) {
return &slot_queries[query_id];
}
return nullptr;
}
virtual void Free(size_t query_id) override {
std::scoped_lock lk(guard);
ReleaseQuery(query_id);
}
template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))>
size_t BuildQuery(Args&&... args) {
std::scoped_lock lk(guard);
if (!old_queries.empty()) {
size_t new_id = old_queries.front();
old_queries.pop_front();
new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...);
return new_id;
}
size_t new_id = slot_queries.size();
slot_queries.emplace_back(std::forward<Args>(args)...);
return new_id;
}
void ReleaseQuery(size_t query_id) {
if (query_id < slot_queries.size()) {
old_queries.push_back(query_id);
return;
}
UNREACHABLE();
}
std::mutex guard;
std::deque<QueryType> slot_queries;
std::deque<size_t> old_queries;
};
} // namespace VideoCommon

View File

@ -0,0 +1,74 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace VideoCommon {
enum class QueryPropertiesFlags : u32 {
HasTimeout = 1 << 0,
IsAFence = 1 << 1,
};
DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags)
// This should always be equivalent to maxwell3d Report Semaphore Reports
enum class QueryType : u32 {
Payload = 0, // "None" in docs, but confirmed via hardware to return the payload
VerticesGenerated = 1,
ZPassPixelCount = 2,
PrimitivesGenerated = 3,
AlphaBetaClocks = 4,
VertexShaderInvocations = 5,
StreamingPrimitivesNeededMinusSucceeded = 6,
GeometryShaderInvocations = 7,
GeometryShaderPrimitivesGenerated = 9,
ZCullStats0 = 10,
StreamingPrimitivesSucceeded = 11,
ZCullStats1 = 12,
StreamingPrimitivesNeeded = 13,
ZCullStats2 = 14,
ClipperInvocations = 15,
ZCullStats3 = 16,
ClipperPrimitivesGenerated = 17,
VtgPrimitivesOut = 18,
PixelShaderInvocations = 19,
ZPassPixelCount64 = 21,
IEEECleanColorTarget = 24,
IEEECleanZetaTarget = 25,
StreamingByteCount = 26,
TessellationInitInvocations = 27,
BoundingRectangle = 28,
TessellationShaderInvocations = 29,
TotalStreamingPrimitivesNeededMinusSucceeded = 30,
TessellationShaderPrimitivesGenerated = 31,
// max.
MaxQueryTypes,
};
// Comparison modes for Host Conditional Rendering
enum class ComparisonMode : u32 {
False = 0,
True = 1,
Conditional = 2,
IfEqual = 3,
IfNotEqual = 4,
MaxComparisonMode,
};
// Reduction ops.
enum class ReductionOp : u32 {
RedAdd = 0,
RedMin = 1,
RedMax = 2,
RedInc = 3,
RedDec = 4,
RedAnd = 5,
RedOr = 6,
RedXor = 7,
MaxReductionOp,
};
} // namespace VideoCommon