From 55481df50f98feb04b18beb518904681b8fcc345 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Wed, 5 Jun 2019 14:20:13 -0400
Subject: [PATCH 1/3] kernel/vm_manager: Add overload of FindFreeRegion() that
 operates on a boundary

This will be necessary for making our TLS slot management slightly more
straightforward. This can also be utilized for other purposes in the
future.

We can implement the existing simpler overload in terms of this one
anyways, we just pass the beginning and end of the ASLR region as the
boundaries.
---
 src/core/hle/kernel/vm_manager.cpp | 31 ++++++++++++++++++++----------
 src/core/hle/kernel/vm_manager.h   | 31 +++++++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 13 deletions(-)
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index c929c2a52f..3df5ccb7fb 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -152,22 +152,33 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
 }
 
 ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
-    // Find the first Free VMA.
-    const VAddr base = GetASLRRegionBaseAddress();
-    const VMAHandle vma_handle = std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
-        if (vma.second.type != VMAType::Free)
-            return false;
+    return FindFreeRegion(GetASLRRegionBaseAddress(), GetASLRRegionEndAddress(), size);
+}
 
-        const VAddr vma_end = vma.second.base + vma.second.size;
-        return vma_end > base && vma_end >= base + size;
-    });
+ResultVal<VAddr> VMManager::FindFreeRegion(VAddr begin, VAddr end, u64 size) const {
+    ASSERT(begin < end);
+    ASSERT(size <= end - begin);
 
-    if (vma_handle == vma_map.end()) {
+    const VMAHandle vma_handle =
+        std::find_if(vma_map.begin(), vma_map.end(), [begin, end, size](const auto& vma) {
+            if (vma.second.type != VMAType::Free) {
+                return false;
+            }
+            const VAddr vma_base = vma.second.base;
+            const VAddr vma_end = vma_base + vma.second.size;
+            const VAddr assumed_base = (begin < vma_base) ? vma_base : begin;
+            const VAddr used_range = assumed_base + size;
+
+            return vma_base <= assumed_base && assumed_base < used_range && used_range < end &&
+                   used_range <= vma_end;
+        });
+
+    if (vma_handle == vma_map.cend()) {
         // TODO(Subv): Find the correct error code here.
         return ResultCode(-1);
     }
 
-    const VAddr target = std::max(base, vma_handle->second.base);
+    const VAddr target = std::max(begin, vma_handle->second.base);
     return MakeResult<VAddr>(target);
 }
 
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index dfbf7a8941..752ae62f93 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -362,13 +362,38 @@ public:
     ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state);
 
     /**
-     * Finds the first free address that can hold a region of the desired size.
+     * Finds the first free memory region of the given size within
+     * the user-addressable ASLR memory region.
      *
-     * @param size Size of the desired region.
-     * @return The found free address.
+     * @param size The size of the desired region in bytes.
+     *
+     * @returns If successful, the base address of the free region with
+     *          the given size.
      */
     ResultVal<VAddr> FindFreeRegion(u64 size) const;
 
+    /**
+     * Finds the first free address range that can hold a region of the desired size
+     *
+     * @param begin The starting address of the range.
+     *              This is treated as an inclusive beginning address.
+     *
+     * @param end   The ending address of the range.
+     *              This is treated as an exclusive ending address.
+     *
+     * @param size  The size of the free region to attempt to locate,
+     *              in bytes.
+     *
+     * @returns If successful, the base address of the free region with
+     *          the given size.
+     *
+     * @returns If unsuccessful, a result containing an error code.
+     *
+     * @pre The starting address must be less than the ending address.
+     * @pre The size must not exceed the address range itself.
+     */
+    ResultVal<VAddr> FindFreeRegion(VAddr begin, VAddr end, u64 size) const;
+
     /**
      * Maps a memory-mapped IO region at a given address.
      *

From abdce723ebdcd0cb142b289af23d982dfcadaa12 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Wed, 5 Jun 2019 14:32:33 -0400
Subject: [PATCH 2/3] kernel/process: Decouple TLS handling from threads

Extracts out all of the thread local storage management from thread
instances themselves and makes the owning process handle the management
of the memory. This brings the memory management slightly more in line
with how the kernel handles these allocations.

Furthermore, this also makes the TLS page management a little more
readable compared to the lingering implementation that was carried over
from Citra.
---
 src/core/hle/kernel/process.cpp | 135 +++++++++++++++++++++-----------
 src/core/hle/kernel/process.h   |   8 +-
 src/core/hle/kernel/thread.cpp  |   4 +-
 src/core/hle/kernel/thread.h    |  16 +---
 4 files changed, 97 insertions(+), 66 deletions(-)

diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 7cfc513a1c..f45ef05f69 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <bitset>
 #include <memory>
 #include <random>
 #include "common/alignment.h"
@@ -48,8 +49,58 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
 }
 } // Anonymous namespace
 
-SharedPtr<Process> Process::Create(Core::System& system, std::string name,
-                                   Process::ProcessType type) {
+// Represents a page used for thread-local storage.
+//
+// Each TLS page contains slots that may be used by processes and threads.
+// Every process and thread is created with a slot in some arbitrary page
+// (whichever page happens to have an available slot).
+class TLSPage {
+public:
+    static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE;
+
+    explicit TLSPage(VAddr address) : base_address{address} {}
+
+    bool HasAvailableSlots() const {
+        return !is_slot_used.all();
+    }
+
+    VAddr GetBaseAddress() const {
+        return base_address;
+    }
+
+    std::optional<VAddr> ReserveSlot() {
+        for (std::size_t i = 0; i < is_slot_used.size(); i++) {
+            if (is_slot_used[i]) {
+                continue;
+            }
+
+            is_slot_used[i] = true;
+            return base_address + (i * Memory::TLS_ENTRY_SIZE);
+        }
+
+        return std::nullopt;
+    }
+
+    void ReleaseSlot(VAddr address) {
+        // Ensure that all given addresses are consistent with how TLS pages
+        // are intended to be used when releasing slots.
+        ASSERT(IsWithinPage(address));
+        ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0);
+
+        const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE;
+        is_slot_used[index] = false;
+    }
+
+private:
+    bool IsWithinPage(VAddr address) const {
+        return base_address <= address && address < base_address + Memory::PAGE_SIZE;
+    }
+
+    VAddr base_address;
+    std::bitset<num_slot_entries> is_slot_used;
+};
+
+SharedPtr<Process> Process::Create(Core::System& system, std::string name, ProcessType type) {
     auto& kernel = system.Kernel();
 
     SharedPtr<Process> process(new Process(system));
@@ -181,61 +232,55 @@ void Process::PrepareForTermination() {
 }
 
 /**
- * Finds a free location for the TLS section of a thread.
- * @param tls_slots The TLS page array of the thread's owner process.
- * Returns a tuple of (page, slot, alloc_needed) where:
- * page: The index of the first allocated TLS page that has free slots.
- * slot: The index of the first free slot in the indicated page.
- * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
+ * Attempts to find a TLS page that contains a free slot for
+ * use by a thread.
+ *
+ * @returns If a page with an available slot is found, then an iterator
+ *          pointing to the page is returned. Otherwise the end iterator
+ *          is returned instead.
  */
-static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot(
-    const std::vector<std::bitset<8>>& tls_slots) {
-    // Iterate over all the allocated pages, and try to find one where not all slots are used.
-    for (std::size_t page = 0; page < tls_slots.size(); ++page) {
-        const auto& page_tls_slots = tls_slots[page];
-        if (!page_tls_slots.all()) {
-            // We found a page with at least one free slot, find which slot it is
-            for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
-                if (!page_tls_slots.test(slot)) {
-                    return std::make_tuple(page, slot, false);
-                }
-            }
-        }
-    }
-
-    return std::make_tuple(0, 0, true);
+static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
+    return std::find_if(tls_pages.begin(), tls_pages.end(),
+                        [](const auto& page) { return page.HasAvailableSlots(); });
 }
 
-VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) {
-    auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots);
-    const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress();
+VAddr Process::CreateTLSRegion() {
+    auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages);
 
-    if (needs_allocation) {
-        tls_slots.emplace_back(0); // The page is completely available at the start
-        available_page = tls_slots.size() - 1;
-        available_slot = 0; // Use the first slot in the new page
+    if (tls_page_iter == tls_pages.cend()) {
+        const auto region_address =
+            vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(),
+                                      vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE);
+        ASSERT(region_address.Succeeded());
 
-        // Allocate some memory from the end of the linear heap for this region.
-        auto& tls_memory = thread.GetTLSMemory();
-        tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0);
+        const auto map_result = vm_manager.MapMemoryBlock(
+            *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0,
+            Memory::PAGE_SIZE, MemoryState::ThreadLocal);
+        ASSERT(map_result.Succeeded());
 
-        vm_manager.RefreshMemoryBlockMappings(tls_memory.get());
+        tls_pages.emplace_back(*region_address);
 
-        vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0,
-                                  Memory::PAGE_SIZE, MemoryState::ThreadLocal);
+        const auto reserve_result = tls_pages.back().ReserveSlot();
+        ASSERT(reserve_result.has_value());
+
+        return *reserve_result;
     }
 
-    tls_slots[available_page].set(available_slot);
-
-    return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
+    return *tls_page_iter->ReserveSlot();
 }
 
-void Process::FreeTLSSlot(VAddr tls_address) {
-    const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress();
-    const VAddr tls_page = tls_base / Memory::PAGE_SIZE;
-    const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
+void Process::FreeTLSRegion(VAddr tls_address) {
+    const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE);
+    auto iter =
+        std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
+            return page.GetBaseAddress() == aligned_address;
+        });
 
-    tls_slots[tls_page].reset(tls_slot);
+    // Something has gone very wrong if we're freeing a region
+    // with no actual page available.
+    ASSERT(iter != tls_pages.cend());
+
+    iter->ReleaseSlot(tls_address);
 }
 
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 248fd38405..39b098e9bb 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <array>
-#include <bitset>
 #include <cstddef>
 #include <list>
 #include <string>
@@ -32,6 +31,7 @@ namespace Kernel {
 class KernelCore;
 class ResourceLimit;
 class Thread;
+class TLSPage;
 
 struct CodeSet;
 
@@ -260,10 +260,10 @@ public:
     // Thread-local storage management
 
     // Marks the next available region as used and returns the address of the slot.
-    VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread);
+    [[nodiscard]] VAddr CreateTLSRegion();
 
     // Frees a used TLS slot identified by the given address
-    void FreeTLSSlot(VAddr tls_address);
+    void FreeTLSRegion(VAddr tls_address);
 
 private:
     explicit Process(Core::System& system);
@@ -310,7 +310,7 @@ private:
     /// holds the TLS for a specific thread. This vector contains which parts are in use for each
     /// page as a bitmask.
     /// This vector will grow as more pages are allocated for new threads.
-    std::vector<std::bitset<8>> tls_slots;
+    std::vector<TLSPage> tls_pages;
 
     /// Contains the parsed process capability descriptors.
     ProcessCapabilities capabilities;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index a055a50026..ec529e7f2d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -65,7 +65,7 @@ void Thread::Stop() {
     owner_process->UnregisterThread(this);
 
     // Mark the TLS slot in the thread's page as free.
-    owner_process->FreeTLSSlot(tls_address);
+    owner_process->FreeTLSRegion(tls_address);
 }
 
 void Thread::WakeAfterDelay(s64 nanoseconds) {
@@ -205,9 +205,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->name = std::move(name);
     thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
     thread->owner_process = &owner_process;
+    thread->tls_address = thread->owner_process->CreateTLSRegion();
     thread->scheduler = &system.Scheduler(processor_id);
     thread->scheduler->AddThread(thread);
-    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
 
     thread->owner_process->RegisterThread(thread.get());
 
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index b4b9cda7cd..07e989637e 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <functional>
-#include <memory>
 #include <string>
 #include <vector>
 
@@ -78,9 +77,6 @@ enum class ThreadActivity : u32 {
 
 class Thread final : public WaitObject {
 public:
-    using TLSMemory = std::vector<u8>;
-    using TLSMemoryPtr = std::shared_ptr<TLSMemory>;
-
     using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
 
     using ThreadContext = Core::ARM_Interface::ThreadContext;
@@ -169,14 +165,6 @@ public:
         return thread_id;
     }
 
-    TLSMemoryPtr& GetTLSMemory() {
-        return tls_memory;
-    }
-
-    const TLSMemoryPtr& GetTLSMemory() const {
-        return tls_memory;
-    }
-
     /// Resumes a thread from waiting
     void ResumeFromWait();
 
@@ -463,11 +451,9 @@ private:
     u32 ideal_core{0xFFFFFFFF};
     u64 affinity_mask{0x1};
 
-    TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>();
+    ThreadActivity activity = ThreadActivity::Normal;
 
     std::string name;
-
-    ThreadActivity activity = ThreadActivity::Normal;
 };
 
 /**

From e23110bd9fce557ee9ad8542866bc552eda8713f Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Wed, 5 Jun 2019 15:08:01 -0400
Subject: [PATCH 3/3] kernel/process: Default initialize all member variables

Ensures a Process instance is always created with a deterministic
initial state.
---
 src/core/hle/kernel/process.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 39b098e9bb..83ea02beec 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -290,7 +290,7 @@ private:
     u64 code_memory_size = 0;
 
     /// Current status of the process
-    ProcessStatus status;
+    ProcessStatus status{};
 
     /// The ID of this process
     u64 process_id = 0;
@@ -339,7 +339,7 @@ private:
     Mutex mutex;
 
     /// Random values for svcGetInfo RandomEntropy
-    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
+    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
 
     /// List of threads that are running with this process as their owner.
     std::list<const Thread*> thread_list;