From d7278db0c4c0367e192c5d2a9d46973a573e5a32 Mon Sep 17 00:00:00 2001
From: Jakub Alba <jakub@yakubin.com>
Date: Sat, 14 Jan 2023 15:28:08 +0100
Subject: initial commit

---
 .gitignore         |   1 +
 COPYING            |  27 +++++++
 antioptimise.h     |  11 +++
 bench.cpp          | 228 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 build-clang.sh     |  18 +++++
 build-gcc.sh       |  18 +++++
 build.bat          |  26 ++++++
 dynarray.h         | 119 ++++++++++++++++++++++++++++
 mem_dynarray.cpp   |  24 ++++++
 mem_stdvec.cpp     |  22 ++++++
 pal.h              |   7 ++
 pal/posix.h        |  91 +++++++++++++++++++++
 pal/windows.h      |  74 +++++++++++++++++
 test_arena.cpp     |  27 +++++++
 test_dynarray.cpp  |  29 +++++++
 virtualallochist.d |  53 +++++++++++++
 16 files changed, 775 insertions(+)
 create mode 100755 .gitignore
 create mode 100644 COPYING
 create mode 100755 antioptimise.h
 create mode 100755 bench.cpp
 create mode 100755 build-clang.sh
 create mode 100755 build-gcc.sh
 create mode 100755 build.bat
 create mode 100755 dynarray.h
 create mode 100755 mem_dynarray.cpp
 create mode 100755 mem_stdvec.cpp
 create mode 100755 pal.h
 create mode 100755 pal/posix.h
 create mode 100755 pal/windows.h
 create mode 100755 test_arena.cpp
 create mode 100755 test_dynarray.cpp
 create mode 100755 virtualallochist.d
diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..84c048a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/build/
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..7abb5cb
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,27 @@
+Copyright (c) Jakub Alba <jakub@yakubin.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/antioptimise.h b/antioptimise.h
new file mode 100755
index 0000000..514ea54
--- /dev/null
+++ b/antioptimise.h
@@ -0,0 +1,11 @@
+#pragma once
+
+template <class T>
+inline auto PhantomRead(T const& datum) {
+    return reinterpret_cast<char const volatile&>(datum);
+}
+
+template <class T>
+inline auto PhantomWrite(T& datum) {
+    return reinterpret_cast<char volatile&>(datum);
+}
\ No newline at end of file
diff --git a/bench.cpp b/bench.cpp
new file mode 100755
index 0000000..759482d
--- /dev/null
+++ b/bench.cpp
@@ -0,0 +1,228 @@
+#include "antioptimise.h"
+#include "dynarray.h"
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+NOINLINE double BenchDynArrayThroughput(size_t n) {
+    static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+    assert(n <= N);
+
+    auto start_time = pal::GetCpuTime();
+    auto a = DynArray<int>(N);
+    for (size_t i = 0; i < n; i++) {
+        a.Append(static_cast<int>(i));
+    }
+    auto end_time = pal::GetCpuTime();
+
+    PhantomRead(a);
+
+    return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughput(size_t n) {
+    auto start_time = pal::GetCpuTime();
+    std::vector<int> a;
+    for (size_t i = 0; i < n; i++) {
+        a.push_back(static_cast<int>(i));
+    }
+    auto end_time = pal::GetCpuTime();
+
+    PhantomRead(a);
+
+    return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchDynArrayThroughputWithUniquePtr(size_t n) {
+    static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+    assert(n <= N);
+
+    auto start_time = pal::GetCpuTime();
+    auto a = DynArray<std::unique_ptr<int>>(N);
+    for (size_t i = 0; i < n; i++) {
+        a.Append(std::make_unique<int>(static_cast<int>(i)));
+    }
+    auto end_time = pal::GetCpuTime();
+
+    PhantomRead(a);
+
+    return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughputWithUniquePtr(size_t n) {
+    auto start_time = pal::GetCpuTime();
+    std::vector<std::unique_ptr<int>> a;
+    for (size_t i = 0; i < n; i++) {
+        a.push_back(std::make_unique<int>(static_cast<int>(i)));
+    }
+    auto end_time = pal::GetCpuTime();
+
+    PhantomRead(a);
+
+    return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+
+struct Latency {
+    uint64_t min = ~0ULL;
+    uint64_t max = 0;
+    size_t histogram[7] = {0};
+
+    void Update(uint64_t delta) {
+        if (delta < min) {
+            min = delta;
+        }
+        if (max < delta) {
+            max = delta;
+        }
+        if (delta < 1) {
+            histogram[0]++;
+        } else if (delta < 10) {
+            histogram[1]++;
+        } else if (delta < 100) {
+            histogram[2]++;
+        } else if (delta < 1000) {
+            histogram[3]++;
+        } else if (delta < 10000) {
+            histogram[4]++;
+        } else if (delta < 100000) {
+            histogram[5]++;
+        } else {
+            histogram[6]++;
+        }
+    }
+
+    void Print(std::ostream& out) const {
+        out << "(" << static_cast<double>(min)*1e-6 << ", "
+            << static_cast<double>(max)*1e-6 << ", [";
+        out << histogram[0] << ", "
+            << histogram[1] << ", "
+            << histogram[2] << ", "
+            << histogram[3] << ", "
+            << histogram[4] << ", "
+            << histogram[5] << ", "
+            << histogram[6] << "])"
+            << std::endl;
+    }
+};
+
+NOINLINE Latency BenchDynArrayLatency(size_t n) {
+    static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+    assert(n <= N);
+
+    Latency l;
+    auto a = DynArray<int>(N);
+
+    for (size_t i = 0; i < n; i++) {
+        auto start_time = pal::GetCpuTime();
+        a.Append(static_cast<int>(i));
+        auto end_time = pal::GetCpuTime();
+        l.Update(end_time - start_time);
+    }
+
+    PhantomRead(a);
+
+    return l;
+}
+
+NOINLINE Latency BenchStdVectorLatency(size_t n) {
+    Latency l;
+    std::vector<int> a;
+
+    for (size_t i = 0; i < n; i++) {
+        auto start_time = pal::GetCpuTime();
+        a.push_back(static_cast<int>(i));
+        auto end_time = pal::GetCpuTime();
+        l.Update(end_time - start_time);
+    }
+
+    PhantomRead(a);
+
+    return l;
+}
+
+NOINLINE Latency BenchDynArrayLatencyWithUniquePtr(size_t n) {
+    static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+    assert(n <= N);
+
+    Latency l;
+    auto a = DynArray<std::unique_ptr<int>>(N);
+
+    for (size_t i = 0; i < n; i++) {
+        auto start_time = pal::GetCpuTime();
+        a.Append(std::make_unique<int>(static_cast<int>(i)));
+        auto end_time = pal::GetCpuTime();
+        l.Update(end_time - start_time);
+    }
+
+    PhantomRead(a);
+
+    return l;
+}
+
+NOINLINE Latency BenchStdVectorLatencyWithUniquePtr(size_t n) {
+    Latency l;
+    std::vector<std::unique_ptr<int>> a;
+
+    for (size_t i = 0; i < n; i++) {
+        auto start_time = pal::GetCpuTime();
+        a.push_back(std::make_unique<int>(static_cast<int>(i)));
+        auto end_time = pal::GetCpuTime();
+        l.Update(end_time - start_time);
+    }
+
+    PhantomRead(a);
+
+    return l;
+}
+
+DECL_MAIN {
+    std::ios::sync_with_stdio(false);
+    std::cout << std::fixed << std::showpoint;
+    std::cout << std::setprecision(6);
+
+    // Throughput
+    for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+        PhantomWrite(n);
+
+        double dyn_array_secs = BenchDynArrayThroughput(n);
+        std::cout << "Construct: " << n << ":    DynArray: " << dyn_array_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_secs) << ")" << std::endl;
+
+        // std::vector throws std::bad_alloc, when pushing i=7854933628.
+        // DynArray has no such issues.
+        double std_vector_secs = BenchStdVectorThroughput(n);
+        std::cout << "Construct: " << n << ": std::vector: " << std_vector_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_secs) << ")" << std::endl;
+
+        // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+        double dyn_array_unique_ptr_secs = BenchDynArrayThroughputWithUniquePtr(n);
+        std::cout << "ConstructUniquePtr: " << n << ":    DynArray: " << dyn_array_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_unique_ptr_secs) << ")" << std::endl;
+
+        double std_vector_unique_ptr_secs = BenchStdVectorThroughputWithUniquePtr(n);
+        std::cout << "ConstructUniquePtr: " << n << ": std::vector: " << std_vector_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_unique_ptr_secs) << ")" << std::endl;
+    }
+
+    // Latency
+    for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+        PhantomWrite(n);
+
+        Latency dyn_array_lat = BenchDynArrayLatency(n);
+        std::cout << "Append: " << n << ":    DynArray: ";
+        dyn_array_lat.Print(std::cout);
+
+        // std::vector throws std::bad_alloc, when pushing i=7854933628.
+        // DynArray has no such issues.
+        Latency std_vector_lat = BenchStdVectorLatency(n);
+        std::cout << "Append: " << n << ": std::vector: ";
+        std_vector_lat.Print(std::cout);
+
+        // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+        Latency dyn_array_unique_ptr_lat = BenchDynArrayLatencyWithUniquePtr(n);
+        std::cout << "AppendUniquePtr: " << n << ":    DynArray: ";
+        dyn_array_unique_ptr_lat.Print(std::cout);
+
+        Latency std_vector_unique_ptr_lat = BenchStdVectorLatencyWithUniquePtr(n);
+        std::cout << "AppendUniquePtr: " << n << ": std::vector: ";
+        std_vector_unique_ptr_lat.Print(std::cout);
+    }
+}
diff --git a/build-clang.sh b/build-clang.sh
new file mode 100755
index 0000000..e148745
--- /dev/null
+++ b/build-clang.sh
@@ -0,0 +1,18 @@
+cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror"
+
+mkdir build 2>/dev/null
+cd build
+
+clang++ $cc_flags -o test_arena ../test_arena.cpp
+clang++ $cc_flags -o test_dynarray ../test_dynarray.cpp
+
+for i in 0 1 2; do
+    clang++ $cc_flags -O$i -o bench$i ../bench.cpp
+done
+
+for i in 1000 1000000 1000000000 10000000000; do
+    clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp
+    clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp
+done
+
+cd ..
diff --git a/build-gcc.sh b/build-gcc.sh
new file mode 100755
index 0000000..d3f06ab
--- /dev/null
+++ b/build-gcc.sh
@@ -0,0 +1,18 @@
+cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror"
+
+mkdir build 2>/dev/null
+cd build
+
+g++ $cc_flags -o test_arena ../test_arena.cpp
+g++ $cc_flags -o test_dynarray ../test_dynarray.cpp
+
+for i in 0 1 2; do
+    g++ $cc_flags -O$i -o bench$i ../bench.cpp
+done
+
+for i in 1000 1000000 1000000000 10000000000; do
+    g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp
+    g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp
+done
+
+cd ..
diff --git a/build.bat b/build.bat
new file mode 100755
index 0000000..1edfd20
--- /dev/null
+++ b/build.bat
@@ -0,0 +1,26 @@
+@echo off
+
+set CLFlags=/Zi /EHsc /nologo /std:c++17 /GR- /WX /W4
+set LinkFlags=/link /incremental:no
+
+md build 2> NUL
+pushd build
+
+call cl %CLFlags% /Fetest_arena.exe ..\test_arena.cpp %LinkFlags%
+call cl %CLFlags% /Fetest_dynarray.exe ..\test_dynarray.cpp %LinkFlags%
+
+call cl %CLFlags% /Febench0.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O1 /Febench1.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O2 /Febench2.exe ..\bench.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_stdvec1000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_stdvec1000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_stdvec1000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_stdvec10000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_dynarray1000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_dynarray1000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_dynarray1000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_dynarray10000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+
+popd
\ No newline at end of file
diff --git a/dynarray.h b/dynarray.h
new file mode 100755
index 0000000..7e75ac2
--- /dev/null
+++ b/dynarray.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+#include "pal.h"
+
+static constexpr size_t kBytesPerCommit = pal::kPageSize;
+
+size_t GetCommittedBytes(size_t used_bytes) {
+    return ((used_bytes + kBytesPerCommit - 1) / kBytesPerCommit) * kBytesPerCommit;
+}
+
+struct Arena {
+    char* base = nullptr;
+    size_t cap = 0;
+    size_t len = 0;
+
+    Arena() = delete;
+    Arena(const Arena&) = delete;
+    Arena(Arena&& other) {
+        base = other.base;
+        cap = other.cap;
+        len = other.len;
+
+        other.base = nullptr;
+        other.cap = 0;
+        other.len = 0;
+    }
+
+    explicit Arena(size_t max_len) {
+        assert(max_len != 0);
+
+        cap = GetCommittedBytes(max_len);
+        len = 0;
+
+        void* buf = pal::MemReserve(cap);
+        if (buf == nullptr) {
+            throw std::bad_alloc();
+        }
+
+        base = static_cast<char*>(buf);
+    }
+
+    ~Arena() {
+        if (base != nullptr) {
+            pal::MemFree(base, cap);
+        }
+    }
+
+    bool Grow(size_t additional_len) {
+        assert(len+additional_len <= cap);
+        size_t old_committed = GetCommittedBytes(len);
+        size_t new_committed = GetCommittedBytes(len+additional_len);
+        size_t delta = new_committed - old_committed;
+        if (delta == 0) {
+            len += additional_len;
+            return true;
+        }
+
+        if (!pal::MemCommit(base + old_committed, delta)) {
+            return false;
+        }
+
+        len += additional_len;
+        return true;
+    }
+};
+
+template <typename T>
+class DynArray {
+    Arena arena;
+
+public:
+    // Redundant, but useful as documentation.
+    DynArray() = delete;
+    DynArray(const DynArray<T>& other) = delete;
+    DynArray(DynArray&& other) = default;
+
+    explicit DynArray(size_t max_len)
+    : arena(max_len * sizeof(T)) {}
+
+    // Support for range loops.
+    T* begin() {
+        return reinterpret_cast<T*>(arena.base);
+    }
+
+    T* end() {
+        return reinterpret_cast<T*>(arena.base + arena.len);
+    }
+
+    bool Append(T v) {
+        auto* dst = end();
+
+        if (!arena.Grow(sizeof(T))) {
+            return false;
+        }
+
+        new (dst) T(std::move(v));
+        return true;
+    }
+
+    size_t Len() const {
+        return arena.len / sizeof(T);
+    }
+
+    T& operator[](size_t i) {
+        assert(i < Len());
+        return begin()[i];
+    }
+
+    ~DynArray() {
+        if (!std::is_trivially_destructible_v<T>) {
+            for (auto* p = begin(); p != end(); p++) {
+                p->~T();
+            }
+        }
+    }
+};
diff --git a/mem_dynarray.cpp b/mem_dynarray.cpp
new file mode 100755
index 0000000..d937175
--- /dev/null
+++ b/mem_dynarray.cpp
@@ -0,0 +1,24 @@
+#include <iostream>
+
+#include "antioptimise.h"
+#include "dynarray.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+    size_t n = kLoopItersNo;
+    static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+    assert(n <= N);
+    
+    PhantomWrite(n);
+
+    auto a = DynArray<int>(N);
+    for (size_t i = 0; i < n; i++) {
+        a.Append(static_cast<int>(i));
+    }
+
+    PhantomRead(a);
+
+    std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
diff --git a/mem_stdvec.cpp b/mem_stdvec.cpp
new file mode 100755
index 0000000..e0182a3
--- /dev/null
+++ b/mem_stdvec.cpp
@@ -0,0 +1,22 @@
+#include <iostream>
+#include <vector>
+
+#include "antioptimise.h"
+#include "pal.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+    size_t n = kLoopItersNo;
+    PhantomWrite(n);
+
+    std::vector<int> a;
+    for (size_t i = 0; i < n; i++) {
+        a.push_back(static_cast<int>(i));
+    }
+
+    PhantomRead(a);
+
+    std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
diff --git a/pal.h b/pal.h
new file mode 100755
index 0000000..e3bc953
--- /dev/null
+++ b/pal.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#ifdef _WIN32
+	#include "pal/windows.h"
+#else
+	#include "pal/posix.h"
+#endif
diff --git a/pal/posix.h b/pal/posix.h
new file mode 100755
index 0000000..8ac9775
--- /dev/null
+++ b/pal/posix.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <iostream>
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define DECL_MAIN int main()
+
+#define NOINLINE __attribute__((noinline))
+
+namespace pal {
+
+static constexpr size_t kPageSize =
+    #if defined(__APPLE__) && defined(__aarch64__)
+        0x4000
+    #else
+        0x1000
+    #endif
+    ;
+
+inline void* MemReserve(size_t len) {
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+    #ifdef MAP_NORESERVE
+        // On Linux, guarantee lazy commit even if /proc/sys/vm/overcommit_memory is set to `heuristic` (0).
+        flags |= MAP_NORESERVE;
+    #endif
+
+    void* buf = mmap(nullptr, len, PROT_NONE, flags, -1, 0);
+    if (buf == nullptr) {
+        int saved_errno = errno;
+        std::cerr << "Reserving memory with mmap failed. Exit code: " << saved_errno << std::endl;
+        return nullptr;
+    }
+
+    return buf;
+}
+
+inline bool MemCommit(void* p, size_t len) {
+    if (mprotect(p, len, PROT_READ | PROT_WRITE) != 0) {
+        int saved_errno = errno;
+        std::cerr << "Committing memory with mprotect failed. Exit code: " << saved_errno << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+inline void MemFree(void* p, size_t len) {
+    if (munmap(p, len) != 0) {
+        int saved_errno = errno;
+        std::cerr << "Failed to unmap memory. Exit code: " << saved_errno << std::endl;
+    }
+}
+
+inline size_t GetPeakMemoryUse() {
+    rusage r_usage;
+    if (getrusage(RUSAGE_SELF, &r_usage) != 0) {
+        int saved_errno = errno;
+        std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl;
+        return 0;
+    }
+
+    // On Mac, ru_maxrss is in bytes. On Linux and FreeBSD it's in kilobytes. So much for POSIX.
+    #ifdef __APPLE__
+    size_t multiplier = 1;
+    #else
+    size_t multiplier = 1000;
+    #endif
+
+    return r_usage.ru_maxrss * multiplier;
+}
+
+inline uint64_t ConvertTimevalToMicroseconds(timeval t) {
+    return static_cast<uint64_t>(t.tv_sec) * 1000000ULL + static_cast<uint64_t>(t.tv_usec);
+}
+
+inline uint64_t GetCpuTime() {
+    rusage r_usage;
+    if (getrusage(RUSAGE_SELF, &r_usage) != 0) {
+        int saved_errno = errno;
+        std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl;
+        return 0;
+    }
+
+    auto user_time = ConvertTimevalToMicroseconds(r_usage.ru_utime);
+    auto kernel_time = ConvertTimevalToMicroseconds(r_usage.ru_stime);
+    return user_time + kernel_time;
+}
+
+} // namespace pal
diff --git a/pal/windows.h b/pal/windows.h
new file mode 100755
index 0000000..3a2a7cf
--- /dev/null
+++ b/pal/windows.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <cassert>
+#include <iostream>
+#include <windows.h>
+#include <psapi.h>
+
+#define DECL_MAIN int __cdecl wmain()
+
+#define NOINLINE __declspec(noinline)
+
+namespace pal {
+
+static constexpr size_t kPageSize = 0x1000;
+
+inline void* MemReserve(size_t len) {
+    void* buf = VirtualAlloc(nullptr, len, MEM_RESERVE, PAGE_NOACCESS);
+    if (buf == nullptr) {
+        DWORD last_error = GetLastError();
+        std::cerr << "Reserving memory with VirtualAlloc failed. Error code: " << last_error << std::endl;
+        return nullptr;
+    }
+
+    return buf;
+}
+
+inline bool MemCommit(void* p, size_t len) {
+    void* committed = VirtualAlloc(p, len, MEM_COMMIT, PAGE_READWRITE);
+    if (committed == nullptr) {
+        DWORD last_error = GetLastError();
+        std::cerr << "Committing memory with VirtualAlloc failed. Error code: " << last_error << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+inline void MemFree(void* p, size_t /*len*/) {
+    VirtualFree(p, 0, MEM_RELEASE);
+}
+
+inline size_t GetPeakMemoryUse() {
+    PROCESS_MEMORY_COUNTERS mem;
+    bool res = GetProcessMemoryInfo(GetCurrentProcess(), &mem, sizeof mem);
+    if (!res) {
+        DWORD last_error = GetLastError();
+        std::cerr << "GetProcessMemoryInfo failed. Error code: " << last_error << std::endl;
+        return 0;
+    }
+
+    return mem.PeakWorkingSetSize;
+}
+
+inline uint64_t ConvertFiletimeToMicroseconds(FILETIME t) {
+    ULARGE_INTEGER li;
+    li.LowPart = t.dwLowDateTime;
+    li.HighPart = t.dwHighDateTime;
+    return static_cast<uint64_t>(li.QuadPart) / 10;
+}
+
+inline uint64_t GetCpuTime() {
+    FILETIME a, b, c, d;
+    if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0) {
+        auto kernel_time = ConvertFiletimeToMicroseconds(c);
+        auto user_time = ConvertFiletimeToMicroseconds(d);
+        return user_time + kernel_time;
+    } else {
+        std::cerr << "failed to get CPU time\n";
+        assert(false);
+        return 0;
+    }
+}
+
+} // namespace pal
diff --git a/test_arena.cpp b/test_arena.cpp
new file mode 100755
index 0000000..545e4ba
--- /dev/null
+++ b/test_arena.cpp
@@ -0,0 +1,27 @@
+#include "dynarray.h"
+
+DECL_MAIN {
+    // Reserving 16GiB.
+    auto a = Arena(16ULL * 1024ULL * 1024ULL * 1024ULL);
+
+    // Committing 1MiB to make space for 4 bytes (with some headroom).
+    bool committed = a.Grow(4);
+    assert(committed);
+
+    a.base[0] = 'a';
+    a.base[1] = 'b';
+    a.base[2] = 'c';
+    a.base[4] = '\0';
+
+    std::cout << a.base << std::endl;
+
+    // Past the declared length, but still in committed memory.
+    // No segfault, but in real code it's probably a bug.
+    a.base[5] = 'x';
+    a.base[kBytesPerCommit-1] = 'y';
+
+    // Reserved, but not committed memory. Segfault.
+    // a.base[kBytesPerCommit] = 'x';
+
+    return 0;
+}
diff --git a/test_dynarray.cpp b/test_dynarray.cpp
new file mode 100755
index 0000000..d05d9a3
--- /dev/null
+++ b/test_dynarray.cpp
@@ -0,0 +1,29 @@
+#include "dynarray.h"
+
+DECL_MAIN {
+    static constexpr size_t N = 100000;
+    auto a = DynArray<int>(N);
+
+    for (size_t i = 0; i < 16; i++) {
+        assert(a.Append(static_cast<int>(i)));
+    }
+
+    for (size_t i = 0; i < 12; i++) {
+        std::cout << a[i] << "\n";
+    }
+
+    for (size_t i = 16; i < N; i++) {
+        a.Append(static_cast<int>(i));
+    }
+
+    size_t real_cap = GetCommittedBytes(N * sizeof(int)) / sizeof(int);
+
+    for (size_t i = N; i < real_cap; i++) {
+        a.Append(static_cast<int>(i));
+    }
+
+    // Exceeded capacity. Assertion failure.
+    // a.Append(0);
+
+    return 0;
+}
diff --git a/virtualallochist.d b/virtualallochist.d
new file mode 100755
index 0000000..4c7fc2a
--- /dev/null
+++ b/virtualallochist.d
@@ -0,0 +1,53 @@
+/*
+syscall::NtAllocateVirtualMemory:entry
+/execname == "bench2.exe"/
+{
+    self->ts = timestamp;
+}
+
+syscall::NtAllocateVirtualMemory:return
+/execname == "bench2.exe"/
+{
+    @alloc[execname] = quantize((timestamp - self->ts) / 1000);
+}
+*/
+
+pid$1::VirtualAlloc:entry
+{
+    self->ts[probefunc] = timestamp;
+}
+
+pid$1::VirtualAlloc:return
+{
+    @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::GetProcessTimes:entry
+{
+    self->ts[probefunc] = timestamp;
+}
+
+pid$1::GetProcessTimes:return
+{
+    @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":entry
+{
+    self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":return
+{
+    @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<int>::Append":entry
+{
+    self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<int>::Append":return
+{
+    @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
\ No newline at end of file
-- 
cgit v1.2.3