aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Alba <jakub@yakubin.com>2023-01-14 15:28:08 +0100
committerJakub Alba <jakub@yakubin.com>2023-01-14 15:30:42 +0100
commitd7278db0c4c0367e192c5d2a9d46973a573e5a32 (patch)
tree2891dd9e25adc673529dfb4beb381c3bb6981c5a
downloadreserve-and-commit-d7278db0c4c0367e192c5d2a9d46973a573e5a32.tar.xz
reserve-and-commit-d7278db0c4c0367e192c5d2a9d46973a573e5a32.zip
initial commit
-rwxr-xr-x.gitignore1
-rw-r--r--COPYING27
-rwxr-xr-xantioptimise.h11
-rwxr-xr-xbench.cpp228
-rwxr-xr-xbuild-clang.sh18
-rwxr-xr-xbuild-gcc.sh18
-rwxr-xr-xbuild.bat26
-rwxr-xr-xdynarray.h119
-rwxr-xr-xmem_dynarray.cpp24
-rwxr-xr-xmem_stdvec.cpp22
-rwxr-xr-xpal.h7
-rwxr-xr-xpal/posix.h91
-rwxr-xr-xpal/windows.h74
-rwxr-xr-xtest_arena.cpp27
-rwxr-xr-xtest_dynarray.cpp29
-rwxr-xr-xvirtualallochist.d53
16 files changed, 775 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..84c048a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/build/
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..7abb5cb
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,27 @@
+Copyright (c) Jakub Alba <jakub@yakubin.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/antioptimise.h b/antioptimise.h
new file mode 100755
index 0000000..514ea54
--- /dev/null
+++ b/antioptimise.h
@@ -0,0 +1,11 @@
+#pragma once
+
+template <class T>
+inline auto PhantomRead(T const& datum) {
+ return reinterpret_cast<char const volatile&>(datum);
+}
+
+template <class T>
+inline auto PhantomWrite(T& datum) {
+ return reinterpret_cast<char volatile&>(datum);
+} \ No newline at end of file
diff --git a/bench.cpp b/bench.cpp
new file mode 100755
index 0000000..759482d
--- /dev/null
+++ b/bench.cpp
@@ -0,0 +1,228 @@
+#include "antioptimise.h"
+#include "dynarray.h"
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+NOINLINE double BenchDynArrayThroughput(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ auto start_time = pal::GetCpuTime();
+ auto a = DynArray<int>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(static_cast<int>(i));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughput(size_t n) {
+ auto start_time = pal::GetCpuTime();
+ std::vector<int> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(static_cast<int>(i));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchDynArrayThroughputWithUniquePtr(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ auto start_time = pal::GetCpuTime();
+ auto a = DynArray<std::unique_ptr<int>>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(std::make_unique<int>(static_cast<int>(i)));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughputWithUniquePtr(size_t n) {
+ auto start_time = pal::GetCpuTime();
+ std::vector<std::unique_ptr<int>> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(std::make_unique<int>(static_cast<int>(i)));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+
+struct Latency {
+ uint64_t min = ~0ULL;
+ uint64_t max = 0;
+ size_t histogram[7] = {0};
+
+ void Update(uint64_t delta) {
+ if (delta < min) {
+ min = delta;
+ }
+ if (max < delta) {
+ max = delta;
+ }
+ if (delta < 1) {
+ histogram[0]++;
+ } else if (delta < 10) {
+ histogram[1]++;
+ } else if (delta < 100) {
+ histogram[2]++;
+ } else if (delta < 1000) {
+ histogram[3]++;
+ } else if (delta < 10000) {
+ histogram[4]++;
+ } else if (delta < 100000) {
+ histogram[5]++;
+ } else {
+ histogram[6]++;
+ }
+ }
+
+ void Print(std::ostream& out) const {
+ out << "(" << static_cast<double>(min)*1e-6 << ", "
+ << static_cast<double>(max)*1e-6 << ", [";
+ out << histogram[0] << ", "
+ << histogram[1] << ", "
+ << histogram[2] << ", "
+ << histogram[3] << ", "
+ << histogram[4] << ", "
+ << histogram[5] << ", "
+ << histogram[6] << "])"
+ << std::endl;
+ }
+};
+
+NOINLINE Latency BenchDynArrayLatency(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ Latency l;
+ auto a = DynArray<int>(N);
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.Append(static_cast<int>(i));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchStdVectorLatency(size_t n) {
+ Latency l;
+ std::vector<int> a;
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.push_back(static_cast<int>(i));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchDynArrayLatencyWithUniquePtr(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ Latency l;
+ auto a = DynArray<std::unique_ptr<int>>(N);
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.Append(std::make_unique<int>(static_cast<int>(i)));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchStdVectorLatencyWithUniquePtr(size_t n) {
+ Latency l;
+ std::vector<std::unique_ptr<int>> a;
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.push_back(std::make_unique<int>(static_cast<int>(i)));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+DECL_MAIN {
+ std::ios::sync_with_stdio(false);
+ std::cout << std::fixed << std::showpoint;
+ std::cout << std::setprecision(6);
+
+ // Throughput
+ for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+ PhantomWrite(n);
+
+ double dyn_array_secs = BenchDynArrayThroughput(n);
+ std::cout << "Construct: " << n << ": DynArray: " << dyn_array_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_secs) << ")" << std::endl;
+
+ // std::vector throws std::bad_alloc, when pushing i=7854933628.
+ // DynArray has no such issues.
+ double std_vector_secs = BenchStdVectorThroughput(n);
+ std::cout << "Construct: " << n << ": std::vector: " << std_vector_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_secs) << ")" << std::endl;
+
+ // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+ double dyn_array_unique_ptr_secs = BenchDynArrayThroughputWithUniquePtr(n);
+ std::cout << "ConstructUniquePtr: " << n << ": DynArray: " << dyn_array_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_unique_ptr_secs) << ")" << std::endl;
+
+ double std_vector_unique_ptr_secs = BenchStdVectorThroughputWithUniquePtr(n);
+ std::cout << "ConstructUniquePtr: " << n << ": std::vector: " << std_vector_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_unique_ptr_secs) << ")" << std::endl;
+ }
+
+ // Latency
+ for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+ PhantomWrite(n);
+
+ Latency dyn_array_lat = BenchDynArrayLatency(n);
+ std::cout << "Append: " << n << ": DynArray: ";
+ dyn_array_lat.Print(std::cout);
+
+ // std::vector throws std::bad_alloc, when pushing i=7854933628.
+ // DynArray has no such issues.
+ Latency std_vector_lat = BenchStdVectorLatency(n);
+ std::cout << "Append: " << n << ": std::vector: ";
+ std_vector_lat.Print(std::cout);
+
+ // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+ Latency dyn_array_unique_ptr_lat = BenchDynArrayLatencyWithUniquePtr(n);
+ std::cout << "AppendUniquePtr: " << n << ": DynArray: ";
+ dyn_array_unique_ptr_lat.Print(std::cout);
+
+ Latency std_vector_unique_ptr_lat = BenchStdVectorLatencyWithUniquePtr(n);
+ std::cout << "AppendUniquePtr: " << n << ": std::vector: ";
+ std_vector_unique_ptr_lat.Print(std::cout);
+ }
+}
diff --git a/build-clang.sh b/build-clang.sh
new file mode 100755
index 0000000..e148745
--- /dev/null
+++ b/build-clang.sh
@@ -0,0 +1,18 @@
+cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror"
+
+mkdir build 2>/dev/null
+cd build
+
+clang++ $cc_flags -o test_arena ../test_arena.cpp
+clang++ $cc_flags -o test_dynarray ../test_dynarray.cpp
+
+for i in 0 1 2; do
+ clang++ $cc_flags -O$i -o bench$i ../bench.cpp
+done
+
+for i in 1000 1000000 1000000000 10000000000; do
+ clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp
+ clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp
+done
+
+cd ..
diff --git a/build-gcc.sh b/build-gcc.sh
new file mode 100755
index 0000000..d3f06ab
--- /dev/null
+++ b/build-gcc.sh
@@ -0,0 +1,18 @@
+cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror"
+
+mkdir build 2>/dev/null
+cd build
+
+g++ $cc_flags -o test_arena ../test_arena.cpp
+g++ $cc_flags -o test_dynarray ../test_dynarray.cpp
+
+for i in 0 1 2; do
+ g++ $cc_flags -O$i -o bench$i ../bench.cpp
+done
+
+for i in 1000 1000000 1000000000 10000000000; do
+ g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp
+ g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp
+done
+
+cd ..
diff --git a/build.bat b/build.bat
new file mode 100755
index 0000000..1edfd20
--- /dev/null
+++ b/build.bat
@@ -0,0 +1,26 @@
+@echo off
+
+set CLFlags=/Zi /EHsc /nologo /std:c++17 /GR- /WX /W4
+set LinkFlags=/link /incremental:no
+
+md build 2> NUL
+pushd build
+
+call cl %CLFlags% /Fetest_arena.exe ..\test_arena.cpp %LinkFlags%
+call cl %CLFlags% /Fetest_dynarray.exe ..\test_dynarray.cpp %LinkFlags%
+
+call cl %CLFlags% /Febench0.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O1 /Febench1.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O2 /Febench2.exe ..\bench.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_stdvec1000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_stdvec1000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_stdvec1000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_stdvec10000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_dynarray1000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_dynarray1000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_dynarray1000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_dynarray10000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+
+popd \ No newline at end of file
diff --git a/dynarray.h b/dynarray.h
new file mode 100755
index 0000000..7e75ac2
--- /dev/null
+++ b/dynarray.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+#include "pal.h"
+
+static constexpr size_t kBytesPerCommit = pal::kPageSize;
+
+size_t GetCommittedBytes(size_t used_bytes) {
+ return ((used_bytes + kBytesPerCommit - 1) / kBytesPerCommit) * kBytesPerCommit;
+}
+
+struct Arena {
+ char* base = nullptr;
+ size_t cap = 0;
+ size_t len = 0;
+
+ Arena() = delete;
+ Arena(const Arena&) = delete;
+ Arena(Arena&& other) {
+ base = other.base;
+ cap = other.cap;
+ len = other.len;
+
+ other.base = nullptr;
+ other.cap = 0;
+ other.len = 0;
+ }
+
+ explicit Arena(size_t max_len) {
+ assert(max_len != 0);
+
+ cap = GetCommittedBytes(max_len);
+ len = 0;
+
+ void* buf = pal::MemReserve(cap);
+ if (buf == nullptr) {
+ throw std::bad_alloc();
+ }
+
+ base = static_cast<char*>(buf);
+ }
+
+ ~Arena() {
+ if (base != nullptr) {
+ pal::MemFree(base, cap);
+ }
+ }
+
+ bool Grow(size_t additional_len) {
+ assert(len+additional_len <= cap);
+ size_t old_committed = GetCommittedBytes(len);
+ size_t new_committed = GetCommittedBytes(len+additional_len);
+ size_t delta = new_committed - old_committed;
+ if (delta == 0) {
+ len += additional_len;
+ return true;
+ }
+
+ if (!pal::MemCommit(base + old_committed, delta)) {
+ return false;
+ }
+
+ len += additional_len;
+ return true;
+ }
+};
+
+template <typename T>
+class DynArray {
+ Arena arena;
+
+public:
+ // Redundant, but useful as documentation.
+ DynArray() = delete;
+ DynArray(const DynArray<T>& other) = delete;
+ DynArray(DynArray&& other) = default;
+
+ explicit DynArray(size_t max_len)
+ : arena(max_len * sizeof(T)) {}
+
+ // Support for range loops.
+ T* begin() {
+ return reinterpret_cast<T*>(arena.base);
+ }
+
+ T* end() {
+ return reinterpret_cast<T*>(arena.base + arena.len);
+ }
+
+ bool Append(T v) {
+ auto* dst = end();
+
+ if (!arena.Grow(sizeof(T))) {
+ return false;
+ }
+
+ new (dst) T(std::move(v));
+ return true;
+ }
+
+ size_t Len() const {
+ return arena.len / sizeof(T);
+ }
+
+ T& operator[](size_t i) {
+ assert(i < Len());
+ return begin()[i];
+ }
+
+ ~DynArray() {
+ if (!std::is_trivially_destructible_v<T>) {
+ for (auto* p = begin(); p != end(); p++) {
+ p->~T();
+ }
+ }
+ }
+};
diff --git a/mem_dynarray.cpp b/mem_dynarray.cpp
new file mode 100755
index 0000000..d937175
--- /dev/null
+++ b/mem_dynarray.cpp
@@ -0,0 +1,24 @@
+#include <iostream>
+
+#include "antioptimise.h"
+#include "dynarray.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+ size_t n = kLoopItersNo;
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ PhantomWrite(n);
+
+ auto a = DynArray<int>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ PhantomRead(a);
+
+ std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
diff --git a/mem_stdvec.cpp b/mem_stdvec.cpp
new file mode 100755
index 0000000..e0182a3
--- /dev/null
+++ b/mem_stdvec.cpp
@@ -0,0 +1,22 @@
+#include <iostream>
+#include <vector>
+
+#include "antioptimise.h"
+#include "pal.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+ size_t n = kLoopItersNo;
+ PhantomWrite(n);
+
+ std::vector<int> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(static_cast<int>(i));
+ }
+
+ PhantomRead(a);
+
+ std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
diff --git a/pal.h b/pal.h
new file mode 100755
index 0000000..e3bc953
--- /dev/null
+++ b/pal.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#ifdef _WIN32
+ #include "pal/windows.h"
+#else
+ #include "pal/posix.h"
+#endif
diff --git a/pal/posix.h b/pal/posix.h
new file mode 100755
index 0000000..8ac9775
--- /dev/null
+++ b/pal/posix.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <iostream>
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define DECL_MAIN int main()
+
+#define NOINLINE __attribute__((noinline))
+
+namespace pal {
+
+static constexpr size_t kPageSize =
+ #if defined(__APPLE__) && defined(__aarch64__)
+ 0x4000
+ #else
+ 0x1000
+ #endif
+ ;
+
+inline void* MemReserve(size_t len) {
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ #ifdef MAP_NORESERVE
+ // On Linux, guarantee lazy commit even if /proc/sys/vm/overcommit_memory is set to `heuristic` (0).
+ flags |= MAP_NORESERVE;
+ #endif
+
+ void* buf = mmap(nullptr, len, PROT_NONE, flags, -1, 0);
+ if (buf == nullptr) {
+ int saved_errno = errno;
+ std::cerr << "Reserving memory with mmap failed. Exit code: " << saved_errno << std::endl;
+ return nullptr;
+ }
+
+ return buf;
+}
+
+inline bool MemCommit(void* p, size_t len) {
+ if (mprotect(p, len, PROT_READ | PROT_WRITE) != 0) {
+ int saved_errno = errno;
+ std::cerr << "Committing memory with mprotect failed. Exit code: " << saved_errno << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+inline void MemFree(void* p, size_t len) {
+ if (munmap(p, len) != 0) {
+ int saved_errno = errno;
+ std::cerr << "Failed to unmap memory. Exit code: " << saved_errno << std::endl;
+ }
+}
+
+inline size_t GetPeakMemoryUse() {
+ rusage r_usage;
+ if (getrusage(RUSAGE_SELF, &r_usage) != 0) {
+ int saved_errno = errno;
+ std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl;
+ return 0;
+ }
+
+ // On Mac, ru_maxrss is in bytes. On Linux and FreeBSD it's in kilobytes. So much for POSIX.
+ #ifdef __APPLE__
+ size_t multiplier = 1;
+ #else
+ size_t multiplier = 1000;
+ #endif
+
+ return r_usage.ru_maxrss * multiplier;
+}
+
+inline uint64_t ConvertTimevalToMicroseconds(timeval t) {
+ return static_cast<uint64_t>(t.tv_sec) * 1000000ULL + static_cast<uint64_t>(t.tv_usec);
+}
+
+inline uint64_t GetCpuTime() {
+ rusage r_usage;
+ if (getrusage(RUSAGE_SELF, &r_usage) != 0) {
+ int saved_errno = errno;
+ std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl;
+ return 0;
+ }
+
+ auto user_time = ConvertTimevalToMicroseconds(r_usage.ru_utime);
+ auto kernel_time = ConvertTimevalToMicroseconds(r_usage.ru_stime);
+ return user_time + kernel_time;
+}
+
+} // namespace pal
diff --git a/pal/windows.h b/pal/windows.h
new file mode 100755
index 0000000..3a2a7cf
--- /dev/null
+++ b/pal/windows.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <cassert>
+#include <iostream>
+#include <windows.h>
+#include <psapi.h>
+
+#define DECL_MAIN int __cdecl wmain()
+
+#define NOINLINE __declspec(noinline)
+
+namespace pal {
+
+static constexpr size_t kPageSize = 0x1000;
+
+inline void* MemReserve(size_t len) {
+ void* buf = VirtualAlloc(nullptr, len, MEM_RESERVE, PAGE_NOACCESS);
+ if (buf == nullptr) {
+ DWORD last_error = GetLastError();
+ std::cerr << "Reserving memory with VirtualAlloc failed. Error code: " << last_error << std::endl;
+ return nullptr;
+ }
+
+ return buf;
+}
+
+inline bool MemCommit(void* p, size_t len) {
+ void* committed = VirtualAlloc(p, len, MEM_COMMIT, PAGE_READWRITE);
+ if (committed == nullptr) {
+ DWORD last_error = GetLastError();
+ std::cerr << "Committing memory with VirtualAlloc failed. Error code: " << last_error << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+inline void MemFree(void* p, size_t /*len*/) {
+ VirtualFree(p, 0, MEM_RELEASE);
+}
+
+inline size_t GetPeakMemoryUse() {
+ PROCESS_MEMORY_COUNTERS mem;
+ bool res = GetProcessMemoryInfo(GetCurrentProcess(), &mem, sizeof mem);
+ if (!res) {
+ DWORD last_error = GetLastError();
+ std::cerr << "GetProcessMemoryInfo failed. Error code: " << last_error << std::endl;
+ return 0;
+ }
+
+ return mem.PeakWorkingSetSize;
+}
+
+inline uint64_t ConvertFiletimeToMicroseconds(FILETIME t) {
+ ULARGE_INTEGER li;
+ li.LowPart = t.dwLowDateTime;
+ li.HighPart = t.dwHighDateTime;
+ return static_cast<uint64_t>(li.QuadPart) / 10;
+}
+
+inline uint64_t GetCpuTime() {
+ FILETIME a, b, c, d;
+ if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0) {
+ auto kernel_time = ConvertFiletimeToMicroseconds(c);
+ auto user_time = ConvertFiletimeToMicroseconds(d);
+ return user_time + kernel_time;
+ } else {
+ std::cerr << "failed to get CPU time\n";
+ assert(false);
+ return 0;
+ }
+}
+
+} // namespace pal
diff --git a/test_arena.cpp b/test_arena.cpp
new file mode 100755
index 0000000..545e4ba
--- /dev/null
+++ b/test_arena.cpp
@@ -0,0 +1,27 @@
+#include "dynarray.h"
+
+DECL_MAIN {
+ // Reserving 16GiB.
+ auto a = Arena(16ULL * 1024ULL * 1024ULL * 1024ULL);
+
+ // Committing 1MiB to make space for 4 bytes (with some headroom).
+ bool committed = a.Grow(4);
+ assert(committed);
+
+ a.base[0] = 'a';
+ a.base[1] = 'b';
+ a.base[2] = 'c';
+ a.base[4] = '\0';
+
+ std::cout << a.base << std::endl;
+
+ // Past the declared length, but still in committed memory.
+ // No segfault, but in real code it's probably a bug.
+ a.base[5] = 'x';
+ a.base[kBytesPerCommit-1] = 'y';
+
+ // Reserved, but not committed memory. Segfault.
+ // a.base[kBytesPerCommit] = 'x';
+
+ return 0;
+}
diff --git a/test_dynarray.cpp b/test_dynarray.cpp
new file mode 100755
index 0000000..d05d9a3
--- /dev/null
+++ b/test_dynarray.cpp
@@ -0,0 +1,29 @@
+#include "dynarray.h"
+
+DECL_MAIN {
+ static constexpr size_t N = 100000;
+ auto a = DynArray<int>(N);
+
+ for (size_t i = 0; i < 16; i++) {
+ assert(a.Append(static_cast<int>(i)));
+ }
+
+ for (size_t i = 0; i < 12; i++) {
+ std::cout << a[i] << "\n";
+ }
+
+ for (size_t i = 16; i < N; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ size_t real_cap = GetCommittedBytes(N * sizeof(int)) / sizeof(int);
+
+ for (size_t i = N; i < real_cap; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ // Exceeded capacity. Assertion failure.
+ // a.Append(0);
+
+ return 0;
+}
diff --git a/virtualallochist.d b/virtualallochist.d
new file mode 100755
index 0000000..4c7fc2a
--- /dev/null
+++ b/virtualallochist.d
@@ -0,0 +1,53 @@
+/*
+syscall::NtAllocateVirtualMemory:entry
+/execname == "bench2.exe"/
+{
+ self->ts = timestamp;
+}
+
+syscall::NtAllocateVirtualMemory:return
+/execname == "bench2.exe"/
+{
+ @alloc[execname] = quantize((timestamp - self->ts) / 1000);
+}
+*/
+
+pid$1::VirtualAlloc:entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::VirtualAlloc:return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::GetProcessTimes:entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::GetProcessTimes:return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<int>::Append":entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<int>::Append":return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+} \ No newline at end of file