diff options
author | Jakub Alba <jakub@yakubin.com> | 2023-01-14 15:28:08 +0100 |
---|---|---|
committer | Jakub Alba <jakub@yakubin.com> | 2023-01-14 15:30:42 +0100 |
commit | d7278db0c4c0367e192c5d2a9d46973a573e5a32 (patch) | |
tree | 2891dd9e25adc673529dfb4beb381c3bb6981c5a | |
download | reserve-and-commit-d7278db0c4c0367e192c5d2a9d46973a573e5a32.tar.xz reserve-and-commit-d7278db0c4c0367e192c5d2a9d46973a573e5a32.zip |
initial commit
-rwxr-xr-x | .gitignore | 1 | ||||
-rw-r--r-- | COPYING | 27 | ||||
-rwxr-xr-x | antioptimise.h | 11 | ||||
-rwxr-xr-x | bench.cpp | 228 | ||||
-rwxr-xr-x | build-clang.sh | 18 | ||||
-rwxr-xr-x | build-gcc.sh | 18 | ||||
-rwxr-xr-x | build.bat | 26 | ||||
-rwxr-xr-x | dynarray.h | 119 | ||||
-rwxr-xr-x | mem_dynarray.cpp | 24 | ||||
-rwxr-xr-x | mem_stdvec.cpp | 22 | ||||
-rwxr-xr-x | pal.h | 7 | ||||
-rwxr-xr-x | pal/posix.h | 91 | ||||
-rwxr-xr-x | pal/windows.h | 74 | ||||
-rwxr-xr-x | test_arena.cpp | 27 | ||||
-rwxr-xr-x | test_dynarray.cpp | 29 | ||||
-rwxr-xr-x | virtualallochist.d | 53 |
16 files changed, 775 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..84c048a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/build/ @@ -0,0 +1,27 @@ +Copyright (c) Jakub Alba <jakub@yakubin.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/antioptimise.h b/antioptimise.h new file mode 100755 index 0000000..514ea54 --- /dev/null +++ b/antioptimise.h @@ -0,0 +1,11 @@ +#pragma once
+
+template <class T>
+inline auto PhantomRead(T const& datum) {
+ return reinterpret_cast<char const volatile&>(datum);
+}
+
+template <class T>
+inline auto PhantomWrite(T& datum) {
+ return reinterpret_cast<char volatile&>(datum);
+}
\ No newline at end of file diff --git a/bench.cpp b/bench.cpp new file mode 100755 index 0000000..759482d --- /dev/null +++ b/bench.cpp @@ -0,0 +1,228 @@ +#include "antioptimise.h"
+#include "dynarray.h"
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+NOINLINE double BenchDynArrayThroughput(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ auto start_time = pal::GetCpuTime();
+ auto a = DynArray<int>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(static_cast<int>(i));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughput(size_t n) {
+ auto start_time = pal::GetCpuTime();
+ std::vector<int> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(static_cast<int>(i));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchDynArrayThroughputWithUniquePtr(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ auto start_time = pal::GetCpuTime();
+ auto a = DynArray<std::unique_ptr<int>>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(std::make_unique<int>(static_cast<int>(i)));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+NOINLINE double BenchStdVectorThroughputWithUniquePtr(size_t n) {
+ auto start_time = pal::GetCpuTime();
+ std::vector<std::unique_ptr<int>> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(std::make_unique<int>(static_cast<int>(i)));
+ }
+ auto end_time = pal::GetCpuTime();
+
+ PhantomRead(a);
+
+ return static_cast<double>(end_time - start_time) * 1e-6;
+}
+
+
+struct Latency {
+ uint64_t min = ~0ULL;
+ uint64_t max = 0;
+ size_t histogram[7] = {0};
+
+ void Update(uint64_t delta) {
+ if (delta < min) {
+ min = delta;
+ }
+ if (max < delta) {
+ max = delta;
+ }
+ if (delta < 1) {
+ histogram[0]++;
+ } else if (delta < 10) {
+ histogram[1]++;
+ } else if (delta < 100) {
+ histogram[2]++;
+ } else if (delta < 1000) {
+ histogram[3]++;
+ } else if (delta < 10000) {
+ histogram[4]++;
+ } else if (delta < 100000) {
+ histogram[5]++;
+ } else {
+ histogram[6]++;
+ }
+ }
+
+ void Print(std::ostream& out) const {
+ out << "(" << static_cast<double>(min)*1e-6 << ", "
+ << static_cast<double>(max)*1e-6 << ", [";
+ out << histogram[0] << ", "
+ << histogram[1] << ", "
+ << histogram[2] << ", "
+ << histogram[3] << ", "
+ << histogram[4] << ", "
+ << histogram[5] << ", "
+ << histogram[6] << "])"
+ << std::endl;
+ }
+};
+
+NOINLINE Latency BenchDynArrayLatency(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ Latency l;
+ auto a = DynArray<int>(N);
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.Append(static_cast<int>(i));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchStdVectorLatency(size_t n) {
+ Latency l;
+ std::vector<int> a;
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.push_back(static_cast<int>(i));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchDynArrayLatencyWithUniquePtr(size_t n) {
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ Latency l;
+ auto a = DynArray<std::unique_ptr<int>>(N);
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.Append(std::make_unique<int>(static_cast<int>(i)));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+NOINLINE Latency BenchStdVectorLatencyWithUniquePtr(size_t n) {
+ Latency l;
+ std::vector<std::unique_ptr<int>> a;
+
+ for (size_t i = 0; i < n; i++) {
+ auto start_time = pal::GetCpuTime();
+ a.push_back(std::make_unique<int>(static_cast<int>(i)));
+ auto end_time = pal::GetCpuTime();
+ l.Update(end_time - start_time);
+ }
+
+ PhantomRead(a);
+
+ return l;
+}
+
+DECL_MAIN {
+ std::ios::sync_with_stdio(false);
+ std::cout << std::fixed << std::showpoint;
+ std::cout << std::setprecision(6);
+
+ // Throughput
+ for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+ PhantomWrite(n);
+
+ double dyn_array_secs = BenchDynArrayThroughput(n);
+ std::cout << "Construct: " << n << ": DynArray: " << dyn_array_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_secs) << ")" << std::endl;
+
+ // std::vector throws std::bad_alloc, when pushing i=7854933628.
+ // DynArray has no such issues.
+ double std_vector_secs = BenchStdVectorThroughput(n);
+ std::cout << "Construct: " << n << ": std::vector: " << std_vector_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_secs) << ")" << std::endl;
+
+ // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+ double dyn_array_unique_ptr_secs = BenchDynArrayThroughputWithUniquePtr(n);
+ std::cout << "ConstructUniquePtr: " << n << ": DynArray: " << dyn_array_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / dyn_array_unique_ptr_secs) << ")" << std::endl;
+
+ double std_vector_unique_ptr_secs = BenchStdVectorThroughputWithUniquePtr(n);
+ std::cout << "ConstructUniquePtr: " << n << ": std::vector: " << std_vector_unique_ptr_secs << " (" << static_cast<uint64_t>(static_cast<double>(n) / std_vector_unique_ptr_secs) << ")" << std::endl;
+ }
+
+ // Latency
+ for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) {
+ PhantomWrite(n);
+
+ Latency dyn_array_lat = BenchDynArrayLatency(n);
+ std::cout << "Append: " << n << ": DynArray: ";
+ dyn_array_lat.Print(std::cout);
+
+ // std::vector throws std::bad_alloc, when pushing i=7854933628.
+ // DynArray has no such issues.
+ Latency std_vector_lat = BenchStdVectorLatency(n);
+ std::cout << "Append: " << n << ": std::vector: ";
+ std_vector_lat.Print(std::cout);
+
+ // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude.
+ Latency dyn_array_unique_ptr_lat = BenchDynArrayLatencyWithUniquePtr(n);
+ std::cout << "AppendUniquePtr: " << n << ": DynArray: ";
+ dyn_array_unique_ptr_lat.Print(std::cout);
+
+ Latency std_vector_unique_ptr_lat = BenchStdVectorLatencyWithUniquePtr(n);
+ std::cout << "AppendUniquePtr: " << n << ": std::vector: ";
+ std_vector_unique_ptr_lat.Print(std::cout);
+ }
+}
diff --git a/build-clang.sh b/build-clang.sh new file mode 100755 index 0000000..e148745 --- /dev/null +++ b/build-clang.sh @@ -0,0 +1,18 @@ +cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror" + +mkdir build 2>/dev/null +cd build + +clang++ $cc_flags -o test_arena ../test_arena.cpp +clang++ $cc_flags -o test_dynarray ../test_dynarray.cpp + +for i in 0 1 2; do + clang++ $cc_flags -O$i -o bench$i ../bench.cpp +done + +for i in 1000 1000000 1000000000 10000000000; do + clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp + clang++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp +done + +cd .. diff --git a/build-gcc.sh b/build-gcc.sh new file mode 100755 index 0000000..d3f06ab --- /dev/null +++ b/build-gcc.sh @@ -0,0 +1,18 @@ +cc_flags="-fno-rtti -std=c++17 -Wall -Wextra -Werror" + +mkdir build 2>/dev/null +cd build + +g++ $cc_flags -o test_arena ../test_arena.cpp +g++ $cc_flags -o test_dynarray ../test_dynarray.cpp + +for i in 0 1 2; do + g++ $cc_flags -O$i -o bench$i ../bench.cpp +done + +for i in 1000 1000000 1000000000 10000000000; do + g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_dynarray$i ../mem_dynarray.cpp + g++ $cc_flags -O2 -DLOOP_ITERS_NO=$i -o mem_stdvec$i ../mem_stdvec.cpp +done + +cd .. diff --git a/build.bat b/build.bat new file mode 100755 index 0000000..1edfd20 --- /dev/null +++ b/build.bat @@ -0,0 +1,26 @@ +@echo off
+
+set CLFlags=/Zi /EHsc /nologo /std:c++17 /GR- /WX /W4
+set LinkFlags=/link /incremental:no
+
+md build 2> NUL
+pushd build
+
+call cl %CLFlags% /Fetest_arena.exe ..\test_arena.cpp %LinkFlags%
+call cl %CLFlags% /Fetest_dynarray.exe ..\test_dynarray.cpp %LinkFlags%
+
+call cl %CLFlags% /Febench0.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O1 /Febench1.exe ..\bench.cpp %LinkFlags%
+call cl %CLFlags% /O2 /Febench2.exe ..\bench.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_stdvec1000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_stdvec1000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_stdvec1000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_stdvec10000000000.exe ..\mem_stdvec.cpp %LinkFlags%
+
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000 /Femem_dynarray1000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000 /Femem_dynarray1000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=1000000000 /Femem_dynarray1000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+call cl %CLFlags% /O2 /DLOOP_ITERS_NO=10000000000 /Femem_dynarray10000000000.exe ..\mem_dynarray.cpp %LinkFlags%
+
+popd
\ No newline at end of file diff --git a/dynarray.h b/dynarray.h new file mode 100755 index 0000000..7e75ac2 --- /dev/null +++ b/dynarray.h @@ -0,0 +1,119 @@ +#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+#include "pal.h"
+
+static constexpr size_t kBytesPerCommit = pal::kPageSize;
+
+size_t GetCommittedBytes(size_t used_bytes) {
+ return ((used_bytes + kBytesPerCommit - 1) / kBytesPerCommit) * kBytesPerCommit;
+}
+
+struct Arena {
+ char* base = nullptr;
+ size_t cap = 0;
+ size_t len = 0;
+
+ Arena() = delete;
+ Arena(const Arena&) = delete;
+ Arena(Arena&& other) {
+ base = other.base;
+ cap = other.cap;
+ len = other.len;
+
+ other.base = nullptr;
+ other.cap = 0;
+ other.len = 0;
+ }
+
+ explicit Arena(size_t max_len) {
+ assert(max_len != 0);
+
+ cap = GetCommittedBytes(max_len);
+ len = 0;
+
+ void* buf = pal::MemReserve(cap);
+ if (buf == nullptr) {
+ throw std::bad_alloc();
+ }
+
+ base = static_cast<char*>(buf);
+ }
+
+ ~Arena() {
+ if (base != nullptr) {
+ pal::MemFree(base, cap);
+ }
+ }
+
+ bool Grow(size_t additional_len) {
+ assert(len+additional_len <= cap);
+ size_t old_committed = GetCommittedBytes(len);
+ size_t new_committed = GetCommittedBytes(len+additional_len);
+ size_t delta = new_committed - old_committed;
+ if (delta == 0) {
+ len += additional_len;
+ return true;
+ }
+
+ if (!pal::MemCommit(base + old_committed, delta)) {
+ return false;
+ }
+
+ len += additional_len;
+ return true;
+ }
+};
+
+template <typename T>
+class DynArray {
+ Arena arena;
+
+public:
+ // Redundant, but useful as documentation.
+ DynArray() = delete;
+ DynArray(const DynArray<T>& other) = delete;
+ DynArray(DynArray&& other) = default;
+
+ explicit DynArray(size_t max_len)
+ : arena(max_len * sizeof(T)) {}
+
+ // Support for range loops.
+ T* begin() {
+ return reinterpret_cast<T*>(arena.base);
+ }
+
+ T* end() {
+ return reinterpret_cast<T*>(arena.base + arena.len);
+ }
+
+ bool Append(T v) {
+ auto* dst = end();
+
+ if (!arena.Grow(sizeof(T))) {
+ return false;
+ }
+
+ new (dst) T(std::move(v));
+ return true;
+ }
+
+ size_t Len() const {
+ return arena.len / sizeof(T);
+ }
+
+ T& operator[](size_t i) {
+ assert(i < Len());
+ return begin()[i];
+ }
+
+ ~DynArray() {
+ if (!std::is_trivially_destructible_v<T>) {
+ for (auto* p = begin(); p != end(); p++) {
+ p->~T();
+ }
+ }
+ }
+};
diff --git a/mem_dynarray.cpp b/mem_dynarray.cpp new file mode 100755 index 0000000..d937175 --- /dev/null +++ b/mem_dynarray.cpp @@ -0,0 +1,24 @@ +#include <iostream>
+
+#include "antioptimise.h"
+#include "dynarray.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+ size_t n = kLoopItersNo;
+ static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL;
+ assert(n <= N);
+
+ PhantomWrite(n);
+
+ auto a = DynArray<int>(N);
+ for (size_t i = 0; i < n; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ PhantomRead(a);
+
+ std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
diff --git a/mem_stdvec.cpp b/mem_stdvec.cpp new file mode 100755 index 0000000..e0182a3 --- /dev/null +++ b/mem_stdvec.cpp @@ -0,0 +1,22 @@ +#include <iostream>
+#include <vector>
+
+#include "antioptimise.h"
+#include "pal.h"
+
+// LOOP_ITERS_NO is defined on the command line.
+static constexpr size_t kLoopItersNo = LOOP_ITERS_NO;
+
+DECL_MAIN {
+ size_t n = kLoopItersNo;
+ PhantomWrite(n);
+
+ std::vector<int> a;
+ for (size_t i = 0; i < n; i++) {
+ a.push_back(static_cast<int>(i));
+ }
+
+ PhantomRead(a);
+
+ std::cout << "Peak memory use: " << pal::GetPeakMemoryUse() << std::endl;
+}
@@ -0,0 +1,7 @@ +#pragma once + +#ifdef _WIN32 + #include "pal/windows.h" +#else + #include "pal/posix.h" +#endif diff --git a/pal/posix.h b/pal/posix.h new file mode 100755 index 0000000..8ac9775 --- /dev/null +++ b/pal/posix.h @@ -0,0 +1,91 @@ +#pragma once + +#include <iostream> +#include <sys/mman.h> +#include <sys/resource.h> + +#define DECL_MAIN int main() + +#define NOINLINE __attribute__((noinline)) + +namespace pal { + +static constexpr size_t kPageSize = + #if defined(__APPLE__) && defined(__aarch64__) + 0x4000 + #else + 0x1000 + #endif + ; + +inline void* MemReserve(size_t len) { + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + #ifdef MAP_NORESERVE + // On Linux, guarantee lazy commit even if /proc/sys/vm/overcommit_memory is set to `heuristic` (0). + flags |= MAP_NORESERVE; + #endif + + void* buf = mmap(nullptr, len, PROT_NONE, flags, -1, 0); + if (buf == nullptr) { + int saved_errno = errno; + std::cerr << "Reserving memory with mmap failed. Exit code: " << saved_errno << std::endl; + return nullptr; + } + + return buf; +} + +inline bool MemCommit(void* p, size_t len) { + if (mprotect(p, len, PROT_READ | PROT_WRITE) != 0) { + int saved_errno = errno; + std::cerr << "Committing memory with mprotect failed. Exit code: " << saved_errno << std::endl; + return false; + } + + return true; +} + +inline void MemFree(void* p, size_t len) { + if (munmap(p, len) != 0) { + int saved_errno = errno; + std::cerr << "Failed to unmap memory. Exit code: " << saved_errno << std::endl; + } +} + +inline size_t GetPeakMemoryUse() { + rusage r_usage; + if (getrusage(RUSAGE_SELF, &r_usage) != 0) { + int saved_errno = errno; + std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl; + return 0; + } + + // On Mac, ru_maxrss is in bytes. On Linux and FreeBSD it's in kilobytes. So much for POSIX. + #ifdef __APPLE__ + size_t multiplier = 1; + #else + size_t multiplier = 1000; + #endif + + return r_usage.ru_maxrss * multiplier; +} + +inline uint64_t ConvertTimevalToMicroseconds(timeval t) { + return static_cast<uint64_t>(t.tv_sec) * 1000000ULL + static_cast<uint64_t>(t.tv_usec); +} + +inline uint64_t GetCpuTime() { + rusage r_usage; + if (getrusage(RUSAGE_SELF, &r_usage) != 0) { + int saved_errno = errno; + std::cerr << "Failed to fetch memory use. Exit code: " << saved_errno << std::endl; + return 0; + } + + auto user_time = ConvertTimevalToMicroseconds(r_usage.ru_utime); + auto kernel_time = ConvertTimevalToMicroseconds(r_usage.ru_stime); + return user_time + kernel_time; +} + +} // namespace pal diff --git a/pal/windows.h b/pal/windows.h new file mode 100755 index 0000000..3a2a7cf --- /dev/null +++ b/pal/windows.h @@ -0,0 +1,74 @@ +#pragma once + +#include <cassert> +#include <iostream> +#include <windows.h> +#include <psapi.h> + +#define DECL_MAIN int __cdecl wmain() + +#define NOINLINE __declspec(noinline) + +namespace pal { + +static constexpr size_t kPageSize = 0x1000; + +inline void* MemReserve(size_t len) { + void* buf = VirtualAlloc(nullptr, len, MEM_RESERVE, PAGE_NOACCESS); + if (buf == nullptr) { + DWORD last_error = GetLastError(); + std::cerr << "Reserving memory with VirtualAlloc failed. Error code: " << last_error << std::endl; + return nullptr; + } + + return buf; +} + +inline bool MemCommit(void* p, size_t len) { + void* committed = VirtualAlloc(p, len, MEM_COMMIT, PAGE_READWRITE); + if (committed == nullptr) { + DWORD last_error = GetLastError(); + std::cerr << "Committing memory with VirtualAlloc failed. Error code: " << last_error << std::endl; + return false; + } + + return true; +} + +inline void MemFree(void* p, size_t /*len*/) { + VirtualFree(p, 0, MEM_RELEASE); +} + +inline size_t GetPeakMemoryUse() { + PROCESS_MEMORY_COUNTERS mem; + bool res = GetProcessMemoryInfo(GetCurrentProcess(), &mem, sizeof mem); + if (!res) { + DWORD last_error = GetLastError(); + std::cerr << "GetProcessMemoryInfo failed. Error code: " << last_error << std::endl; + return 0; + } + + return mem.PeakWorkingSetSize; +} + +inline uint64_t ConvertFiletimeToMicroseconds(FILETIME t) { + ULARGE_INTEGER li; + li.LowPart = t.dwLowDateTime; + li.HighPart = t.dwHighDateTime; + return static_cast<uint64_t>(li.QuadPart) / 10; +} + +inline uint64_t GetCpuTime() { + FILETIME a, b, c, d; + if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0) { + auto kernel_time = ConvertFiletimeToMicroseconds(c); + auto user_time = ConvertFiletimeToMicroseconds(d); + return user_time + kernel_time; + } else { + std::cerr << "failed to get CPU time\n"; + assert(false); + return 0; + } +} + +} // namespace pal diff --git a/test_arena.cpp b/test_arena.cpp new file mode 100755 index 0000000..545e4ba --- /dev/null +++ b/test_arena.cpp @@ -0,0 +1,27 @@ +#include "dynarray.h"
+
+DECL_MAIN {
+ // Reserving 16GiB.
+ auto a = Arena(16ULL * 1024ULL * 1024ULL * 1024ULL);
+
+ // Committing 1MiB to make space for 4 bytes (with some headroom).
+ bool committed = a.Grow(4);
+ assert(committed);
+
+ a.base[0] = 'a';
+ a.base[1] = 'b';
+ a.base[2] = 'c';
+ a.base[4] = '\0';
+
+ std::cout << a.base << std::endl;
+
+ // Past the declared length, but still in committed memory.
+ // No segfault, but in real code it's probably a bug.
+ a.base[5] = 'x';
+ a.base[kBytesPerCommit-1] = 'y';
+
+ // Reserved, but not committed memory. Segfault.
+ // a.base[kBytesPerCommit] = 'x';
+
+ return 0;
+}
diff --git a/test_dynarray.cpp b/test_dynarray.cpp new file mode 100755 index 0000000..d05d9a3 --- /dev/null +++ b/test_dynarray.cpp @@ -0,0 +1,29 @@ +#include "dynarray.h"
+
+DECL_MAIN {
+ static constexpr size_t N = 100000;
+ auto a = DynArray<int>(N);
+
+ for (size_t i = 0; i < 16; i++) {
+ assert(a.Append(static_cast<int>(i)));
+ }
+
+ for (size_t i = 0; i < 12; i++) {
+ std::cout << a[i] << "\n";
+ }
+
+ for (size_t i = 16; i < N; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ size_t real_cap = GetCommittedBytes(N * sizeof(int)) / sizeof(int);
+
+ for (size_t i = N; i < real_cap; i++) {
+ a.Append(static_cast<int>(i));
+ }
+
+ // Exceeded capacity. Assertion failure.
+ // a.Append(0);
+
+ return 0;
+}
diff --git a/virtualallochist.d b/virtualallochist.d new file mode 100755 index 0000000..4c7fc2a --- /dev/null +++ b/virtualallochist.d @@ -0,0 +1,53 @@ +/*
+syscall::NtAllocateVirtualMemory:entry
+/execname == "bench2.exe"/
+{
+ self->ts = timestamp;
+}
+
+syscall::NtAllocateVirtualMemory:return
+/execname == "bench2.exe"/
+{
+ @alloc[execname] = quantize((timestamp - self->ts) / 1000);
+}
+*/
+
+pid$1::VirtualAlloc:entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::VirtualAlloc:return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::GetProcessTimes:entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::GetProcessTimes:return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<std::unique_ptr<int,std::default_delete<int> > >::Append":return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
+
+pid$1::"DynArray<int>::Append":entry
+{
+ self->ts[probefunc] = timestamp;
+}
+
+pid$1::"DynArray<int>::Append":return
+{
+ @[execname,probefunc] = quantize((timestamp - self->ts[probefunc]) / 1000);
+}
\ No newline at end of file |