#include "antioptimise.h" #include "dynarray.h" #include #include #include NOINLINE double BenchDynArrayThroughput(size_t n) { static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL; assert(n <= N); auto start_time = pal::GetCpuTime(); auto a = DynArray(N); for (size_t i = 0; i < n; i++) { a.Append(static_cast(i)); } auto end_time = pal::GetCpuTime(); PhantomRead(a); return static_cast(end_time - start_time) * 1e-6; } NOINLINE double BenchStdVectorThroughput(size_t n) { auto start_time = pal::GetCpuTime(); std::vector a; for (size_t i = 0; i < n; i++) { a.push_back(static_cast(i)); } auto end_time = pal::GetCpuTime(); PhantomRead(a); return static_cast(end_time - start_time) * 1e-6; } NOINLINE double BenchDynArrayThroughputWithUniquePtr(size_t n) { static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL; assert(n <= N); auto start_time = pal::GetCpuTime(); auto a = DynArray>(N); for (size_t i = 0; i < n; i++) { a.Append(std::make_unique(static_cast(i))); } auto end_time = pal::GetCpuTime(); PhantomRead(a); return static_cast(end_time - start_time) * 1e-6; } NOINLINE double BenchStdVectorThroughputWithUniquePtr(size_t n) { auto start_time = pal::GetCpuTime(); std::vector> a; for (size_t i = 0; i < n; i++) { a.push_back(std::make_unique(static_cast(i))); } auto end_time = pal::GetCpuTime(); PhantomRead(a); return static_cast(end_time - start_time) * 1e-6; } struct Latency { uint64_t min = ~0ULL; uint64_t max = 0; size_t histogram[7] = {0}; void Update(uint64_t delta) { if (delta < min) { min = delta; } if (max < delta) { max = delta; } if (delta < 1) { histogram[0]++; } else if (delta < 10) { histogram[1]++; } else if (delta < 100) { histogram[2]++; } else if (delta < 1000) { histogram[3]++; } else if (delta < 10000) { histogram[4]++; } else if (delta < 100000) { histogram[5]++; } else { histogram[6]++; } } void Print(std::ostream& out) const { out << "(" << static_cast(min)*1e-6 << ", " << static_cast(max)*1e-6 << ", ["; out << histogram[0] << ", " << histogram[1] << ", " << histogram[2] << ", " << histogram[3] << ", " << histogram[4] << ", " << histogram[5] << ", " << histogram[6] << "])" << std::endl; } }; NOINLINE Latency BenchDynArrayLatency(size_t n) { static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL; assert(n <= N); Latency l; auto a = DynArray(N); for (size_t i = 0; i < n; i++) { auto start_time = pal::GetCpuTime(); a.Append(static_cast(i)); auto end_time = pal::GetCpuTime(); l.Update(end_time - start_time); } PhantomRead(a); return l; } NOINLINE Latency BenchStdVectorLatency(size_t n) { Latency l; std::vector a; for (size_t i = 0; i < n; i++) { auto start_time = pal::GetCpuTime(); a.push_back(static_cast(i)); auto end_time = pal::GetCpuTime(); l.Update(end_time - start_time); } PhantomRead(a); return l; } NOINLINE Latency BenchDynArrayLatencyWithUniquePtr(size_t n) { static constexpr size_t N = 16ULL * 1024ULL * 1024ULL * 1024ULL; assert(n <= N); Latency l; auto a = DynArray>(N); for (size_t i = 0; i < n; i++) { auto start_time = pal::GetCpuTime(); a.Append(std::make_unique(static_cast(i))); auto end_time = pal::GetCpuTime(); l.Update(end_time - start_time); } PhantomRead(a); return l; } NOINLINE Latency BenchStdVectorLatencyWithUniquePtr(size_t n) { Latency l; std::vector> a; for (size_t i = 0; i < n; i++) { auto start_time = pal::GetCpuTime(); a.push_back(std::make_unique(static_cast(i))); auto end_time = pal::GetCpuTime(); l.Update(end_time - start_time); } PhantomRead(a); return l; } DECL_MAIN { std::ios::sync_with_stdio(false); std::cout << std::fixed << std::showpoint; std::cout << std::setprecision(6); // Throughput for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) { PhantomWrite(n); double dyn_array_secs = BenchDynArrayThroughput(n); std::cout << "Construct: " << n << ": DynArray: " << dyn_array_secs << " (" << static_cast(static_cast(n) / dyn_array_secs) << ")" << std::endl; // std::vector throws std::bad_alloc, when pushing i=7854933628. // DynArray has no such issues. double std_vector_secs = BenchStdVectorThroughput(n); std::cout << "Construct: " << n << ": std::vector: " << std_vector_secs << " (" << static_cast(static_cast(n) / std_vector_secs) << ")" << std::endl; // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude. double dyn_array_unique_ptr_secs = BenchDynArrayThroughputWithUniquePtr(n); std::cout << "ConstructUniquePtr: " << n << ": DynArray: " << dyn_array_unique_ptr_secs << " (" << static_cast(static_cast(n) / dyn_array_unique_ptr_secs) << ")" << std::endl; double std_vector_unique_ptr_secs = BenchStdVectorThroughputWithUniquePtr(n); std::cout << "ConstructUniquePtr: " << n << ": std::vector: " << std_vector_unique_ptr_secs << " (" << static_cast(static_cast(n) / std_vector_unique_ptr_secs) << ")" << std::endl; } // Latency for (auto n : {1000ULL, 1000'000ULL, 1'000'000'000ULL, 10'000'000'000ULL}) { PhantomWrite(n); Latency dyn_array_lat = BenchDynArrayLatency(n); std::cout << "Append: " << n << ": DynArray: "; dyn_array_lat.Print(std::cout); // std::vector throws std::bad_alloc, when pushing i=7854933628. // DynArray has no such issues. Latency std_vector_lat = BenchStdVectorLatency(n); std::cout << "Append: " << n << ": std::vector: "; std_vector_lat.Print(std::cout); // In case of unique_ptr both DynArray and std::vector run out of memory for that order of magnitude. Latency dyn_array_unique_ptr_lat = BenchDynArrayLatencyWithUniquePtr(n); std::cout << "AppendUniquePtr: " << n << ": DynArray: "; dyn_array_unique_ptr_lat.Print(std::cout); Latency std_vector_unique_ptr_lat = BenchStdVectorLatencyWithUniquePtr(n); std::cout << "AppendUniquePtr: " << n << ": std::vector: "; std_vector_unique_ptr_lat.Print(std::cout); } }