From 67686bac64512cdcdd016510c26fdeacd2951e0d Mon Sep 17 00:00:00 2001 From: puji4810 <2430438586@qq.com> Date: Thu, 4 Sep 2025 01:07:16 +0800 Subject: [PATCH 1/3] fix: - Correct cipher suite identifier for TLS_AES_256_GCM_SHA384, {0x13, 0x02} should correspond to TLS_AES_256_GCM_SHA384.Ref: https://datatracker.ietf.org/doc/html/rfc8446#appendix-B.4 - remove the repeat check - use correct number N instead of this->n - move allochdl in list move operations --- include/fast_io_crypto/tls/cipher_suite.h | 6 +++--- include/fast_io_crypto/tls/client_hello.h | 2 +- include/fast_io_dsal/array.h | 4 ---- include/fast_io_dsal/impl/index_span.h | 6 +++--- include/fast_io_dsal/impl/list.h | 3 ++- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/fast_io_crypto/tls/cipher_suite.h b/include/fast_io_crypto/tls/cipher_suite.h index 336d98897..a7879ffdb 100644 --- a/include/fast_io_crypto/tls/cipher_suite.h +++ b/include/fast_io_crypto/tls/cipher_suite.h @@ -78,7 +78,7 @@ rfc */ inline constexpr cipher_suite_type tls_aes_128_gcm_sha256{{::std::byte(0x13), ::std::byte(0x01)}}; -inline constexpr cipher_suite_type tls_aes_256_gcm_sha256{{::std::byte(0x13), ::std::byte(0x02)}}; +inline constexpr cipher_suite_type tls_aes_256_gcm_sha384{{::std::byte(0x13), ::std::byte(0x02)}}; inline constexpr cipher_suite_type tls_chacha20_poly1305_sha256{{::std::byte(0x13), ::std::byte(0x03)}}; inline constexpr cipher_suite_type tls_aes_128_ccm_sha256{{::std::byte(0x13), ::std::byte(0x04)}}; inline constexpr cipher_suite_type tls_aes_128_ccm_8_sha256{{::std::byte(0x13), ::std::byte(0x05)}}; @@ -214,9 +214,9 @@ inline constexpr void print_define(output &outp, cipher_suite_type const &e) { print_freestanding(outp, u8"TLS_AES_128_GCM_SHA256{0x13,0x01}"); } - else if (e == tls_aes_256_gcm_sha256) + else if (e == tls_aes_256_gcm_sha384) { - print_freestanding(outp, u8"TLS_AES_256_GCM_SHA256{0x13,0x02}"); + print_freestanding(outp, u8"TLS_AES_256_GCM_SHA384{0x13,0x02}"); } else if (e == tls_chacha20_poly1305_sha256) { diff --git a/include/fast_io_crypto/tls/client_hello.h b/include/fast_io_crypto/tls/client_hello.h index 51afa123a..e557fc1eb 100644 --- a/include/fast_io_crypto/tls/client_hello.h +++ b/include/fast_io_crypto/tls/client_hello.h @@ -13,7 +13,7 @@ struct client_hello ::std::uint_least8_t session_id_length{32}; ::fast_io::freestanding::array<::std::uint_least8_t, 32> session_id{}; ::fast_io::freestanding::array<::std::uint_least8_t, 2> cipher_suite_length = {0x00, 0x02}; - cipher_suite::cipher_suite_type cipher_suite{cipher_suite::tls_aes_256_gcm_sha256}; + cipher_suite::cipher_suite_type cipher_suite{cipher_suite::tls_aes_256_gcm_sha384}; ::std::uint_least8_t number_of_compression_length = {0x01}; ::std::uint_least8_t compression_method = {}; }; diff --git a/include/fast_io_dsal/array.h b/include/fast_io_dsal/array.h index b10924865..48e975273 100644 --- a/include/fast_io_dsal/array.h +++ b/include/fast_io_dsal/array.h @@ -4,10 +4,6 @@ #error "You must be using a C++ compiler" #endif -#if !defined(__cplusplus) -#error "You must be using a C++ compiler" -#endif - #include "../fast_io_core.h" #include "impl/misc/push_macros.h" #include "impl/misc/push_warnings.h" diff --git a/include/fast_io_dsal/impl/index_span.h b/include/fast_io_dsal/impl/index_span.h index 21bdcbd35..27e03a720 100644 --- a/include/fast_io_dsal/impl/index_span.h +++ b/include/fast_io_dsal/impl/index_span.h @@ -273,11 +273,11 @@ class index_span #endif inline constexpr span_type subspan(size_type pos, size_type count = ::fast_io::containers::npos) const noexcept { - if (this->n < pos) [[unlikely]] + if (N < pos) [[unlikely]] { ::fast_io::fast_terminate(); } - size_type const val{this->n - pos}; + size_type const val{N - pos}; if (val < count) { if (count != ::fast_io::containers::npos) [[unlikely]] @@ -295,7 +295,7 @@ class index_span #endif inline constexpr span_type subspan_unchecked(size_type pos, size_type count = ::fast_io::containers::npos) const noexcept { - size_type const val{this->n - pos}; + size_type const val{N - pos}; if (count == ::fast_io::containers::npos) { count = val; diff --git a/include/fast_io_dsal/impl/list.h b/include/fast_io_dsal/impl/list.h index 59996b980..9b9ac98a0 100644 --- a/include/fast_io_dsal/impl/list.h +++ b/include/fast_io_dsal/impl/list.h @@ -907,7 +907,7 @@ class list } inline constexpr list(list &&other) noexcept - : imp(other.imp) + : imp(other.imp), allochdl(std::move(other.allochdl)) { auto prev = static_cast<::fast_io::containers::details::list_node_common *>(imp.prev); auto next = static_cast<::fast_io::containers::details::list_node_common *>(imp.next); @@ -921,6 +921,7 @@ class list { this->destroy(); imp = other.imp; + allochdl = ::std::move(other.allochdl); auto prev = static_cast<::fast_io::containers::details::list_node_common *>(imp.prev); auto next = static_cast<::fast_io::containers::details::list_node_common *>(imp.next); next->prev = prev->next = __builtin_addressof(imp); From b53b7852cbc9ad018941c22843e62539fb2f3a82 Mon Sep 17 00:00:00 2001 From: puji4810 <2430438586@qq.com> Date: Fri, 5 Sep 2025 17:26:54 +0800 Subject: [PATCH 2/3] comment out allochdl move operations --- include/fast_io_dsal/impl/list.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/fast_io_dsal/impl/list.h b/include/fast_io_dsal/impl/list.h index 9b9ac98a0..d395e3866 100644 --- a/include/fast_io_dsal/impl/list.h +++ b/include/fast_io_dsal/impl/list.h @@ -907,7 +907,10 @@ class list } inline constexpr list(list &&other) noexcept - : imp(other.imp), allochdl(std::move(other.allochdl)) + : imp(other.imp) + #if 0 + , allochdl(std::move(other.allochdl)) + #endif { auto prev = static_cast<::fast_io::containers::details::list_node_common *>(imp.prev); auto next = static_cast<::fast_io::containers::details::list_node_common *>(imp.next); @@ -921,7 +924,9 @@ class list { this->destroy(); imp = other.imp; + #if 0 allochdl = ::std::move(other.allochdl); + #endif auto prev = static_cast<::fast_io::containers::details::list_node_common *>(imp.prev); auto next = static_cast<::fast_io::containers::details::list_node_common *>(imp.next); next->prev = prev->next = __builtin_addressof(imp); From 12e6890ae274b4953939f631ac3b7ef91b2f3506 Mon Sep 17 00:00:00 2001 From: puji4810 <2430438586@qq.com> Date: Mon, 15 Sep 2025 14:41:07 +0800 Subject: [PATCH 3/3] Add I/O and formatting performance benchmarks - Add format vs fmt benchmark (format_vs_fmt.cc) - Add teju vs dragonbox floating point benchmark (teju_vs_dragonbox.cc) - Add file I/O vs stdio benchmark (file_vs_stdio.cc) These benchmarks compare fast_io performance against fmt library, dragonbox/teju floating point implementations, and standard stdio for various I/O and formatting operations. --- benchmark/0019.formatting/format_vs_fmt.cc | 409 ++++++++++++++++++ .../teju_vs_dragonbox.cc | 162 +++++++ benchmark/0021.io/file_vs_stdio.cc | 49 +++ 3 files changed, 620 insertions(+) create mode 100644 benchmark/0019.formatting/format_vs_fmt.cc create mode 100644 benchmark/0020.teju_vs_dragonbox/teju_vs_dragonbox.cc create mode 100644 benchmark/0021.io/file_vs_stdio.cc diff --git a/benchmark/0019.formatting/format_vs_fmt.cc b/benchmark/0019.formatting/format_vs_fmt.cc new file mode 100644 index 000000000..86ac41901 --- /dev/null +++ b/benchmark/0019.formatting/format_vs_fmt.cc @@ -0,0 +1,409 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ROUNDS 5 + +#if __has_include() +#include +#define ENABLE_STD_FORMAT_BENCH 1 +#endif + +#if __has_include() +#include +#if __has_include() +#include +#define ENABLE_FMT_BENCH 1 +#endif +#endif + +using namespace fast_io::io; +using namespace fast_io::mnp; + +struct benchmark_result +{ + std::size_t total_size{}; + fast_io::unix_timestamp elapsed{}; +}; + +template +inline benchmark_result run_bench(Func f, std::uint32_t iterations, std::uint32_t rounds = ROUNDS) +{ + // More thorough warmup + for (std::uint32_t w = 0; w < 1000; ++w) + { + auto warmup = f(w); + (void)warmup; + } + + std::vector times; + times.reserve(rounds); + std::size_t total_size{}; + + for (std::uint32_t round = 0; round < rounds; ++round) + { + auto start = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + for (std::uint32_t i{}; i != iterations; ++i) + { + auto tmp = f(i); + if (round == 0) // Only count size once + { + total_size += tmp.size(); + } + } + auto end = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + times.push_back(end - start); + } + + // Return the best (fastest) time + auto best_time = *std::min_element(times.begin(), times.end()); + return {total_size, best_time}; +} + + +inline ::fast_io::string make_record_fastio(std::uint32_t i) +{ + std::uint32_t id = i * 2654435761u + 0x9e3779b9u; + std::uint64_t val = 0xDEADBEEFCAFEBABEull ^ (std::uint64_t)id * 1315423911ull; + std::uint32_t score = (id % 10007) + 3141; + std::uint32_t rate = score / ((id % 97) + 1); + ::fast_io::string name{"fastio"}; + + // return fast_io::concat_fast_io( + // "ID=", width(scalar_placement::right, hex0xupper(id), 10, '0'), + // " VAL=", width(scalar_placement::right, hex0xupper(val), 18, '0'), + // " SCORE=", width(scalar_placement::right, strvw(score_s), 12), + // " RATE=", width(scalar_placement::right, strvw(rate_s), 10), + // " NAME=", left(strvw(name), 16, '.')); + + return fast_io::concat_fast_io( + "ID=", width(scalar_placement::right, hex0xupper(id), 10, '0'), + " VAL=", width(scalar_placement::right, hex0xupper(val), 18, '0'), + " SCORE=", width(scalar_placement::right, score, 12), + " RATE=", width(scalar_placement::right, rate, 10), + " NAME=", left(name, 16, '.')); +} + +#if defined(ENABLE_STD_FORMAT_BENCH) +inline std::string make_record_stdformat(std::uint32_t i) +{ + std::uint32_t id = i * 2654435761u + 0x9e3779b9u; + std::uint64_t val = 0xDEADBEEFCAFEBABEull ^ static_cast(id) * 1315423911ull; + std::uint32_t score = (id % 10007) + 3141; + std::uint32_t rate = score / ((id % 97) + 1); + constexpr auto name = "fastio"; + return std::format("ID={:#010X} VAL={:#018X} SCORE={:>12} RATE={:>10} NAME={:.<16}", + id, val, score, rate, name); +} +#endif + +#if __has_include() && defined(ENABLE_FMT_BENCH) +inline std::string make_record_fmt(std::uint32_t i) +{ + std::uint32_t id = i * 2654435761u + 0x9e3779b9u; + std::uint64_t val = 0xDEADBEEFCAFEBABEull ^ static_cast(id) * 1315423911ull; + std::uint32_t score = (id % 10007) + 3141; + std::uint32_t rate = score / ((id % 97) + 1); + constexpr auto name = "fastio"; + +#if __has_include() + return fmt::format(FMT_COMPILE("ID={:#010X} VAL={:#018X} SCORE={:>12} RATE={:>10} NAME={:.<16}"), + id, val, score, rate, name); +#else + return fmt::format("ID={:#010X} VAL={:#018X} SCORE={:>12} RATE={:>10} NAME={:.<16}", + id, val, score, rate, name); +#endif +} +#endif + +// iostream 版本(生成字符串) +inline std::string make_record_iostream(std::uint32_t i) +{ + std::uint32_t id = i * 2654435761u + 0x9e3779b9u; + std::uint64_t val = 0xDEADBEEFCAFEBABEull ^ static_cast(id) * 1315423911ull; + std::uint32_t score = (id % 10007) + 3141; + std::uint32_t rate = score / ((id % 97) + 1); + constexpr char const *name = "fastio"; + + std::ostringstream oss; + oss.setf(std::ios::uppercase); + oss << "ID=" << std::showbase << std::internal << std::setfill('0') << std::setw(10) << std::hex << id; + oss << std::dec << std::setfill(' ') << std::nouppercase; // reset + oss.setf(std::ios::uppercase); + oss << " VAL=" << std::showbase << std::internal << std::setfill('0') << std::setw(18) << std::hex << val; + oss << std::dec << std::setfill(' '); + oss << " SCORE=" << std::setw(12) << std::right << score; + oss << " RATE=" << std::setw(10) << std::right << rate; + oss << " NAME=" << std::left << std::setw(16) << std::setfill('.') << name; + return oss.str(); +} + +// -------- 写入基准(带缓冲/无缓冲)到 /dev/null,避免磁盘干扰 -------- +inline benchmark_result run_write_bench_fastio(std::uint32_t iterations, bool buffered_128k) +{ + std::size_t total_size{}; + auto start = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + if (buffered_128k) + { + fast_io::native_file nf("/dev/null", fast_io::open_mode::out | fast_io::open_mode::trunc); + std::string buf; + buf.reserve(128 * 1024); + for (std::uint32_t i{}; i != iterations; ++i) + { + auto rec = make_record_fastio(i); + total_size += rec.size(); + if (buf.size() + rec.size() + 1 > 128 * 1024) + { + if (!buf.empty()) + { + ::fast_io::operations::write_all(nf, buf.data(), buf.data() + buf.size()); + buf.clear(); + } + if (rec.size() + 1 > 128 * 1024) + { + ::fast_io::operations::write_all(nf, rec.data(), rec.data() + rec.size()); + char nl = '\n'; + ::fast_io::operations::write_all(nf, &nl, &nl + 1); + continue; + } + } + buf.append(rec.data(), rec.size()); + buf.push_back('\n'); + } + if (!buf.empty()) + { + ::fast_io::operations::write_all(nf, buf.data(), buf.data() + buf.size()); + } + } + else + { + fast_io::native_file nf("/dev/null", fast_io::open_mode::out | fast_io::open_mode::trunc); + for (std::uint32_t i{}; i != iterations; ++i) + { + auto rec = make_record_fastio(i); + total_size += rec.size(); + ::fast_io::operations::write_all(nf, rec.data(), rec.data() + rec.size()); + char nl = '\n'; + ::fast_io::operations::write_all(nf, &nl, &nl + 1); + } + } + auto end = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + return {total_size, end - start}; +} + +inline benchmark_result run_write_bench_fmt(std::uint32_t iterations, bool buffered_128k) +{ + std::size_t total_size{}; + auto start = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + if (buffered_128k) + { + fast_io::native_file nf("/dev/null", fast_io::open_mode::out | fast_io::open_mode::trunc); + std::string buf; + buf.reserve(128 * 1024); + for (std::uint32_t i{}; i != iterations; ++i) + { + auto rec = make_record_fmt(i); + total_size += rec.size(); + if (buf.size() + rec.size() + 1 > 128 * 1024) + { + if (!buf.empty()) + { + ::fast_io::operations::write_all(nf, buf.data(), buf.data() + buf.size()); + buf.clear(); + } + if (rec.size() + 1 > 128 * 1024) + { + ::fast_io::operations::write_all(nf, rec.data(), rec.data() + rec.size()); + char nl = '\n'; + ::fast_io::operations::write_all(nf, &nl, &nl + 1); + continue; + } + } + buf.append(rec.data(), rec.size()); + buf.push_back('\n'); + } + if (!buf.empty()) + { + ::fast_io::operations::write_all(nf, buf.data(), buf.data() + buf.size()); + } + } + else + { + fast_io::native_file nf("/dev/null", fast_io::open_mode::out | fast_io::open_mode::trunc); + for (std::uint32_t i{}; i != iterations; ++i) + { + auto rec = make_record_fmt(i); + total_size += rec.size(); + print(nf, rec, '\n'); + } + } + auto end = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + return {total_size, end - start}; +} + +inline benchmark_result run_write_bench_iostream(std::uint32_t iterations, bool buffered_128k) +{ + std::size_t total_size{}; + auto start = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + { + std::ofstream out("/dev/null", std::ios::binary | std::ios::trunc); + std::vector bigbuf; + if (buffered_128k) + { + bigbuf.resize(128 * 1024); + out.rdbuf()->pubsetbuf(bigbuf.data(), static_cast(bigbuf.size())); + } + else + { + out.rdbuf()->pubsetbuf(nullptr, 0); + } + for (std::uint32_t i{}; i != iterations; ++i) + { + auto rec = make_record_iostream(i); + total_size += rec.size(); + out.write(rec.data(), static_cast(rec.size())); + out.put('\n'); + } + out.flush(); + } + auto end = fast_io::posix_clock_gettime(fast_io::posix_clock_id::monotonic_raw); + return {total_size, end - start}; +} + +int main(int argc, char **argv) +{ + std::uint32_t iterations = 10000000; + std::uint32_t rounds = ROUNDS; + if (argc >= 2) + { + try + { + iterations = ::fast_io::to(::fast_io::mnp::os_c_str(argv[1])); + } + catch (...) + { + // ignore invalid input, keep default + } + } + if (argc >= 3) + { + try + { + rounds = ::fast_io::to(::fast_io::mnp::os_c_str(argv[2])); + } + catch (...) + { + // ignore invalid input, keep default ROUNDS + } + } + + auto sample_fastio = make_record_fastio(1); +#if defined(ENABLE_STD_FORMAT_BENCH) + auto sample_stdformat = make_record_stdformat(1); +#endif +#if __has_include() && defined(ENABLE_FMT_BENCH) + auto sample_fmt = make_record_fmt(1); +#endif + + using namespace fast_io::io; + print("Sample fast_io output: ", sample_fastio, "\n"); +#if defined(ENABLE_STD_FORMAT_BENCH) + print("Sample std::format output:", sample_stdformat, "\n"); +#endif +#if __has_include() && defined(ENABLE_FMT_BENCH) + print("Sample fmt output: ", sample_fmt, "\n"); +#endif + print("\n"); + + print("Running benchmarks with ", iterations, " iterations, ", rounds, " rounds each (showing best time)...\n\n"); + + auto fastio_res = run_bench(make_record_fastio, iterations, rounds); + print("fast_io completed\n"); + +#if defined(ENABLE_STD_FORMAT_BENCH) + auto stdformat_res = run_bench(make_record_stdformat, iterations, rounds); + print("std::format completed\n"); +#endif + +#if __has_include() && defined(ENABLE_FMT_BENCH) + auto fmt_res = run_bench(make_record_fmt, iterations, rounds); + print("fmt completed\n"); +#endif + + print("\n[format benchmark results]\n"); + print("Iterations: ", iterations, ", Rounds: ", rounds, "\n"); + print("fast_io (total size: ", fastio_res.total_size, ") took ", fastio_res.elapsed, "s\n"); + +#if defined(ENABLE_STD_FORMAT_BENCH) + print("std::format (total size: ", stdformat_res.total_size, ") took ", stdformat_res.elapsed, "s"); + // Calculate speedup + constexpr double subseconds_to_seconds = 1.0 / static_cast(fast_io::uint_least64_subseconds_per_second); + double stdformat_seconds = static_cast(stdformat_res.elapsed.seconds) + + static_cast(stdformat_res.elapsed.subseconds) * subseconds_to_seconds; + double fastio_seconds = static_cast(fastio_res.elapsed.seconds) + + static_cast(fastio_res.elapsed.subseconds) * subseconds_to_seconds; + if (fastio_seconds > 0) + { + double speedup = stdformat_seconds / fastio_seconds; + std::string speedup_str = std::format("{:.2f}", speedup); + print(" (fast_io is ", speedup_str, "x faster)\n"); + } + else + { + print("\n"); + } +#endif + +#if __has_include() && defined(ENABLE_FMT_BENCH) + print("fmt (total size: ", fmt_res.total_size, ") took ", fmt_res.elapsed, "s"); + constexpr double subseconds_to_seconds2 = 1.0 / static_cast(fast_io::uint_least64_subseconds_per_second); + double fmt_seconds = static_cast(fmt_res.elapsed.seconds) + + static_cast(fmt_res.elapsed.subseconds) * subseconds_to_seconds2; + double fastio_seconds2 = static_cast(fastio_res.elapsed.seconds) + + static_cast(fastio_res.elapsed.subseconds) * subseconds_to_seconds2; + if (fastio_seconds2 > 0) + { + double fmt_speedup = fmt_seconds / fastio_seconds2; + std::string fmt_speedup_str = std::format("{:.2f}", fmt_speedup); + print(" (fast_io is ", fmt_speedup_str, "x faster)\n"); + } + else + { + print("\n"); + } +#endif + + print("\n\n[write benchmark to /dev/null]\n"); + // fast_io write: 128KB buffered vs direct system call + { + auto fio_buf = run_write_bench_fastio(iterations, true); + auto fio_nobuf = run_write_bench_fastio(iterations, false); + print("fast_io obuf(128K) (size: ", fio_buf.total_size, ") took ", fio_buf.elapsed, "s\n"); + print("fast_io native(no) (size: ", fio_nobuf.total_size, ") took ", fio_nobuf.elapsed, "s\n"); + } + // iostream write: 128KB buffered vs no buffered + // { + // auto iostream_buf = run_write_bench_iostream(iterations, true); + // auto iostream_nobuf = run_write_bench_iostream(iterations, false); + // print("iostream 128K buf (size: ", iostream_buf.total_size, ") took ", iostream_buf.elapsed, "s\n"); + // print("iostream no buf (size: ", iostream_nobuf.total_size, ") took ", iostream_nobuf.elapsed, "s\n"); + // } +#if __has_include() && defined(ENABLE_FMT_BENCH) + // fmt write: 128KB buffered vs direct system call (format with FMT_COMPILE) + { + auto fmt_buf = run_write_bench_fmt(iterations, true); + auto fmt_nobuf = run_write_bench_fmt(iterations, false); + print("fmt(FMT_COMPILE)+buf (size: ", fmt_buf.total_size, ") took ", fmt_buf.elapsed, "s\n"); + print("fmt(FMT_COMPILE)+no (size: ", fmt_nobuf.total_size, ") took ", fmt_nobuf.elapsed, "s\n"); + } +#endif +} diff --git a/benchmark/0020.teju_vs_dragonbox/teju_vs_dragonbox.cc b/benchmark/0020.teju_vs_dragonbox/teju_vs_dragonbox.cc new file mode 100644 index 000000000..33f0c3047 --- /dev/null +++ b/benchmark/0020.teju_vs_dragonbox/teju_vs_dragonbox.cc @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace fast_io::io; + +template +static std::vector make_random_values(std::size_t n) +{ + std::mt19937_64 eng{123456789u}; + std::uniform_real_distribution dist(std::numeric_limits::denorm_min(), std::numeric_limits::max()); + std::vector v; + v.reserve(n); + for (std::size_t i = 0; i < n; ++i) + { + v.push_back(dist(eng)); // strictly positive finite by construction + } + return v; +} + +static void bench_float(std::size_t n) +{ + auto values = make_random_values(n); + + if(!values.empty()) + { + char fio_buf[128]; + char dbx_buf[128]; + char teju_buf[128]; + auto const x0 = values.front(); + char* fio_p = fast_io::pr_rsv_to_c_array(fio_buf, fast_io::mnp::scientific(x0)); + char* dbx_p = jkj::dragonbox::to_chars(x0, dbx_buf); + char* teju_p = jkj::dragonbox::to_chars(x0, teju_buf); + fast_io::println( + "sample fast_io=", fast_io::mnp::strvw(fio_buf, fio_p), + " dragonbox=", fast_io::mnp::strvw(dbx_buf, dbx_p), + " teju=", fast_io::mnp::strvw(teju_buf, teju_p)); + } + + { + fast_io::timer t(u8"fastio_float"); + std::uint64_t acc{}; + for (auto const x : values) + { + // auto const [mantissa, exponent, sign] = fast_io::details::get_punned_result(x); + // (void)sign; + // auto const r = fast_io::details::dragonbox_impl(mantissa, static_cast<::std::int_least32_t>(exponent)); + // acc += static_cast(r.m10) + static_cast(r.e10); + char buf[128]; + auto *p = fast_io::pr_rsv_to_c_array(buf, fast_io::mnp::scientific(x)); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } + + { + using namespace jkj::dragonbox; + fast_io::timer t(u8"dragonbox_float"); + std::uint64_t acc{}; + for (auto const x : values) + { + char buf[128]{0}; + auto *p = to_chars(x, buf); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } + + { + fast_io::timer t(u8"teju_float"); + std::uint64_t acc{}; + for (auto const x : values) + { + char buf[128]{0}; + auto *p = jkj::dragonbox::to_chars(x, buf); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } +} + +static void bench_double(std::size_t n) +{ + auto values = make_random_values(n); + + if(!values.empty()) + { + char fio_buf[128]; + char dbx_buf[128]; + char teju_buf[128]; + auto const x0 = values.front(); + char* fio_p = fast_io::pr_rsv_to_c_array(fio_buf, fast_io::mnp::scientific(x0)); + char* dbx_p = jkj::dragonbox::to_chars(x0, dbx_buf); + char* teju_p = jkj::dragonbox::to_chars(x0, teju_buf); + fast_io::println( + "sample fast_io=", fast_io::mnp::strvw(fio_buf, fio_p), + " dragonbox=", fast_io::mnp::strvw(dbx_buf, dbx_p), + " teju=", fast_io::mnp::strvw(teju_buf, teju_p)); + } + + { + // fast_io core (dragonbox_impl only, no string assembly) + fast_io::timer t(u8"fastio_double"); + std::uint64_t acc{}; + for (auto const x : values) + { + char buf[128]{0}; + auto *p = fast_io::pr_rsv_to_c_array(buf, fast_io::mnp::scientific(x)); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } + + { + using namespace jkj::dragonbox; + fast_io::timer t(u8"dragonbox_double"); + std::uint64_t acc{}; + for (auto const x : values) + { + char buf[128]{0}; + auto *p = to_chars(x, buf); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } + + { + fast_io::timer t(u8"teju_double"); + std::uint64_t acc{}; + for (auto const x : values) + { + char buf[128]{0}; + auto *p = jkj::dragonbox::to_chars(x, buf); + acc += static_cast(p - buf); + } + std::uint64_t volatile sink = acc; + (void)sink; + } +} + +int main() +{ + constexpr std::size_t N = 1u << 20; // ~1M samples + bench_float(N); + print("\n"); + bench_double(N); +} diff --git a/benchmark/0021.io/file_vs_stdio.cc b/benchmark/0021.io/file_vs_stdio.cc new file mode 100644 index 000000000..6760aba47 --- /dev/null +++ b/benchmark/0021.io/file_vs_stdio.cc @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include +using namespace fast_io::io; + +int main() +{ + constexpr std::size_t N = 10'000'000; + // fast_io file write + { + fast_io::timer t(u8"fastio_file_write"); + fast_io::obuf_file obf("io_bench.txt"); + for (std::size_t i{}; i != N; ++i) + { + println(obf, i); + } + } + // stdio file write + { + fast_io::timer t(u8"stdio_file_write"); + fast_io::c_file cf("io_bench_stdio.txt", fast_io::open_mode::out); + for (std::size_t i{}; i != N; ++i) + { + std::fprintf(cf.fp, "%zu\n", i); + } + } + std::vector vec(N); + // fast_io file read + { + fast_io::timer t(u8"fastio_file_read"); + fast_io::ibuf_file ibf("io_bench.txt"); + for (std::size_t i{}; i != N; ++i) + { + scan(ibf, vec[i]); + } + } + // stdio file read + { + fast_io::timer t(u8"stdio_file_read"); + fast_io::c_file cf("io_bench_stdio.txt", fast_io::open_mode::in); + for (std::size_t i{}; i != N; ++i) + { + std::fscanf(cf.fp, "%zu", &vec[i]); + } + } +}