diff --git a/.gitignore b/.gitignore index 9e27eddf76..e1d8210b59 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ inspect.html test/cuda *.DS_Store /**/*.dSYM/ +build/* +*.json + diff --git a/doc/math.qbk b/doc/math.qbk index 23eb856e1b..4cb059d4ec 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -1,13 +1,13 @@ [book Math Toolkit [quickbook 1.7] - [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] + [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Matthew Borland, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] [/purpose ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22] [license Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at [@http://www.boost.org/LICENSE_1_0.txt]) ] - [authors [Agrawal, Nikhar], [Bikineev, Anton], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [Råde, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] + [authors [Agrawal, Nikhar], [Bikineev, Anton], [Borland, Matthew], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [Råde, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] [/last-revision $Date$] [version 2.13.0] ] diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 970b3c7937..98ab09873c 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -259,9 +259,86 @@ Passing a value greater than `max_prime` results in a __domain_error being raise This function is `constexpr` only if the compiler supports C++14 constexpr functions. -[endsect] [/section:primes] +[endsect] [/section:primes Prime Numbers] -[endsect] [/Number Series] +[section:prime_sieve Prime Sieve] + +[h4 Synopsis] + +`` +#include +`` + +namespace boost { namespace math { + + template + void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &resultant_primes) + + template + void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &resultant_primes) + + template + void prime_sieve(Integer upper_bound, Container &resultant_primes) + + template + void prime_range(Integer lower_bound, Integer upper_bound, Container &resultant_primes) + + template + constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) + + +}} // End namespaces + +[h4 Description] + +There are two sets of prime sieveing functions available: `prime_sieve` and `prime_range`. +`prime_sieve` will return all primes in the range [2, `upper_bound`). +`prime_range` will return all primes in the range [lower_bound, upper_bound). +Both `prime_sieve` and `prime_range` can perform arbitrary precision calculations using `boost::multiprecision::cpp_int` or `boost::multiprecision::mpz_int`. + +`prime_sieve` and `prime_range` both have a parameter for an execution policy. +The policies `std::execution::par` or `std::execution::par_unseq` will enable internal multi-threading. +All other policies or no policy will result in the `prime_sieve` and `prime_range` executing sequentially. + +`prime_reserve` uses the prime number theorem to reserve for approximately the correct number of primes given `upper_bound`. + +[h4 Examples] + // To reserve space and calculate primes [2, 1,000,000) in parallel + std::vector primes; + boost::math::prime_reserve(1'000'000, primes); + boost::math::prime_sieve(std::execution::par, 1'000'000, primes); + + // To calculate primes [100, 1,000) sequentially + std::vector primes; + boost::math::prime_range(100, 1'000, primes); + +[h4 Complexity] +These functions were tested for complexity using [@https://github.com/google/benchmark google benchmark] on a system with the following specifications: +* CPU: Intel i5-10500 +* OS: Red Hat Enterprise Linux 8.2 +* Compiler: gcc-g++ 10.2.0 (Compiled from source with GMP 6.2.0, MPC 1.2.0, and MPFR 4.1.0 using RHEL gcc-toolset-9) +* Compiler flags: -Ofast -MMD -march=native -g -lbenchmark -lpthread -lgmp + +Range of test upper_bound: 2[super 1] - 2[super 30] + +[pre''' +[table:id Complexity Calculations `bigo[](N)` + [[Type] [Time Complexity] [Time Complexity RMS] [CPU Complexity] [CPU Complexity RMS]] + [[int32_t] [0.72N] [5%] [0.02N] [13%]] + [[int64_t] [0.78N] [16%] [0.03N] [6%]] + [[uint32_t] [0.71N] [10%] [0.02N] [14%]] + [[cpp_int] [6.24N] [44%] [0.35N] [4%]] + [[mpz_int] [9.27N] [11%] [0.69N] [8%]] +] +'''] + +[h4 References] +* Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] +* Gries, David and Misra, Jayadev [@https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf A Linear Sieve Algorithm for Finding Prime Numbers] + +[endsect] [/section:primes Prime Sieve] + +[endsect] [/section:number_series Number Series] [/ Copyright 2013, 2014 Nikhar Agrawal, Christopher Kormanyos, John Maddock, Paul A. Bristow. diff --git a/include/boost/math/special_functions/detail/fast_mod.hpp b/include/boost/math/special_functions/detail/fast_mod.hpp new file mode 100644 index 0000000000..fcefe89bd8 --- /dev/null +++ b/include/boost/math/special_functions/detail/fast_mod.hpp @@ -0,0 +1,34 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_FAST_MOD_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_FAST_MOD_HPP + +// https://stackoverflow.com/questions/14997165/fastest-way-to-get-a-positive-modulo-in-c-c#14997413 + +namespace boost::math::detail +{ +template +inline unsigned modulo(const T value, const unsigned m) +{ + int mod = static_cast(value % static_cast(m)); + if(mod < 0) + { + mod += m; + } + + return mod; +} + +// Value must be positive and m must be a power of two +template +inline unsigned modulo_power_of_two(const T value, const unsigned m) +{ + return value & static_cast(m-1); +} +} +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_FAST_MOD_HPP diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp new file mode 100644 index 0000000000..67ef833001 --- /dev/null +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -0,0 +1,322 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class IntervalSieve +{ + +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. +using uint128_t = unsigned __int128; // One machine word smaller than the boost equivalent +#else +#include +using uint128_t = boost::multiprecision::uint128_t; +#endif + +private: + // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) + // This table is from page 421, table 16.3.1, Hugh Williams' book + // Last 8 entries added from Wooding's MS thesis, 2003, pp. 92-93 + struct pssentry + { + static constexpr std::size_t len {49}; + static constexpr std::array prime + { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 67, 71, 79, 83, 101, 103, 107, 113, 131, 149, 157, + 173, 181, 193, 197, 211, 227, 229, 233, 239, 241, 251, 257, 263, 277, 281, 283, 293, 311, 331, 337, 347, 353 + }; + + static constexpr std::array ps + { + 73, 241, 1'009, 2'641, 8'089, 18'001, 53'881, 87'481, 117'049, 515'761, 1'083'289, 3'206'641, 3'818'929, + 9'257'329, 22'000'801, 48'473'881, 175'244'281, 427'733'329, 898'716'289, 2'805'544'681, 10'310'263'441, + 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, + 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, + 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689u, + + (static_cast(0x3uLL) << 64) | 0xc956f827e0524359uLL, // 69'848'288'320'900'186'969 + (static_cast(0xbuLL) << 64) | 0x539315b3b1268d59uLL, // 208'936'365'799'044'975'961 + (static_cast(0x1cuLL) << 64) | 0xec87d86ca60b50a1uLL, // 533'552'663'339'828'203'681 + (static_cast(0x32uLL) << 64) | 0xc6d3496f20db3d81uLL, // 936'664'079'266'714'697'089 + (static_cast(0x74uLL) << 64) | 0x210967a12ba94be1uLL, // 2'142'202'860'370'269'916'129 + (static_cast(0x2e3uLL) << 64) | 0xec11ddc09fd65c51uLL, // 13'649'154'491'558'298'803'281 + (static_cast(0x753uLL) << 64) | 0x641c14b397c27bf1uLL, // 34'594'858'801'670'127'778'801 + (static_cast(0x1511uLL) << 64) | 0x85fdf38d1fc9ce21uLL, // 99'492'945'930'479'213'334'049 + (static_cast(0x3e8buLL) << 64) | 0xaba417e222ca5091uLL // 295'363'187'400'900'310'880'401 + }; + }; + + static constexpr pssentry pss_{}; + boost::math::detail::prime_sieve::MOD30Wheel w_{}; + std::size_t tdlimit_; + + Integer delta_; + Integer left_; + Integer right_; + + OutputIterator resultant_primes_; + + simple_bitset b_; + + std::vector primes_; + std::int_fast64_t plimit_; + + void Settdlimit() noexcept; + void SeiveLength(const Integer d) noexcept; + void Sieve() noexcept; + bool Psstest(const std::size_t pos) noexcept; + void Psstestall() noexcept; + decltype(auto) WriteOutput() noexcept; + void Setup(const Integer left, const Integer right) noexcept; + +public: + IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; +}; + +template +void IntervalSieve::Settdlimit() noexcept +{ + const double dr {static_cast(right_)}; + const double delta {static_cast(delta_)}; + const double tdest {delta * std::log(dr)}; + + // Small cases + if(tdest * tdest >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + return; + } + + // First guess + if(tdest <= 1ul<<30) + { + tdlimit_ = static_cast(tdest); + } + + else + { + tdlimit_ = 1ul<<30; + } + + // Find the corresponding prime + std::size_t i; + for(i = pss_.len - 1; i > 0; --i) + { + if(static_cast(pss_.ps[i]) * tdlimit_ < dr) + { + break; + } + } + plimit_ = pss_.prime[i]; + + double tdlimit_guess = 1 + std::fmod(dr, static_cast(pss_.ps[i])); + if(tdlimit_guess * tdlimit_guess >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + } +} + +template +void IntervalSieve::SeiveLength(const Integer d) noexcept +{ + Integer r {left_ % d}; + Integer start {0}; + + if(r != 0) + { + start = d - r; + } + + for(Integer i {start}; i >= 0 && i < b_.size(); i += d) + { + b_.clear(static_cast(i)); + } +} + +template +void IntervalSieve::Sieve() noexcept +{ + std::int_fast64_t primes_range {}; + if(plimit_ <= 10) + { + primes_range = 10; + } + + else + { + primes_range = plimit_; + } + + // Sieve with pre-computed (or small) primes and then use the wheel for the remainder + std::size_t i {}; + Integer j; + if(plimit_ <= pss_.prime.back()) + { + SeiveLength(static_cast(2)); + for(; pss_.prime[i] < primes_range; ++i) + { + SeiveLength(pss_.prime[i]); + } + + j = w_.Next(pss_.prime[--i]); + } + + else + { + primes_.resize(static_cast(prime_approximation(right_))); + linear_sieve(primes_range, primes_.begin()); + + for(; primes_[i] < primes_range; ++i) + { + SeiveLength(primes_[i]); + } + + j = w_.Next(primes_[--i]); + } + + for(; j <= tdlimit_; j = w_.Next(j)) + { + SeiveLength(j); + } +} + +template +decltype(auto) IntervalSieve::WriteOutput() noexcept +{ + for(std::size_t i {left_ % 2 == 0 ? 1 : 0}; i < b_.size(); i += 2) + { + if(b_[i]) + { + *resultant_primes_++ = left_ + i; + } + } + return resultant_primes_; +} + +// Performs the pseduosqaure prime test on n = left + pos +// return 1 if prime or prime power, 0 otherwise +// Begins with a base-2 test +template +bool IntervalSieve::Psstest(const std::size_t pos) noexcept +{ + const Integer n {static_cast(left_ + pos)}; + const Integer exponent {(n - 1) / 2}; + const std::int_fast64_t nmod8 = static_cast(n % 8); + + std::int_fast64_t negative_one_count {}; + + for(std::size_t i {}; i < primes_.size(); ++i) + { + Integer temp = primes_[i]; + temp = static_cast(std::pow(static_cast(temp), static_cast(n))); + + if(temp == 1) + { + if(i == 0 && nmod8 == 5) + { + return false; + } + } + + else + { + ++temp; + if(temp == n) + { + if(i > 0) + { + ++negative_one_count; + } + } + else + { + return false; + } + } + } + + return (nmod8 != 1 || negative_one_count > 0); +} + +template +void IntervalSieve::Psstestall() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + if(!Psstest(i)) + { + b_.clear(i); + } + } + } +} + +template +void IntervalSieve::Setup(const Integer left, const Integer right) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.reset(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } +} + +template +IntervalSieve::IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept : + resultant_primes_ {resultant_primes} +{ + Setup(left, right); + WriteOutput(); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right) noexcept +{ + Setup(left, right); + return WriteOutput(); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept +{ + resultant_primes_ = resultant_primes; + Setup(left, right); + return WriteOutput(); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp new file mode 100644 index 0000000000..96176309c0 --- /dev/null +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -0,0 +1,162 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +decltype(auto) linear_sieve(const Integer upper_bound, OutputIterator resultant_primes) +{ + const std::size_t masks_size {static_cast(upper_bound / 2 + 1)}; + std::unique_ptr masks {new bool[masks_size]}; + memset(masks.get(), true, sizeof(*masks.get()) * (masks_size)); + + *resultant_primes++ = 2; + + for(std::size_t index {1}; index < masks_size; ++index) + { + if(masks[index]) + { + *resultant_primes++ = static_cast(2 * index + 1); + for(std::size_t clear {index * 3 + 1}; clear < masks_size; clear += index * 2 + 1) + { + masks[clear] = false; + } + } + } + return resultant_primes; +} + +// 4'096 is where benchmarked performance of linear_sieve begins to diverge +template +static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types + +// Stepanov Sieve - From Mathematics to Generic Programming Chap 3 +template +void mark_sieve(RandomAccessIterator first, RandomAccessIterator last, Integer factor) +{ + *first = false; + while(last - first > factor) + { + first = first + factor; + *first = false; + } +} + +template +inline void mark_sieve(Bitset& bits, const Integer factor) +{ + for(Integer i {factor * factor}; i < bits.size(); i += factor) + { + bits[static_cast(i)] = 0; + } +} + +template +void sift(RandomAccessIterator first, Integer n) +{ + const auto last {std::next(first, static_cast(n))}; + std::fill(first, last, true); + Integer i {0}; + Integer index_square {3}; + Integer factor {3}; + + for(; index_square < n; index_square += factor + factor - 2) + { + if(first[i]) + { + mark_sieve(first + index_square, last, factor); + } + + ++i; + factor += 2; + } +} + +// TODO(mborland): Pass in a more efficient data structure (likely dynamic_bitset) to sift and post-process +template +inline decltype(auto) stepanov_sieve(Integer upper_bound, OutputIterator resultant_primes) +{ + if(upper_bound == 2) + { + return resultant_primes; + } + + sift(resultant_primes, upper_bound); + return resultant_primes; +} + +// TODO(mborland): Pass in execution policy. mark_sieve can readily be converted to std::for_each, but dynamic_bitset would need replaced with something +// that has iterators +template +decltype(auto) wheel_sieve_of_eratosthenes(const Integer upper_bound, OutputIterator resultant_primes) +{ + if(upper_bound == 2) + { + *resultant_primes++ = static_cast(2); + return resultant_primes; + } + + const Integer sqrt_upper_bound {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + boost::dynamic_bitset<> trial(static_cast(upper_bound)); + trial.set(); + std::array primes {2, 3, 5}; // Wheel basis + std::array wheel {7, 11, 13, 17, 19, 23, 29, 31}; // MOD 30 wheel + const Integer wheel_mod {30}; + + for(std::size_t i {}; i < primes.size(); ++i) + { + mark_sieve(trial, primes[i]); + *resultant_primes++ = primes[i]; + } + + // Last value in the wheel is the starting point for the next step + for(std::size_t i {}; i < wheel.size(); ++i) + { + mark_sieve(trial, wheel[i]); + *resultant_primes++ = wheel[i]; + } + + Integer i {wheel_mod}; + for(; (i + wheel.front()) < sqrt_upper_bound; i += wheel_mod) + { + for(std::size_t j {}; j < wheel.size(); ++j) + { + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + mark_sieve(trial, spoke); + *resultant_primes++ = std::move(spoke); + } + } + } + + for(; (i + wheel.front()) < upper_bound; i += wheel_mod) + { + for(std::size_t j {}; j < wheel.size(); ++j) + { + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + *resultant_primes++ = std::move(spoke); + } + } + } + + return resultant_primes; +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP diff --git a/include/boost/math/special_functions/detail/prime_wheel.hpp b/include/boost/math/special_functions/detail/prime_wheel.hpp new file mode 100644 index 0000000000..5a4e9496c9 --- /dev/null +++ b/include/boost/math/special_functions/detail/prime_wheel.hpp @@ -0,0 +1,344 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class Wheel +{ +private: + struct Wheelrec + { + std::int_fast32_t rp; + std::int_fast32_t dist; + std::int_fast32_t pos; + std::int_fast32_t inv; + }; + + std::unique_ptr W_; + Integer M_; + Integer k_; + Integer phi_; + + static constexpr std::array P_ {2, 3, 5, 7, 11, 13, 17, 19}; + + void build(Integer korsize); + +public: + Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; + explicit Wheel(Integer korsize) { build(korsize); } + explicit Wheel(const Wheel &x) { build(x.K()); } + + constexpr bool operator!() const noexcept { return W_ == nullptr; } + constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } + const Wheel& operator=(const Wheel &x) + { + if(this != &x) + { + build(x.K()); + } + return *this; + } + + constexpr Integer Size() const noexcept { return M_; } + constexpr Integer K() const noexcept { return k_; } + constexpr Integer Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } + constexpr Integer MakeRP(const Integer i) const noexcept + { + if(W_[i % M_].rp) + { + return i; + } + return Next(i); + } + constexpr Integer Prev(const Integer i) const noexcept { return i - W_[(M_ - (i % M_)) % M_].dist; } + constexpr Integer Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr Integer Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + + void Print(); +}; + +template +void Wheel::build(Integer korsize) +{ + // Calculate k_ and M_ + if(korsize >= 10) + { + --korsize; + for(k_ = 0; korsize > 0; ++k_) + { + korsize /= P_[k_]; + } + } + else + { + k_ = korsize; + } + + Integer i {0}; + Integer dist {0}; + Integer pos {1}; + + for(M_ = 1; i < k_; ++i) + { + M_ *= P_[i]; + } + + W_ = std::make_unique(M_); + + // Compute the RP field + for(i = 0; i < M_; ++i) + { + W_[i].rp = 1; + } + + for(i = 0; i < k_; ++i) + { + for(Integer j {0}; j < M_; j += P_[i]) + { + W_[j].rp = 0; + } + } + + // Compute the dist field + W_[M_- 1].dist = 2; + for(i = M_ - 2; i >= 0; --i) + { + W_[i].dist = ++dist; + if(W_[i].rp) + { + dist = 0; + } + } + + // Copute pos and inv fields + for(i = 0; i < M_; ++i) + { + W_[i].inv = 0; + if(W_[i].rp) + { + W_[pos].inv = i; + W_[i].pos = pos++; + } + else + { + W_[i].pos = 0; + } + + } + + W_[0].inv = -1; + phi_ = W_[M_- 1].pos; +} + +template +void Wheel::Print() +{ + std::int_fast32_t i {}; + std::cout << "Wheel size = " << this->Size() + << "\nk = " << this->K() + << "\nphi(M) = " << this->Phi() << std::endl; + + // Verify size + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << i << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nRP Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].rp << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nDist Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].dist << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nPos Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].pos << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nInv Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << W_[i].inv << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + std::cout << std::endl; +} + +// Pre-computed MOD 30 wheel +// TODO(mborland): refactor capitalization of member functions +template +class MOD30Wheel final +{ + static constexpr std::uint_fast8_t M_ {30}; + static constexpr std::uint_fast8_t k_ {3}; + static constexpr std::uint_fast8_t phi_ {8}; + + static constexpr std::array dist_ + { + 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 2 + }; + static constexpr std::array spokes_ + { + 2, 6, 4, 2, 4, 2, 4, 6 + }; + + std::size_t current_index_; + Integer current_index_num_; + +public: + constexpr MOD30Wheel() = default; + ~MOD30Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } + + // Avoid using modulus if scanning quickly through the wheel. Uses the stored values to generate the next number + inline Integer Next() noexcept + { + ++current_index_; + if(current_index_ == phi_) + { + current_index_ = 0; + } + return current_index_num_ += spokes_[current_index_]; + } + + inline Integer advance(const std::size_t dist) noexcept + { + for(std::size_t i {}; i < dist; ++i) + { + Next(); + } + return current_index_num_; + } + + inline std::size_t current_index() const noexcept { return current_index_; } + + inline auto SetCurrentIndex(const Integer i) noexcept + { + current_index_num_ = Next(i - 1); + + const std::size_t temp {static_cast(current_index_num_ % M_)}; + switch (temp) + { + case 0: case 1: + current_index_ = 0; + break; + case 2: case 3: case 4: case 5: case 6: case 7: + current_index_ = 1; + break; + case 8: case 9: case 10: case 11: + current_index_ = 2; + break; + case 12: case 13: + current_index_ = 3; + break; + case 14: case 15: case 16: case 17: + current_index_ = 4; + break; + case 18: case 19: + current_index_ = 5; + break; + case 20: case 21: case 22: case 23: + current_index_ = 6; + break; + case 24: case 25: case 26: case 27: case 28: case 29: + current_index_ = 7; + break; + default: + break; + } + + return current_index_num_; + } + + // Magic number is the number of possible primes in each turn of the mod 30 wheel + // https://en.wikipedia.org/wiki/Wheel_factorization + constexpr auto PrimeRatio() const noexcept { return static_cast(phi_) / M_; } +}; + +// Pre-computed MOD 210 wheel +template +class MOD210Wheel final +{ +private: + static constexpr auto M_ {210}; + static constexpr auto k_ {4}; + static constexpr auto phi_ {28}; + + static constexpr std::array dist_ + { + 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 6, 5, + 4, 3, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, + 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, + 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, + 1, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 2 + }; + +public: + constexpr MOD210Wheel() = default; + ~MOD210Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr auto Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } + + // Magic number is the number of possible primes in each turn of the mod 30 wheel + // https://en.wikipedia.org/wiki/Wheel_factorization + constexpr auto PrimeRatio() const noexcept { return 48.0 / 210.0; } +}; +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP diff --git a/include/boost/math/special_functions/detail/simple_bitset.hpp b/include/boost/math/special_functions/detail/simple_bitset.hpp new file mode 100644 index 0000000000..78f1f72d9e --- /dev/null +++ b/include/boost/math/special_functions/detail/simple_bitset.hpp @@ -0,0 +1,228 @@ +// Copyright 2020 John Maddock and Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SIMPLE_BITSET_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SIMPLE_BITSET_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class simple_bitset +{ +private: + static constexpr std::size_t ln2(std::size_t n) noexcept + { + return n <= 1 ? 0 : 1 + ln2(n >> 1); + } + + // https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication + static constexpr std::array index64 + { + 0, 47, 1, 56, 48, 27, 2, 60, + 57, 49, 41, 37, 28, 16, 3, 61, + 54, 58, 35, 52, 50, 42, 21, 44, + 38, 32, 29, 23, 17, 11, 4, 62, + 46, 55, 26, 59, 40, 36, 15, 53, + 34, 51, 20, 43, 31, 22, 10, 45, + 25, 39, 14, 33, 19, 30, 9, 24, + 13, 18, 8, 12, 7, 6, 5, 63 + }; + + static constexpr std::uint64_t debruijn64 {0x03f79d71b4cb0a89}; + + static constexpr auto num_bits {sizeof(I) * CHAR_BIT}; + static constexpr I mask {num_bits - 1}; + static constexpr std::size_t shift {ln2(num_bits)}; + + std::unique_ptr bits; + std::size_t m_size; + std::size_t size_; + +public: + simple_bitset() noexcept : bits {nullptr}, m_size {} {}; + + explicit simple_bitset(std::size_t n) noexcept : bits {nullptr}, m_size {n} + { + resize(n); + reset(); + } + + // Initialize with specific pattern: + simple_bitset(std::size_t n, const I* pattern, std::size_t len) noexcept + { + const std::size_t block_count = n / num_bits + (n % num_bits ? 1 : 0); + if (block_count <= len) + { + std::memcpy(bits.get(), pattern, block_count * sizeof(I)); + } + else + { + I* p = bits.get(); + std::memcpy(p, pattern, len * sizeof(I)); + I* base = p; + p += len; + + while (len <= block_count - len) + { + std::memcpy(p, base, len * sizeof(I)); + p += len; + len *= 2; + } + + if (block_count > len) + { + std::memcpy(p, base, (block_count - len) * sizeof(I)); + } + } + } + + ~simple_bitset() = default; + + inline I* limbs() noexcept + { + return bits.get(); + } + + inline I test(std::size_t n) const noexcept + { + return bits[n >> shift] & (I(1u) << (n & mask)); + } + + inline void clear(std::size_t n) noexcept + { + bits[n >> shift] &= ~I(I(1u) << (n & mask)); + } + + inline void set(std::size_t n) noexcept + { + bits[n >> shift] |= (I(1u) << (n & mask)); + } + + inline std::size_t size() const noexcept { return m_size; } + + void resize(std::size_t n) noexcept + { + if(n != m_size) + { + m_size = n; + size_ = n / num_bits + (n % num_bits ? 1 : 0); + bits.reset(new I[size_]); + } + } + + void reset() noexcept + { + std::memset(bits.get(), 0xff, m_size / CHAR_BIT + (m_size % CHAR_BIT ? 1 : 0)); + } + + void clear_all() noexcept + { + std::memset(bits.get(), 0, m_size / CHAR_BIT + (m_size % CHAR_BIT ? 1 : 0)); + } + + inline I operator[](const std::size_t n) const noexcept + { + return test(n); + } + + // https://bisqwit.iki.fi/source/misc/bitcounting/ + // WP3 - Uses hardcoded constants if type is U64 + I count() const noexcept + { + static constexpr I m1 {std::is_same_v ? 0x5555555555555555 : (~static_cast(0)) / 3}; + static constexpr I m2 {std::is_same_v ? 0x3333333333333333 : (~static_cast(0)) / 5}; + static constexpr I m4 {std::is_same_v ? 0x0f0f0f0f0f0f0f0f : (~static_cast(0)) / 17}; + static constexpr I h1 {std::is_same_v ? 0x0101010101010101 : (~static_cast(0)) / 255}; + + I counter {}; + + for(std::size_t i {}; i < m_size; i += num_bits) + { + I x = bits[std::floor(i / num_bits)]; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + + counter += (x * h1) >> 56; + } + + return counter; + } + + template, bool> = true> + std::size_t bit_scan_forward(std::size_t pos) const noexcept + { + pos = std::ceil(pos / 64.0); + + while(pos < size_ && bits[pos] == 0) + { + ++pos; + } + + if(pos == size_) + { + return m_size; + } + + const std::uint64_t temp {bits[pos]}; + if(temp == 0) + { + return m_size; + } + else + { + return index64[((temp ^ (temp-1)) * debruijn64) >> 58] + pos * 64; + } + } + + template, bool> = true> + inline std::size_t bit_scan_forward() const noexcept + { + return bit_scan_forward(0); + } + + template, bool> = true> + std::size_t bit_scan_reverse(std::size_t pos) const noexcept + { + pos = std::floor(pos / 64.0) - 1; + + while(bits[pos] == 0 && pos >= 0) + { + --pos; + } + + std::uint64_t temp {bits[pos]}; + if(temp == 0) + { + return 0; + } + else + { + temp |= temp >> 1; + temp |= temp >> 1; + temp |= temp >> 2; + temp |= temp >> 4; + temp |= temp >> 8; + temp |= temp >> 16; + temp |= temp >> 32; + + return index64[(temp * debruijn64) >> 58] + pos * 64; + } + } +}; +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SIMPLE_BITSET_HPP diff --git a/include/boost/math/special_functions/detail/small_primes.hpp b/include/boost/math/special_functions/detail/small_primes.hpp new file mode 100644 index 0000000000..962292cfc7 --- /dev/null +++ b/include/boost/math/special_functions/detail/small_primes.hpp @@ -0,0 +1,71 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SMALL_PRIMES_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SMALL_PRIMES_HPP + +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +inline Integer small_prime_limit() noexcept +{ + return static_cast(1'920); +} + +// The first 64 turns of the mod 30 wheel (less than 1920) +template +decltype(auto) small_primes(const Integer upper_bound, OutputIterator out) noexcept +{ + constexpr std::array small_primes_set + { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, + 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, + 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, + 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, + 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, + 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0 + }; + + MOD30Wheel wheel_; + + // Wheel basis + *out++ = 2; + *out++ = 3; + *out++ = 5; + *out++ = wheel_.SetCurrentIndex(7); + + auto it {small_primes_set.cbegin()}; + ++it; + Integer temp {11}; + wheel_.Next(); + while(it != small_primes_set.cend() && temp < upper_bound) + { + if(*it) + { + *out++ = temp; + } + + temp = wheel_.Next(); + ++it; + } + + return out; +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_SMALL_PRIMES_HPP diff --git a/include/boost/math/special_functions/detail/trial_division.hpp b/include/boost/math/special_functions/detail/trial_division.hpp new file mode 100644 index 0000000000..f0c6fa419b --- /dev/null +++ b/include/boost/math/special_functions/detail/trial_division.hpp @@ -0,0 +1,31 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_TRIAL_DIVISION_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_TRIAL_DIVISION_HPP + +namespace boost{ namespace math{ namespace detail{ +template +bool is_prime(const T n) noexcept +{ + if (n <= 1) + { + return false; + } + + for (T factor{2}; factor * factor <= n; ++factor) + { + if (n % factor == 0) + { + return false; + } + } + + return true; +} +}}} +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_TRIAL_DIVISION_HPP diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp new file mode 100644 index 0000000000..da1eabcc6a --- /dev/null +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -0,0 +1,51 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP + +namespace boost::math +{ +// Constexpr does not work with multiprecision types +template, bool> = true> +constexpr Integer prime_approximation(const Integer upper_bound) noexcept +{ + constexpr auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia + return static_cast(::floor(c * static_cast(upper_bound) / ::log(static_cast(upper_bound)))); +} + +template, bool> = true> +constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) noexcept +{ + return prime_approximation(upper_bound) - prime_approximation(lower_bound); +} + +template, bool> = true> +Integer prime_approximation(const Integer upper_bound) noexcept +{ + const auto c = 30 * std::log(113) / 113; + return static_cast(std::floor(c * static_cast(upper_bound) / std::log(static_cast(upper_bound)))); +} + +template, bool> = true> +Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) noexcept +{ + return prime_approximation(upper_bound) - prime_approximation(lower_bound); +} + +template +inline void prime_reserve(Integer upper_bound, std::vector& prime_container) +{ + prime_container.reserve(static_cast(prime_approximation(upper_bound))); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp new file mode 100644 index 0000000000..b42095f386 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -0,0 +1,334 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +inline std::size_t L1D_SIZE {32'768}; + +template +decltype(auto) sequential_segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const Integer interval {static_cast(L1D_SIZE * 8)}; + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + interval}; + + if (current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + IntervalSieve sieve(current_lower_bound, current_upper_bound, resultant_primes); + + for(std::size_t i {}; i < ranges; ++i) + { + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + resultant_primes = sieve.NewRange(current_lower_bound, current_upper_bound); + } + + return resultant_primes; +} + +template +decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const auto num_threads {std::thread::hardware_concurrency() > 0 ? std::thread::hardware_concurrency() : 2u}; + const Integer thread_range {(upper_bound - lower_bound) / static_cast(num_threads)}; + + std::vector> prime_vectors(num_threads); + std::vector> future_manager; + future_manager.reserve(num_threads); + + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + thread_range}; + + for(std::size_t i {}; i < num_threads - 1; ++i) + { + prime_vectors[i].resize(static_cast(prime_approximation(current_lower_bound, current_upper_bound))); + + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &prime_vectors, i]{ + sequential_segmented_sieve(current_lower_bound, current_upper_bound, prime_vectors[i].begin()); + })); + + current_lower_bound = current_upper_bound; + current_upper_bound += thread_range; + } + + prime_vectors.back().resize(static_cast(prime_approximation(current_lower_bound, upper_bound))); + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &prime_vectors]{ + sequential_segmented_sieve(current_lower_bound, upper_bound, prime_vectors.back().begin()); + })); + + std::size_t i {}; + for(auto&& future : future_manager) + { + future.get(); // Blocks to maintain proper sorting + + for(auto& val : prime_vectors[i]) + { + *resultant_primes++ = std::move(val); + } + + ++i; + } + + return resultant_primes; +} + +template +decltype(auto) prime_sieve_iter_impl(ExecutionPolicy&& policy, const Integer upper_bound, OutputIterator resultant_primes) +{ + if (upper_bound == 2) + { + return resultant_primes; + } + + else if (upper_bound <= small_prime_limit()) + { + small_primes(upper_bound, resultant_primes); + return resultant_primes; + } + + resultant_primes = small_primes(small_prime_limit(), resultant_primes); + + if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + resultant_primes = sequential_segmented_sieve(small_prime_limit(), upper_bound, resultant_primes); + } + + else + { + resultant_primes = segmented_sieve(small_prime_limit(), upper_bound, resultant_primes); + } + + return resultant_primes; +} + +template +inline decltype(auto) prime_sieve_iter_impl(const Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_sieve_iter_impl(std::execution::seq, upper_bound, resultant_primes); +} + +template +decltype(auto) prime_range_iter_impl(ExecutionPolicy&& policy, const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + if(lower_bound <= small_prime_limit() || upper_bound <= small_prime_limit()) + { + std::vector primes; + primes.resize(static_cast(prime_approximation(upper_bound))); + small_primes(upper_bound, primes.begin()); + + auto it {primes.begin()}; + while(*it < lower_bound) + { + ++it; + } + + while(it != primes.end() && *it <= upper_bound) + { + *resultant_primes++ = std::move(*it++); + } + } + + if(lower_bound <= small_prime_limit() && upper_bound > small_prime_limit()) + { + if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + resultant_primes = detail::prime_sieve::sequential_segmented_sieve(small_prime_limit(), upper_bound, resultant_primes); + } + + else + { + resultant_primes = detail::prime_sieve::segmented_sieve(small_prime_limit(), upper_bound, resultant_primes); + } + } + + else + { + if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + resultant_primes = detail::prime_sieve::sequential_segmented_sieve(lower_bound, upper_bound, resultant_primes); + } + + else + { + resultant_primes = detail::prime_sieve::segmented_sieve(lower_bound, upper_bound, resultant_primes); + } + } + + return resultant_primes; +} + +template +inline decltype(auto) prime_range_iter_impl(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_range_iter_impl(std::execution::seq, lower_bound, upper_bound, resultant_primes); +} + +// SFINAE for dual interface +template +class is_container +{ +private: + using yes = char; + struct no { char x[2]; }; + + template + static yes test( decltype(&U::size) ); + + template + static no test(...); + +public: + enum { type = sizeof(test(0)) == sizeof(char) }; +}; + +// Add C++14/17esqe interface to is_container +template +static constexpr auto is_container_t = is_container::type; +} + +namespace boost::math +{ +template +inline decltype(auto) prime_sieve(ExecutionPolicy&& exec, Integer upper_bound, T output) +{ + if constexpr (detail::prime_sieve::is_container_t>) + { + if constexpr (std::is_integral_v) + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, upper_bound, std::begin(*output)); + } + else + { + if (upper_bound < static_cast(std::numeric_limits::max())) + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, static_cast(upper_bound), std::begin(*output)); + } + else + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, upper_bound, std::begin(*output)); + } + } + } + else + { + if constexpr (std::is_integral_v) + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, upper_bound, output); + } + else + { + if (upper_bound < static_cast(std::numeric_limits::max())) + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, static_cast(upper_bound), output); + } + else + { + return detail::prime_sieve::prime_sieve_iter_impl(exec, upper_bound, output); + } + } + } +} + +template +inline decltype(auto) prime_sieve(Integer upper_bound, T output) +{ + return prime_sieve(std::execution::seq, upper_bound, output); +} + +template +inline decltype(auto) prime_range(ExecutionPolicy&& exec, Integer upper_bound, Integer lower_bound, T output) +{ + if constexpr (detail::prime_sieve::is_container_t>) + { + if constexpr (std::is_integral_v) + { + return detail::prime_sieve::prime_range_iter_impl(exec, lower_bound, upper_bound, std::begin(*output)); + } + else + { + if (upper_bound < static_cast(std::numeric_limits::max())) + { + return detail::prime_sieve::prime_range_iter_impl(exec, static_cast(lower_bound), + static_cast(upper_bound), std::begin(*output)); + } + else + { + return detail::prime_sieve::prime_range_iter_impl(exec, lower_bound, upper_bound, std::begin(*output)); + } + } + } + else + { + if constexpr (std::is_integral_v) + { + return detail::prime_sieve::prime_range_iter_impl(exec, lower_bound, upper_bound, output); + } + else + { + if (upper_bound < static_cast(std::numeric_limits::max())) + { + return detail::prime_sieve::prime_range_iter_impl(exec, static_cast(lower_bound), static_cast(upper_bound), output); + } + else + { + return detail::prime_sieve::prime_range_iter_impl(exec, lower_bound, upper_bound, output); + } + } + } +} + +template +inline decltype(auto) prime_range(Integer lower_bound, Integer upper_bound, T output) +{ + return prime_range(std::execution::seq, lower_bound, upper_bound, output); +} + +template +inline void set_l1d_size(const Integer size) +{ + detail::prime_sieve::L1D_SIZE = static_cast(size); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP diff --git a/include/boost/math/tools/stopwatch.hpp b/include/boost/math/tools/stopwatch.hpp new file mode 100644 index 0000000000..35e6eaf7af --- /dev/null +++ b/include/boost/math/tools/stopwatch.hpp @@ -0,0 +1,69 @@ +// Copyright 2020 John Maddock and Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_STOPWATCH_HPP +#define BOOST_MATH_TOOLS_STOPWATCH_HPP + +#include +#include +#include + +namespace boost::math::tools +{ +template +class stopwatch +{ +private: + using Time = typename Clock::time_point; + using duration = typename Clock::duration; + + std::vector