diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index f8251325e..49fdec3b3 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -15,7 +15,7 @@ jobs: native_backend: "all" mb2_jobs: "mb2" mb4_jobs: "mb4_tcm" - mb6_jobs: "mb6" + mb6_jobs: "mb6_ntl" # cmake_args_map holds job specific additional cmake options. compiler flags, native_backend flag and # OpenMP flag are set in generic_workflow.yml cmake_args_map: '{ diff --git a/README.md b/README.md index b762af18c..4f16f0a4d 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ OpenFHE also supports hybrid vectorized schemes, with the goal of enabling the F * Switching between CKKS and FHEW/TFHE to evaluate non-smooth functions, e.g., comparison, using (scalar) FHEW/TFHE functional bootstrapping * Switching between RLWE (a scheme equivalent to the coefficient-encoded additive BFV scheme) and CKKS to evaluate arbitrary lookup tables over vectors of integers, e.g., modular reduction, comparison or S-box, using vectorized functional bootstrapping implemented in CKKS +OpenFHE also supports partial schemes, called schemelets, such as RLWE which is equivalent to the coefficient-encoded additive BFV scheme. In OpenFHE, the RLWE schemelet is the starting point for the vectorized functional bootstrapping capability, which allows the evaluation of arbitrary lookup tables over vectors of integers, e.g., modular reduction, comparison or Sbox, using CKKS in an intermediate step. + OpenFHE also includes the following multiparty extensions of FHE: * Threshold FHE for BGV, BFV, and CKKS schemes * Interactive bootstrapping for Threshold CKKS diff --git a/benchmark/src/ckks-bootstrapping.cpp b/benchmark/src/ckks-bootstrapping.cpp new file mode 100644 index 000000000..8b7c84477 --- /dev/null +++ b/benchmark/src/ckks-bootstrapping.cpp @@ -0,0 +1,128 @@ +//================================================================================== +// BSD 2-Clause License +// +// Copyright (c) 2014-2025, NJIT, Duality Technologies Inc. and other contributors +// +// All rights reserved. +// +// Author TPOC: contact@openfhe.org +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//================================================================================== + +#include "benchmark/benchmark.h" +#include "config_core.h" +#include "cryptocontext.h" +#include "gen-cryptocontext.h" +#include "gtest/gtest.h" +#include "scheme/ckksrns/ckksrns-fhe.h" +#include "scheme/ckksrns/ckksrns-utils.h" +#include "scheme/ckksrns/gen-cryptocontext-ckksrns.h" + +#include + +using namespace lbcrypto; + +struct boot_config { + uint32_t ringDim; + uint32_t slots; + uint32_t dcrtBits; + uint32_t firstMod; + uint32_t numDigits; + uint32_t lvlsAfter; + uint32_t iters; + std::vector lvlb; + SecretKeyDist skdst; + ScalingTechnique stech; +}; + +// clang-format off +[[maybe_unused]] std::vector boot_configs = { + // ringDm, slots, dcrtBits, firstMod, numDigits, lvlsAfter, iters, lvlb, skdst, stech + { 1 << 16, 1 << 15, 54, 60, 15, 9, 1, {3, 3}, UNIFORM_TERNARY, FLEXIBLEAUTO}, + { 1 << 16, 1 << 15, 50, 57, 11, 9, 2, {3, 3}, UNIFORM_TERNARY, FLEXIBLEAUTO}, + { 1 << 16, 1 << 15, 50, 57, 16, 10, 2, {3, 3}, UNIFORM_TERNARY, FLEXIBLEAUTO}, + { 1 << 16, 1 << 15, 52, 57, 10, 8, 2, {3, 3}, UNIFORM_TERNARY, FIXEDMANUAL}, + { 1 << 16, 1 << 15, 52, 57, 16, 9, 2, {3, 3}, UNIFORM_TERNARY, FIXEDMANUAL}, + { 1 << 17, 1 << 16, 59, 60, 0, 5, 1, {4, 4}, SPARSE_TERNARY, FLEXIBLEAUTO}, + { 1 << 17, 1 << 16, 59, 60, 0, 5, 1, {4, 4}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 16, 1 << 5, 59, 60, 0, 5, 1, {1, 1}, SPARSE_TERNARY, FLEXIBLEAUTO}, + { 1 << 16, 1 << 5, 59, 60, 0, 5, 1, {1, 1}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 5, 59, 60, 0, 5, 1, {1, 1}, SPARSE_TERNARY, FLEXIBLEAUTO}, + { 1 << 17, 1 << 5, 59, 60, 0, 5, 1, {1, 1}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 16, 59, 60, 0, 10, 1, {4, 4}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 5, 59, 60, 0, 10, 1, {1, 1}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 16, 59, 60, 0, 10, 2, {4, 4}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 5, 59, 60, 0, 10, 2, {1, 1}, SPARSE_ENCAPSULATED, FLEXIBLEAUTO}, + { 1 << 17, 1 << 16, 78, 96, 0, 10, 2, {4, 4}, SPARSE_TERNARY, COMPOSITESCALINGAUTO}, +}; +// clang-format on + +[[maybe_unused]] static void BootConfigs(benchmark::internal::Benchmark* b) { + for (uint32_t i = 0; i < boot_configs.size(); ++i) + b->ArgName("Config")->Arg(i); +} + +[[maybe_unused]] static void CKKSBoot(benchmark::State& state) { + auto t = boot_configs[state.range(0)]; + + CCParams parameters; + parameters.SetSecurityLevel(HEStd_128_classic); + parameters.SetRingDim(t.ringDim); + parameters.SetScalingModSize(t.dcrtBits); + parameters.SetFirstModSize(t.firstMod); + parameters.SetNumLargeDigits(t.numDigits); + parameters.SetSecretKeyDist(t.skdst); + parameters.SetScalingTechnique(t.stech); + parameters.SetKeySwitchTechnique(HYBRID); + uint32_t depth = t.lvlsAfter + FHECKKSRNS::GetBootstrapDepth(t.lvlb, t.skdst) + (t.iters - 1); + parameters.SetMultiplicativeDepth(depth); + + auto cc = GenCryptoContext(parameters); + cc->Enable(PKE); + cc->Enable(KEYSWITCH); + cc->Enable(LEVELEDSHE); + cc->Enable(ADVANCEDSHE); + cc->Enable(FHE); + + cc->EvalBootstrapSetup(t.lvlb, {0, 0}, t.slots); + + auto keyPair = cc->KeyGen(); + cc->EvalMultKeyGen(keyPair.secretKey); + cc->EvalBootstrapKeyGen(keyPair.secretKey, t.slots); + + std::vector x = {0.25, 0.5, 0.75, 1.0, 2.0, 3.0, 4.0, 5.0}; + + auto ptxt = cc->MakeCKKSPackedPlaintext(x, 1, depth - 1, nullptr, t.slots); + ptxt->SetLength(t.slots); + + auto ctxt = cc->Encrypt(keyPair.publicKey, ptxt); + + while (state.KeepRunning()) + auto ctxtAfter = cc->EvalBootstrap(ctxt, t.iters); + + cc->ClearStaticMapsAndVectors(); +} + +BENCHMARK(CKKSBoot)->Unit(benchmark::kSecond)->Iterations(4)->Apply(BootConfigs); + +BENCHMARK_MAIN(); diff --git a/benchmark/src/ckks-functional-bootstrapping.cpp b/benchmark/src/ckks-functional-bootstrapping.cpp new file mode 100644 index 000000000..3fd584863 --- /dev/null +++ b/benchmark/src/ckks-functional-bootstrapping.cpp @@ -0,0 +1,489 @@ +//================================================================================== +// BSD 2-Clause License +// +// Copyright (c) 2014-2025, NJIT, Duality Technologies Inc. and other contributors +// +// All rights reserved. +// +// Author TPOC: contact@openfhe.org +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//================================================================================== + +#include "benchmark/benchmark.h" +#include "config_core.h" +#include "cryptocontext.h" +#include "gen-cryptocontext.h" +#include "gtest/gtest.h" +#include "math/hermite.h" +#include "scheme/ckksrns/ckksrns-fhe.h" +#include "scheme/ckksrns/ckksrns-utils.h" +#include "scheme/ckksrns/gen-cryptocontext-ckksrns.h" +#include "schemelet/rlwe-mp.h" + +#include +#include +#include + +using namespace lbcrypto; + +struct fbt_config { + BigInteger QBFVInit; + BigInteger PInput; + BigInteger POutput; + BigInteger Q; + BigInteger Bigq; + double scaleTHI; + double scaleStepTHI; + size_t order; + uint32_t numSlots; + uint32_t ringDim; + uint32_t dnum; + std::vector lvlb; +}; + +[[maybe_unused]] const BigInteger Q1(BigInteger(1) << 1); +[[maybe_unused]] const BigInteger Q2(BigInteger(1) << 2); +[[maybe_unused]] const BigInteger Q4(BigInteger(1) << 4); +[[maybe_unused]] const BigInteger Q8(BigInteger(1) << 8); +[[maybe_unused]] const BigInteger Q12(BigInteger(1) << 12); +[[maybe_unused]] const BigInteger Q32(BigInteger(1) << 32); +[[maybe_unused]] const BigInteger Q33(BigInteger(1) << 33); +[[maybe_unused]] const BigInteger Q35(BigInteger(1) << 35); +[[maybe_unused]] const BigInteger Q38(BigInteger(1) << 38); +[[maybe_unused]] const BigInteger Q47(BigInteger(1) << 47); +[[maybe_unused]] const BigInteger Q55(BigInteger(1) << 55); +[[maybe_unused]] const BigInteger Q60(BigInteger(1) << 60); +[[maybe_unused]] const BigInteger Q71(BigInteger(1) << 71); +[[maybe_unused]] const BigInteger Q80(BigInteger(1) << 80); + +// clang-format off +[[maybe_unused]] std::map arblut_configs = { + // QBFVInit, PInput, POutput, Q, Bigq, scaleTHI, scaleStepTHI, order, numSlots, ringDim, dnum, lvlBudget + {1, { Q60, Q1, Q1, Q33, Q33, 1.0, 1.0, 1, 1 << 15, 1 << 15, 3, {3, 3}}}, + {2, { Q60, Q2, Q2, Q35, Q35, 16.0, 1.0, 1, 1 << 16, 1 << 16, 3, {4, 4}}}, + {4, { Q60, Q4, Q4, Q38, Q38, 32.0, 1.0, 1, 1 << 16, 1 << 16, 3, {4, 4}}}, + {8, { Q60, Q8, Q8, Q47, Q47, 32.0, 1.0, 1, 1 << 16, 1 << 16, 4, {3, 3}}}, + {12, { Q80, Q12, Q12, Q55, Q55, 2000.0, 1.0, 1, 1 << 17, 1 << 17, 3, {4, 4}}}, + {32, { Q80, Q32, Q4, Q71, Q47, 256.0, 16.0, 1, 1 << 16, 1 << 16, 4, {3, 3}}} +}; +// clang-format on + +[[maybe_unused]] static void ArbLUTBits(benchmark::internal::Benchmark* b) { + for (uint32_t bits : {12, 8, 4, 2, 1}) + b->ArgName("bits")->Arg(bits); +} + +[[maybe_unused]] static void FBTSetup(benchmark::State& state) { + auto t = arblut_configs[12]; + + bool flagSP = (t.numSlots <= t.ringDim / 2); // sparse packing + + auto numSlotsCKKS = flagSP ? t.numSlots : t.numSlots / 2; + + auto a = t.PInput.ConvertToInt(); + auto b = t.POutput.ConvertToInt(); + auto f = [a, b](int64_t x) -> int64_t { + return (x % a - a / 2) % b; + }; + + std::vector x = { + (t.PInput.ConvertToInt() / 2), (t.PInput.ConvertToInt() / 2) + 1, 0, 3, 16, 33, 64, + (t.PInput.ConvertToInt() - 1)}; + if (x.size() < t.numSlots) + x = Fill(x, t.numSlots); + + std::vector coeffint; + std::vector> coeffcomp; + bool binaryLUT = (t.PInput.ConvertToInt() == 2) && (t.order == 1); + if (binaryLUT) // coeffs for [1, cos^2(pi x)], not [1, cos(2pi x)] + coeffint = {f(1), f(0) - f(1)}; + else // divided by 2 + coeffcomp = GetHermiteTrigCoefficients(f, t.PInput.ConvertToInt(), t.order, t.scaleTHI); + + uint32_t dcrtBits = t.Bigq.GetMSB() - 1; + CCParams parameters; + parameters.SetSecretKeyDist(SPARSE_ENCAPSULATED); + parameters.SetSecurityLevel(HEStd_NotSet); + parameters.SetScalingModSize(dcrtBits); + parameters.SetScalingTechnique(FIXEDMANUAL); + parameters.SetFirstModSize(dcrtBits); + parameters.SetNumLargeDigits(t.dnum); + parameters.SetBatchSize(numSlotsCKKS); + parameters.SetRingDim(t.ringDim); + + uint32_t depth = 0; + if (binaryLUT) + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffint, t.PInput, t.order, SPARSE_ENCAPSULATED); + else + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffcomp, t.PInput, t.order, SPARSE_ENCAPSULATED); + parameters.SetMultiplicativeDepth(depth); + + auto cc = GenCryptoContext(parameters); + cc->Enable(PKE); + cc->Enable(KEYSWITCH); + cc->Enable(LEVELEDSHE); + cc->Enable(ADVANCEDSHE); + cc->Enable(FHE); + + auto keyPair = cc->KeyGen(); + + while (state.KeepRunning()) { + if (binaryLUT) + cc->EvalFBTSetup(coeffint, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, + 0, t.order); + else + cc->EvalFBTSetup(coeffcomp, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, + 0, t.order); + } + + cc->ClearStaticMapsAndVectors(); +} + +[[maybe_unused]] static void FBTKeyGen(benchmark::State& state) { + auto t = arblut_configs[12]; + + bool flagSP = (t.numSlots <= t.ringDim / 2); // sparse packing + + auto numSlotsCKKS = flagSP ? t.numSlots : t.numSlots / 2; + + auto a = t.PInput.ConvertToInt(); + auto b = t.POutput.ConvertToInt(); + auto f = [a, b](int64_t x) -> int64_t { + return (x % a - a / 2) % b; + }; + + std::vector x = { + (t.PInput.ConvertToInt() / 2), (t.PInput.ConvertToInt() / 2) + 1, 0, 3, 16, 33, 64, + (t.PInput.ConvertToInt() - 1)}; + if (x.size() < t.numSlots) + x = Fill(x, t.numSlots); + + std::vector coeffint; + std::vector> coeffcomp; + bool binaryLUT = (t.PInput.ConvertToInt() == 2) && (t.order == 1); + if (binaryLUT) // coeffs for [1, cos^2(pi x)], not [1, cos(2pi x)] + coeffint = {f(1), f(0) - f(1)}; + else // divided by 2 + coeffcomp = GetHermiteTrigCoefficients(f, t.PInput.ConvertToInt(), t.order, t.scaleTHI); + + uint32_t dcrtBits = t.Bigq.GetMSB() - 1; + CCParams parameters; + parameters.SetSecretKeyDist(SPARSE_ENCAPSULATED); + parameters.SetSecurityLevel(HEStd_NotSet); + parameters.SetScalingModSize(dcrtBits); + parameters.SetScalingTechnique(FIXEDMANUAL); + parameters.SetFirstModSize(dcrtBits); + parameters.SetNumLargeDigits(t.dnum); + parameters.SetBatchSize(numSlotsCKKS); + parameters.SetRingDim(t.ringDim); + + uint32_t depth = 0; + if (binaryLUT) + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffint, t.PInput, t.order, SPARSE_ENCAPSULATED); + else + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffcomp, t.PInput, t.order, SPARSE_ENCAPSULATED); + parameters.SetMultiplicativeDepth(depth); + + auto cc = GenCryptoContext(parameters); + cc->Enable(PKE); + cc->Enable(KEYSWITCH); + cc->Enable(LEVELEDSHE); + cc->Enable(ADVANCEDSHE); + cc->Enable(FHE); + + auto keyPair = cc->KeyGen(); + + if (binaryLUT) + cc->EvalFBTSetup(coeffint, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, 0, + t.order); + else + cc->EvalFBTSetup(coeffcomp, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, 0, + t.order); + + while (state.KeepRunning()) { + cc->EvalBootstrapKeyGen(keyPair.secretKey, numSlotsCKKS); + cc->EvalMultKeyGen(keyPair.secretKey); + } + + cc->ClearStaticMapsAndVectors(); +} + +[[maybe_unused]] static void FBTArbLUT(benchmark::State& state) { + auto t = arblut_configs[state.range(0)]; + + bool flagSP = (t.numSlots <= t.ringDim / 2); // sparse packing + + auto numSlotsCKKS = flagSP ? t.numSlots : t.numSlots / 2; + + auto a = t.PInput.ConvertToInt(); + auto b = t.POutput.ConvertToInt(); + auto f = [a, b](int64_t x) -> int64_t { + return (x % a - a / 2) % b; + }; + + std::vector x = { + (t.PInput.ConvertToInt() / 2), (t.PInput.ConvertToInt() / 2) + 1, 0, 3, 16, 33, 64, + (t.PInput.ConvertToInt() - 1)}; + if (x.size() < t.numSlots) + x = Fill(x, t.numSlots); + + std::vector coeffint; + std::vector> coeffcomp; + bool binaryLUT = (t.PInput.ConvertToInt() == 2) && (t.order == 1); + if (binaryLUT) // coeffs for [1, cos^2(pi x)], not [1, cos(2pi x)] + coeffint = {f(1), f(0) - f(1)}; + else // divided by 2 + coeffcomp = GetHermiteTrigCoefficients(f, t.PInput.ConvertToInt(), t.order, t.scaleTHI); + + uint32_t dcrtBits = t.Bigq.GetMSB() - 1; + CCParams parameters; + parameters.SetSecretKeyDist(SPARSE_ENCAPSULATED); + parameters.SetSecurityLevel(HEStd_NotSet); + parameters.SetScalingModSize(dcrtBits); + parameters.SetScalingTechnique(FIXEDMANUAL); + parameters.SetFirstModSize(dcrtBits); + parameters.SetNumLargeDigits(t.dnum); + parameters.SetBatchSize(numSlotsCKKS); + parameters.SetRingDim(t.ringDim); + + uint32_t depth = 0; + if (binaryLUT) + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffint, t.PInput, t.order, SPARSE_ENCAPSULATED); + else + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffcomp, t.PInput, t.order, SPARSE_ENCAPSULATED); + parameters.SetMultiplicativeDepth(depth); + + auto cc = GenCryptoContext(parameters); + cc->Enable(PKE); + cc->Enable(KEYSWITCH); + cc->Enable(LEVELEDSHE); + cc->Enable(ADVANCEDSHE); + cc->Enable(FHE); + + auto keyPair = cc->KeyGen(); + + if (binaryLUT) + cc->EvalFBTSetup(coeffint, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, 0, + t.order); + else + cc->EvalFBTSetup(coeffcomp, numSlotsCKKS, t.PInput, t.POutput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, 0, + t.order); + + cc->EvalBootstrapKeyGen(keyPair.secretKey, numSlotsCKKS); + cc->EvalMultKeyGen(keyPair.secretKey); + + auto ep = SchemeletRLWEMP::GetElementParams(keyPair.secretKey, depth); + + auto ctxtBFV = SchemeletRLWEMP::EncryptCoeff(x, t.QBFVInit, t.PInput, keyPair.secretKey, ep); + + SchemeletRLWEMP::ModSwitch(ctxtBFV, t.Q, t.QBFVInit); + + auto ctxt = SchemeletRLWEMP::ConvertRLWEToCKKS(*cc, ctxtBFV, keyPair.publicKey, t.Bigq, numSlotsCKKS, depth); + + while (state.KeepRunning()) { + Ciphertext ctxtAfterFBT; + if (binaryLUT) + ctxtAfterFBT = cc->EvalFBT(ctxt, coeffint, t.PInput.GetMSB() - 1, ep->GetModulus(), t.scaleTHI, 0, t.order); + else + ctxtAfterFBT = + cc->EvalFBT(ctxt, coeffcomp, t.PInput.GetMSB() - 1, ep->GetModulus(), t.scaleTHI, 0, t.order); + ctxtAfterFBT.reset(); + } + + cc->ClearStaticMapsAndVectors(); +} + +[[maybe_unused]] static void FBTSignDigit32(benchmark::State& state) { + auto t = arblut_configs[32]; + + bool flagSP = (t.numSlots <= t.ringDim / 2); // sparse packing + + auto numSlotsCKKS = flagSP ? t.numSlots : t.numSlots / 2; + + auto a = t.PInput.ConvertToInt(); + auto b = t.POutput.ConvertToInt(); + + auto funcMod = [b](int64_t x) -> int64_t { + return (x % b); + }; + auto funcStep = [a, b](int64_t x) -> int64_t { + return (x % a) >= (b / 2); + }; + + std::vector x = { + t.PInput.ConvertToInt() / 2, t.PInput.ConvertToInt() / 2 + 1, 0, 3, 16, 33, 64, + t.PInput.ConvertToInt() - 1}; + if (x.size() < t.numSlots) + x = Fill(x, t.numSlots); + + auto exact(x); + std::transform(x.begin(), x.end(), exact.begin(), + [&](const int64_t& elem) { return (elem >= t.PInput.ConvertToDouble() / 2.); }); + + std::vector coeffintMod; + std::vector> coeffcompMod; + std::vector> coeffcompStep; + bool binaryLUT = (t.POutput.ConvertToInt() == 2) && (t.order == 1); + if (binaryLUT) { + coeffintMod = {funcMod(1), funcMod(0) - funcMod(1)}; // coeffs for [1, cos^2(pi x)], not [1, cos(2pi x)] + } + else { + coeffcompMod = + GetHermiteTrigCoefficients(funcMod, t.POutput.ConvertToInt(), t.order, t.scaleTHI); // divided by 2 + coeffcompStep = GetHermiteTrigCoefficients(funcStep, t.POutput.ConvertToInt(), t.order, + t.scaleStepTHI); // divided by 2 + } + + uint32_t dcrtBits = t.Bigq.GetMSB() - 1; + CCParams parameters; + parameters.SetSecretKeyDist(SPARSE_ENCAPSULATED); + parameters.SetSecurityLevel(HEStd_NotSet); + parameters.SetScalingModSize(dcrtBits); + parameters.SetScalingTechnique(FIXEDMANUAL); + parameters.SetFirstModSize(dcrtBits); + parameters.SetNumLargeDigits(t.dnum); + parameters.SetBatchSize(numSlotsCKKS); + parameters.SetRingDim(t.ringDim); + + uint32_t depth = 0; + if (binaryLUT) + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffintMod, t.PInput, t.order, SPARSE_ENCAPSULATED); + else + depth += FHECKKSRNS::GetFBTDepth(t.lvlb, coeffcompMod, t.PInput, t.order, SPARSE_ENCAPSULATED); + parameters.SetMultiplicativeDepth(depth); + + auto cc = GenCryptoContext(parameters); + cc->Enable(PKE); + cc->Enable(KEYSWITCH); + cc->Enable(LEVELEDSHE); + cc->Enable(ADVANCEDSHE); + cc->Enable(FHE); + + auto keyPair = cc->KeyGen(); + + if (binaryLUT) + cc->EvalFBTSetup(coeffintMod, numSlotsCKKS, t.POutput, t.PInput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, + 0, t.order); + else + cc->EvalFBTSetup(coeffcompMod, numSlotsCKKS, t.POutput, t.PInput, t.Bigq, keyPair.publicKey, {0, 0}, t.lvlb, 0, + 0, t.order); + + cc->EvalBootstrapKeyGen(keyPair.secretKey, numSlotsCKKS); + cc->EvalMultKeyGen(keyPair.secretKey); + + auto ep = SchemeletRLWEMP::GetElementParams(keyPair.secretKey, depth); + + std::vector coeffint; + std::vector> coeffcomp; + if (binaryLUT) + coeffint = coeffintMod; + else + coeffcomp = coeffcompMod; + + while (state.KeepRunning()) { + auto ctxtBFV = SchemeletRLWEMP::EncryptCoeff(x, t.QBFVInit, t.PInput, keyPair.secretKey, ep); + + SchemeletRLWEMP::ModSwitch(ctxtBFV, t.Q, t.QBFVInit); + + uint32_t QBFVBits = t.Q.GetMSB() - 1; + + auto Q = t.Q; + auto PInput = t.PInput; + + BigInteger QNew; + + const bool checkgt2 = t.POutput.ConvertToInt() > 2; + const uint32_t pDigitBits = t.POutput.GetMSB() - 1; + + uint64_t scaleTHI = t.scaleTHI; + bool step = false; + bool go = QBFVBits > dcrtBits; + size_t levelsToDrop = 0; + uint32_t postScalingBits = 0; + + // For arbitrary digit size, pNew > 2, the last iteration needs to evaluate step pNew not mod pNew. + // Currently this only works when log(pNew) divides log(p). + while (go) { + auto encryptedDigit = ctxtBFV; + + // Apply mod q + encryptedDigit[0].SwitchModulus(t.Bigq, 1, 0, 0); + encryptedDigit[1].SwitchModulus(t.Bigq, 1, 0, 0); + + auto ctxt = + SchemeletRLWEMP::ConvertRLWEToCKKS(*cc, encryptedDigit, keyPair.publicKey, t.Bigq, numSlotsCKKS, depth); + + // Bootstrap the digit. + Ciphertext ctxtAfterFBT; + if (binaryLUT) + ctxtAfterFBT = cc->EvalFBT(ctxt, coeffint, pDigitBits, ep->GetModulus(), + scaleTHI * (1 << postScalingBits), levelsToDrop, t.order); + else + ctxtAfterFBT = cc->EvalFBT(ctxt, coeffcomp, pDigitBits, ep->GetModulus(), + scaleTHI * (1 << postScalingBits), levelsToDrop, t.order); + + auto polys = SchemeletRLWEMP::ConvertCKKSToRLWE(ctxtAfterFBT, Q); + + if (!step) { + QNew = Q >> pDigitBits; + + // Subtract digit and switch mod from Q to QNew for BFV ciphertext + ctxtBFV[0] = (ctxtBFV[0] - polys[0]).MultiplyAndRound(QNew, Q); + ctxtBFV[0].SwitchModulus(QNew, 1, 0, 0); + ctxtBFV[1] = (ctxtBFV[1] - polys[1]).MultiplyAndRound(QNew, Q); + ctxtBFV[1].SwitchModulus(QNew, 1, 0, 0); + Q >>= pDigitBits; + PInput >>= pDigitBits; + QBFVBits -= pDigitBits; + postScalingBits += pDigitBits; + } + else { + ctxtBFV[0] = std::move(polys[0]); + ctxtBFV[1] = std::move(polys[1]); + } + + go = QBFVBits > dcrtBits; + + if (checkgt2 && !go && !step) { + if (!binaryLUT) + coeffcomp = coeffcompStep; + scaleTHI = t.scaleStepTHI; + step = true; + go = true; + int64_t lvlsToDrop = GetMultiplicativeDepthByCoeffVector(coeffcompMod, true) - + GetMultiplicativeDepthByCoeffVector(coeffcompStep, true); + if (coeffcompMod.size() > 4 && lvlsToDrop > 0) + levelsToDrop = lvlsToDrop; + } + } + } + + cc->ClearStaticMapsAndVectors(); +} + +BENCHMARK(FBTArbLUT)->Unit(benchmark::kSecond)->Iterations(4)->Apply(ArbLUTBits); +BENCHMARK(FBTSignDigit32)->Unit(benchmark::kSecond)->Iterations(4); +BENCHMARK(FBTSetup)->Unit(benchmark::kSecond)->Iterations(10); +BENCHMARK(FBTKeyGen)->Unit(benchmark::kSecond)->Iterations(4); + +BENCHMARK_MAIN(); diff --git a/benchmark/src/poly-benchmark.h b/benchmark/src/poly-benchmark.h index ad1361760..87aa0a198 100644 --- a/benchmark/src/poly-benchmark.h +++ b/benchmark/src/poly-benchmark.h @@ -45,10 +45,10 @@ using namespace lbcrypto; -constexpr size_t POLY_NUM = 16; +constexpr size_t POLY_NUM = 8; constexpr size_t POLY_NUM_M1 = (POLY_NUM - 1); -std::vector tow_args({1, 2, 4, 8, 16}); +std::vector tow_args({1, 2, 4, 8, 16, 32}); std::shared_ptr> NativepolysEval; std::shared_ptr> NativepolysCoef; std::map>> DCRTpolysEval; @@ -219,7 +219,27 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, // ************************************************************************************ -[[maybe_unused]] static void Native_ntt(benchmark::State& state) { +[[maybe_unused]] static void Native_Copy(benchmark::State& state) { + auto polys = NativepolysEval; + NativePoly p; + size_t i{0}; + while (state.KeepRunning()) { + benchmark::DoNotOptimize(p = (*polys)[(i = (i + 1) & POLY_NUM_M1)]); + } +} + +[[maybe_unused]] static void DCRT_Copy(benchmark::State& state) { + auto polys = DCRTpolysEval[state.range(0)]; + DCRTPoly p; + size_t i{0}; + while (state.KeepRunning()) { + benchmark::DoNotOptimize(p = (*polys)[(i = (i + 1) & POLY_NUM_M1)]); + } +} + +// ************************************************************************************ + +[[maybe_unused]] static void Native_Copy_ntt(benchmark::State& state) { std::shared_ptr> polys = NativepolysCoef; NativePoly p; size_t i{POLY_NUM_M1}; @@ -229,7 +249,7 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, } } -[[maybe_unused]] static void DCRT_ntt(benchmark::State& state) { +[[maybe_unused]] static void DCRT_Copy_ntt(benchmark::State& state) { std::shared_ptr> polys = DCRTpolysCoef[state.range(0)]; DCRTPoly p; size_t i{POLY_NUM_M1}; @@ -239,7 +259,7 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, } } -[[maybe_unused]] static void Native_intt(benchmark::State& state) { +[[maybe_unused]] static void Native_Copy_intt(benchmark::State& state) { std::shared_ptr> polys = NativepolysEval; NativePoly p; size_t i{POLY_NUM_M1}; @@ -249,7 +269,7 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, } } -[[maybe_unused]] static void DCRT_intt(benchmark::State& state) { +[[maybe_unused]] static void DCRT_Copy_intt(benchmark::State& state) { std::shared_ptr> polys = DCRTpolysEval[state.range(0)]; DCRTPoly p; size_t i{POLY_NUM_M1}; @@ -259,6 +279,26 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, } } +[[maybe_unused]] static void Native_avg_ntt_intt(benchmark::State& state) { + auto polys = *NativepolysCoef; + NativePoly* p; + size_t i{POLY_NUM_M1}; + while (state.KeepRunning()) { + p = &polys[(i = (i + 1) & POLY_NUM_M1)]; + p->SwitchFormat(); + } +} + +[[maybe_unused]] static void DCRT_avg_ntt_intt(benchmark::State& state) { + auto polys = *DCRTpolysCoef[state.range(0)]; + DCRTPoly* p; + size_t i{POLY_NUM_M1}; + while (state.KeepRunning()) { + p = &polys[(i = (i + 1) & POLY_NUM_M1)]; + p->SwitchFormat(); + } +} + [[maybe_unused]] static void Native_ntt_intt(benchmark::State& state) { std::shared_ptr> polys = NativepolysCoef; NativePoly* p; @@ -369,35 +409,47 @@ static void GenerateDCRTPolys(uint32_t order, uint32_t bits, // BENCHMARK(Native_Add)->Unit(benchmark::kMicrosecond); // BENCHMARK(DCRT_Add)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_AddEq)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_AddEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); + +BENCHMARK(Native_AddEq)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_AddEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); // BENCHMARK(Native_Sub)->Unit(benchmark::kMicrosecond); // BENCHMARK(DCRT_Sub)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_SubEq)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_SubEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); + +BENCHMARK(Native_SubEq)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_SubEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); // BENCHMARK(Native_Mul)->Unit(benchmark::kMicrosecond); // BENCHMARK(DCRT_Mul)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_MulEq)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_MulEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_ntt)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_ntt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_intt)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_intt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); +BENCHMARK(Native_MulEq)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_MulEq)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); + +BENCHMARK(Native_Copy)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_Copy)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); + +BENCHMARK(Native_Copy_ntt)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_Copy_ntt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); + +BENCHMARK(Native_Copy_intt)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_Copy_intt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); + +BENCHMARK(Native_avg_ntt_intt)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_avg_ntt_intt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); + // BENCHMARK(Native_ntt_intt)->Unit(benchmark::kMicrosecond); // BENCHMARK(DCRT_ntt_intt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); + // BENCHMARK(Native_intt_ntt)->Unit(benchmark::kMicrosecond); // BENCHMARK(DCRT_intt_ntt)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_CRTInterpolate)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_CRTInterpolate)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); +BENCHMARK(Native_CRTInterpolate)->Unit(benchmark::kMicrosecond)->MinTime(5.0); +BENCHMARK(DCRT_CRTInterpolate)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments)->MinTime(5.0); -BENCHMARK(Native_DecryptionCRTInterpolate)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_DecryptionCRTInterpolate)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); +// BENCHMARK(Native_DecryptionCRTInterpolate)->Unit(benchmark::kMicrosecond); +// BENCHMARK(DCRT_DecryptionCRTInterpolate)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); -BENCHMARK(Native_BaseDecompose)->Unit(benchmark::kMicrosecond); -BENCHMARK(DCRT_BaseDecompose)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); +// BENCHMARK(Native_BaseDecompose)->Unit(benchmark::kMicrosecond); +// BENCHMARK(DCRT_BaseDecompose)->Unit(benchmark::kMicrosecond)->Apply(DCRTArguments); #endif diff --git a/src/binfhe/lib/lwe-pke.cpp b/src/binfhe/lib/lwe-pke.cpp index 5af69ecef..cf356d298 100644 --- a/src/binfhe/lib/lwe-pke.cpp +++ b/src/binfhe/lib/lwe-pke.cpp @@ -301,9 +301,9 @@ LWESwitchingKey LWEEncryptionScheme::KeySwitchGen(const std::shared_ptr>> resultVecA(N); std::vector>> resultVecB(N); - // TODO (cpascoe/dsuponit): this pragma needs to be revised as it may have to be removed completely + // TODO: parallelize loop using fix from KeySwitchHYBRID::KeySwitchGenInternal + // #if !defined(__MINGW32__) && !defined(__MINGW64__) - // #pragma omp parallel for num_threads(N) // #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(N)) // #endif for (size_t i = 0; i < N; ++i) { @@ -377,7 +377,7 @@ LWECiphertext LWEEncryptionScheme::KeySwitch(const std::shared_ptr& params, LWEPlaintext m) const { NativeInteger q(params->Getq()); - return std::make_shared(NativeVector(params->Getn(), q), (q >> 2)*m); + return std::make_shared(NativeVector(params->Getn(), q), (q >> 2) * m); } }; // namespace lbcrypto diff --git a/src/core/include/math/hal/intnat/transformnat-impl.h b/src/core/include/math/hal/intnat/transformnat-impl.h index f9365e822..b52d59d52 100644 --- a/src/core/include/math/hal/intnat/transformnat-impl.h +++ b/src/core/include/math/hal/intnat/transformnat-impl.h @@ -47,6 +47,7 @@ #include "utils/utilities.h" #include +#include #include namespace intnat { @@ -120,12 +121,12 @@ std::map ChineseRemainderTransformArbNat::m_DivisionNTTRootOfUnity; template -std::map ChineseRemainderTransformArbNat::m_nttDivisionDim; +std::map ChineseRemainderTransformArbNat::m_nttDivisionDim; template void NumberTheoreticTransformNat::ForwardTransformIterative(const VecType& element, const VecType& rootOfUnityTable, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); if (result->GetLength() != n) { OPENFHE_THROW("size of input element and size of output element not of same size"); } @@ -134,24 +135,24 @@ void NumberTheoreticTransformNat::ForwardTransformIterative(const VecTy IntType mu = modulus.ComputeMu(); result->SetModulus(modulus); - usint msb = GetMSB(n - 1); + uint32_t msb = GetMSB(n - 1); for (size_t i = 0; i < n; i++) { (*result)[i] = element[ReverseBits(i, msb)]; } IntType omega, omegaFactor, oddVal, evenVal; - usint logm, i, j, indexEven, indexOdd; + uint32_t logm, i, j, indexEven, indexOdd; - usint logn = GetMSB(n - 1); + uint32_t logn = GetMSB(n - 1); for (logm = 1; logm <= logn; logm++) { // calculate the i indexes into the root table one time per loop - std::vector indexes(1 << (logm - 1)); - for (i = 0; i < (usint)(1 << (logm - 1)); i++) { + std::vector indexes(1 << (logm - 1)); + for (i = 0; i < (uint32_t)(1 << (logm - 1)); i++) { indexes[i] = (i << (logn - logm)); } for (j = 0; j < n; j = j + (1 << logm)) { - for (i = 0; i < (usint)(1 << (logm - 1)); i++) { + for (i = 0; i < (uint32_t)(1 << (logm - 1)); i++) { omega = rootOfUnityTable[indexes[i]]; indexEven = j + i; indexOdd = indexEven + (1 << (logm - 1)); @@ -182,14 +183,14 @@ template void NumberTheoreticTransformNat::InverseTransformIterative(const VecType& element, const VecType& rootOfUnityInverseTable, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); IntType modulus = element.GetModulus(); IntType mu = modulus.ComputeMu(); NumberTheoreticTransformNat().ForwardTransformIterative(element, rootOfUnityInverseTable, result); IntType cycloOrderInv(IntType(n).ModInverse(modulus)); - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { (*result)[i].ModMulEq(cycloOrderInv, modulus, mu); } return; @@ -198,15 +199,15 @@ void NumberTheoreticTransformNat::InverseTransformIterative(const VecTy template void NumberTheoreticTransformNat::ForwardTransformToBitReverseInPlace(const VecType& rootOfUnityTable, VecType* element) { - usint n = element->GetLength(); + uint32_t n = element->GetLength(); IntType modulus = element->GetModulus(); IntType mu = modulus.ComputeMu(); - usint i, m, j1, j2, indexOmega, indexLo, indexHi; + uint32_t i, m, j1, j2, indexOmega, indexLo, indexHi; IntType omega, omegaFactor, loVal, hiVal; - usint t = (n >> 1); - usint logt1 = GetMSB(t); + uint32_t t = (n >> 1); + uint32_t logt1 = GetMSB(t); for (m = 1; m < n; m <<= 1) { for (i = 0; i < m; ++i) { j1 = i << logt1; @@ -243,7 +244,7 @@ template void NumberTheoreticTransformNat::ForwardTransformToBitReverse(const VecType& element, const VecType& rootOfUnityTable, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); if (result->GetLength() != n) { OPENFHE_THROW("size of input element and size of output element not of same size"); } @@ -252,15 +253,15 @@ void NumberTheoreticTransformNat::ForwardTransformToBitReverse(const Ve IntType mu = modulus.ComputeMu(); result->SetModulus(modulus); - usint i, m, j1, j2, indexOmega, indexLo, indexHi; + uint32_t i, m, j1, j2, indexOmega, indexLo, indexHi; IntType omega, omegaFactor, loVal, hiVal, zero(0); for (i = 0; i < n; ++i) { (*result)[i] = element[i]; } - usint t = (n >> 1); - usint logt1 = GetMSB(t); + uint32_t t = (n >> 1); + uint32_t logt1 = GetMSB(t); for (m = 1; m < n; m <<= 1) { for (i = 0; i < m; ++i) { j1 = i << logt1; @@ -377,7 +378,7 @@ void NumberTheoreticTransformNat::ForwardTransformToBitReverse(const Ve const VecType& rootOfUnityTable, const VecType& preconRootOfUnityTable, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); if (result->GetLength() != n) { OPENFHE_THROW("size of input element and size of output element not of same size"); @@ -395,8 +396,8 @@ void NumberTheoreticTransformNat::ForwardTransformToBitReverse(const Ve NativeInteger preconOmega; IntType omega, omegaFactor, loVal, hiVal, zero(0); - usint t = (n >> 1); - usint logt1 = GetMSB(t); + uint32_t t = (n >> 1); + uint32_t logt1 = GetMSB(t); for (uint32_t m = 1; m < n; m <<= 1, t >>= 1, --logt1) { uint32_t j1, j2; for (uint32_t i = 0; i < m; ++i) { @@ -438,15 +439,15 @@ template void NumberTheoreticTransformNat::InverseTransformFromBitReverseInPlace(const VecType& rootOfUnityInverseTable, const IntType& cycloOrderInv, VecType* element) { - usint n = element->GetLength(); + uint32_t n = element->GetLength(); IntType modulus = element->GetModulus(); IntType mu = modulus.ComputeMu(); IntType loVal, hiVal, omega, omegaFactor; - usint i, m, j1, j2, indexOmega, indexLo, indexHi; + uint32_t i, m, j1, j2, indexOmega, indexLo, indexHi; - usint t = 1; - usint logt1 = 1; + uint32_t t = 1; + uint32_t logt1 = 1; for (m = (n >> 1); m >= 1; m >>= 1) { for (i = 0; i < m; ++i) { j1 = i << logt1; @@ -493,7 +494,7 @@ void NumberTheoreticTransformNat::InverseTransformFromBitReverse(const const VecType& rootOfUnityInverseTable, const IntType& cycloOrderInv, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); if (result->GetLength() != n) { OPENFHE_THROW("size of input element and size of output element not of same size"); @@ -501,7 +502,7 @@ void NumberTheoreticTransformNat::InverseTransformFromBitReverse(const result->SetModulus(element.GetModulus()); - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { (*result)[i] = element[i]; } InverseTransformFromBitReverseInPlace(rootOfUnityInverseTable, cycloOrderInv, result); @@ -627,14 +628,14 @@ template void NumberTheoreticTransformNat::InverseTransformFromBitReverse( const VecType& element, const VecType& rootOfUnityInverseTable, const VecType& preconRootOfUnityInverseTable, const IntType& cycloOrderInv, const IntType& preconCycloOrderInv, VecType* result) { - usint n = element.GetLength(); + uint32_t n = element.GetLength(); if (result->GetLength() != n) { OPENFHE_THROW("size of input element and size of output element not of same size"); } result->SetModulus(element.GetModulus()); - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { (*result)[i] = element[i]; } InverseTransformFromBitReverseInPlace(rootOfUnityInverseTable, preconRootOfUnityInverseTable, cycloOrderInv, @@ -645,28 +646,12 @@ void NumberTheoreticTransformNat::InverseTransformFromBitReverse( template void ChineseRemainderTransformFTTNat::ForwardTransformToBitReverseInPlace(const IntType& rootOfUnity, - const usint CycloOrder, + const uint32_t cycloOrder, VecType* element) { - if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) { + if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) return; - } - - if (!IsPowerOfTwo(CycloOrder)) { - OPENFHE_THROW("CyclotomicOrder is not a power of two"); - } - - usint CycloOrderHf = (CycloOrder >> 1); - if (element->GetLength() != CycloOrderHf) { - OPENFHE_THROW("element size must be equal to CyclotomicOrder / 2"); - } - - IntType modulus = element->GetModulus(); - - auto mapSearch = m_rootOfUnityReverseTableByModulus.find(modulus); - if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != CycloOrderHf) { - PreCompute(rootOfUnity, CycloOrder, modulus); - } - + auto modulus = element->GetModulus(); + PreCompute(rootOfUnity, cycloOrder, modulus); NumberTheoreticTransformNat().ForwardTransformToBitReverseInPlace( m_rootOfUnityReverseTableByModulus[modulus], m_rootOfUnityPreconReverseTableByModulus[modulus], element); } @@ -674,28 +659,14 @@ void ChineseRemainderTransformFTTNat::ForwardTransformToBitReverseInPla template void ChineseRemainderTransformFTTNat::ForwardTransformToBitReverse(const VecType& element, const IntType& rootOfUnity, - const usint CycloOrder, VecType* result) { + const uint32_t cycloOrder, + VecType* result) { if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) { *result = element; return; } - - if (!IsPowerOfTwo(CycloOrder)) { - OPENFHE_THROW("CyclotomicOrder is not a power of two"); - } - - usint CycloOrderHf = (CycloOrder >> 1); - if (result->GetLength() != CycloOrderHf) { - OPENFHE_THROW("result size must be equal to CyclotomicOrder / 2"); - } - - IntType modulus = element.GetModulus(); - - auto mapSearch = m_rootOfUnityReverseTableByModulus.find(modulus); - if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != CycloOrderHf) { - PreCompute(rootOfUnity, CycloOrder, modulus); - } - + auto modulus = element.GetModulus(); + PreCompute(rootOfUnity, cycloOrder, modulus); NumberTheoreticTransformNat().ForwardTransformToBitReverse( element, m_rootOfUnityReverseTableByModulus[modulus], m_rootOfUnityPreconReverseTableByModulus[modulus], result); @@ -705,29 +676,13 @@ void ChineseRemainderTransformFTTNat::ForwardTransformToBitReverse(cons template void ChineseRemainderTransformFTTNat::InverseTransformFromBitReverseInPlace(const IntType& rootOfUnity, - const usint CycloOrder, + const uint32_t cycloOrder, VecType* element) { - if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) { + if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) return; - } - - if (!IsPowerOfTwo(CycloOrder)) { - OPENFHE_THROW("CyclotomicOrder is not a power of two"); - } - - usint CycloOrderHf = (CycloOrder >> 1); - if (element->GetLength() != CycloOrderHf) { - OPENFHE_THROW("element size must be equal to CyclotomicOrder / 2"); - } - - IntType modulus = element->GetModulus(); - - auto mapSearch = m_rootOfUnityReverseTableByModulus.find(modulus); - if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != CycloOrderHf) { - PreCompute(rootOfUnity, CycloOrder, modulus); - } - - usint msb = GetMSB(CycloOrderHf - 1); + auto modulus = element->GetModulus(); + PreCompute(rootOfUnity, cycloOrder, modulus); + uint32_t msb = GetMSB((cycloOrder >> 1) - 1); NumberTheoreticTransformNat().InverseTransformFromBitReverseInPlace( m_rootOfUnityInverseReverseTableByModulus[modulus], m_rootOfUnityInversePreconReverseTableByModulus[modulus], m_cycloOrderInverseTableByModulus[modulus][msb], m_cycloOrderInversePreconTableByModulus[modulus][msb], @@ -737,114 +692,78 @@ void ChineseRemainderTransformFTTNat::InverseTransformFromBitReverseInP template void ChineseRemainderTransformFTTNat::InverseTransformFromBitReverse(const VecType& element, const IntType& rootOfUnity, - const usint CycloOrder, VecType* result) { + const uint32_t cycloOrder, + VecType* result) { if (rootOfUnity == IntType(1) || rootOfUnity == IntType(0)) { *result = element; return; } - - if (!IsPowerOfTwo(CycloOrder)) { - OPENFHE_THROW("CyclotomicOrder is not a power of two"); - } - - usint CycloOrderHf = (CycloOrder >> 1); - if (result->GetLength() != CycloOrderHf) { - OPENFHE_THROW("result size must be equal to CyclotomicOrder / 2"); - } - - IntType modulus = element.GetModulus(); - - auto mapSearch = m_rootOfUnityReverseTableByModulus.find(modulus); - if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != CycloOrderHf) { - PreCompute(rootOfUnity, CycloOrder, modulus); - } - - usint n = element.GetLength(); - result->SetModulus(element.GetModulus()); - for (usint i = 0; i < n; i++) { + auto modulus = element.GetModulus(); + result->SetModulus(modulus); + PreCompute(rootOfUnity, cycloOrder, modulus); + uint32_t n = element.GetLength(); + for (uint32_t i = 0; i < n; ++i) (*result)[i] = element[i]; - } - - usint msb = GetMSB(CycloOrderHf - 1); + uint32_t msb = GetMSB(n - 1); NumberTheoreticTransformNat().InverseTransformFromBitReverseInPlace( m_rootOfUnityInverseReverseTableByModulus[modulus], m_rootOfUnityInversePreconReverseTableByModulus[modulus], m_cycloOrderInverseTableByModulus[modulus][msb], m_cycloOrderInversePreconTableByModulus[modulus][msb], result); - - return; } template -void ChineseRemainderTransformFTTNat::PreCompute(const IntType& rootOfUnity, const usint CycloOrder, +void ChineseRemainderTransformFTTNat::PreCompute(const IntType& rootOfUnity, const uint32_t cycloOrder, const IntType& modulus) { - usint CycloOrderHf = (CycloOrder >> 1); - + auto ringDim = (cycloOrder >> 1); auto mapSearch = m_rootOfUnityReverseTableByModulus.find(modulus); - if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != CycloOrderHf) { + if (mapSearch == m_rootOfUnityReverseTableByModulus.end() || mapSearch->second.GetLength() != ringDim) { #pragma omp critical { IntType x(1), xinv(1); - usint msb = GetMSB(CycloOrderHf - 1); - IntType mu = modulus.ComputeMu(); - VecType Table(CycloOrderHf, modulus); - VecType TableI(CycloOrderHf, modulus); + uint32_t msb = GetMSB(ringDim - 1); + IntType mu = modulus.ComputeMu(); IntType rootOfUnityInverse = rootOfUnity.ModInverse(modulus); - usint iinv; - for (usint i = 0; i < CycloOrderHf; i++) { - iinv = ReverseBits(i, msb); - Table[iinv] = x; - TableI[iinv] = xinv; + NativeInteger nModulus = modulus.ConvertToInt(); + VecType Table(ringDim, modulus); + VecType TableI(ringDim, modulus); + VecType preconTable(ringDim, nModulus); + VecType preconTableI(ringDim, nModulus); + for (uint32_t i = 0; i < ringDim; ++i) { + auto iinv = ReverseBits(i, msb); + Table[iinv] = x; + preconTable[iinv] = NativeInteger(x.ConvertToInt()).PrepModMulConst(nModulus); x.ModMulEq(rootOfUnity, modulus, mu); + TableI[iinv] = xinv; + preconTableI[iinv] = NativeInteger(xinv.ConvertToInt()).PrepModMulConst(nModulus); xinv.ModMulEq(rootOfUnityInverse, modulus, mu); } - m_rootOfUnityReverseTableByModulus[modulus] = Table; - m_rootOfUnityInverseReverseTableByModulus[modulus] = TableI; + m_rootOfUnityReverseTableByModulus[modulus] = std::move(Table); + m_rootOfUnityInverseReverseTableByModulus[modulus] = std::move(TableI); + m_rootOfUnityPreconReverseTableByModulus[modulus] = std::move(preconTable); + m_rootOfUnityInversePreconReverseTableByModulus[modulus] = std::move(preconTableI); + IntType coInv(1); VecType TableCOI(msb + 1, modulus); - for (usint i = 0; i < msb + 1; i++) { - IntType coInv(IntType(1 << i).ModInverse(modulus)); - TableCOI[i] = coInv; + VecType preconTableCOI(msb + 1, nModulus); + for (uint32_t i = 0; i <= msb; ++i) { + TableCOI[i] = coInv.ModInverse(modulus); + preconTableCOI[i] = NativeInteger(TableCOI[i].ConvertToInt()).PrepModMulConst(nModulus); + coInv <<= 1; } - m_cycloOrderInverseTableByModulus[modulus] = TableCOI; - - NativeInteger nativeModulus = modulus.ConvertToInt(); - VecType preconTable(CycloOrderHf, nativeModulus); - VecType preconTableI(CycloOrderHf, nativeModulus); - - for (usint i = 0; i < CycloOrderHf; i++) { - preconTable[i] = NativeInteger(m_rootOfUnityReverseTableByModulus[modulus][i].ConvertToInt()) - .PrepModMulConst(nativeModulus); - preconTableI[i] = NativeInteger(m_rootOfUnityInverseReverseTableByModulus[modulus][i].ConvertToInt()) - .PrepModMulConst(nativeModulus); - } - - VecType preconTableCOI(msb + 1, nativeModulus); - for (usint i = 0; i < msb + 1; i++) { - preconTableCOI[i] = NativeInteger(m_cycloOrderInverseTableByModulus[modulus][i].ConvertToInt()) - .PrepModMulConst(nativeModulus); - } - - m_rootOfUnityPreconReverseTableByModulus[modulus] = preconTable; - m_rootOfUnityInversePreconReverseTableByModulus[modulus] = preconTableI; - m_cycloOrderInversePreconTableByModulus[modulus] = preconTableCOI; + m_cycloOrderInverseTableByModulus[modulus] = std::move(TableCOI); + m_cycloOrderInversePreconTableByModulus[modulus] = std::move(preconTableCOI); } } } template -void ChineseRemainderTransformFTTNat::PreCompute(std::vector& rootOfUnity, const usint CycloOrder, +void ChineseRemainderTransformFTTNat::PreCompute(std::vector& rootOfUnity, const uint32_t cycloOrder, std::vector& moduliiChain) { - usint numOfRootU = rootOfUnity.size(); - usint numModulii = moduliiChain.size(); - - if (numOfRootU != numModulii) { + uint32_t numOfRootU = rootOfUnity.size(); + uint32_t numModulii = moduliiChain.size(); + if (numOfRootU != numModulii) OPENFHE_THROW("size of root of unity and size of moduli chain not of same size"); - } - - for (usint i = 0; i < numOfRootU; ++i) { - IntType currentRoot(rootOfUnity[i]); - IntType currentMod(moduliiChain[i]); - PreCompute(currentRoot, CycloOrder, currentMod); - } + for (uint32_t i = 0; i < numOfRootU; ++i) + PreCompute(rootOfUnity[i], cycloOrder, moduliiChain[i]); } template @@ -858,8 +777,8 @@ void ChineseRemainderTransformFTTNat::Reset() { } template -void BluesteinFFTNat::PreComputeDefaultNTTModulusRoot(usint cycloOrder, const IntType& modulus) { - usint nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); +void BluesteinFFTNat::PreComputeDefaultNTTModulusRoot(uint32_t cycloOrder, const IntType& modulus) { + uint32_t nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); const auto nttModulus = LastPrime(log2(nttDim) + 2 * modulus.GetMSB(), nttDim); const auto nttRoot = RootOfUnity(nttDim, nttModulus); const ModulusRoot nttModulusRoot = {nttModulus, nttRoot}; @@ -869,44 +788,39 @@ void BluesteinFFTNat::PreComputeDefaultNTTModulusRoot(usint cycloOrder, } template -void BluesteinFFTNat::PreComputeRootTableForNTT(usint cyclotoOrder, +void BluesteinFFTNat::PreComputeRootTableForNTT(uint32_t cyclotoOrder, const ModulusRoot& nttModulusRoot) { - usint nttDim = pow(2, ceil(log2(2 * cyclotoOrder - 1))); + uint32_t nttDim = pow(2, ceil(log2(2 * cyclotoOrder - 1))); const auto& nttModulus = nttModulusRoot.first; const auto& nttRoot = nttModulusRoot.second; IntType root(nttRoot); - auto rootInv = root.ModInverse(nttModulus); - usint nttDimHf = (nttDim >> 1); + uint32_t nttDimHf = (nttDim >> 1); VecType rootTable(nttDimHf, nttModulus); VecType rootTableInverse(nttDimHf, nttModulus); - IntType x(1); - for (usint i = 0; i < nttDimHf; i++) { + IntType x(1), y(1); + for (uint32_t i = 0; i < nttDimHf; ++i) { rootTable[i] = x; - x = x.ModMul(root, nttModulus); - } - - x = 1; - for (usint i = 0; i < nttDimHf; i++) { - rootTableInverse[i] = x; - x = x.ModMul(rootInv, nttModulus); + x.ModMulEq(root, nttModulus); + rootTableInverse[i] = y; + y.ModMulEq(rootInv, nttModulus); } - m_rootOfUnityTableByModulusRoot[nttModulusRoot] = rootTable; - m_rootOfUnityInverseTableByModulusRoot[nttModulusRoot] = rootTableInverse; + m_rootOfUnityTableByModulusRoot[nttModulusRoot] = std::move(rootTable); + m_rootOfUnityInverseTableByModulusRoot[nttModulusRoot] = std::move(rootTableInverse); } template -void BluesteinFFTNat::PreComputePowers(usint cycloOrder, const ModulusRoot& modulusRoot) { +void BluesteinFFTNat::PreComputePowers(uint32_t cycloOrder, const ModulusRoot& modulusRoot) { const auto& modulus = modulusRoot.first; const auto& root = modulusRoot.second; VecType powers(cycloOrder, modulus); powers[0] = 1; - for (usint i = 1; i < cycloOrder; i++) { + for (uint32_t i = 1; i < cycloOrder; i++) { auto iSqr = (i * i) % (2 * cycloOrder); auto val = root.ModExp(IntType(iSqr), modulus); powers[i] = val; @@ -915,7 +829,7 @@ void BluesteinFFTNat::PreComputePowers(usint cycloOrder, const ModulusR } template -void BluesteinFFTNat::PreComputeRBTable(usint cycloOrder, const ModulusRootPair& modulusRootPair) { +void BluesteinFFTNat::PreComputeRBTable(uint32_t cycloOrder, const ModulusRootPair& modulusRootPair) { const auto& modulusRoot = modulusRootPair.first; const auto& modulus = modulusRoot.first; const auto& root = modulusRoot.second; @@ -926,11 +840,11 @@ void BluesteinFFTNat::PreComputeRBTable(usint cycloOrder, const Modulus // const auto &nttRoot = nttModulusRoot.second; // assumes rootTable is precomputed const auto& rootTable = m_rootOfUnityTableByModulusRoot[nttModulusRoot]; - usint nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); + uint32_t nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); VecType b(2 * cycloOrder - 1, modulus); b[cycloOrder - 1] = 1; - for (usint i = 1; i < cycloOrder; i++) { + for (uint32_t i = 1; i < cycloOrder; i++) { auto iSqr = (i * i) % (2 * cycloOrder); auto val = rootInv.ModExp(IntType(iSqr), modulus); b[cycloOrder - 1 + i] = val; @@ -947,7 +861,7 @@ void BluesteinFFTNat::PreComputeRBTable(usint cycloOrder, const Modulus template VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const IntType& root, - const usint cycloOrder) { + const uint32_t cycloOrder) { const auto& modulus = element.GetModulus(); const auto& nttModulusRoot = m_defaultNTTModulusRoot[modulus]; @@ -955,7 +869,8 @@ VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const } template -VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const IntType& root, const usint cycloOrder, +VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const IntType& root, + const uint32_t cycloOrder, const ModulusRoot& nttModulusRoot) { if (element.GetLength() != cycloOrder) { OPENFHE_THROW("expected size of element vector should be equal to cyclotomic order"); @@ -972,8 +887,8 @@ VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const m_rootOfUnityInverseTableByModulusRoot[nttModulusRoot]; // assumes rootTableInverse is precomputed VecType x = element.ModMul(powers); - usint nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); - auto Ra = PadZeros(x, nttDim); + uint32_t nttDim = pow(2, ceil(log2(2 * cycloOrder - 1))); + auto Ra = PadZeros(x, nttDim); Ra.SetModulus(nttModulus); VecType RA(nttDim); NumberTheoreticTransformNat().ForwardTransformIterative(Ra, rootTable, &RA); @@ -993,15 +908,15 @@ VecType BluesteinFFTNat::ForwardTransform(const VecType& element, const } template -VecType BluesteinFFTNat::PadZeros(const VecType& a, const usint finalSize) { - usint s = a.GetLength(); +VecType BluesteinFFTNat::PadZeros(const VecType& a, const uint32_t finalSize) { + uint32_t s = a.GetLength(); VecType result(finalSize, a.GetModulus()); - for (usint i = 0; i < s; i++) { + for (uint32_t i = 0; i < s; i++) { result[i] = a[i]; } - for (usint i = a.GetLength(); i < finalSize; i++) { + for (uint32_t i = a.GetLength(); i < finalSize; i++) { result[i] = IntType(0); } @@ -1009,10 +924,10 @@ VecType BluesteinFFTNat::PadZeros(const VecType& a, const usint finalSi } template -VecType BluesteinFFTNat::Resize(const VecType& a, usint lo, usint hi) { +VecType BluesteinFFTNat::Resize(const VecType& a, uint32_t lo, uint32_t hi) { VecType result(hi - lo + 1, a.GetModulus()); - for (usint i = lo, j = 0; i <= hi; i++, j++) { + for (uint32_t i = lo, j = 0; i <= hi; i++, j++) { result[j] = a[i]; } @@ -1034,12 +949,12 @@ void ChineseRemainderTransformArbNat::SetCylotomicPolynomial(const VecT } template -void ChineseRemainderTransformArbNat::PreCompute(const usint cyclotoOrder, const IntType& modulus) { +void ChineseRemainderTransformArbNat::PreCompute(const uint32_t cyclotoOrder, const IntType& modulus) { BluesteinFFTNat().PreComputeDefaultNTTModulusRoot(cyclotoOrder, modulus); } template -void ChineseRemainderTransformArbNat::SetPreComputedNTTModulus(usint cyclotoOrder, const IntType& modulus, +void ChineseRemainderTransformArbNat::SetPreComputedNTTModulus(uint32_t cyclotoOrder, const IntType& modulus, const IntType& nttModulus, const IntType& nttRoot) { const ModulusRoot nttModulusRoot = {nttModulus, nttRoot}; @@ -1047,19 +962,19 @@ void ChineseRemainderTransformArbNat::SetPreComputedNTTModulus(usint cy } template -void ChineseRemainderTransformArbNat::SetPreComputedNTTDivisionModulus(usint cyclotoOrder, +void ChineseRemainderTransformArbNat::SetPreComputedNTTDivisionModulus(uint32_t cyclotoOrder, const IntType& modulus, const IntType& nttMod, const IntType& nttRootBig) { OPENFHE_DEBUG_FLAG(false); - usint n = GetTotient(cyclotoOrder); + uint32_t n = GetTotient(cyclotoOrder); OPENFHE_DEBUG("GetTotient(" << cyclotoOrder << ")= " << n); - usint power = cyclotoOrder - n; + uint32_t power = cyclotoOrder - n; m_nttDivisionDim[cyclotoOrder] = 2 * std::pow(2, ceil(log2(power))); - usint nttDimBig = std::pow(2, ceil(log2(2 * cyclotoOrder - 1))); + uint32_t nttDimBig = std::pow(2, ceil(log2(2 * cyclotoOrder - 1))); // Computes the root of unity for the division NTT based on the root of unity // for regular NTT @@ -1068,22 +983,22 @@ void ChineseRemainderTransformArbNat::SetPreComputedNTTDivisionModulus( m_DivisionNTTModulus[modulus] = nttMod; m_DivisionNTTRootOfUnity[modulus] = nttRoot; // part0 setting of rootTable and inverse rootTable - usint nttDim = m_nttDivisionDim[cyclotoOrder]; + uint32_t nttDim = m_nttDivisionDim[cyclotoOrder]; IntType root(nttRoot); auto rootInv = root.ModInverse(nttMod); - usint nttDimHf = (nttDim >> 1); + uint32_t nttDimHf = (nttDim >> 1); VecType rootTable(nttDimHf, nttMod); VecType rootTableInverse(nttDimHf, nttMod); IntType x(1); - for (usint i = 0; i < nttDimHf; i++) { + for (uint32_t i = 0; i < nttDimHf; i++) { rootTable[i] = x; x = x.ModMul(root, nttMod); } x = 1; - for (usint i = 0; i < nttDimHf; i++) { + for (uint32_t i = 0; i < nttDimHf; i++) { rootTableInverse[i] = x; x = x.ModMul(rootInv, nttMod); } @@ -1105,7 +1020,7 @@ void ChineseRemainderTransformArbNat::SetPreComputedNTTDivisionModulus( const auto& cycloPoly = m_cyclotomicPolyMap[modulus]; VecType QForwardTransform(nttDim, nttMod); - for (usint i = 0; i < cycloPoly.GetLength(); i++) { + for (uint32_t i = 0; i < cycloPoly.GetLength(); i++) { QForwardTransform[i] = cycloPoly[i]; } @@ -1117,17 +1032,17 @@ void ChineseRemainderTransformArbNat::SetPreComputedNTTDivisionModulus( template VecType ChineseRemainderTransformArbNat::InversePolyMod(const VecType& cycloPoly, const IntType& modulus, - usint power) { + uint32_t power) { VecType result(power, modulus); - usint r = ceil(log2(power)); + uint32_t r = ceil(log2(power)); VecType h(1, modulus); // h is a unit polynomial h[0] = 1; // Precompute the Barrett mu parameter IntType mu = modulus.ComputeMu(); - for (usint i = 0; i < r; i++) { - usint qDegree = std::pow(2, i + 1); + for (uint32_t i = 0; i < r; i++) { + uint32_t qDegree = std::pow(2, i + 1); VecType q(qDegree + 1, modulus); // q = x^(2^i+1) q[qDegree] = 1; auto hSquare = PolynomialMultiplication(h, h); @@ -1135,7 +1050,7 @@ VecType ChineseRemainderTransformArbNat::InversePolyMod(const VecType& auto a = h * IntType(2); auto b = PolynomialMultiplication(hSquare, cycloPoly); // b = 2h - gh^2 - for (usint j = 0; j < b.GetLength(); j++) { + for (uint32_t j = 0; j < b.GetLength(); j++) { if (j < a.GetLength()) { b[j] = a[j].ModSub(b[j], modulus, mu); } @@ -1146,7 +1061,7 @@ VecType ChineseRemainderTransformArbNat::InversePolyMod(const VecType& h = PolyMod(b, q, modulus); } // take modulo x^power - for (usint i = 0; i < power; i++) { + for (uint32_t i = 0; i < power; i++) { result[i] = h[i]; } @@ -1156,8 +1071,8 @@ VecType ChineseRemainderTransformArbNat::InversePolyMod(const VecType& template VecType ChineseRemainderTransformArbNat::ForwardTransform(const VecType& element, const IntType& root, const IntType& nttModulus, const IntType& nttRoot, - const usint cycloOrder) { - usint phim = GetTotient(cycloOrder); + const uint32_t cycloOrder) { + uint32_t phim = GetTotient(cycloOrder); if (element.GetLength() != phim) { OPENFHE_THROW("element size should be equal to phim"); } @@ -1194,8 +1109,8 @@ VecType ChineseRemainderTransformArbNat::ForwardTransform(const VecType template VecType ChineseRemainderTransformArbNat::InverseTransform(const VecType& element, const IntType& root, const IntType& nttModulus, const IntType& nttRoot, - const usint cycloOrder) { - usint phim = GetTotient(cycloOrder); + const uint32_t cycloOrder) { + uint32_t phim = GetTotient(cycloOrder); if (element.GetLength() != phim) { OPENFHE_THROW("element size should be equal to phim"); } @@ -1231,20 +1146,20 @@ VecType ChineseRemainderTransformArbNat::InverseTransform(const VecType } template -VecType ChineseRemainderTransformArbNat::Pad(const VecType& element, const usint cycloOrder, bool forward) { - usint n = GetTotient(cycloOrder); +VecType ChineseRemainderTransformArbNat::Pad(const VecType& element, const uint32_t cycloOrder, bool forward) { + uint32_t n = GetTotient(cycloOrder); const auto& modulus = element.GetModulus(); VecType inputToBluestein(cycloOrder, modulus); if (forward) { // Forward transform padding - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { inputToBluestein[i] = element[i]; } } else { // Inverse transform padding auto tList = GetTotientList(cycloOrder); - usint i = 0; + uint32_t i = 0; for (auto& coprime : tList) { inputToBluestein[coprime] = element[i++]; } @@ -1254,16 +1169,16 @@ VecType ChineseRemainderTransformArbNat::Pad(const VecType& element, co } template -VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, const usint cycloOrder, bool forward, +VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, const uint32_t cycloOrder, bool forward, const IntType& bigMod, const IntType& bigRoot) { - usint n = GetTotient(cycloOrder); + uint32_t n = GetTotient(cycloOrder); const auto& modulus = element.GetModulus(); VecType output(n, modulus); if (forward) { // Forward transform drop auto tList = GetTotientList(cycloOrder); - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { output[i] = element[tList[i]]; } } @@ -1273,7 +1188,7 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c // cycloOrder is prime: Reduce mod Phi_{n+1}(x) // Reduction involves subtracting the coeff of x^n from all terms auto coeff_n = element[n]; - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { output[i] = element[i].ModSub(coeff_n, modulus, mu); } } @@ -1282,7 +1197,7 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c // cycloOrder is 2*prime: 2 Step reduction // First reduce mod x^(n+1)+1 (=(x+1)*Phi_{2*(n+1)}(x)) // Subtract co-efficient of x^(i+n+1) from x^(i) - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { auto coeff_i = element[i]; auto coeff_ip = element[i + n + 1]; output[i] = coeff_i.ModSub(coeff_ip, modulus, mu); @@ -1290,7 +1205,7 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c auto coeff_n = element[n].ModSub(element[2 * n + 1], modulus, mu); // Now reduce mod Phi_{2*(n+1)}(x) // Similar to the prime case but with alternating signs - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { if (i % 2 == 0) { output[i].ModSubEq(coeff_n, modulus, mu); } @@ -1314,8 +1229,8 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c const auto& rootTable = m_rootOfUnityDivisionTableByModulus[nttMod]; VecType aPadded2(m_nttDivisionDim[cycloOrder], nttMod); // perform mod operation - usint power = cycloOrder - n; - for (usint i = n; i < element.GetLength(); i++) { + uint32_t power = cycloOrder - n; + for (uint32_t i = n; i < element.GetLength(); i++) { aPadded2[power - (i - n) - 1] = element[i]; } VecType A(m_nttDivisionDim[cycloOrder]); @@ -1326,7 +1241,7 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c NumberTheoreticTransformNat().InverseTransformIterative(AB, rootTableInverse, &a); VecType quotient(m_nttDivisionDim[cycloOrder], modulus); - for (usint i = 0; i < power; i++) { + for (uint32_t i = 0; i < power; i++) { quotient[i] = a[i]; } quotient.ModEq(modulus); @@ -1344,7 +1259,7 @@ VecType ChineseRemainderTransformArbNat::Drop(const VecType& element, c IntType mu = modulus.ComputeMu(); // Precompute the Barrett mu parameter - for (usint i = 0; i < n; i++) { + for (uint32_t i = 0; i < n; i++) { output[i] = element[i].ModSub(newQuotient2[cycloOrder - 1 - i], modulus, mu); } } diff --git a/src/pke/examples/functional-bootstrapping-ckks.cpp b/src/pke/examples/functional-bootstrapping-ckks.cpp index bb3878e16..98540fb08 100644 --- a/src/pke/examples/functional-bootstrapping-ckks.cpp +++ b/src/pke/examples/functional-bootstrapping-ckks.cpp @@ -163,13 +163,12 @@ void ArbitraryLUT(BigInteger QBFVInit, BigInteger PInput, BigInteger POutput, Bi parameters.SetNumLargeDigits(dnum); parameters.SetBatchSize(numSlotsCKKS); parameters.SetRingDim(ringDim); - uint32_t depth = levelsAvailableAfterBootstrap; + uint32_t depth = levelsAvailableAfterBootstrap; if (binaryLUT) depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffint, PInput, order, secretKeyDist); else depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffcomp, PInput, order, secretKeyDist); - parameters.SetMultiplicativeDepth(depth); auto cc = GenCryptoContext(parameters); @@ -321,13 +320,12 @@ void MultiValueBootstrapping(BigInteger QBFVInit, BigInteger PInput, BigInteger parameters.SetNumLargeDigits(dnum); parameters.SetBatchSize(numSlotsCKKS); parameters.SetRingDim(ringDim); - uint32_t depth = levelsAvailableAfterBootstrap + levelsComputation; + uint32_t depth = levelsAvailableAfterBootstrap + levelsComputation; if (binaryLUT) depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffint1, PInput, order, secretKeyDist); else depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffcomp1, PInput, order, secretKeyDist); - parameters.SetMultiplicativeDepth(depth); auto cc = GenCryptoContext(parameters); @@ -568,12 +566,10 @@ void MultiPrecisionSign(BigInteger QBFVInit, BigInteger PInput, BigInteger PDigi parameters.SetRingDim(ringDim); uint32_t depth = levelsAvailableAfterBootstrap; - if (binaryLUT) depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffintMod, PDigit, order, secretKeyDist); else depth += FHECKKSRNS::GetFBTDepth(lvlb, coeffcompMod, PDigit, order, secretKeyDist); - parameters.SetMultiplicativeDepth(depth); auto cc = GenCryptoContext(parameters); diff --git a/src/pke/examples/simple-ckks-bootstrapping.cpp b/src/pke/examples/simple-ckks-bootstrapping.cpp index 03fcf7b94..b47bf2bb9 100644 --- a/src/pke/examples/simple-ckks-bootstrapping.cpp +++ b/src/pke/examples/simple-ckks-bootstrapping.cpp @@ -35,8 +35,6 @@ Example for CKKS bootstrapping with full packing */ -#define PROFILE - #include "openfhe.h" using namespace lbcrypto; @@ -78,12 +76,12 @@ void SimpleBootstrapExample() { */ #if NATIVEINT == 128 ScalingTechnique rescaleTech = FIXEDAUTO; - usint dcrtBits = 78; - usint firstMod = 89; + uint32_t dcrtBits = 78; + uint32_t firstMod = 89; #else ScalingTechnique rescaleTech = FLEXIBLEAUTO; - usint dcrtBits = 59; - usint firstMod = 60; + uint32_t dcrtBits = 59; + uint32_t firstMod = 60; #endif parameters.SetScalingModSize(dcrtBits); @@ -99,11 +97,11 @@ void SimpleBootstrapExample() { */ std::vector levelBudget = {4, 4}; - // Note that the actual number of levels avalailable after bootstrapping before next bootstrapping + // Note that the actual number of levels avalailable after bootstrapping before next bootstrapping // will be levelsAvailableAfterBootstrap - 1 because an additional level // is used for scaling the ciphertext before next bootstrapping (in 64-bit CKKS bootstrapping) uint32_t levelsAvailableAfterBootstrap = 10; - usint depth = levelsAvailableAfterBootstrap + FHECKKSRNS::GetBootstrapDepth(levelBudget, secretKeyDist); + uint32_t depth = levelsAvailableAfterBootstrap + FHECKKSRNS::GetBootstrapDepth(levelBudget, secretKeyDist); parameters.SetMultiplicativeDepth(depth); CryptoContext cryptoContext = GenCryptoContext(parameters); @@ -114,10 +112,10 @@ void SimpleBootstrapExample() { cryptoContext->Enable(ADVANCEDSHE); cryptoContext->Enable(FHE); - usint ringDim = cryptoContext->GetRingDimension(); + uint32_t ringDim = cryptoContext->GetRingDimension(); // This is the maximum number of slots that can be used for full packing. - usint numSlots = ringDim / 2; - std::cout << "CKKS scheme is using ring dimension " << ringDim << std::endl << std::endl; + uint32_t numSlots = ringDim / 2; + std::cout << "CKKS scheme ring dimension: " << ringDim << "\n\n"; cryptoContext->EvalBootstrapSetup(levelBudget); @@ -132,22 +130,26 @@ void SimpleBootstrapExample() { Plaintext ptxt = cryptoContext->MakeCKKSPackedPlaintext(x, 1, depth - 1); ptxt->SetLength(encodedLength); - std::cout << "Input: " << ptxt << std::endl; + std::cout << "Input: " << ptxt << "\n"; Ciphertext ciph = cryptoContext->Encrypt(keyPair.publicKey, ptxt); - std::cout << "Initial number of levels remaining: " << depth - ciph->GetLevel() << std::endl; + std::cout << "Initial number of levels remaining: " << depth - ciph->GetLevel() << "\n\n"; + + // auto start = std::chrono::high_resolution_clock::now(); // Perform the bootstrapping operation. The goal is to increase the number of levels remaining // for HE computation. auto ciphertextAfter = cryptoContext->EvalBootstrap(ciph); + // auto stop = std::chrono::high_resolution_clock::now(); + // std::cout << "Bootstrapping time: " << std::chrono::duration(stop - start).count() << " s\n\n"; + std::cout << "Number of levels remaining after bootstrapping: " - << depth - ciphertextAfter->GetLevel() - (ciphertextAfter->GetNoiseScaleDeg() - 1) << std::endl - << std::endl; + << depth - ciphertextAfter->GetLevel() - (ciphertextAfter->GetNoiseScaleDeg() - 1) << "\n\n"; Plaintext result; cryptoContext->Decrypt(keyPair.secretKey, ciphertextAfter, &result); result->SetLength(encodedLength); - std::cout << "Output after bootstrapping \n\t" << result << std::endl; + std::cout << "Output after bootstrapping: " << result << "\n"; } diff --git a/src/pke/include/ciphertext-fwd.h b/src/pke/include/ciphertext-fwd.h index b36467720..35dcbcd41 100644 --- a/src/pke/include/ciphertext-fwd.h +++ b/src/pke/include/ciphertext-fwd.h @@ -57,12 +57,12 @@ template struct seriesPowers { std::vector> powersRe; std::vector> powers2Re; - ConstCiphertext power2km1Re; + Ciphertext power2km1Re; uint32_t k; uint32_t m; std::vector> powersIm; std::vector> powers2Im; - ConstCiphertext power2km1Im; + Ciphertext power2km1Im; seriesPowers() = default; @@ -74,13 +74,13 @@ struct seriesPowers { : powersRe(powers0), powersIm(powers1) {} seriesPowers(const std::vector>& powers0, const std::vector>& powers20, - ConstCiphertext& power2km10, uint32_t k0, uint32_t m0) + const Ciphertext& power2km10, uint32_t k0, uint32_t m0) : powersRe(powers0), powers2Re(powers20), power2km1Re(power2km10), k(k0), m(m0) {} seriesPowers(const std::vector>& powers0, const std::vector>& powers20, - ConstCiphertext& power2km10, uint32_t k0, uint32_t m0, + const Ciphertext& power2km10, uint32_t k0, uint32_t m0, const std::vector>& powers1, const std::vector>& powers21, - ConstCiphertext& power2km11) + const Ciphertext& power2km11) : powersRe(powers0), powers2Re(powers20), power2km1Re(power2km10), diff --git a/src/pke/include/cryptocontext.h b/src/pke/include/cryptocontext.h index 5837b5150..a5c6c3456 100644 --- a/src/pke/include/cryptocontext.h +++ b/src/pke/include/cryptocontext.h @@ -631,6 +631,11 @@ class CryptoContextImpl : public Serializable { return !(a == b); } + /** + * @brief Clears various caches within the library + */ + static void ClearStaticMapsAndVectors(); + /** * @brief Serializes either all EvalMult keys (if keyTag is empty) or the EvalMult keys for keyTag * diff --git a/src/pke/include/scheme/ckksrns/ckksrns-fhe.h b/src/pke/include/scheme/ckksrns/ckksrns-fhe.h index 233234e61..45b879874 100644 --- a/src/pke/include/scheme/ckksrns/ckksrns-fhe.h +++ b/src/pke/include/scheme/ckksrns/ckksrns-fhe.h @@ -1,7 +1,7 @@ //================================================================================== // BSD 2-Clause License // -// Copyright (c) 2014-2022, NJIT, Duality Technologies Inc. and other contributors +// Copyright (c) 2014-2025, NJIT, Duality Technologies Inc. and other contributors // // All rights reserved. // @@ -63,23 +63,13 @@ class CKKSBootstrapPrecom { CKKSBootstrapPrecom(CKKSBootstrapPrecom&& rhs) noexcept = default; - // number of slots for which the bootstrapping is performed - uint32_t m_slots; - - // the inner dimension in the baby-step giant-step strategy - uint32_t m_dim1; - uint32_t m_gs; - - uint32_t m_levelEnc; - uint32_t m_levelDec; - // level budget for homomorphic encoding, number of layers to collapse in one level, // number of layers remaining to be collapsed in one level to have exactly the number // of levels specified in the level budget, the number of rotations in one level, // the baby step and giant step in the baby-step giant-step strategy, the number of // rotations in the remaining level, the baby step and giant step in the baby-step // giant-step strategy for the remaining level - std::vector m_paramsEnc = std::vector(CKKS_BOOT_PARAMS::TOTAL_ELEMENTS); + struct ckks_boot_params m_paramsEnc; // level budget for homomorphic decoding, number of layers to collapse in one level, // number of layers remaining to be collapsed in one level to have exactly the number @@ -87,7 +77,10 @@ class CKKSBootstrapPrecom { // the baby step and giant step in the baby-step giant-step strategy, the number of // rotations in the remaining level, the baby step and giant step in the baby-step // giant-step strategy for the remaining level - std::vector m_paramsDec = std::vector(CKKS_BOOT_PARAMS::TOTAL_ELEMENTS); + struct ckks_boot_params m_paramsDec; + + // number of slots for which the bootstrapping is performed + uint32_t m_slots; // Linear map U0; used in decoding std::vector m_U0Pre; @@ -106,20 +99,20 @@ class CKKSBootstrapPrecom { template void save(Archive& ar) const { - ar(cereal::make_nvp("dim1_Enc", m_dim1)); - ar(cereal::make_nvp("dim1_Dec", m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP])); + ar(cereal::make_nvp("dim1_Enc", m_paramsEnc.g)); + ar(cereal::make_nvp("dim1_Dec", m_paramsDec.g)); ar(cereal::make_nvp("slots", m_slots)); - ar(cereal::make_nvp("lEnc", m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET])); - ar(cereal::make_nvp("lDec", m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET])); + ar(cereal::make_nvp("lEnc", m_paramsEnc.lvlb)); + ar(cereal::make_nvp("lDec", m_paramsDec.lvlb)); } template void load(Archive& ar) { - ar(cereal::make_nvp("dim1_Enc", m_dim1)); - ar(cereal::make_nvp("dim1_Dec", m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP])); + ar(cereal::make_nvp("dim1_Enc", m_paramsEnc.g)); + ar(cereal::make_nvp("dim1_Dec", m_paramsDec.g)); ar(cereal::make_nvp("slots", m_slots)); - ar(cereal::make_nvp("lEnc", m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET])); - ar(cereal::make_nvp("lDec", m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET])); + ar(cereal::make_nvp("lEnc", m_paramsEnc.lvlb)); + ar(cereal::make_nvp("lDec", m_paramsDec.lvlb)); } }; @@ -395,7 +388,7 @@ class FHECKKSRNS : public FHERNS { size_t order = 1); template - Ciphertext EvalMVBNoDecodingInternal(std::shared_ptr> ciphertext, + Ciphertext EvalMVBNoDecodingInternal(const std::shared_ptr>& ciphertext, const std::vector& coefficients, uint32_t digitBitSize, size_t order = 1); diff --git a/src/pke/include/scheme/ckksrns/ckksrns-utils.h b/src/pke/include/scheme/ckksrns/ckksrns-utils.h index e90c2cade..bdbd92a29 100644 --- a/src/pke/include/scheme/ckksrns/ckksrns-utils.h +++ b/src/pke/include/scheme/ckksrns/ckksrns-utils.h @@ -59,6 +59,9 @@ inline bool IsNotEqualOne(double v, double delta = 0x1p-44) { inline bool IsNotEqualZero(double v, double delta = 0x1p-44) { return std::abs(v) > delta; } +inline bool IsNotEqualNegOne(double v, double delta = 0x1p-44) { + return std::abs(v + 1.0) > delta; +} inline bool IsNotEqualOne(std::complex val, double delta = 0x1p-44) { return IsNotEqualOne(val.real(), delta) || IsNotEqualZero(val.imag(), delta); } @@ -253,15 +256,25 @@ uint32_t ReduceRotation(int32_t index, uint32_t slots); /** * Computes all parameters needed for the homomorphic encoding and decoding in the bootstrapping - * operation and returns them as a vector. The returned vector's data can be accessed using - * enum'ed indices from CKKS_BOOT_PARAMS that are defined below. + * operation and returns them as a struct. * * @param slots number of slots * @param levelBudget the allocated level budget for the computation. * @param dim1 the value for the inner dimension in the baby-step giant-step strategy - * @return vector with parameters for the homomorphic encoding and decoding in bootstrapping + * @return struct with parameters for the homomorphic encoding and decoding in bootstrapping */ -std::vector GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget = 4, uint32_t dim1 = 0); +struct ckks_boot_params { + uint32_t lvlb; // level budget + uint32_t layersCollapse; // layers to collapse in one level + uint32_t remCollapse; // remaining layers to collapse + uint32_t numRotations; // umber of rotations in one level + uint32_t b; // baby step in the baby-step giant-step strategy + uint32_t g; // giant step in the baby-step giant-step strategy + uint32_t numRotationsRem; // number of rotations in the remaining level + uint32_t bRem; // baby step in the baby-step giant-step strategy for the remaining level + uint32_t gRem; // giant step in the baby-step giant-step strategy for the remaining level +}; +struct ckks_boot_params GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget = 4, uint32_t dim1 = 0); /** * Gets inner loop dimension for baby step giant step algorithm for linear transform, @@ -291,23 +304,6 @@ std::vector FindLTRotationIndicesSwitch(uint32_t dim1, uint32_t m, uint */ std::vector FindLTRotationIndicesSwitchArgmin(uint32_t m, uint32_t blockDimension, uint32_t cols); -namespace CKKS_BOOT_PARAMS { - -// Enums representing indices for the vector returned by GetCollapsedFFTParams() -enum { - LEVEL_BUDGET, // the level budget - LAYERS_COLL, // the number of layers to collapse in one level - LAYERS_REM, // the number of layers remaining to be collapsed in one level to have exactly the number of levels specified in the level budget - NUM_ROTATIONS, // the number of rotations in one level - BABY_STEP, // the baby step in the baby-step giant-step strategy - GIANT_STEP, // the giant step in the baby-step giant-step strategy - NUM_ROTATIONS_REM, // the number of rotations in the remaining level - BABY_STEP_REM, // the baby step in the baby-step giant-step strategy for the remaining level - GIANT_STEP_REM, // the giant step in the baby-step giant-step strategy for the remaining level - TOTAL_ELEMENTS // total number of elements in the vector -}; -} // namespace CKKS_BOOT_PARAMS - } // namespace lbcrypto #endif diff --git a/src/pke/lib/cryptocontext.cpp b/src/pke/lib/cryptocontext.cpp index fe26edc97..ee2a31749 100644 --- a/src/pke/lib/cryptocontext.cpp +++ b/src/pke/lib/cryptocontext.cpp @@ -48,6 +48,23 @@ template std::map>>> CryptoContextImpl::s_evalAutomorphismKeyMap{}; +template +void CryptoContextImpl::ClearStaticMapsAndVectors() { + CryptoContextImpl::s_evalAutomorphismKeyMap.clear(); + CryptoContextImpl::s_evalMultKeyMap.clear(); + PackedEncoding::Destroy(); + intnat::ChineseRemainderTransformFTTNat().Reset(); +#ifdef WITH_BE2 + bigintfxd::ChineseRemainderTransformFTTFxd().Reset(); +#endif +#ifdef WITH_BE4 + bigintdyn::ChineseRemainderTransformFTTDyn().Reset(); +#endif +#ifdef WITH_NTL + NTL::ChineseRemainderTransformFTTNtl().Reset(); +#endif +} + template void CryptoContextImpl::SetKSTechniqueInScheme() { // check if the scheme is an RNS scheme diff --git a/src/pke/lib/keyswitch/keyswitch-bv.cpp b/src/pke/lib/keyswitch/keyswitch-bv.cpp index 93ee3b08b..04b3fe201 100644 --- a/src/pke/lib/keyswitch/keyswitch-bv.cpp +++ b/src/pke/lib/keyswitch/keyswitch-bv.cpp @@ -50,8 +50,6 @@ namespace lbcrypto { EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey oldKey, const PrivateKey newKey) const { - EvalKeyRelin ek(std::make_shared>(newKey->GetCryptoContext())); - const auto cryptoParams = std::dynamic_pointer_cast(newKey->GetCryptoParameters()); const DCRTPoly& sNew = newKey->GetPrivateElement(); @@ -86,6 +84,8 @@ EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey o std::vector av(nWindows); std::vector bv(nWindows); + // TODO: parallelize loop using fix from KeySwitchHYBRID::KeySwitchGenInternal + if (digitSize > 0) { for (usint i = 0; i < sOld.GetNumOfElements(); i++) { std::vector sOldDecomposed = sOld.GetElementAtIndex(i).PowersOfBase(digitSize); @@ -115,6 +115,7 @@ EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey o } } + EvalKeyRelin ek(std::make_shared>(newKey->GetCryptoContext())); ek->SetAVector(std::move(av)); ek->SetBVector(std::move(bv)); ek->SetKeyTag(newKey->GetKeyTag()); @@ -124,8 +125,6 @@ EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey o EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey oldKey, const PrivateKey newKey, const EvalKey ek) const { - EvalKeyRelin evalKey(std::make_shared>(newKey->GetCryptoContext())); - const auto cryptoParams = std::dynamic_pointer_cast(oldKey->GetCryptoParameters()); const DCRTPoly& sNew = newKey->GetPrivateElement(); @@ -161,6 +160,8 @@ EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey o std::vector av(nWindows); std::vector bv(nWindows); + // TODO: parallelize loop using fix from KeySwitchHYBRID::KeySwitchGenInternal + if (digitSize > 0) { for (usint i = 0; i < sizeSOld; i++) { std::vector sOldDecomposed = sOld.GetElementAtIndex(i).PowersOfBase(digitSize); @@ -200,6 +201,7 @@ EvalKey KeySwitchBV::KeySwitchGenInternal(const PrivateKey o } } + EvalKeyRelin evalKey(std::make_shared>(newKey->GetCryptoContext())); evalKey->SetAVector(std::move(av)); evalKey->SetBVector(std::move(bv)); evalKey->SetKeyTag(newKey->GetKeyTag()); diff --git a/src/pke/lib/keyswitch/keyswitch-hybrid.cpp b/src/pke/lib/keyswitch/keyswitch-hybrid.cpp index 552fee8fc..868e57d45 100644 --- a/src/pke/lib/keyswitch/keyswitch-hybrid.cpp +++ b/src/pke/lib/keyswitch/keyswitch-hybrid.cpp @@ -35,13 +35,12 @@ */ #define PROFILE -#include "keyswitch/keyswitch-hybrid.h" - +#include "ciphertext.h" +#include "key/evalkeyrelin.h" #include "key/privatekey.h" #include "key/publickey.h" -#include "key/evalkeyrelin.h" +#include "keyswitch/keyswitch-hybrid.h" #include "scheme/ckksrns/ckksrns-cryptoparameters.h" -#include "ciphertext.h" namespace lbcrypto { @@ -53,81 +52,78 @@ EvalKey KeySwitchHYBRID::KeySwitchGenInternal(const PrivateKey KeySwitchHYBRID::KeySwitchGenInternal(const PrivateKey oldKey, const PrivateKey newKey, const EvalKey ekPrev) const { - EvalKeyRelin ek(std::make_shared>(newKey->GetCryptoContext())); - const auto cryptoParams = std::dynamic_pointer_cast(newKey->GetCryptoParameters()); - - const std::shared_ptr paramsQ = cryptoParams->GetElementParams(); - const std::shared_ptr paramsQP = cryptoParams->GetParamsQP(); - - size_t sizeQ = paramsQ->GetParams().size(); - size_t sizeQP = paramsQP->GetParams().size(); - - DCRTPoly sOld = oldKey->GetPrivateElement(); - DCRTPoly sNew = newKey->GetPrivateElement().Clone(); + const auto& paramsQ = cryptoParams->GetElementParams(); + const auto& paramsQP = cryptoParams->GetParamsQP(); + const auto& pparamsQP = paramsQP->GetParams(); // skNew is currently in basis Q. This extends it to basis QP. - sNew.SetFormat(Format::COEFFICIENT); - - DCRTPoly sNewExt(paramsQP, Format::COEFFICIENT, true); - // The part with basis Q - for (size_t i = 0; i < sizeQ; i++) { - sNewExt.SetElementAtIndex(i, sNew.GetElementAtIndex(i)); - } + DCRTPoly sNewExt(paramsQP, Format::EVALUATION, true); + const auto& sNew = newKey->GetPrivateElement(); - // The part with basis P - for (size_t j = sizeQ; j < sizeQP; j++) { - const NativeInteger& pj = paramsQP->GetParams()[j]->GetModulus(); - const NativeInteger& rootj = paramsQP->GetParams()[j]->GetRootOfUnity(); - auto sNew0 = sNew.GetElementAtIndex(0); - sNew0.SwitchModulus(pj, rootj, 0, 0); - sNewExt.SetElementAtIndex(j, std::move(sNew0)); - } + auto sNew0 = sNew.GetElementAtIndex(0); + sNew0.SetFormat(Format::COEFFICIENT); - sNewExt.SetFormat(Format::EVALUATION); + const uint32_t sizeQ = paramsQ->GetParams().size(); + const uint32_t sizeQP = paramsQP->GetParams().size(); - const auto ns = cryptoParams->GetNoiseScale(); - const DggType& dgg = cryptoParams->GetDiscreteGaussianGenerator(); - DugType dug; +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(sizeQP)) + for (uint32_t i = 0; i < sizeQP; ++i) { + if (i < sizeQ) { + auto tmp = sNew.GetElementAtIndex(i); + tmp.SetFormat(Format::EVALUATION); + sNewExt.SetElementAtIndex(i, std::move(tmp)); + } + else { + auto tmp = sNew0; + tmp.SwitchModulus(pparamsQP[i]->GetModulus(), pparamsQP[i]->GetRootOfUnity(), 0, 0); + tmp.SetFormat(Format::EVALUATION); + sNewExt.SetElementAtIndex(i, std::move(tmp)); + } + } - size_t numPartQ = cryptoParams->GetNumPartQ(); + const auto ns = cryptoParams->GetNoiseScale(); + const uint32_t numPerPartQ = cryptoParams->GetNumPerPartQ(); + const uint32_t numPartQ = cryptoParams->GetNumPartQ(); std::vector av(numPartQ); std::vector bv(numPartQ); - std::vector PModq = cryptoParams->GetPModq(); - size_t numPerPartQ = cryptoParams->GetNumPerPartQ(); + DugType dug; + auto dgg = cryptoParams->GetDiscreteGaussianGenerator(); + + const auto& sOld = oldKey->GetPrivateElement(); + const auto& PModq = cryptoParams->GetPModq(); - for (size_t part = 0; part < numPartQ; ++part) { - DCRTPoly a = (ekPrev == nullptr) ? DCRTPoly(dug, paramsQP, Format::EVALUATION) : // single-key HE - ekPrev->GetAVector()[part]; // threshold HE +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(numPartQ)) private(dug, dgg) + for (uint32_t part = 0; part < numPartQ; ++part) { + auto a = (ekPrev == nullptr) ? DCRTPoly(dug, paramsQP, Format::EVALUATION) : // single-key HE + ekPrev->GetAVector()[part]; // threshold HE DCRTPoly e(dgg, paramsQP, Format::EVALUATION); DCRTPoly b(paramsQP, Format::EVALUATION, true); - // starting and ending position of current part - size_t startPartIdx = numPerPartQ * part; - size_t endPartIdx = (sizeQ > (startPartIdx + numPerPartQ)) ? (startPartIdx + numPerPartQ) : sizeQ; + const uint32_t startPartIdx = numPerPartQ * part; + const uint32_t endPartIdx = (sizeQ > (startPartIdx + numPerPartQ)) ? (startPartIdx + numPerPartQ) : sizeQ; - for (size_t i = 0; i < sizeQP; ++i) { - auto ai = a.GetElementAtIndex(i); - auto ei = e.GetElementAtIndex(i); - auto sNewi = sNewExt.GetElementAtIndex(i); + for (uint32_t i = 0; i < sizeQP; ++i) { + const auto& ai = a.GetElementAtIndex(i); + const auto& ei = e.GetElementAtIndex(i); + const auto& sni = sNewExt.GetElementAtIndex(i); if (i < startPartIdx || i >= endPartIdx) { - b.SetElementAtIndex(i, -ai * sNewi + ns * ei); + b.SetElementAtIndex(i, (-ai * sni) + (ns * ei)); } else { - // P * sOld is only applied for the current part - auto sOldi = sOld.GetElementAtIndex(i); - b.SetElementAtIndex(i, -ai * sNewi + PModq[i] * sOldi + ns * ei); + const auto& soi = sOld.GetElementAtIndex(i); + b.SetElementAtIndex(i, (-ai * sni) + (ns * ei) + (PModq[i] * soi)); } } - - av[part] = a; - bv[part] = b; + av[part] = std::move(a); + bv[part] = std::move(b); } + EvalKeyRelin ek(std::make_shared>(newKey->GetCryptoContext())); ek->SetAVector(std::move(av)); ek->SetBVector(std::move(bv)); ek->SetKeyTag(newKey->GetKeyTag()); @@ -136,72 +132,65 @@ EvalKey KeySwitchHYBRID::KeySwitchGenInternal(const PrivateKey KeySwitchHYBRID::KeySwitchGenInternal(const PrivateKey oldKey, const PublicKey newKey) const { - EvalKeyRelin ek = std::make_shared>(newKey->GetCryptoContext()); - const auto cryptoParams = std::dynamic_pointer_cast(newKey->GetCryptoParameters()); + const auto& paramsQ = cryptoParams->GetElementParams(); + const auto& paramsQP = cryptoParams->GetParamsQP(); - const std::shared_ptr paramsQ = cryptoParams->GetElementParams(); - const std::shared_ptr paramsQP = cryptoParams->GetParamsQP(); - - usint sizeQ = paramsQ->GetParams().size(); - usint sizeQP = paramsQP->GetParams().size(); + const uint32_t sizeQ = paramsQ->GetParams().size(); + const uint32_t sizeQP = paramsQP->GetParams().size(); - DCRTPoly sOld = oldKey->GetPrivateElement(); - - DCRTPoly newp0 = newKey->GetPublicElements().at(0); - DCRTPoly newp1 = newKey->GetPublicElements().at(1); - - const auto ns = cryptoParams->GetNoiseScale(); - const DggType& dgg = cryptoParams->GetDiscreteGaussianGenerator(); - TugType tug; - - auto numPartQ = cryptoParams->GetNumPartQ(); + const auto ns = cryptoParams->GetNoiseScale(); + const uint32_t numPerPartQ = cryptoParams->GetNumPerPartQ(); + const uint32_t numPartQ = cryptoParams->GetNumPartQ(); std::vector av(numPartQ); std::vector bv(numPartQ); - std::vector PModq = cryptoParams->GetPModq(); - usint numPerPartQ = cryptoParams->GetNumPerPartQ(); + TugType tug; + auto dgg = cryptoParams->GetDiscreteGaussianGenerator(); - for (usint part = 0; part < numPartQ; part++) { - DCRTPoly u = (cryptoParams->GetSecretKeyDist() == GAUSSIAN) ? DCRTPoly(dgg, paramsQP, Format::EVALUATION) : - DCRTPoly(tug, paramsQP, Format::EVALUATION); + const auto& sOld = oldKey->GetPrivateElement(); + const auto& newp0 = newKey->GetPublicElements().at(0); + const auto& newp1 = newKey->GetPublicElements().at(1); + const auto& PModq = cryptoParams->GetPModq(); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(numPartQ)) private(dgg, tug) + for (uint32_t part = 0; part < numPartQ; ++part) { + auto u = (cryptoParams->GetSecretKeyDist() == GAUSSIAN) ? DCRTPoly(dgg, paramsQP, Format::EVALUATION) : + DCRTPoly(tug, paramsQP, Format::EVALUATION); DCRTPoly e0(dgg, paramsQP, Format::EVALUATION); DCRTPoly e1(dgg, paramsQP, Format::EVALUATION); - DCRTPoly a(paramsQP, Format::EVALUATION, true); DCRTPoly b(paramsQP, Format::EVALUATION, true); // starting and ending position of current part - usint startPartIdx = numPerPartQ * part; - usint endPartIdx = (sizeQ > startPartIdx + numPerPartQ) ? (startPartIdx + numPerPartQ) : sizeQ; + const uint32_t startPartIdx = numPerPartQ * part; + const uint32_t endPartIdx = (sizeQ > startPartIdx + numPerPartQ) ? (startPartIdx + numPerPartQ) : sizeQ; - for (usint i = 0; i < sizeQP; i++) { - auto e0i = e0.GetElementAtIndex(i); - auto e1i = e1.GetElementAtIndex(i); + for (uint32_t i = 0; i < sizeQP; ++i) { + const auto& ui = u.GetElementAtIndex(i); - auto ui = u.GetElementAtIndex(i); + const auto& e0i = e0.GetElementAtIndex(i); + const auto& e1i = e1.GetElementAtIndex(i); - auto newp0i = newp0.GetElementAtIndex(i); - auto newp1i = newp1.GetElementAtIndex(i); + const auto& newp0i = newp0.GetElementAtIndex(i); + const auto& newp1i = newp1.GetElementAtIndex(i); a.SetElementAtIndex(i, newp1i * ui + ns * e1i); if (i < startPartIdx || i >= endPartIdx) { - b.SetElementAtIndex(i, newp0i * ui + ns * e0i); + b.SetElementAtIndex(i, (newp0i * ui) + (ns * e0i)); } else { - // P * sOld is only applied for the current part - auto sOldi = sOld.GetElementAtIndex(i); - b.SetElementAtIndex(i, newp0i * ui + ns * e0i + PModq[i] * sOldi); + const auto& soi = sOld.GetElementAtIndex(i); + b.SetElementAtIndex(i, (newp0i * ui) + (ns * e0i) + (PModq[i] * soi)); } } - - av[part] = a; - bv[part] = b; + av[part] = std::move(a); + bv[part] = std::move(b); } + EvalKeyRelin ek = std::make_shared>(newKey->GetCryptoContext()); ek->SetAVector(std::move(av)); ek->SetBVector(std::move(bv)); ek->SetKeyTag(newKey->GetKeyTag()); @@ -209,114 +198,110 @@ EvalKey KeySwitchHYBRID::KeySwitchGenInternal(const PrivateKey& ciphertext, const EvalKey ek) const { - std::vector& cv = ciphertext->GetElements(); - - std::shared_ptr> ba = (cv.size() == 2) ? KeySwitchCore(cv[1], ek) : KeySwitchCore(cv[2], ek); + auto& cv = ciphertext->GetElements(); + auto ba = KeySwitchCore(cv.back(), ek); cv[0].SetFormat((*ba)[0].GetFormat()); cv[0] += (*ba)[0]; cv[1].SetFormat((*ba)[1].GetFormat()); - if (cv.size() > 2) { + if (cv.size() > 2) cv[1] += (*ba)[1]; - } - else { + else cv[1] = (*ba)[1]; - } + cv.resize(2); } Ciphertext KeySwitchHYBRID::KeySwitchExt(ConstCiphertext ciphertext, bool addFirst) const { const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); - const std::vector& cv = ciphertext->GetElements(); + const auto& cv = ciphertext->GetElements(); + const auto& PModq = cryptoParams->GetPModq(); - const auto paramsQl = cv[0].GetParams(); const auto paramsP = cryptoParams->GetParamsP(); + const auto paramsQl = cv[0].GetParams(); const auto paramsQlP = cv[0].GetExtendedCRTBasis(paramsP); - uint32_t sizeQl = paramsQl->GetParams().size(); - uint32_t sizeCv = cv.size(); - std::vector resultElements(sizeCv); + const uint32_t sizeCv = cv.size(); + const uint32_t sizeQl = paramsQl->GetParams().size(); + std::vector elements(sizeCv); + for (uint32_t k = 0; k < sizeCv; ++k) { - resultElements[k] = DCRTPoly(paramsQlP, Format::EVALUATION, true); + elements[k] = DCRTPoly(paramsQlP, Format::EVALUATION, true); if ((addFirst) || (k > 0)) { - auto cMult = cv[k].TimesNoCheck(cryptoParams->GetPModq()); + auto cMult = cv[k].TimesNoCheck(PModq); for (uint32_t i = 0; i < sizeQl; ++i) { - resultElements[k].SetElementAtIndex(i, std::move(cMult.GetElementAtIndex(i))); + elements[k].SetElementAtIndex(i, std::move(cMult.GetElementAtIndex(i))); } } } auto result = ciphertext->CloneEmpty(); - result->SetElements(std::move(resultElements)); + result->SetElements(std::move(elements)); return result; } Ciphertext KeySwitchHYBRID::KeySwitchDown(ConstCiphertext ciphertext) const { - const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); + const auto& cv = ciphertext->GetElements(); + const auto paramsQlP = cv[0].GetParams(); - const auto paramsP = cryptoParams->GetParamsP(); - const auto paramsQlP = ciphertext->GetElements()[0].GetParams(); + const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); + const auto paramsP = cryptoParams->GetParamsP(); // TODO : (Andrey) precompute paramsQl in cryptoparameters - usint sizeQl = paramsQlP->GetParams().size() - paramsP->GetParams().size(); + const uint32_t sizeQl = paramsQlP->GetParams().size() - paramsP->GetParams().size(); std::vector moduliQ(sizeQl); std::vector rootsQ(sizeQl); - for (size_t i = 0; i < sizeQl; i++) { + for (uint32_t i = 0; i < sizeQl; ++i) { moduliQ[i] = paramsQlP->GetParams()[i]->GetModulus(); rootsQ[i] = paramsQlP->GetParams()[i]->GetRootOfUnity(); } - auto paramsQl = std::make_shared(2 * paramsQlP->GetRingDimension(), moduliQ, rootsQ); - - auto cTilda = ciphertext->GetElements(); - - PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); - - DCRTPoly ct0 = cTilda[0].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), - cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), - cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), - cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), - cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); - - DCRTPoly ct1 = cTilda[1].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), - cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), - cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), - cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), - cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); + const auto paramsQl = std::make_shared(paramsQlP->GetCyclotomicOrder(), moduliQ, rootsQ); + + const PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); + + std::vector elements(2); + elements[0] = cv[0].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), + cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), + cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), + cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), + cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); + elements[1] = cv[1].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), + cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), + cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), + cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), + cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); auto result = ciphertext->CloneEmpty(); - result->SetElements({std::move(ct0), std::move(ct1)}); + result->SetElements(std::move(elements)); return result; } DCRTPoly KeySwitchHYBRID::KeySwitchDownFirstElement(ConstCiphertext ciphertext) const { - const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); - - const std::vector& cTilda = ciphertext->GetElements(); + const auto& cv = ciphertext->GetElements()[0]; + const auto paramsQlP = cv.GetParams(); - const auto paramsP = cryptoParams->GetParamsP(); - const auto paramsQlP = cTilda[0].GetParams(); + const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); + const auto paramsP = cryptoParams->GetParamsP(); // TODO : (Andrey) precompute paramsQl in cryptoparameters - usint sizeQl = paramsQlP->GetParams().size() - paramsP->GetParams().size(); + const uint32_t sizeQl = paramsQlP->GetParams().size() - paramsP->GetParams().size(); std::vector moduliQ(sizeQl); std::vector rootsQ(sizeQl); - for (size_t i = 0; i < sizeQl; i++) { + for (uint32_t i = 0; i < sizeQl; ++i) { moduliQ[i] = paramsQlP->GetParams()[i]->GetModulus(); rootsQ[i] = paramsQlP->GetParams()[i]->GetRootOfUnity(); } - auto paramsQl = std::make_shared(2 * paramsQlP->GetRingDimension(), moduliQ, rootsQ); - - PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); + const auto paramsQl = std::make_shared(paramsQlP->GetCyclotomicOrder(), moduliQ, rootsQ); - DCRTPoly cv0 = cTilda[0].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), - cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), - cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), - cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), - cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); + const PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); - return cv0; + return cv.ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), + cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), + cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), + cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), + cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); } std::shared_ptr> KeySwitchHYBRID::KeySwitchCore(const DCRTPoly& a, @@ -329,86 +314,67 @@ std::shared_ptr> KeySwitchHYBRID::EvalKeySwitchPrecomputeC const DCRTPoly& c, std::shared_ptr> cryptoParamsBase) const { const auto cryptoParams = std::dynamic_pointer_cast(cryptoParamsBase); - const std::shared_ptr paramsQl = c.GetParams(); - const std::shared_ptr paramsP = cryptoParams->GetParamsP(); - const std::shared_ptr paramsQlP = c.GetExtendedCRTBasis(paramsP); - - size_t sizeQl = paramsQl->GetParams().size(); - size_t sizeP = paramsP->GetParams().size(); - size_t sizeQlP = sizeQl + sizeP; + const auto paramsQl = c.GetParams(); + const auto paramsP = cryptoParams->GetParamsP(); + const auto paramsQlP = c.GetExtendedCRTBasis(paramsP); - uint32_t alpha = cryptoParams->GetNumPerPartQ(); + const uint32_t sizeQl = paramsQl->GetParams().size(); + const uint32_t sizeP = paramsP->GetParams().size(); + const uint32_t sizeQlP = sizeQl + sizeP; + const uint32_t alpha = cryptoParams->GetNumPerPartQ(); // The number of digits of the current ciphertext - uint32_t numPartQl = ceil((static_cast(sizeQl)) / alpha); + uint32_t numPartQl = std::ceil(static_cast(sizeQl) / alpha); if (numPartQl > cryptoParams->GetNumberOfQPartitions()) numPartQl = cryptoParams->GetNumberOfQPartitions(); - std::vector partsCt(numPartQl); + auto result = std::make_shared>(numPartQl); // Digit decomposition // Zero-padding and split - for (uint32_t part = 0; part < numPartQl; part++) { + for (uint32_t part = 0; part < numPartQl; ++part) { + DCRTPoly partsCt; if (part == numPartQl - 1) { - auto paramsPartQ = cryptoParams->GetParamsPartQ(part); + const auto& paramsPartQ = cryptoParams->GetParamsPartQ(part); uint32_t sizePartQl = sizeQl - alpha * part; - std::vector moduli(sizePartQl); std::vector roots(sizePartQl); - - for (uint32_t i = 0; i < sizePartQl; i++) { + for (uint32_t i = 0; i < sizePartQl; ++i) { moduli[i] = paramsPartQ->GetParams()[i]->GetModulus(); roots[i] = paramsPartQ->GetParams()[i]->GetRootOfUnity(); } - - auto params = DCRTPoly::Params(paramsPartQ->GetCyclotomicOrder(), moduli, roots); - - partsCt[part] = DCRTPoly(std::make_shared(params), Format::EVALUATION, true); + auto&& params = std::make_shared(paramsPartQ->GetCyclotomicOrder(), moduli, roots); + partsCt = DCRTPoly(params, Format::EVALUATION, true); } else { - partsCt[part] = DCRTPoly(cryptoParams->GetParamsPartQ(part), Format::EVALUATION, true); + partsCt = DCRTPoly(cryptoParams->GetParamsPartQ(part), Format::EVALUATION, true); } - usint sizePartQl = partsCt[part].GetNumOfElements(); - usint startPartIdx = alpha * part; - for (uint32_t i = 0, idx = startPartIdx; i < sizePartQl; i++, idx++) { - partsCt[part].SetElementAtIndex(i, c.GetElementAtIndex(idx)); - } + const uint32_t sizePartQl = partsCt.GetNumOfElements(); + const uint32_t startPartIdx = alpha * part; + for (uint32_t i = 0, idx = startPartIdx; i < sizePartQl; ++i, ++idx) + partsCt.SetElementAtIndex(i, c.GetElementAtIndex(idx)); + + partsCt.SetFormat(Format::COEFFICIENT); + auto partsCtCompl = partsCt.ApproxSwitchCRTBasis(cryptoParams->GetParamsPartQ(part), + cryptoParams->GetParamsComplPartQ(sizeQl - 1, part), + cryptoParams->GetPartQlHatInvModq(part, sizePartQl - 1), + cryptoParams->GetPartQlHatInvModqPrecon(part, sizePartQl - 1), + cryptoParams->GetPartQlHatModp(sizeQl - 1, part), + cryptoParams->GetmodComplPartqBarrettMu(sizeQl - 1, part)); + partsCtCompl.SetFormat(Format::EVALUATION); + + (*result)[part] = DCRTPoly(paramsQlP, Format::EVALUATION, true); + + const uint32_t endPartIdx = startPartIdx + sizePartQl; + for (uint32_t i = 0; i < startPartIdx; ++i) + (*result)[part].SetElementAtIndex(i, std::move(partsCtCompl.GetElementAtIndex(i))); + for (uint32_t i = startPartIdx; i < endPartIdx; ++i) + (*result)[part].SetElementAtIndex(i, c.GetElementAtIndex(i)); + for (uint32_t i = endPartIdx; i < sizeQlP; ++i) + (*result)[part].SetElementAtIndex(i, std::move(partsCtCompl.GetElementAtIndex(i - sizePartQl))); } - - std::vector partsCtCompl(numPartQl); - std::vector partsCtExt(numPartQl); - - for (uint32_t part = 0; part < numPartQl; part++) { - auto partCtClone = partsCt[part].Clone(); - partCtClone.SetFormat(Format::COEFFICIENT); - - uint32_t sizePartQl = partsCt[part].GetNumOfElements(); - partsCtCompl[part] = partCtClone.ApproxSwitchCRTBasis( - cryptoParams->GetParamsPartQ(part), cryptoParams->GetParamsComplPartQ(sizeQl - 1, part), - cryptoParams->GetPartQlHatInvModq(part, sizePartQl - 1), - cryptoParams->GetPartQlHatInvModqPrecon(part, sizePartQl - 1), - cryptoParams->GetPartQlHatModp(sizeQl - 1, part), - cryptoParams->GetmodComplPartqBarrettMu(sizeQl - 1, part)); - - partsCtCompl[part].SetFormat(Format::EVALUATION); - - partsCtExt[part] = DCRTPoly(paramsQlP, Format::EVALUATION, true); - - usint startPartIdx = alpha * part; - usint endPartIdx = startPartIdx + sizePartQl; - for (usint i = 0; i < startPartIdx; i++) { - partsCtExt[part].SetElementAtIndex(i, partsCtCompl[part].GetElementAtIndex(i)); - } - for (usint i = startPartIdx, idx = 0; i < endPartIdx; i++, idx++) { - partsCtExt[part].SetElementAtIndex(i, partsCt[part].GetElementAtIndex(idx)); - } - for (usint i = endPartIdx; i < sizeQlP; ++i) { - partsCtExt[part].SetElementAtIndex(i, partsCtCompl[part].GetElementAtIndex(i - sizePartQl)); - } - } - - return std::make_shared>(std::move(partsCtExt)); + return result; } std::shared_ptr> KeySwitchHYBRID::EvalFastKeySwitchCore( @@ -416,67 +382,55 @@ std::shared_ptr> KeySwitchHYBRID::EvalFastKeySwitchCore( const std::shared_ptr paramsQl) const { const auto cryptoParams = std::dynamic_pointer_cast(evalKey->GetCryptoParameters()); - std::shared_ptr> cTilda = EvalFastKeySwitchCoreExt(digits, evalKey, paramsQl); - - PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); + const PlaintextModulus t = (cryptoParams->GetNoiseScale() == 1) ? 0 : cryptoParams->GetPlaintextModulus(); - DCRTPoly ct0 = (*cTilda)[0].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), + auto result = EvalFastKeySwitchCoreExt(digits, evalKey, paramsQl); + (*result)[0] = (*result)[0].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); - - DCRTPoly ct1 = (*cTilda)[1].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), + (*result)[1] = (*result)[1].ApproxModDown(paramsQl, cryptoParams->GetParamsP(), cryptoParams->GetPInvModq(), cryptoParams->GetPInvModqPrecon(), cryptoParams->GetPHatInvModp(), cryptoParams->GetPHatInvModpPrecon(), cryptoParams->GetPHatModq(), cryptoParams->GetModqBarrettMu(), cryptoParams->GettInvModp(), cryptoParams->GettInvModpPrecon(), t, cryptoParams->GettModqPrecon()); - - return std::make_shared>(std::initializer_list{std::move(ct0), std::move(ct1)}); + return result; } std::shared_ptr> KeySwitchHYBRID::EvalFastKeySwitchCoreExt( const std::shared_ptr> digits, const EvalKey evalKey, const std::shared_ptr paramsQl) const { - const auto cryptoParams = std::dynamic_pointer_cast(evalKey->GetCryptoParameters()); - const std::vector& bv = evalKey->GetBVector(); - const std::vector& av = evalKey->GetAVector(); - - const std::shared_ptr paramsP = cryptoParams->GetParamsP(); - const std::shared_ptr paramsQlP = (*digits)[0].GetParams(); - - size_t sizeQl = paramsQl->GetParams().size(); - size_t sizeQlP = paramsQlP->GetParams().size(); - size_t sizeQ = cryptoParams->GetElementParams()->GetParams().size(); - - DCRTPoly cTilda0(paramsQlP, Format::EVALUATION, true); - DCRTPoly cTilda1(paramsQlP, Format::EVALUATION, true); - - for (uint32_t j = 0; j < digits->size(); j++) { - const DCRTPoly& cj = (*digits)[j]; - const DCRTPoly& bj = bv[j]; - const DCRTPoly& aj = av[j]; - - for (usint i = 0; i < sizeQl; i++) { - const auto& cji = cj.GetElementAtIndex(i); - const auto& aji = aj.GetElementAtIndex(i); - const auto& bji = bj.GetElementAtIndex(i); - - cTilda0.SetElementAtIndex(i, cTilda0.GetElementAtIndex(i) + cji * bji); - cTilda1.SetElementAtIndex(i, cTilda1.GetElementAtIndex(i) + cji * aji); - } - for (usint i = sizeQl, idx = sizeQ; i < sizeQlP; i++, idx++) { - const auto& cji = cj.GetElementAtIndex(i); - const auto& aji = aj.GetElementAtIndex(idx); - const auto& bji = bj.GetElementAtIndex(idx); - - cTilda0.SetElementAtIndex(i, cTilda0.GetElementAtIndex(i) + cji * bji); - cTilda1.SetElementAtIndex(i, cTilda1.GetElementAtIndex(i) + cji * aji); + const auto paramsQlP = (*digits)[0].GetParams(); + const uint32_t sizeQlP = paramsQlP->GetParams().size(); + + const uint32_t limit = digits->size(); + const uint32_t sizeQl = paramsQl->GetParams().size(); + auto&& cryptoParams = std::dynamic_pointer_cast(evalKey->GetCryptoParameters()); + const uint32_t delta = cryptoParams->GetElementParams()->GetParams().size() - sizeQl; + + const auto& av = evalKey->GetAVector(); + const auto& bv = evalKey->GetBVector(); + + auto result = std::make_shared>(); + result->reserve(2); + result->emplace_back(paramsQlP, Format::EVALUATION, true); + result->emplace_back(paramsQlP, Format::EVALUATION, true); + auto& elements = (*result); + + for (uint32_t j = 0; j < limit; ++j) { +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(sizeQlP)) + for (uint32_t i = 0; i < sizeQlP; ++i) { + const auto idx = (i >= sizeQl) ? i + delta : i; + const auto& cji = (*digits)[j].GetElementAtIndex(i); + const auto& bji = bv[j].GetElementAtIndex(idx); + const auto& aji = av[j].GetElementAtIndex(idx); + elements[0].SetElementAtIndex(i, elements[0].GetElementAtIndex(i) + cji * bji); + elements[1].SetElementAtIndex(i, elements[1].GetElementAtIndex(i) + cji * aji); } } - return std::make_shared>( - std::initializer_list{std::move(cTilda0), std::move(cTilda1)}); + return result; } } // namespace lbcrypto diff --git a/src/pke/lib/scheme/ckksrns/ckksrns-advancedshe.cpp b/src/pke/lib/scheme/ckksrns/ckksrns-advancedshe.cpp index d9b59abd6..decd7c7aa 100644 --- a/src/pke/lib/scheme/ckksrns/ckksrns-advancedshe.cpp +++ b/src/pke/lib/scheme/ckksrns/ckksrns-advancedshe.cpp @@ -33,8 +33,6 @@ CKKS implementation. See https://eprint.iacr.org/2020/1118 for details. */ -#define PROFILE - #include "cryptocontext.h" #include "scheme/ckksrns/ckksrns-cryptoparameters.h" #include "scheme/ckksrns/ckksrns-advancedshe.h" @@ -48,38 +46,36 @@ namespace lbcrypto { Ciphertext AdvancedSHECKKSRNS::EvalMultMany(const std::vector>& ciphertextVec, const std::vector>& evalKeys) const { - const size_t inSize = ciphertextVec.size(); + const uint32_t inSize = ciphertextVec.size(); if (inSize == 0) OPENFHE_THROW("Input ciphertext vector is empty."); if (inSize == 1) - return ciphertextVec[0]; + return ciphertextVec[0]->Clone(); - const size_t lim = inSize * 2 - 2; - std::vector> ciphertextMultVec; - ciphertextMultVec.resize(inSize - 1); + const uint32_t lim = inSize * 2 - 2; + std::vector> ciphertextMultVec(inSize - 1); auto algo = ciphertextVec[0]->GetCryptoContext()->GetScheme(); const auto cryptoParams = std::dynamic_pointer_cast(ciphertextVec[0]->GetCryptoParameters()); uint32_t levelsToDrop = cryptoParams->GetCompositeDegree(); - size_t ctrIndex = 0; - size_t i = 0; + uint32_t i = 0, j = 0; for (; i < (inSize - 1); i += 2) { - ciphertextMultVec[ctrIndex] = algo->EvalMultAndRelinearize(ciphertextVec[i], ciphertextVec[(i + 1)], evalKeys); - algo->ModReduceInPlace(ciphertextMultVec[ctrIndex++], levelsToDrop); + ciphertextMultVec[j] = algo->EvalMultAndRelinearize(ciphertextVec[i], ciphertextVec[i + 1], evalKeys); + algo->ModReduceInPlace(ciphertextMultVec[j++], levelsToDrop); } if (i < inSize) { - ciphertextMultVec[ctrIndex] = + ciphertextMultVec[j] = algo->EvalMultAndRelinearize(ciphertextVec[i], ciphertextMultVec[i + 1 - inSize], evalKeys); - algo->ModReduceInPlace(ciphertextMultVec[ctrIndex++], levelsToDrop); + algo->ModReduceInPlace(ciphertextMultVec[j++], levelsToDrop); i += 2; } for (; i < lim; i += 2) { - ciphertextMultVec[ctrIndex] = + ciphertextMultVec[j] = algo->EvalMultAndRelinearize(ciphertextMultVec[i - inSize], ciphertextMultVec[i + 1 - inSize], evalKeys); - algo->ModReduceInPlace(ciphertextMultVec[ctrIndex++], levelsToDrop); + algo->ModReduceInPlace(ciphertextMultVec[j++], levelsToDrop); } return ciphertextMultVec.back(); @@ -90,27 +86,30 @@ Ciphertext AdvancedSHECKKSRNS::EvalMultMany(const std::vector -static inline Ciphertext internalEvalLinearWSum(std::vector>& ciphertexts, - const std::vector& constants) { - std::vector> cts(ciphertexts.size()); - for (uint32_t i = 0; i < ciphertexts.size(); i++) +Ciphertext internalEvalLinearWSum(const std::vector>& ciphertexts, + const std::vector& constants) { + const uint32_t limit = ciphertexts.size(); + std::vector> cts(limit); + for (uint32_t i = 0; i < limit; ++i) cts[i] = ciphertexts[i]->Clone(); return internalEvalLinearWSumMutable(cts, constants); } template -static inline Ciphertext internalEvalLinearWSumMutable(std::vector>& ciphertexts, - const std::vector& constants) { +Ciphertext internalEvalLinearWSumMutable(std::vector>& ciphertexts, + const std::vector& constants) { const auto cryptoParams = std::dynamic_pointer_cast(ciphertexts[0]->GetCryptoParameters()); auto cc = ciphertexts[0]->GetCryptoContext(); + const uint32_t limit = ciphertexts.size(); + if (cryptoParams->GetScalingTechnique() != FIXEDMANUAL) { // Check to see if input ciphertexts are of same level // and adjust if needed to the max level among them uint32_t maxLevel = ciphertexts[0]->GetLevel(); uint32_t maxIdx = 0; - for (uint32_t i = 1; i < ciphertexts.size(); ++i) { + for (uint32_t i = 1; i < limit; ++i) { if ((ciphertexts[i]->GetLevel() > maxLevel) || ((ciphertexts[i]->GetLevel() == maxLevel) && (ciphertexts[i]->GetNoiseScaleDeg() == 2))) { maxLevel = ciphertexts[i]->GetLevel(); @@ -121,28 +120,76 @@ static inline Ciphertext internalEvalLinearWSumMutable(std::vectorGetScheme(); for (uint32_t i = 0; i < maxIdx; ++i) algo->AdjustLevelsAndDepthInPlace(ciphertexts[i], ciphertexts[maxIdx]); - for (uint32_t i = maxIdx + 1; i < ciphertexts.size(); ++i) + for (uint32_t i = maxIdx + 1; i < limit; ++i) algo->AdjustLevelsAndDepthInPlace(ciphertexts[i], ciphertexts[maxIdx]); uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); if (ciphertexts[maxIdx]->GetNoiseScaleDeg() == 2) { - for (uint32_t i = 0; i < ciphertexts.size(); ++i) { + for (uint32_t i = 0; i < limit; ++i) algo->ModReduceInternalInPlace(ciphertexts[i], compositeDegree); - } } } - Ciphertext weightedSum = cc->EvalMult(ciphertexts[0], constants[0]); - - Ciphertext tmp; - for (uint32_t i = 1; i < ciphertexts.size(); i++) { - tmp = cc->EvalMult(ciphertexts[i], constants[i]); - cc->EvalAddInPlace(weightedSum, tmp); + cc->EvalMultInPlace(ciphertexts[0], constants[0]); + for (uint32_t i = 1; i < limit; ++i) { + cc->EvalMultInPlace(ciphertexts[i], constants[i]); + cc->EvalAddInPlaceNoCheck(ciphertexts[0], ciphertexts[i]); } + cc->ModReduceInPlace(ciphertexts[0]); + return ciphertexts[0]; +} + +template +Ciphertext EvalPartialLinearWSum(const std::vector>& ciphertexts, + const std::vector& constants, uint32_t limit = 0) { + if (0 == limit) + limit = ciphertexts.size(); + + const auto cryptoParams = std::dynamic_pointer_cast(ciphertexts[0]->GetCryptoParameters()); + + auto cc = ciphertexts[0]->GetCryptoContext(); - cc->ModReduceInPlace(weightedSum); + std::vector> cts(limit); + if (cryptoParams->GetScalingTechnique() != FIXEDMANUAL) { + cts[0] = ciphertexts[0]->Clone(); + // Check to see if input ciphertexts are of same level + // and adjust if needed to the max level among them + uint32_t maxLevel = cts[0]->GetLevel(); + uint32_t maxIdx = 0; + for (uint32_t i = 1; i < limit; ++i) { + cts[i] = ciphertexts[i]->Clone(); + if ((cts[i]->GetLevel() > maxLevel) || + ((cts[i]->GetLevel() == maxLevel) && (cts[i]->GetNoiseScaleDeg() == 2))) { + maxLevel = cts[i]->GetLevel(); + maxIdx = i; + } + } - return weightedSum; + auto algo = cc->GetScheme(); + auto& ctm = cts[maxIdx]; + for (uint32_t i = 0; i < maxIdx; ++i) + algo->AdjustLevelsAndDepthInPlace(cts[i], ctm); + for (uint32_t i = maxIdx + 1; i < limit; ++i) + algo->AdjustLevelsAndDepthInPlace(cts[i], ctm); + + uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); + if (ctm->GetNoiseScaleDeg() == 2) { + for (uint32_t i = 0; i < limit; ++i) + algo->ModReduceInternalInPlace(cts[i], compositeDegree); + } + } + else { + for (uint32_t i = 0; i < limit; ++i) + cts[i] = ciphertexts[i]->Clone(); + } + + cc->EvalMultInPlace(cts[0], constants[1]); + for (uint32_t i = 1; i < limit; ++i) { + cc->EvalMultInPlace(cts[i], constants[i + 1]); + cc->EvalAddInPlaceNoCheck(cts[0], cts[i]); + } + cc->ModReduceInPlace(cts[0]); + return cts[0]; } Ciphertext AdvancedSHECKKSRNS::EvalLinearWSum(std::vector>& ciphertexts, @@ -178,57 +225,49 @@ Ciphertext AdvancedSHECKKSRNS::EvalLinearWSumMutable( template std::shared_ptr> internalEvalPowersLinear(ConstCiphertext& x, const std::vector& coefficients) { - uint32_t k = coefficients.size() - 1; - std::vector indices(k); - // set the indices for the powers of x that need to be computed to 1 + const uint32_t k = coefficients.size() - 1; + std::vector indices(k); + + // find indices for powers of x that need to be computed for (uint32_t i = k; i > 0; --i) { - if (!(i & (i - 1))) { - // if i is a power of 2 - indices[i - 1] = 1; + if (0 == (i & (i - 1))) { // if i is a power of 2 + indices[i - 1] = true; } - else { - // non-power of 2 + else { // non-power of 2 if (IsNotEqualZero(coefficients[i])) { - indices[i - 1] = 1; - int64_t powerOf2 = int64_t(1) << static_cast(std::floor(std::log2(i))); - int64_t rem = i % powerOf2; - if (indices[rem - 1] == 0) - indices[rem - 1] = 1; + uint32_t rem = i; // while rem is not a power of 2 // set indices required to compute rem to 1 - while ((rem & (rem - 1))) { - powerOf2 = 1 << static_cast(std::floor(std::log2(rem))); - rem = rem % powerOf2; - if (indices[rem - 1] == 0) - indices[rem - 1] = 1; + while (0 != (rem & (rem - 1))) { + indices[rem - 1] = true; + rem &= (uint64_t(1) << (GetMSB(rem) - 1)) - 1; } } } } std::vector> powers(k); - powers[0] = x->Clone(); - auto cc = x->GetCryptoContext(); + powers[0] = x->Clone(); + auto cc = x->GetCryptoContext(); + auto cryptoParams = std::dynamic_pointer_cast(x->GetCryptoParameters()); uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); // computes all powers up to k for x for (uint32_t i = 2; i <= k; ++i) { - if (!(i & (i - 1))) { - // if i is a power of two - powers[i - 1] = cc->EvalMult(powers[i / 2 - 1], powers[i / 2 - 1]); + if (0 == (i & (i - 1))) { + powers[i - 1] = cc->EvalSquare(powers[i / 2 - 1]); cc->ModReduceInPlace(powers[i - 1]); } else { - if (indices[i - 1] == 1) { - // non-power of 2 - int64_t powerOf2 = int64_t(1) << static_cast(std::floor(std::log2(i))); - int64_t rem = i % powerOf2; - uint32_t levelDiff = powers[powerOf2 - 1]->GetLevel() - powers[rem - 1]->GetLevel(); - cc->LevelReduceInPlace(powers[rem - 1], nullptr, levelDiff / compositeDegree); - - powers[i - 1] = cc->EvalMult(powers[powerOf2 - 1], powers[rem - 1]); + if (indices[i - 1]) { + uint64_t p = (uint64_t(1) << (GetMSB(i) - 1)) - 1; + uint64_t r = (i & p) - 1; + uint32_t diff = powers[p]->GetLevel() - powers[r]->GetLevel(); + cc->LevelReduceInPlace(powers[r], nullptr, diff / compositeDegree); + + powers[i - 1] = cc->EvalMult(powers[p], powers[r]); cc->ModReduceInPlace(powers[i - 1]); } } @@ -236,26 +275,22 @@ std::shared_ptr> internalEvalPowersLinear(ConstCiphertext // brings all powers of x to the same level for (uint32_t i = 1; i < k; ++i) { - if (indices[i - 1] == 1) { - uint32_t levelDiff = powers[k - 1]->GetLevel() - powers[i - 1]->GetLevel(); - cc->LevelReduceInPlace(powers[i - 1], nullptr, levelDiff / compositeDegree); + if (indices[i - 1]) { + uint32_t diff = powers[k - 1]->GetLevel() - powers[i - 1]->GetLevel(); + cc->LevelReduceInPlace(powers[i - 1], nullptr, diff / compositeDegree); } } - return std::make_shared>(powers); + return std::make_shared>(std::move(powers)); } -template -std::shared_ptr> internalEvalPowersPS(ConstCiphertext& x, - const std::vector& coefficients) { - auto n = Degree(coefficients); - auto degs = ComputeDegreesPS(n); +std::shared_ptr> internalEvalPowersPS(ConstCiphertext& x, uint32_t degree) { + auto degs = ComputeDegreesPS(degree); uint32_t k = degs[0]; uint32_t m = degs[1]; - std::vector> powers; - powers.reserve(k); - powers.push_back(x->Clone()); + std::vector> powers(k); + powers[0] = x->Clone(); auto cc = x->GetCryptoContext(); uint32_t compositeDegree = @@ -264,74 +299,71 @@ std::shared_ptr> internalEvalPowersPS(ConstCiphertextEvalSquare(powers[(powerOf2 >> 1) - 1])); + powers[i - 1] = cc->EvalSquare(powers[(powerOf2 >> 1) - 1]); } else { - uint32_t levelDiff = powers[powerOf2 - 1]->GetLevel() - powers[rem - 1]->GetLevel(); - cc->LevelReduceInPlace(powers[rem - 1], nullptr, levelDiff / compositeDegree); - powers.push_back(cc->EvalMult(powers[powerOf2 - 1], powers[rem - 1])); + uint32_t diff = powers[powerOf2 - 1]->GetLevel() - powers[rem - 1]->GetLevel(); + cc->LevelReduceInPlace(powers[rem - 1], nullptr, diff / compositeDegree); + powers[i - 1] = cc->EvalMult(powers[powerOf2 - 1], powers[rem - 1]); } - cc->ModReduceInPlace(powers[powerOf2 - 1 + rem]); + if (++rem == powerOf2) { powerOf2 <<= 1; rem = 0; } + cc->ModReduceInPlace(powers[i - 1]); } const auto cryptoParams = std::dynamic_pointer_cast(powers[k - 1]->GetCryptoParameters()); - auto algo = cc->GetScheme(); - if (cryptoParams->GetScalingTechnique() == FIXEDMANUAL) { // brings all powers of x to the same level - for (size_t i = 1; i < k; i++) { - uint32_t levelDiff = powers[k - 1]->GetLevel() - powers[i - 1]->GetLevel(); - cc->LevelReduceInPlace(powers[i - 1], nullptr, levelDiff); - } + uint32_t levelk = powers[k - 1]->GetLevel(); + for (uint32_t i = 1; i < k; ++i) + cc->LevelReduceInPlace(powers[i - 1], nullptr, levelk - powers[i - 1]->GetLevel()); } else { - for (size_t i = 1; i < k; i++) { - algo->AdjustLevelsAndDepthInPlace(powers[i - 1], powers[k - 1]); - } + for (uint32_t i = 1; i < k; ++i) + cc->GetScheme()->AdjustLevelsAndDepthInPlace(powers[i - 1], powers[k - 1]); } // computes powers of form k*2^i for x and the product of the powers in power2, that yield x^{k(2*m - 1)} - std::vector> powers2; - powers2.reserve(m); - powers2.push_back(powers.back()->Clone()); - auto power2km1 = powers.back()->Clone(); + std::vector> powers2(m); + powers2[0] = powers.back(); + + auto power2km1 = powers.back(); - for (uint32_t i = 1; i < m; i++) { - powers2.push_back(cc->EvalSquare(powers2[i - 1])); + for (uint32_t i = 1; i < m; ++i) { + powers2[i] = cc->EvalSquare(powers2[i - 1]); cc->ModReduceInPlace(powers2[i]); - power2km1 = cc->EvalMult(power2km1, powers2.back()); + power2km1 = cc->EvalMult(powers2[i], power2km1); cc->ModReduceInPlace(power2km1); } - return std::make_shared>(powers, powers2, power2km1, k, m); + return std::make_shared>(std::move(powers), std::move(powers2), std::move(power2km1), k, m); } std::shared_ptr> AdvancedSHECKKSRNS::EvalPowers(ConstCiphertext& x, const std::vector& coefficients) const { - return (Degree(coefficients) < 5) ? internalEvalPowersLinear(x, coefficients) : - internalEvalPowersPS(x, coefficients); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalPowersLinear(x, coefficients) : internalEvalPowersPS(x, d); } std::shared_ptr> AdvancedSHECKKSRNS::EvalPowers(ConstCiphertext& x, const std::vector& coefficients) const { - return (Degree(coefficients) < 5) ? internalEvalPowersLinear(x, coefficients) : - internalEvalPowersPS(x, coefficients); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalPowersLinear(x, coefficients) : internalEvalPowersPS(x, d); } std::shared_ptr> AdvancedSHECKKSRNS::EvalPowers( ConstCiphertext& x, const std::vector>& coefficients) const { - return (Degree(coefficients) < 5) ? internalEvalPowersLinear(x, coefficients) : - internalEvalPowersPS(x, coefficients); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalPowersLinear(x, coefficients) : internalEvalPowersPS(x, d); } template -static inline Ciphertext internalEvalPolyLinearWithPrecomp(std::vector>& powers, - const std::vector& coefficients) { - uint32_t k = coefficients.size() - 1; +Ciphertext internalEvalPolyLinearWithPrecomp(std::vector>& powers, + const std::vector& coefficients) { + const uint32_t k = coefficients.size() - 1; if (k <= 1) OPENFHE_THROW("The coefficients vector should contain at least 2 elements"); @@ -361,315 +393,119 @@ static inline Ciphertext internalEvalPolyLinearWithPrecomp(std::vector } template -static Ciphertext InnerEvalPolyPS(ConstCiphertext& x, - const std::vector& coefficients, uint32_t k, uint32_t m, - std::vector>& powers, - std::vector>& powers2) { - auto cc = x->GetCryptoContext(); - +Ciphertext InnerEvalPolyPS(ConstCiphertext& x, const std::vector& coefficients, + uint32_t k, uint32_t m, const std::vector>& powers, + const std::vector>& powers2) { // Compute k*2^m because we use it often uint32_t k2m2k = k * (1 << (m - 1)) - k; // Divide coefficients by x^{k*2^{m-1}} - std::vector xkm(static_cast(k2m2k + k) + 1, 0.0); + std::vector xkm(k2m2k + k + 1); xkm.back() = 1; - auto divqr = LongDivisionPoly(coefficients, xkm); // Subtract x^{k(2^{m-1} - 1)} from r - auto r2 = divqr->r; - if (static_cast(k2m2k - Degree(divqr->r)) <= 0) { - r2[static_cast(k2m2k)] -= 1; - r2.resize(Degree(r2) + 1); + auto& r2 = divqr->r; + if (auto n = Degree(r2); static_cast(k2m2k - n) <= 0) { + r2.resize(n + 1); + r2[k2m2k] -= 1; } else { - r2.resize(static_cast(k2m2k + 1), 0.0); + r2.resize(k2m2k + 1); r2.back() = -1; } - // Divide r2 by q auto divcs = LongDivisionPoly(r2, divqr->q); + auto cc = x->GetCryptoContext(); - // Add x^{k(2^{m-1} - 1)} to s - auto s2 = divcs->r; - s2.resize(static_cast(k2m2k + 1), 0.0); - s2.back() = 1; + Ciphertext cu, qu, su; - Ciphertext cu; - uint32_t dc = Degree(divcs->q); - bool flag_c = false; +#pragma omp task shared(qu) + { + // Evaluate q and s2 at u. + // If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. - if (dc >= 1) { - if (dc == 1) { - if (IsNotEqualOne(divcs->q[1])) { - cu = cc->EvalMult(powers.front(), divcs->q[1]); - cc->ModReduceInPlace(cu); - } - else { - cu = powers.front()->Clone(); - } + if (Degree(divqr->q) > k) { + qu = InnerEvalPolyPS(x, divqr->q, k, m - 1, powers, powers2); } else { - std::vector> ctxs(dc); - std::vector weights(dc); - - for (uint32_t i = 0; i < dc; i++) { - ctxs[i] = powers[i]; - weights[i] = divcs->q[i + 1]; - } - - cu = cc->EvalLinearWSumMutable(ctxs, weights); + qu = cc->EvalAdd(powers[k - 1], divqr->q.front()); + divqr->q.resize(k); + if (uint32_t n = Degree(divqr->q); n > 0) + cc->EvalAddInPlace(qu, EvalPartialLinearWSum(powers, divqr->q, n)); } - - // adds the free term (at x^0) - cc->EvalAddInPlace(cu, divcs->q.front()); - flag_c = true; } - // Evaluate q and s2 at u. If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. - Ciphertext qu; - - if (Degree(divqr->q) > k) { - qu = InnerEvalPolyPS(x, divqr->q, k, m - 1, powers, powers2); - } - else { - // dq = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto qcopy = divqr->q; - qcopy.resize(k); - if (Degree(qcopy) > 0) { - std::vector> ctxs(Degree(qcopy)); - std::vector weights(Degree(qcopy)); - - for (uint32_t i = 0; i < Degree(qcopy); i++) { - ctxs[i] = powers[i]; - weights[i] = divqr->q[i + 1]; - } +#pragma omp task shared(su) + { + // Add x^{k(2^{m-1} - 1)} to s + auto& s2 = divcs->r; + s2.resize(k2m2k + 1); + s2.back() = 1; - qu = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order term will always be 1 because q is monic - cc->EvalAddInPlace(qu, powers[k - 1]); + if (Degree(s2) > k) { + su = InnerEvalPolyPS(x, s2, k, m - 1, powers, powers2); } else { - qu = powers[k - 1]->Clone(); + su = cc->EvalAdd(powers[k - 1], s2.front()); + s2.resize(k); + if (uint32_t n = Degree(s2); n > 0) + cc->EvalAddInPlace(su, EvalPartialLinearWSum(powers, s2, n)); } - // adds the free term (at x^0) - cc->EvalAddInPlace(qu, divqr->q.front()); } - uint32_t ds = Degree(s2); - Ciphertext su; - - if (std::equal(s2.begin(), s2.end(), divqr->q.begin())) { - su = qu->Clone(); + if (uint32_t n = Degree(divcs->q); n == 0) { + cu = cc->EvalAdd(powers2[m - 1], divcs->q.front()); } - else { - if (ds > k) { - su = InnerEvalPolyPS(x, s2, k, m - 1, powers, powers2); + else if (n == 1) { + if (IsNotEqualOne(divcs->q[1])) { + cu = cc->EvalMult(powers.front(), divcs->q[1]); + cc->ModReduceInPlace(cu); + cc->EvalAddInPlace(cu, powers2[m - 1]); } else { - // ds = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto scopy = s2; - scopy.resize(k); - if (Degree(scopy) > 0) { - std::vector> ctxs(Degree(scopy)); - std::vector weights(Degree(scopy)); - - for (uint32_t i = 0; i < Degree(scopy); ++i) { - ctxs[i] = powers[i]; - weights[i] = s2[i + 1]; - } - - su = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order term will always be 1 because q is monic - cc->EvalAddInPlace(su, powers[k - 1]); - } - else { - su = powers[k - 1]->Clone(); - } - // adds the free term (at x^0) - cc->EvalAddInPlace(su, s2.front()); + cu = cc->EvalAdd(powers2[m - 1], powers.front()); } - } - - Ciphertext result; - - if (flag_c) { - result = cc->EvalAdd(powers2[m - 1], cu); + cc->EvalAddInPlace(cu, divcs->q.front()); } else { - result = cc->EvalAdd(powers2[m - 1], divcs->q.front()); + cu = cc->EvalAdd(powers2[m - 1], EvalPartialLinearWSum(powers, divcs->q, n)); + cc->EvalAddInPlace(cu, divcs->q.front()); } - result = cc->EvalMult(result, qu); +#pragma omp taskwait + + auto result = cc->EvalMult(cu, qu); cc->ModReduceInPlace(result); cc->EvalAddInPlace(result, su); - return result; } template -static inline Ciphertext internalEvalPolyPSWithPrecomp(std::shared_ptr> ctxtPowers, - const std::vector& coefficients) { - auto f2 = coefficients; - auto n = Degree(f2); - f2.resize(n + 1); - - auto powers = ctxtPowers->powersRe; - auto powers2 = ctxtPowers->powers2Re; - auto power2km1 = ctxtPowers->power2km1Re; - auto k = ctxtPowers->k; - auto m = ctxtPowers->m; +Ciphertext internalEvalPolyPSWithPrecomp(const std::shared_ptr>& ctxtPowers, + const std::vector& coefficients) { + auto& powers = ctxtPowers->powersRe; + auto& powers2 = ctxtPowers->powers2Re; + auto& power2km1 = ctxtPowers->power2km1Re; + auto k = ctxtPowers->k; + auto m = ctxtPowers->m; // Compute k*2^{m-1}-k because we use it a lot uint32_t k2m2k = k * (1 << (m - 1)) - k; - // Add x^{k(2^m - 1)} to the polynomial that has to be evaluated - // std::vector f2 = coefficients; - f2.resize(2 * k2m2k + k + 1, 0.0); + // Add T^{k(2^m - 1)}(y) to the polynomial that has to be evaluated + auto f2 = coefficients; + f2.resize(Degree(f2) + 1); + f2.resize(2 * k2m2k + k + 1); f2.back() = 1; - // Divide f2 by x^{k*2^{m-1}} - std::vector xkm(static_cast(k2m2k + k) + 1); - xkm.back() = 1; - auto divqr = LongDivisionPoly(f2, xkm); - - // Subtract x^{k(2^{m-1} - 1)} from r - auto r2 = divqr->r; - if (static_cast(k2m2k - Degree(divqr->r)) <= 0) { - r2[static_cast(k2m2k)] -= 1; - r2.resize(Degree(r2) + 1); - } - else { - r2.resize(static_cast(k2m2k + 1), 0.0); - r2.back() = -1; - } - - // Divide r2 by q - auto divcs = LongDivisionPoly(r2, divqr->q); - - // Add x^{k(2^{m-1} - 1)} to s - auto s2 = divcs->r; - s2.resize(static_cast(k2m2k + 1), 0.0); - s2.back() = 1; - - auto cc = powers[0]->GetCryptoContext(); - - // Evaluate c at u - Ciphertext cu; - uint32_t dc = Degree(divcs->q); - bool flag_c = false; - - if (dc >= 1) { - if (dc == 1) { - if (IsNotEqualOne(divcs->q[1])) { - cu = cc->EvalMult(powers.front(), divcs->q[1]); - // Do rescaling after scalar multiplication - cc->ModReduceInPlace(cu); - } - else { - cu = powers.front()->Clone(); - } - } - else { - std::vector> ctxs(dc); - std::vector weights(dc); - - for (uint32_t i = 0; i < dc; i++) { - ctxs[i] = powers[i]; - weights[i] = divcs->q[i + 1]; - } - - cu = cc->EvalLinearWSumMutable(ctxs, weights); - } - - // adds the free term (at x^0) - cc->EvalAddInPlace(cu, divcs->q.front()); - flag_c = true; - } - - // Evaluate q and s2 at u. If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. - Ciphertext qu; - - if (Degree(divqr->q) > k) { - qu = InnerEvalPolyPS(powers[0], divqr->q, k, m - 1, powers, powers2); - } - else { - // dq = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto qcopy = divqr->q; - qcopy.resize(k); - if (Degree(qcopy) > 0) { - std::vector> ctxs(Degree(qcopy)); - std::vector weights(Degree(qcopy)); - - for (uint32_t i = 0; i < Degree(qcopy); i++) { - ctxs[i] = powers[i]; - weights[i] = divqr->q[i + 1]; - } - - qu = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order term will always be 1 because q is monic - cc->EvalAddInPlace(qu, powers[k - 1]); - } - else { - qu = powers[k - 1]->Clone(); - } - // adds the free term (at x^0) - cc->EvalAddInPlace(qu, divqr->q.front()); - } - - uint32_t ds = Degree(s2); - Ciphertext su; - - if (std::equal(s2.begin(), s2.end(), divqr->q.begin())) { - su = qu->Clone(); - } - else { - if (ds > k) { - su = InnerEvalPolyPS(powers[0], s2, k, m - 1, powers, powers2); - } - else { - // ds = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto scopy = s2; - scopy.resize(k); - if (Degree(scopy) > 0) { - std::vector> ctxs(Degree(scopy)); - std::vector weights(Degree(scopy)); - - for (uint32_t i = 0; i < Degree(scopy); i++) { - ctxs[i] = powers[i]; - weights[i] = s2[i + 1]; - } - - su = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order term will always be 1 because q is monic - cc->EvalAddInPlace(su, powers[k - 1]); - } - else { - su = powers[k - 1]->Clone(); - } - // adds the free term (at x^0) - cc->EvalAddInPlace(su, s2.front()); - } - } - Ciphertext result; - - if (flag_c) { - result = cc->EvalAdd(powers2[m - 1], cu); +#pragma omp parallel num_threads(OpenFHEParallelControls.GetThreadLimit(6 * m + 2)) + { +#pragma omp single + result = + powers[0]->GetCryptoContext()->EvalSub(InnerEvalPolyPS(powers[0], f2, k, m, powers, powers2), power2km1); } - else { - result = cc->EvalAdd(powers2[m - 1], divcs->q.front()); - } - - result = cc->EvalMult(result, qu); - cc->ModReduceInPlace(result); - cc->EvalAddInPlace(result, su); - cc->EvalSubInPlace(result, power2km1); - return result; } @@ -717,15 +553,15 @@ Ciphertext AdvancedSHECKKSRNS::EvalPolyLinear(ConstCiphertext AdvancedSHECKKSRNS::EvalPolyPS(ConstCiphertext& x, const std::vector& coeffs) const { - return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, coeffs), coeffs); + return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, Degree(coeffs)), coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalPolyPS(ConstCiphertext& x, const std::vector& coeffs) const { - return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, coeffs), coeffs); + return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, Degree(coeffs)), coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalPolyPS(ConstCiphertext& x, const std::vector>& coeffs) const { - return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, coeffs), coeffs); + return internalEvalPolyPSWithPrecomp(internalEvalPowersPS(x, Degree(coeffs)), coeffs); } //------------------------------------------------------------------------------ @@ -736,93 +572,65 @@ template std::shared_ptr> internalEvalChebyPolysLinear(ConstCiphertext& x, const std::vector& coefficients, double a, double b) { - auto cc = x->GetCryptoContext(); - uint32_t k = coefficients.size() - 1; + const uint32_t k = coefficients.size() - 1; std::vector> T(k); + auto cc = x->GetCryptoContext(); + // computes linear transformation y = -1 + 2 (x-a)/(b-a) // consumes one level when a <> -1 && b <> 1 - if ((a - std::round(a) < 1e-10) && (b - std::round(b) < 1e-10) && (std::round(a) == -1.0) && - (std::round(b) == 1.0)) { + if (!IsNotEqualNegOne(a) && !IsNotEqualOne(b)) { T[0] = x->Clone(); } else { // linear transformation is needed double alpha = 2 / (b - a); - double beta = 2 * a / (b - a); + double beta = a * alpha; T[0] = cc->EvalMult(x, alpha); cc->ModReduceInPlace(T[0]); cc->EvalAddInPlace(T[0], -1.0 - beta); } - Ciphertext yReduced = T[0]->Clone(); - uint32_t compositeDegree = - std::dynamic_pointer_cast(x->GetCryptoParameters())->GetCompositeDegree(); - // Computes Chebyshev polynomials up to degree k // for y: T_1(y) = y, T_2(y), ... , T_k(y) // uses binary tree multiplication for (uint32_t i = 2; i <= k; ++i) { - // if i is a power of two - if (!(i & (i - 1))) { - // compute T_{2i}(y) = 2*T_i(y)^2 - 1 - auto square = cc->EvalSquare(T[i / 2 - 1]); - T[i - 1] = cc->EvalAdd(square, square); + if (i & 0x1) { // if i is odd + // compute T_{2i+1}(y) = 2*T_i(y)*T_{i+1}(y) - y + T[i - 1] = cc->EvalMult(T[i / 2 - 1], T[i / 2]); + cc->EvalAddInPlaceNoCheck(T[i - 1], T[i - 1]); cc->ModReduceInPlace(T[i - 1]); - cc->EvalAddInPlace(T[i - 1], -1.0); - // TODO: (Andrey) Do we need this? - if (i == 2) { - cc->LevelReduceInPlace(T[i / 2 - 1], nullptr); - cc->LevelReduceInPlace(yReduced, nullptr); - } - cc->LevelReduceInPlace(yReduced, nullptr); // depth log_2 i + 1 - - // i/2 will now be used only at a lower level - if (i / 2 > 1) { - cc->LevelReduceInPlace(T[i / 2 - 1], nullptr); - } - // TODO: (Andrey) until here. - // If we need it, we can also add it in EvalChebyshevSeriesPS + cc->EvalSubInPlace(T[i - 1], T[0]); } else { - // non-power of 2 - if (i % 2 == 1) { - // if i is odd - // compute T_{2i+1}(y) = 2*T_i(y)*T_{i+1}(y) - y - auto prod = cc->EvalMult(T[i / 2 - 1], T[i / 2]); - T[i - 1] = cc->EvalAdd(prod, prod); - cc->ModReduceInPlace(T[i - 1]); - cc->EvalSubInPlace(T[i - 1], yReduced); - } - else { - // i is even but not power of 2 - // compute T_{2i}(y) = 2*T_i(y)^2 - 1 - auto square = cc->EvalSquare(T[i / 2 - 1]); - T[i - 1] = cc->EvalAdd(square, square); - cc->ModReduceInPlace(T[i - 1]); - cc->EvalAddInPlace(T[i - 1], -1.0); - } + // compute T_{2i}(y) = 2*T_i(y)^2 - 1 + T[i - 1] = cc->EvalSquare(T[i / 2 - 1]); + cc->EvalAddInPlaceNoCheck(T[i - 1], T[i - 1]); + cc->ModReduceInPlace(T[i - 1]); + cc->EvalAddInPlace(T[i - 1], -1.0); } } - for (uint32_t i = 1; i < k; ++i) { - uint32_t levelDiff = T[k - 1]->GetLevel() - T[i - 1]->GetLevel(); - cc->LevelReduceInPlace(T[i - 1], nullptr, levelDiff / compositeDegree); - } - return std::make_shared>(T); + + uint32_t compositeDegree = + std::dynamic_pointer_cast(x->GetCryptoParameters())->GetCompositeDegree(); + for (uint32_t i = 1; i < k; ++i) + cc->LevelReduceInPlace(T[i - 1], nullptr, (T[k - 1]->GetLevel() - T[i - 1]->GetLevel()) / compositeDegree); + + return std::make_shared>(std::move(T)); } template -static inline Ciphertext internalEvalChebyshevSeriesLinearWithPrecomp( - std::vector>& T, const std::vector& coefficients) { - auto cc = T[0]->GetCryptoContext(); - uint32_t k = coefficients.size() - 1; +Ciphertext internalEvalChebyshevSeriesLinearWithPrecomp(std::vector>& T, + const std::vector& coefficients) { + const uint32_t k = coefficients.size() - 2; // perform scalar multiplication for the highest-order term - auto result = cc->EvalMult(T[k - 1], coefficients[k]); + auto cc = T[0]->GetCryptoContext(); + auto result = cc->EvalMult(T[k], coefficients[k + 1]); // perform scalar multiplication for all other terms and sum them up - for (uint32_t i = 0; i < k - 1; ++i) { + for (uint32_t i = 0; i < k; ++i) { if (IsNotEqualZero(coefficients[i + 1])) { cc->EvalMultInPlace(T[i], coefficients[i + 1]); cc->EvalAddInPlace(result, T[i]); @@ -839,178 +647,122 @@ static inline Ciphertext internalEvalChebyshevSeriesLinearWithPrecomp( } template -static Ciphertext InnerEvalChebyshevPS(ConstCiphertext& x, - const std::vector& coefficients, uint32_t k, - uint32_t m, std::vector>& T, - std::vector>& T2) { - auto cc = x->GetCryptoContext(); - uint32_t compositeDegree = - std::dynamic_pointer_cast(x->GetCryptoParameters())->GetCompositeDegree(); - +Ciphertext InnerEvalChebyshevPS(ConstCiphertext& x, const std::vector& coefficients, + uint32_t k, uint32_t m, const std::vector>& T, + const std::vector>& T2) { // Compute k*2^{m-1}-k because we use it a lot uint32_t k2m2k = k * (1 << (m - 1)) - k; // Divide coefficients by T^{k*2^{m-1}} - std::vector Tkm(static_cast(k2m2k + k) + 1); + std::vector Tkm(k2m2k + k + 1); Tkm.back() = 1; auto divqr = LongDivisionChebyshev(coefficients, Tkm); // Subtract x^{k(2^{m-1} - 1)} from r - auto r2 = divqr->r; - if (static_cast(k2m2k - Degree(divqr->r)) <= 0) { - r2[static_cast(k2m2k)] -= 1; - r2.resize(Degree(r2) + 1); + auto& r2 = divqr->r; + if (uint32_t n = Degree(r2); static_cast(k2m2k - n) <= 0) { + r2.resize(n + 1); + r2[k2m2k] -= 1; } else { - r2.resize(static_cast(k2m2k + 1)); + r2.resize(k2m2k + 1); r2.back() = -1; } - // Divide r2 by q auto divcs = LongDivisionChebyshev(r2, divqr->q); + auto cc = x->GetCryptoContext(); - // Add x^{k(2^{m-1} - 1)} to s - auto s2 = divcs->r; - s2.resize(static_cast(k2m2k + 1), 0.0); - s2.back() = 1; - - // Evaluate c at u - Ciphertext cu; - uint32_t dc = Degree(divcs->q); - bool flag_c = false; - if (dc >= 1) { - if (dc == 1) { - if (IsNotEqualOne(divcs->q[1])) { - cu = cc->EvalMult(T.front(), divcs->q[1]); - cc->ModReduceInPlace(cu); - } - else { - cu = T.front()->Clone(); - } + Ciphertext cu, qu, su; + + { + // Evaluate q and s2 at u. + // If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. + if (Degree(divqr->q) > k) { + qu = InnerEvalChebyshevPS(x, divqr->q, k, m - 1, T, T2); } else { - std::vector> ctxs(dc); - std::vector weights(dc); - - for (uint32_t i = 0; i < dc; ++i) { - ctxs[i] = T[i]; - weights[i] = divcs->q[i + 1]; - } + // dq = k from construction + // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - cu = internalEvalLinearWSumMutable(ctxs, weights); - } + // the highest order coefficient will always be a power of two up to 2^{m-1} because q is "monic" but the Chebyshev rule adds a factor of 2 + // we don't need to increase the depth by multiplying the highest order coefficient, but instead checking and summing, since we work with m <= 4. + qu = T[k - 1]->Clone(); + const uint32_t limit = std::log2(ToReal(divqr->q.back())); + for (uint32_t i = 0; i < limit; ++i) + cc->EvalAddInPlaceNoCheck(qu, qu); - // adds the free term (at x^0) - cc->EvalAddInPlace(cu, divcs->q.front() / 2.0); - // Need to reduce levels up to the level of T2[m-1]. - uint32_t levelDiff = T2[m - 1]->GetLevel() - cu->GetLevel(); - cc->LevelReduceInPlace(cu, nullptr, levelDiff / compositeDegree); + // adds the free term (at x^0) + cc->EvalAddInPlace(qu, divqr->q.front() / 2.0); + // The number of levels of qu is the same as the number of levels of T[k-1] + 1. + // Will only get here when m = 2, so the number of levels of qu and T2[m-1] will be the same. - flag_c = true; + divqr->q.resize(k); + if (uint32_t n = Degree(divqr->q); n > 0) + cc->EvalAddInPlace(qu, EvalPartialLinearWSum(T, divqr->q, n)); + } } - // Evaluate q and s2 at u. If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. - Ciphertext qu; - - if (Degree(divqr->q) > k) { - qu = InnerEvalChebyshevPS(x, divqr->q, k, m - 1, T, T2); - } - else { - // dq = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto qcopy = divqr->q; - qcopy.resize(k); - if (Degree(qcopy) > 0) { - std::vector> ctxs(Degree(qcopy)); - std::vector weights(Degree(qcopy)); - - for (uint32_t i = 0; i < Degree(qcopy); i++) { - ctxs[i] = T[i]; - weights[i] = divqr->q[i + 1]; - } + { + // Add x^{k(2^{m-1} - 1)} to s + auto& s2 = divcs->r; + s2.resize(k2m2k + 1); + s2.back() = 1; - qu = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order coefficient will always be a power of two up to 2^{m-1} because q is "monic" but the Chebyshev rule adds a factor of 2 - // we don't need to increase the depth by multiplying the highest order coefficient, but instead checking and summing, since we work with m <= 4. - Ciphertext sum = T[k - 1]->Clone(); - uint32_t limit = log2(ToReal(divqr->q.back())); - for (uint32_t i = 0; i < limit; ++i) { - sum = cc->EvalAdd(sum, sum); - } - cc->EvalAddInPlace(qu, sum); + if (Degree(s2) > k) { + su = InnerEvalChebyshevPS(x, s2, k, m - 1, T, T2); } else { - Ciphertext sum = T[k - 1]->Clone(); - uint32_t limit = log2(ToReal(divqr->q.back())); - for (uint32_t i = 0; i < limit; ++i) { - sum = cc->EvalAdd(sum, sum); - } - qu = sum; - } + // the highest order coefficient will always be 1 because s2 is monic. + su = T[k - 1]->Clone(); - // adds the free term (at x^0) - cc->EvalAddInPlace(qu, divqr->q.front() / 2.0); - // The number of levels of qu is the same as the number of levels of T[k-1] or T[k-1] + 1. - // No need to reduce it to T2[m-1] because it only reaches here when m = 2. - } + // ds = k from construction + // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients + s2.resize(k); + if (uint32_t n = Degree(s2); n > 0) + cc->EvalAddInPlace(su, EvalPartialLinearWSum(T, s2, n)); - Ciphertext su; + // adds the free term (at x^0) + cc->EvalAddInPlace(su, s2.front() / 2.0); - if (Degree(s2) > k) { - su = InnerEvalChebyshevPS(x, s2, k, m - 1, T, T2); + // The number of levels of su is the same as the number of levels of T[k-1] or T[k-1] + 1. Need to reduce it to T2[m-1] + 1. + cc->LevelReduceInPlace(su, nullptr); + } } - else { - // ds = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto scopy = s2; - scopy.resize(k); - if (Degree(scopy) > 0) { - std::vector> ctxs(Degree(scopy)); - std::vector weights(Degree(scopy)); - - for (uint32_t i = 0; i < Degree(scopy); i++) { - ctxs[i] = T[i]; - weights[i] = s2[i + 1]; - } - su = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order coefficient will always be 1 because s2 is monic. - cc->EvalAddInPlace(su, T[k - 1]); + if (uint32_t n = Degree(divcs->q); n >= 1) { + if (n == 1) { + if (IsNotEqualOne(divcs->q[1])) { + cu = cc->EvalMult(T.front(), divcs->q[1]); + cc->ModReduceInPlace(cu); + } + else { + cu = T.front()->Clone(); + } } else { - su = T[k - 1]->Clone(); + cu = EvalPartialLinearWSum(T, divcs->q, n); } // adds the free term (at x^0) - cc->EvalAddInPlace(su, s2.front() / 2.0); - // The number of levels of su is the same as the number of levels of T[k-1] or T[k-1] + 1. Need to reduce it to T2[m-1] + 1. - // su = cc->LevelReduce(su, nullptr, su->GetElements()[0].GetNumOfElements() - Lm + 1) ; - cc->LevelReduceInPlace(su, nullptr); - } - - Ciphertext result; + cc->EvalAddInPlace(cu, divcs->q.front() / 2.0); - if (flag_c) { - result = cc->EvalAdd(T2[m - 1], cu); - } - else { - result = cc->EvalAdd(T2[m - 1], divcs->q.front() / 2.0); + // Need to reduce levels up to the level of T2[m-1]. + uint32_t cd = + std::dynamic_pointer_cast(x->GetCryptoParameters())->GetCompositeDegree(); + cc->LevelReduceInPlace(cu, nullptr, (T2[m - 1]->GetLevel() - cu->GetLevel()) / cd); } - result = cc->EvalMult(result, qu); - cc->ModReduceInPlace(result); + cu = cu ? cc->EvalAdd(T2[m - 1], cu) : cc->EvalAdd(T2[m - 1], divcs->q.front() / 2.0); + auto result = cc->EvalMult(cu, qu); + cc->ModReduceInPlace(result); cc->EvalAddInPlace(result, su); - return result; } -template -std::shared_ptr> internalEvalChebyPolysPS(ConstCiphertext& x, - const std::vector& coefficients, +std::shared_ptr> internalEvalChebyPolysPS(ConstCiphertext& x, uint32_t degree, double a, double b) { - auto n = Degree(coefficients); - auto degs = ComputeDegreesPS(n); + auto degs = ComputeDegreesPS(degree); uint32_t k = degs[0]; uint32_t m = degs[1]; @@ -1018,8 +770,7 @@ std::shared_ptr> internalEvalChebyPolysPS(ConstCiphertext // consumes one level when a <> -1 && b <> 1 auto cc = x->GetCryptoContext(); std::vector> T(k); - if ((a - std::round(a) < 1e-10) && (b - std::round(b) < 1e-10) && (std::round(a) == -1.0) && - (std::round(b) == 1.0)) { + if (!IsNotEqualNegOne(a) && !IsNotEqualOne(b)) { // no linear transformation is needed if a = -1, b = 1 // T_1(y) = y T[0] = x->Clone(); @@ -1027,291 +778,105 @@ std::shared_ptr> internalEvalChebyPolysPS(ConstCiphertext else { // linear transformation is needed double alpha = 2 / (b - a); - double beta = 2 * a / (b - a); + double beta = a * alpha; T[0] = cc->EvalMult(x, alpha); cc->ModReduceInPlace(T[0]); cc->EvalAddInPlace(T[0], -1.0 - beta); } - Ciphertext y = T[0]->Clone(); - // Computes Chebyshev polynomials up to degree k // for y: T_1(y) = y, T_2(y), ... , T_k(y) // uses binary tree multiplication for (uint32_t i = 2; i <= k; ++i) { - // if i is a power of two - if (!(i & (i - 1))) { - // compute T_{2i}(y) = 2*T_i(y)^2 - 1 - auto square = cc->EvalSquare(T[i / 2 - 1]); - T[i - 1] = cc->EvalAdd(square, square); + if (i & 0x1) { // if i is odd + // compute T_{2i+1}(y) = 2*T_i(y)*T_{i+1}(y) - y + T[i - 1] = cc->EvalMult(T[i / 2 - 1], T[i / 2]); + cc->EvalAddInPlaceNoCheck(T[i - 1], T[i - 1]); cc->ModReduceInPlace(T[i - 1]); - cc->EvalAddInPlace(T[i - 1], -1.0); + cc->EvalSubInPlace(T[i - 1], T[0]); } else { - // non-power of 2 - if (i % 2 == 1) { - // if i is odd - // compute T_{2i+1}(y) = 2*T_i(y)*T_{i+1}(y) - y - auto prod = cc->EvalMult(T[i / 2 - 1], T[i / 2]); - T[i - 1] = cc->EvalAdd(prod, prod); - - cc->ModReduceInPlace(T[i - 1]); - cc->EvalSubInPlace(T[i - 1], y); - } - else { - // i is even but not power of 2 - // compute T_{2i}(y) = 2*T_i(y)^2 - 1 - auto square = cc->EvalSquare(T[i / 2 - 1]); - T[i - 1] = cc->EvalAdd(square, square); - cc->ModReduceInPlace(T[i - 1]); - cc->EvalAddInPlace(T[i - 1], -1.0); - } + // compute T_{2i}(y) = 2*T_i(y)^2 - 1 + T[i - 1] = cc->EvalSquare(T[i / 2 - 1]); + cc->EvalAddInPlaceNoCheck(T[i - 1], T[i - 1]); + cc->ModReduceInPlace(T[i - 1]); + cc->EvalAddInPlace(T[i - 1], -1.0); } } const auto cryptoParams = std::dynamic_pointer_cast(T[k - 1]->GetCryptoParameters()); - - auto algo = cc->GetScheme(); - if (cryptoParams->GetScalingTechnique() == FIXEDMANUAL) { // brings all powers of x to the same level - for (uint32_t i = 1; i < k; ++i) { - uint32_t levelDiff = T[k - 1]->GetLevel() - T[i - 1]->GetLevel(); - cc->LevelReduceInPlace(T[i - 1], nullptr, levelDiff); - } + for (uint32_t i = 1; i < k; ++i) + cc->LevelReduceInPlace(T[i - 1], nullptr, T[k - 1]->GetLevel() - T[i - 1]->GetLevel()); } else { - for (uint32_t i = 1; i < k; ++i) { - algo->AdjustLevelsAndDepthInPlace(T[i - 1], T[k - 1]); - } + for (uint32_t i = 1; i < k; ++i) + cc->GetScheme()->AdjustLevelsAndDepthInPlace(T[i - 1], T[k - 1]); } std::vector> T2(m); - // Compute the Chebyshev polynomials T_k(y), T_{2k}(y), T_{4k}(y), ... , T_{2^{m-1}k}(y) // T2[0] is used as a placeholder - T2.front() = T.back(); - for (uint32_t i = 1; i < m; i++) { - auto square = cc->EvalSquare(T2[i - 1]); - T2[i] = cc->EvalAdd(square, square); + T2[0] = T.back(); + + // computes T_{k(2*m - 1)}(y) + auto T2km1 = T.back(); + + for (uint32_t i = 1; i < m; ++i) { + // Compute the Chebyshev polynomials T_k(y), T_{2k}(y), T_{4k}(y), ... , T_{2^{m-1}k}(y) + T2[i] = cc->EvalSquare(T2[i - 1]); + cc->EvalAddInPlaceNoCheck(T2[i], T2[i]); cc->ModReduceInPlace(T2[i]); cc->EvalAddInPlace(T2[i], -1.0); - } - // computes T_{k(2*m - 1)}(y) - auto T2km1 = T2.front(); - for (uint32_t i = 1; i < m; i++) { // compute T_{k(2*m - 1)} = 2*T_{k(2^{m-1}-1)}(y)*T_{k*2^{m-1}}(y) - T_k(y) - auto prod = cc->EvalMult(T2km1, T2[i]); - T2km1 = cc->EvalAdd(prod, prod); + T2km1 = cc->EvalMult(T2km1, T2[i]); + cc->EvalAddInPlaceNoCheck(T2km1, T2km1); cc->ModReduceInPlace(T2km1); - cc->EvalSubInPlace(T2km1, T2.front()); + cc->EvalSubInPlace(T2km1, T2[0]); } - // We also need to reduce the number of levels of T[k-1] and of T2[0] by another level. - // cc->LevelReduceInPlace(T[k-1], nullptr); - // cc->LevelReduceInPlace(T2.front(), nullptr); - - return std::make_shared>(T, T2, T2km1, k, m); + return std::make_shared>(std::move(T), std::move(T2), std::move(T2km1), k, m); } template -static inline Ciphertext internalEvalChebyshevSeriesPSWithPrecomp( - std::shared_ptr> ctxtPolys, const std::vector& coefficients) { - auto f2 = coefficients; - auto n = Degree(f2); - f2.resize(n + 1); - - auto T = ctxtPolys->powersRe; - auto T2 = ctxtPolys->powers2Re; - auto T2km1 = ctxtPolys->power2km1Re; - auto k = ctxtPolys->k; - auto m = ctxtPolys->m; +Ciphertext internalEvalChebyshevSeriesPSWithPrecomp(const std::shared_ptr>& ctxtPolys, + const std::vector& coefficients) { + auto& T = ctxtPolys->powersRe; + auto& T2 = ctxtPolys->powers2Re; + auto& T2km1 = ctxtPolys->power2km1Re; + auto k = ctxtPolys->k; + auto m = ctxtPolys->m; // Compute k*2^{m-1}-k because we use it a lot uint32_t k2m2k = k * (1 << (m - 1)) - k; // Add T^{k(2^m - 1)}(y) to the polynomial that has to be evaluated - f2.resize(2 * k2m2k + k + 1, 0.0); + auto f2 = coefficients; + f2.resize(Degree(f2) + 1); + f2.resize(2 * k2m2k + k + 1); f2.back() = 1; - // Divide f2 by T^{k*2^{m-1}} - std::vector Tkm(k2m2k + k + 1); - Tkm.back() = 1; - auto divqr = LongDivisionChebyshev(f2, Tkm); - - // Subtract x^{k(2^{m-1} - 1)} from r - auto r2 = divqr->r; - if (static_cast(k2m2k - Degree(r2)) <= 0) { - r2[static_cast(k2m2k)] -= 1; - r2.resize(Degree(r2) + 1); - } - else { - r2.resize(static_cast(k2m2k + 1)); - r2.back() = -1; - } - - // Divide r2 by q - auto divcs = LongDivisionChebyshev(r2, divqr->q); - - // Add x^{k(2^{m-1} - 1)} to s - auto s2 = divcs->r; - s2.resize(k2m2k + 1); - s2.back() = 1; - - auto cc = T[0]->GetCryptoContext(); - - // Evaluate c at u - Ciphertext cu; - uint32_t dc = Degree(divcs->q); - bool flag_c = false; - if (dc >= 1) { - if (dc == 1) { - if (IsNotEqualOne(divcs->q[1])) { - cu = cc->EvalMult(T.front(), divcs->q[1]); - cc->ModReduceInPlace(cu); - } - else { - cu = T.front()->Clone(); - } - } - else { - std::vector> ctxs(dc); - std::vector weights(dc); - - for (uint32_t i = 0; i < dc; i++) { - ctxs[i] = T[i]; - weights[i] = divcs->q[i + 1]; - } - - cu = cc->EvalLinearWSumMutable(ctxs, weights); - } - - // adds the free term (at x^0) - cc->EvalAddInPlace(cu, divcs->q.front() / 2.0); - // TODO : Andrey why not T2[m-1]->GetLevel() instead? - // Need to reduce levels to the level of T2[m-1]. - // uint32_t levelDiff = y->GetLevel() - cu->GetLevel() + ceil(log2(k)) + m - 1; - // cc->LevelReduceInPlace(cu, nullptr, levelDiff); - - flag_c = true; - } - - // Evaluate q and s2 at u. If their degrees are larger than k, then recursively apply the Paterson-Stockmeyer algorithm. - Ciphertext qu; - - if (Degree(divqr->q) > k) { - qu = InnerEvalChebyshevPS(T[0], divqr->q, k, m - 1, T, T2); - } - else { - // dq = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto qcopy = divqr->q; - qcopy.resize(k); - if (Degree(qcopy) > 0) { - std::vector> ctxs(Degree(qcopy)); - std::vector weights(Degree(qcopy)); - - for (uint32_t i = 0; i < Degree(qcopy); ++i) { - ctxs[i] = T[i]; - weights[i] = divqr->q[i + 1]; - } - qu = internalEvalLinearWSumMutable(ctxs, weights); - // the highest order coefficient will always be a power of two up to 2^{m-1} because q is "monic" but the Chebyshev rule adds a factor of 2 - // we don't need to increase the depth by multiplying the highest order coefficient, but instead checking and summing, since we work with m <= 4. - Ciphertext sum = T[k - 1]->Clone(); - uint32_t limit = log2(ToReal(divqr->q.back())); - for (uint32_t i = 0; i < limit; ++i) { - sum = cc->EvalAdd(sum, sum); - } - cc->EvalAddInPlace(qu, sum); - } - else { - Ciphertext sum = T[k - 1]->Clone(); - uint32_t limit = log2(ToReal(divqr->q.back())); - for (uint32_t i = 0; i < limit; ++i) { - sum = cc->EvalAdd(sum, sum); - } - qu = sum; - } - - // adds the free term (at x^0) - cc->EvalAddInPlace(qu, divqr->q.front() / 2.0); - // The number of levels of qu is the same as the number of levels of T[k-1] + 1. - // Will only get here when m = 2, so the number of levels of qu and T2[m-1] will be the same. - } - - Ciphertext su; - - if (Degree(s2) > k) { - su = InnerEvalChebyshevPS(T[0], s2, k, m - 1, T, T2); - } - else { - // ds = k from construction - // perform scalar multiplication for all other terms and sum them up if there are non-zero coefficients - auto scopy = s2; - scopy.resize(k); - if (Degree(scopy) > 0) { - std::vector> ctxs(Degree(scopy)); - std::vector weights(Degree(scopy)); - - for (uint32_t i = 0; i < Degree(scopy); ++i) { - ctxs[i] = T[i]; - weights[i] = s2[i + 1]; - } - - su = cc->EvalLinearWSumMutable(ctxs, weights); - // the highest order coefficient will always be 1 because s2 is monic. - cc->EvalAddInPlace(su, T[k - 1]); - } - else { - su = T[k - 1]; - } - - // adds the free term (at x^0) - cc->EvalAddInPlace(su, s2.front() / 2.0); - // The number of levels of su is the same as the number of levels of T[k-1] + 1. - // Will only get here when m = 2, so need to reduce the number of levels by 1. - } - - // TODO : Andrey : here is different from 895 line - // Reduce number of levels of su to number of levels of T2km1. - // cc->LevelReduceInPlace(su, nullptr); - - Ciphertext result; - - if (flag_c) { - result = cc->EvalAdd(T2[m - 1], cu); - } - else { - result = cc->EvalAdd(T2[m - 1], divcs->q.front() / 2.0); - } - - result = cc->EvalMult(result, qu); - cc->ModReduceInPlace(result); - - cc->EvalAddInPlace(result, su); - cc->EvalSubInPlace(result, T2km1); - - return result; + return T[0]->GetCryptoContext()->EvalSub(InnerEvalChebyshevPS(T[0], f2, k, m, T, T2), T2km1); } std::shared_ptr> AdvancedSHECKKSRNS::EvalChebyPolys(ConstCiphertext& x, const std::vector& coefficients, double a, double b) const { - return (Degree(coefficients) < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : - internalEvalChebyPolysPS(x, coefficients, a, b); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : internalEvalChebyPolysPS(x, d, a, b); } std::shared_ptr> AdvancedSHECKKSRNS::EvalChebyPolys(ConstCiphertext& x, const std::vector& coefficients, double a, double b) const { - return (Degree(coefficients) < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : - internalEvalChebyPolysPS(x, coefficients, a, b); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : internalEvalChebyPolysPS(x, d, a, b); } std::shared_ptr> AdvancedSHECKKSRNS::EvalChebyPolys( ConstCiphertext& x, const std::vector>& coefficients, double a, double b) const { - return (Degree(coefficients) < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : - internalEvalChebyPolysPS(x, coefficients, a, b); + uint32_t d = Degree(coefficients); + return (d < 5) ? internalEvalChebyPolysLinear(x, coefficients, a, b) : internalEvalChebyPolysPS(x, d, a, b); } Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeries(ConstCiphertext& x, @@ -1333,12 +898,12 @@ Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeries(ConstCiphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesWithPrecomp( std::shared_ptr> ctxtPowers, const std::vector& coeffs) const { return (Degree(coeffs) < 5) ? internalEvalChebyshevSeriesLinearWithPrecomp(ctxtPowers->powersRe, coeffs) : - internalEvalPolyPSWithPrecomp(ctxtPowers, coeffs); + internalEvalChebyshevSeriesPSWithPrecomp(ctxtPowers, coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesWithPrecomp( std::shared_ptr> ctxtPowers, const std::vector& coeffs) const { return (Degree(coeffs) < 5) ? internalEvalChebyshevSeriesLinearWithPrecomp(ctxtPowers->powersRe, coeffs) : - internalEvalPolyPSWithPrecomp(ctxtPowers, coeffs); + internalEvalChebyshevSeriesPSWithPrecomp(ctxtPowers, coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesWithPrecomp( std::shared_ptr> ctxtPowers, const std::vector>& coeffs) const { @@ -1368,21 +933,17 @@ Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesLinear(ConstCipherte Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesPS(ConstCiphertext& x, const std::vector& coeffs, double a, double b) const { - return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, coeffs, a, b), coeffs); + return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, Degree(coeffs), a, b), coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesPS(ConstCiphertext& x, const std::vector& coeffs, double a, double b) const { - return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, coeffs, a, b), coeffs); + return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, Degree(coeffs), a, b), coeffs); } Ciphertext AdvancedSHECKKSRNS::EvalChebyshevSeriesPS(ConstCiphertext& x, const std::vector>& coeffs, double a, double b) const { - return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, coeffs, a, b), coeffs); + return internalEvalChebyshevSeriesPSWithPrecomp(internalEvalChebyPolysPS(x, Degree(coeffs), a, b), coeffs); } -//------------------------------------------------------------------------------ -// EVAL LINEAR TRANSFORMATION -//------------------------------------------------------------------------------ - } // namespace lbcrypto diff --git a/src/pke/lib/scheme/ckksrns/ckksrns-fhe.cpp b/src/pke/lib/scheme/ckksrns/ckksrns-fhe.cpp index c895b3e3e..0d44ebf2d 100644 --- a/src/pke/lib/scheme/ckksrns/ckksrns-fhe.cpp +++ b/src/pke/lib/scheme/ckksrns/ckksrns-fhe.cpp @@ -120,32 +120,32 @@ void FHECKKSRNS::EvalBootstrapSetup(const CryptoContextImpl& cc, std:: auto& precom = m_bootPrecomMap[slots]; precom->m_slots = slots; - precom->m_dim1 = dim1[0]; // even for the case of a single slot we need one level for rescaling uint32_t logSlots = (slots < 3) ? 1 : std::log2(slots); // Perform some checks on the level budget and compute parameters - std::vector newBudget = levelBudget; - if (newBudget[0] > logSlots) { + uint32_t newBudget0 = levelBudget[0]; + if (newBudget0 > logSlots) { std::cerr << "\nWarning, the level budget for encoding is too large. Setting it to " << logSlots << std::endl; - newBudget[0] = logSlots; + newBudget0 = logSlots; } - if (newBudget[0] < 1) { + if (newBudget0 < 1) { std::cerr << "\nWarning, the level budget for encoding can not be zero. Setting it to 1" << std::endl; - newBudget[0] = 1; + newBudget0 = 1; } - if (newBudget[1] > logSlots) { + uint32_t newBudget1 = levelBudget[1]; + if (newBudget1 > logSlots) { std::cerr << "\nWarning, the level budget for decoding is too large. Setting it to " << logSlots << std::endl; - newBudget[1] = logSlots; + newBudget1 = logSlots; } - if (newBudget[1] < 1) { + if (newBudget1 < 1) { std::cerr << "\nWarning, the level budget for decoding can not be zero. Setting it to 1" << std::endl; - newBudget[1] = 1; + newBudget1 = 1; } - precom->m_paramsEnc = GetCollapsedFFTParams(slots, newBudget[0], dim1[0]); - precom->m_paramsDec = GetCollapsedFFTParams(slots, newBudget[1], dim1[1]); + precom->m_paramsEnc = GetCollapsedFFTParams(slots, newBudget0, dim1[0]); + precom->m_paramsDec = GetCollapsedFFTParams(slots, newBudget1, dim1[1]); if (precompute) { uint32_t m = 4 * slots; @@ -197,48 +197,51 @@ void FHECKKSRNS::EvalBootstrapSetup(const CryptoContextImpl& cc, std:: uint32_t approxModDepth = GetModDepthInternal(cryptoParams->GetSecretKeyDist()); - uint32_t depthBT = approxModDepth + precom->m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] + - precom->m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; + uint32_t depthBT = approxModDepth + precom->m_paramsEnc.lvlb + precom->m_paramsDec.lvlb; // compute # of levels to remain when encoding the coefficients // for FLEXIBLEAUTOEXT we do not need extra modulus in auxiliary plaintexts auto st = cryptoParams->GetScalingTechnique(); uint32_t L0 = cryptoParams->GetElementParams()->GetParams().size() - (st == FLEXIBLEAUTOEXT); - uint32_t lEnc = L0 - compositeDegree * (precom->m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] + 1); + uint32_t lEnc = L0 - compositeDegree * (precom->m_paramsEnc.lvlb + 1); uint32_t lDec = L0 - compositeDegree * depthBT; - bool isLTBootstrap = (precom->m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1) && - (precom->m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1); + bool isLTBootstrap = (precom->m_paramsEnc.lvlb == 1) && (precom->m_paramsDec.lvlb == 1); if (isLTBootstrap) { - // allocate all vectors - std::vector>> U0(slots, std::vector>(slots)); - std::vector>> U1(slots, std::vector>(slots)); - std::vector>> U0hatT(slots, std::vector>(slots)); - std::vector>> U1hatT(slots, std::vector>(slots)); - - for (uint32_t i = 0; i < slots; i++) { - for (uint32_t j = 0; j < slots; j++) { - U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; - U0hatT[j][i] = std::conj(U0[i][j]); - U1[i][j] = std::complex(0, 1) * U0[i][j]; - U1hatT[j][i] = std::conj(U1[i][j]); + if (isSparse) { + std::vector>> U0(slots, std::vector>(slots)); + std::vector>> U0hatT(slots, std::vector>(slots)); + std::vector>> U1(slots, std::vector>(slots)); + std::vector>> U1hatT(slots, std::vector>(slots)); + for (uint32_t i = 0; i < slots; ++i) { + for (uint32_t j = 0; j < slots; ++j) { + U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; + U0hatT[j][i] = std::conj(U0[i][j]); + U1[i][j] = std::complex(0, 1) * U0[i][j]; + U1hatT[j][i] = std::conj(U1[i][j]); + } } - } - - if (!isSparse) { - precom->m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, scaleEnc, lEnc); - precom->m_U0Pre = EvalLinearTransformPrecompute(cc, U0, scaleDec, lDec); + precom->m_U0Pre = EvalLinearTransformPrecompute(cc, U0, U1, 1, scaleDec, lDec); + precom->m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, U1hatT, 0, scaleEnc, lEnc); } else { - precom->m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, U1hatT, 0, scaleEnc, lEnc); - precom->m_U0Pre = EvalLinearTransformPrecompute(cc, U0, U1, 1, scaleDec, lDec); + std::vector>> U0(slots, std::vector>(slots)); + std::vector>> U0hatT(slots, std::vector>(slots)); + for (uint32_t i = 0; i < slots; ++i) { + for (uint32_t j = 0; j < slots; ++j) { + U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; + U0hatT[j][i] = std::conj(U0[i][j]); + } + } + precom->m_U0Pre = EvalLinearTransformPrecompute(cc, U0, scaleDec, lDec); + precom->m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, scaleEnc, lEnc); } } else { - precom->m_U0hatTPreFFT = EvalCoeffsToSlotsPrecompute(cc, ksiPows, rotGroup, false, scaleEnc, lEnc); precom->m_U0PreFFT = EvalSlotsToCoeffsPrecompute(cc, ksiPows, rotGroup, false, scaleDec, lDec); + precom->m_U0hatTPreFFT = EvalCoeffsToSlotsPrecompute(cc, ksiPows, rotGroup, false, scaleEnc, lEnc); } } } @@ -258,22 +261,19 @@ std::shared_ptr>> FHECKKSRNS::EvalBootstrap auto algo = cc->GetScheme(); auto M = cc->GetCyclotomicOrder(); - if (slots == 0) - slots = M / 4; + slots = (slots == 0) ? M / 4 : slots; // computing all indices for baby-step giant-step procedure auto evalKeys = algo->EvalAtIndexKeyGen(nullptr, privateKey, FindBootstrapRotationIndices(slots, M)); - auto conjKey = ConjugateKeyGen(privateKey); - (*evalKeys)[M - 1] = conjKey; + (*evalKeys)[M - 1] = ConjugateKeyGen(privateKey); if (cryptoParams->GetSecretKeyDist() == SPARSE_ENCAPSULATED) { DCRTPoly::TugType tug; - DCRTPoly sNew(tug, cryptoParams->GetElementParams(), Format::EVALUATION, 32); // sparse key used for the modraising step auto skNew = std::make_shared>(cc); - skNew->SetPrivateElement(std::move(sNew)); + skNew->SetPrivateElement(DCRTPoly(tug, cryptoParams->GetElementParams(), Format::EVALUATION, 32)); // we reserve M-4 and M-2 for the sparse encapsulation switching keys // Even autorphism indices are not possible, so there will not be any conflict @@ -298,11 +298,9 @@ void FHECKKSRNS::EvalBootstrapPrecompute(const CryptoContextImpl& cc, uint32_t slots = (numSlots == 0) ? M / 4 : numSlots; auto& p = GetBootPrecom(slots); - std::vector dim1{p.m_dim1, static_cast(p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP])}; - std::vector newBudget{static_cast(p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET]), - static_cast(p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET])}; - p.m_paramsEnc = GetCollapsedFFTParams(slots, newBudget[0], dim1[0]); - p.m_paramsDec = GetCollapsedFFTParams(slots, newBudget[1], dim1[1]); + + p.m_paramsEnc = GetCollapsedFFTParams(slots, p.m_paramsEnc.lvlb, p.m_paramsEnc.g); + p.m_paramsDec = GetCollapsedFFTParams(slots, p.m_paramsDec.lvlb, p.m_paramsDec.g); uint32_t m = 4 * slots; uint32_t mmask = m - 1; // assumes m is power of 2 @@ -353,48 +351,51 @@ void FHECKKSRNS::EvalBootstrapPrecompute(const CryptoContextImpl& cc, uint32_t approxModDepth = GetModDepthInternal(cryptoParams->GetSecretKeyDist()); - uint32_t depthBT = - approxModDepth + p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] + p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; + uint32_t depthBT = approxModDepth + p.m_paramsEnc.lvlb + p.m_paramsDec.lvlb; // compute # of levels to remain when encoding the coefficients // for FLEXIBLEAUTOEXT we do not need extra modulus in auxiliary plaintexts auto st = cryptoParams->GetScalingTechnique(); uint32_t L0 = cryptoParams->GetElementParams()->GetParams().size() - (st == FLEXIBLEAUTOEXT); - uint32_t lEnc = L0 - compositeDegree * (p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] + 1); + uint32_t lEnc = L0 - compositeDegree * (p.m_paramsEnc.lvlb + 1); uint32_t lDec = L0 - compositeDegree * depthBT; - bool isLTBootstrap = - (p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1) && (p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1); + bool isLTBootstrap = (p.m_paramsEnc.lvlb == 1) && (p.m_paramsDec.lvlb == 1); if (isLTBootstrap) { - // allocate all vectors - std::vector>> U0(slots, std::vector>(slots)); - std::vector>> U1(slots, std::vector>(slots)); - std::vector>> U0hatT(slots, std::vector>(slots)); - std::vector>> U1hatT(slots, std::vector>(slots)); - - for (size_t i = 0; i < slots; i++) { - for (size_t j = 0; j < slots; j++) { - U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; - U0hatT[j][i] = std::conj(U0[i][j]); - U1[i][j] = std::complex(0, 1) * U0[i][j]; - U1hatT[j][i] = std::conj(U1[i][j]); + if (isSparse) { + std::vector>> U0(slots, std::vector>(slots)); + std::vector>> U0hatT(slots, std::vector>(slots)); + std::vector>> U1(slots, std::vector>(slots)); + std::vector>> U1hatT(slots, std::vector>(slots)); + for (uint32_t i = 0; i < slots; ++i) { + for (uint32_t j = 0; j < slots; ++j) { + U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; + U0hatT[j][i] = std::conj(U0[i][j]); + U1[i][j] = std::complex(0, 1) * U0[i][j]; + U1hatT[j][i] = std::conj(U1[i][j]); + } } - } - - if (!isSparse) { - p.m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, scaleEnc, lEnc); - p.m_U0Pre = EvalLinearTransformPrecompute(cc, U0, scaleDec, lDec); + p.m_U0Pre = EvalLinearTransformPrecompute(cc, U0, U1, 1, scaleDec, lDec); + p.m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, U1hatT, 0, scaleEnc, lEnc); } else { - p.m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, U1hatT, 0, scaleEnc, lEnc); - p.m_U0Pre = EvalLinearTransformPrecompute(cc, U0, U1, 1, scaleDec, lDec); + std::vector>> U0(slots, std::vector>(slots)); + std::vector>> U0hatT(slots, std::vector>(slots)); + for (uint32_t i = 0; i < slots; ++i) { + for (uint32_t j = 0; j < slots; ++j) { + U0[i][j] = ksiPows[(j * rotGroup[i]) & mmask]; + U0hatT[j][i] = std::conj(U0[i][j]); + } + } + p.m_U0Pre = EvalLinearTransformPrecompute(cc, U0, scaleDec, lDec); + p.m_U0hatTPre = EvalLinearTransformPrecompute(cc, U0hatT, scaleEnc, lEnc); } } else { - p.m_U0hatTPreFFT = EvalCoeffsToSlotsPrecompute(cc, ksiPows, rotGroup, false, scaleEnc, lEnc); p.m_U0PreFFT = EvalSlotsToCoeffsPrecompute(cc, ksiPows, rotGroup, false, scaleDec, lDec); + p.m_U0hatTPreFFT = EvalCoeffsToSlotsPrecompute(cc, ksiPows, rotGroup, false, scaleEnc, lEnc); } } @@ -404,11 +405,13 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher if (cryptoParams->GetKeySwitchTechnique() != HYBRID) OPENFHE_THROW("CKKS Bootstrapping only supported with Hybrid key switching."); + auto st = cryptoParams->GetScalingTechnique(); #if NATIVEINT == 128 if (st == FLEXIBLEAUTO || st == FLEXIBLEAUTOEXT) OPENFHE_THROW("128-bit CKKS Bootstrapping only supported for FIXEDMANUAL and FIXEDAUTO."); #endif + if (numIterations != 1 && numIterations != 2) OPENFHE_THROW("CKKS Bootstrapping only supported for 1 or 2 iterations."); @@ -428,28 +431,28 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher // Step 1: Get the input. uint32_t powerOfTwoModulus = 1 << precision; - // Step 2: Scale up by powerOfTwoModulus, and extend the modulus to powerOfTwoModulus * q. - // Note that we extend the modulus implicitly without any code calls because the value always stays 0. - // We multiply by powerOfTwoModulus, and leave the last CRT value to be 0 (mod powerOfTwoModulus). - auto ctScaledUp = cc->EvalMultNoCheck(ciphertext, powerOfTwoModulus); - ctScaledUp->SetLevel(L0 - ctScaledUp->GetElements()[0].GetNumOfElements()); - // Step 3: Bootstrap the initial ciphertext. - auto ctInitialBootstrap = cc->EvalBootstrap(ciphertext, numIterations - 1, precision); + auto ctInitialBootstrap = EvalBootstrap(ciphertext, numIterations - 1, precision); cc->GetScheme()->ModReduceInternalInPlace(ctInitialBootstrap, compositeDegree); // Step 4: Scale up by powerOfTwoModulus. cc->GetScheme()->MultByIntegerInPlace(ctInitialBootstrap, powerOfTwoModulus); + // If we start with more towers, than we obtain from bootstrapping, return the original ciphertext. + auto bootstrappingSizeQ = ctInitialBootstrap->GetElements()[0].GetNumOfElements(); + if (bootstrappingSizeQ <= initSizeQ) + return ciphertext->Clone(); + + // Step 2: Scale up by powerOfTwoModulus, and extend the modulus to powerOfTwoModulus * q. + // Note that we extend the modulus implicitly without any code calls because the value always stays 0. + auto ctScaledUp = ciphertext->Clone(); + // We multiply by powerOfTwoModulus, and leave the last CRT value to be 0 (mod powerOfTwoModulus). + cc->GetScheme()->MultByIntegerInPlace(ctScaledUp, powerOfTwoModulus); + ctScaledUp->SetLevel(L0 - ctScaledUp->GetElements()[0].GetNumOfElements()); + // Step 5: Mod-down to powerOfTwoModulus * q // We mod down, and leave the last CRT value to be 0 because it's divisible by powerOfTwoModulus. auto ctBootstrappedScaledDown = ctInitialBootstrap->Clone(); - auto bootstrappingSizeQ = ctBootstrappedScaledDown->GetElements()[0].GetNumOfElements(); - - // If we start with more towers, than we obtain from bootstrapping, return the original ciphertext. - if (bootstrappingSizeQ <= initSizeQ) { - return ciphertext->Clone(); - } // TODO: YSP Can be removed for FLEXIBLE* scaling techniques as well as the closeness of 2^p to moduli is no longer needed if (st != COMPOSITESCALINGAUTO && st != COMPOSITESCALINGMANUAL) { @@ -462,7 +465,7 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher auto ctBootstrappingError = cc->EvalSub(ctBootstrappedScaledDown, ctScaledUp); // Step 8: Bootstrap the error. - auto ctBootstrappedError = cc->EvalBootstrap(ctBootstrappingError, 1, 0); + auto ctBootstrappedError = EvalBootstrap(ctBootstrappingError, 1, 0); cc->GetScheme()->ModReduceInternalInPlace(ctBootstrappedError, compositeDegree); // Step 9: Subtract the bootstrapped error from the initial bootstrap to get even lower error. @@ -526,42 +529,40 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher uint32_t N = cc->GetRingDimension(); if (compositeDegree > 1) { // RNS basis extension from level 0 RNS limbs to the raised RNS basis - auto& ctxtDCRT = raised->GetElements(); - ExtendCiphertext(ctxtDCRT, *cc, elementParamsRaisedPtr); - raised->SetLevel(L0 - ctxtDCRT[0].GetNumOfElements()); + auto& ctxtDCRTs = raised->GetElements(); + ExtendCiphertext(ctxtDCRTs, *cc, elementParamsRaisedPtr); + raised->SetLevel(L0 - ctxtDCRTs[0].GetNumOfElements()); } else { if (cryptoParams->GetSecretKeyDist() == SPARSE_ENCAPSULATED) { - auto evalKeyMap = cc->GetEvalAutomorphismKeyMap(raised->GetKeyTag()); + auto& evalKeyMap = cc->GetEvalAutomorphismKeyMap(raised->GetKeyTag()); // transform from a denser secret to a sparser one raised = KeySwitchSparse(raised, evalKeyMap.at(2 * N - 4)); // Only level 0 ciphertext used here. Other towers ignored to make CKKS bootstrapping faster. - auto& ctxtDCRT = raised->GetElements(); - for (auto& poly : ctxtDCRT) { - poly.SetFormat(COEFFICIENT); - DCRTPoly temp(elementParamsRaisedPtr, COEFFICIENT); - temp = poly.GetElementAtIndex(0); - temp.SetFormat(EVALUATION); - poly = std::move(temp); + auto& ctxtDCRTs = raised->GetElements(); + for (auto& dcrt : ctxtDCRTs) { + dcrt.SetFormat(COEFFICIENT); + DCRTPoly tmp(dcrt.GetElementAtIndex(0), elementParamsRaisedPtr); + tmp.SetFormat(EVALUATION); + dcrt = std::move(tmp); } - raised->SetLevel(L0 - ctxtDCRT[0].GetNumOfElements()); + raised->SetLevel(L0 - ctxtDCRTs[0].GetNumOfElements()); // go back to a denser secret algo->KeySwitchInPlace(raised, evalKeyMap.at(2 * N - 2)); } else { // Only level 0 ciphertext used here. Other towers ignored to make CKKS bootstrapping faster. - auto& ctxtDCRT = raised->GetElements(); - for (auto& poly : ctxtDCRT) { - poly.SetFormat(COEFFICIENT); - DCRTPoly temp(elementParamsRaisedPtr, COEFFICIENT); - temp = poly.GetElementAtIndex(0); - temp.SetFormat(EVALUATION); - poly = std::move(temp); + auto& ctxtDCRTs = raised->GetElements(); + for (auto& dcrt : ctxtDCRTs) { + dcrt.SetFormat(COEFFICIENT); + DCRTPoly tmp(dcrt.GetElementAtIndex(0), elementParamsRaisedPtr); + tmp.SetFormat(EVALUATION); + dcrt = std::move(tmp); } - raised->SetLevel(L0 - ctxtDCRT[0].GetNumOfElements()); + raised->SetLevel(L0 - ctxtDCRTs[0].GetNumOfElements()); } } @@ -602,13 +603,12 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher cc->EvalMultInPlace(raised, pre * (1.0 / (k * N))); // no linear transformations are needed for Chebyshev series as the range has been normalized to [-1,1] - double coeffLowerBound = -1; - double coeffUpperBound = 1; + double coeffLowerBound = -1.0; + double coeffUpperBound = 1.0; auto& p = GetBootPrecom(slots); - bool isLTBootstrap = - (p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1) && (p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1); + bool isLTBootstrap = (p.m_paramsEnc.lvlb == 1) && (p.m_paramsDec.lvlb == 1); Ciphertext ctxtDec; if (slots == cc->GetCyclotomicOrder() / 4) { @@ -631,11 +631,11 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher auto ctxtEnc = (isLTBootstrap) ? EvalLinearTransform(p.m_U0hatTPre, raised) : EvalCoeffsToSlots(p.m_U0hatTPreFFT, raised); - auto evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()); - auto conj = Conjugate(ctxtEnc, evalKeyMap); - auto ctxtEncI = cc->EvalSub(ctxtEnc, conj); + auto& evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()); + auto conj = Conjugate(ctxtEnc, evalKeyMap); + auto ctxtEncI = cc->EvalSub(ctxtEnc, conj); + cc->EvalAddInPlace(ctxtEnc, conj); algo->MultByMonomialInPlace(ctxtEncI, 3 * slots); - cc->EvalAddInPlaceNoCheck(ctxtEnc, conj); if (st == FIXEDMANUAL) { while (ctxtEnc->GetNoiseScaleDeg() > 1) { @@ -655,8 +655,8 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher //------------------------------------------------------------------------------ // Evaluate Chebyshev series for the sine wave - ctxtEnc = cc->EvalChebyshevSeries(ctxtEnc, coefficients, coeffLowerBound, coeffUpperBound); - ctxtEncI = cc->EvalChebyshevSeries(ctxtEncI, coefficients, coeffLowerBound, coeffUpperBound); + ctxtEnc = algo->EvalChebyshevSeries(ctxtEnc, coefficients, coeffLowerBound, coeffUpperBound); + ctxtEncI = algo->EvalChebyshevSeries(ctxtEncI, coefficients, coeffLowerBound, coeffUpperBound); // Double-angle iterations if (st != FIXEDMANUAL) { @@ -707,8 +707,9 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher // Running PartialSum //------------------------------------------------------------------------------ - for (uint32_t j = 1; j < N / (2 * slots); j <<= 1) - cc->EvalAddInPlaceNoCheck(raised, cc->EvalRotate(raised, j * slots)); + const auto limit = N / (2 * slots); + for (uint32_t j = 1; j < limit; j <<= 1) + cc->EvalAddInPlace(raised, cc->EvalRotate(raised, j * slots)); #ifdef BOOTSTRAPTIMING TIC(t); @@ -723,9 +724,8 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher auto ctxtEnc = (isLTBootstrap) ? EvalLinearTransform(p.m_U0hatTPre, raised) : EvalCoeffsToSlots(p.m_U0hatTPreFFT, raised); - auto evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()); - auto conj = Conjugate(ctxtEnc, evalKeyMap); - cc->EvalAddInPlaceNoCheck(ctxtEnc, conj); + auto& evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()); + cc->EvalAddInPlace(ctxtEnc, Conjugate(ctxtEnc, evalKeyMap)); if (st == FIXEDMANUAL) { while (ctxtEnc->GetNoiseScaleDeg() > 1) { @@ -750,7 +750,7 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher //------------------------------------------------------------------------------ // Evaluate Chebyshev series for the sine wave - ctxtEnc = cc->EvalChebyshevSeries(ctxtEnc, coefficients, coeffLowerBound, coeffUpperBound); + ctxtEnc = algo->EvalChebyshevSeries(ctxtEnc, coefficients, coeffLowerBound, coeffUpperBound); // Double-angle iterations if (st != FIXEDMANUAL) @@ -808,24 +808,20 @@ Ciphertext FHECKKSRNS::EvalBootstrap(ConstCiphertext& cipher //------------------------------------------------------------------------------ std::vector FHECKKSRNS::FindBootstrapRotationIndices(uint32_t slots, uint32_t M) { - auto& p = GetBootPrecom(slots); - bool isLTBootstrap = - (p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1) && (p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET] == 1); + const auto& p = GetBootPrecom(slots); - std::vector fullIndexList; - if (isLTBootstrap) { - fullIndexList = FindLinearTransformRotationIndices(slots, M); + // Remove possible duplicates and remove automorphisms corresponding to 0 and M/4 by using std::set + std::set s; + if (p.m_paramsEnc.lvlb == 1 && p.m_paramsDec.lvlb == 1) { + auto tmp = FindLinearTransformRotationIndices(slots, M); + s.insert(tmp.begin(), tmp.end()); } else { - fullIndexList = FindCoeffsToSlotsRotationIndices(slots, M); - - std::vector indexListStC{FindSlotsToCoeffsRotationIndices(slots, M)}; - fullIndexList.insert(fullIndexList.end(), std::make_move_iterator(indexListStC.begin()), - std::make_move_iterator(indexListStC.end())); + auto tmp = FindCoeffsToSlotsRotationIndices(slots, M); + s.insert(tmp.begin(), tmp.end()); + tmp = FindSlotsToCoeffsRotationIndices(slots, M); + s.insert(tmp.begin(), tmp.end()); } - - // Remove possible duplicates and remove automorphisms corresponding to 0 and M/4 by using std::set - std::set s(fullIndexList.begin(), fullIndexList.end()); s.erase(0); s.erase(M / 4); @@ -837,29 +833,28 @@ std::vector FHECKKSRNS::FindBootstrapRotationIndices(uint32_t slots, ui // This method completely depends on FindBootstrapRotationIndices() to do that. std::vector FHECKKSRNS::FindLinearTransformRotationIndices(uint32_t slots, uint32_t M) { // Computing the baby-step g and the giant-step h. - auto& p = GetBootPrecom(slots); - uint32_t g = (p.m_dim1 == 0) ? static_cast(std::ceil(std::sqrt(slots))) : p.m_dim1; - uint32_t h = static_cast(std::ceil(static_cast(slots) / g)); + const auto& p = GetBootPrecom(slots); + const uint32_t g = (p.m_paramsEnc.g == 0) ? std::ceil(std::sqrt(slots)) : p.m_paramsEnc.g; + const uint32_t h = std::ceil(static_cast(slots) / g); - std::vector indexList; // To avoid overflowing uint32_t variables, we do some math operations below in a specific order // computing all indices for baby-step giant-step procedure - int32_t indexListSz = static_cast(g) + h + M - 2; + const int32_t indexListSz = static_cast(g) + h + M - 2; if (indexListSz < 0) OPENFHE_THROW("indexListSz can not be negative"); + std::vector indexList; indexList.reserve(indexListSz); - for (size_t i = 1; i <= g; ++i) + + for (uint32_t i = 1; i <= g; ++i) indexList.emplace_back(i); - for (size_t i = 2; i < h; ++i) - indexList.emplace_back(g * i); + for (uint32_t i = 2; i < h; ++i) + indexList.emplace_back(i * g); // additional automorphisms are needed for sparse bootstrapping - uint32_t m = slots * 4; - if (m != M) { - for (size_t j = 1; j < M / m; j <<= 1) { + if (uint32_t m = slots * 4; m != M) { + for (uint32_t j = 1; j < M / m; j <<= 1) indexList.emplace_back(j * slots); - } } return indexList; @@ -869,114 +864,86 @@ std::vector FHECKKSRNS::FindLinearTransformRotationIndices(uint32_t sl // so it DOES NOT remove possible duplicates and automorphisms corresponding to 0 and M/4. // This method completely depends on FindBootstrapRotationIndices() to do that. std::vector FHECKKSRNS::FindCoeffsToSlotsRotationIndices(uint32_t slots, uint32_t M) { - auto& p = GetBootPrecom(slots); + const auto& p = GetBootPrecom(slots).m_paramsEnc; - uint32_t levelBudget = p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - uint32_t layersCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_COLL]; - uint32_t remCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_REM]; - uint32_t numRotations = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - uint32_t b = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP]; - uint32_t g = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP]; - uint32_t numRotationsRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - uint32_t bRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - uint32_t gRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; - - uint32_t flagRem = (remCollapse == 0) ? 0 : 1; - - std::vector indexList; // To avoid overflowing uint32_t variables, we do some math operations below in a specific order // Computing all indices for baby-step giant-step procedure for encoding and decoding - int32_t indexListSz = static_cast(b) + g - 2 + bRem + gRem - 2 + 1 + M; + const int32_t indexListSz = static_cast(p.b) + p.g - 2 + p.bRem + p.gRem - 2 + 1 + M; if (indexListSz < 0) OPENFHE_THROW("indexListSz can not be negative"); + + std::vector indexList; indexList.reserve(indexListSz); - for (int32_t s = static_cast(levelBudget) - 1; s >= static_cast(flagRem); --s) { - const uint32_t scalingFactor = 1U << ((s - flagRem) * layersCollapse + remCollapse); - const int32_t halfRots = (1 - (numRotations + 1) / 2); - for (int32_t j = halfRots; j < static_cast(g + halfRots); ++j) { - indexList.emplace_back(ReduceRotation(j * scalingFactor, slots)); - } - for (size_t i = 0; i < b; i++) { - indexList.emplace_back(ReduceRotation((g * i) * scalingFactor, M / 4)); - } + // additional automorphisms are needed for sparse bootstrapping + if (uint32_t m = slots * 4; m != M) { + for (uint32_t j = 1; j < M / m; j <<= 1) + indexList.emplace_back(j * slots); } - if (flagRem) { - const int32_t halfRots = (1 - (numRotationsRem + 1) / 2); - for (int32_t j = halfRots; j < static_cast(gRem + halfRots); ++j) { - indexList.emplace_back(ReduceRotation(j, slots)); - } - for (size_t i = 0; i < bRem; i++) { - indexList.emplace_back(ReduceRotation(gRem * i, M / 4)); - } + M >>= 2; + const int32_t flagRem = (p.remCollapse == 0) ? 0 : 1; + const int32_t halfRots = 1 - (p.numRotations + 1) / 2; + const int32_t halfRotsg = halfRots + p.g; + for (int32_t s = -1 + p.lvlb; s >= flagRem; --s) { + const uint32_t scalingFactor = 1U << ((s - flagRem) * p.layersCollapse + p.remCollapse); + for (int32_t j = halfRots; j < halfRotsg; ++j) + indexList.emplace_back(ReduceRotation(j * scalingFactor, slots)); + for (uint32_t i = 0; i < p.b; ++i) + indexList.emplace_back(ReduceRotation(i * p.g * scalingFactor, M)); } - uint32_t m = slots * 4; - // additional automorphisms are needed for sparse bootstrapping - if (m != M) { - for (size_t j = 1; j < M / m; j <<= 1) { - indexList.emplace_back(j * slots); - } + if (flagRem == 1) { + const int32_t halfRotsRem = (1 - (p.numRotationsRem + 1) / 2); + const int32_t halfRotsRemg = halfRotsRem + p.gRem; + for (int32_t j = halfRotsRem; j < halfRotsRemg; ++j) + indexList.emplace_back(ReduceRotation(j, slots)); + for (uint32_t i = 0; i < p.bRem; ++i) + indexList.emplace_back(ReduceRotation(i * p.gRem, M)); } return indexList; } std::vector FHECKKSRNS::FindSlotsToCoeffsRotationIndices(uint32_t slots, uint32_t M) { - auto& p = GetBootPrecom(slots); + const auto& p = GetBootPrecom(slots).m_paramsDec; - uint32_t levelBudget = p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - uint32_t layersCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_COLL]; - uint32_t remCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_REM]; - uint32_t numRotations = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - uint32_t b = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP]; - uint32_t g = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP]; - uint32_t numRotationsRem = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - uint32_t bRem = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - uint32_t gRem = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; - - uint32_t flagRem = (remCollapse == 0) ? 0 : 1; - if (levelBudget < flagRem) - OPENFHE_THROW("levelBudget can not be less than flagRem"); - - std::vector indexList; // To avoid overflowing uint32_t variables, we do some math operations below in a specific order // Computing all indices for baby-step giant-step procedure for encoding and decoding - int32_t indexListSz = static_cast(b) + g - 2 + bRem + gRem - 2 + 1 + M; + const int32_t indexListSz = static_cast(p.b) + p.g - 2 + p.bRem + p.gRem - 2 + 1 + M; if (indexListSz < 0) OPENFHE_THROW("indexListSz can not be negative"); + + std::vector indexList; indexList.reserve(indexListSz); - for (size_t s = 0; s < (levelBudget - flagRem); ++s) { - const uint32_t scalingFactor = 1U << (s * layersCollapse); - const int32_t halfRots = (1 - (numRotations + 1) / 2); - for (int32_t j = halfRots; j < static_cast(g + halfRots); ++j) { - indexList.emplace_back(ReduceRotation(j * scalingFactor, M / 4)); - } - for (size_t i = 0; i < b; ++i) { - indexList.emplace_back(ReduceRotation((g * i) * scalingFactor, M / 4)); - } + // additional automorphisms are needed for sparse bootstrapping + if (uint32_t m = slots * 4; m != M) { + for (uint32_t j = 1; j < M / m; j <<= 1) + indexList.emplace_back(j * slots); } - if (flagRem) { - uint32_t s = levelBudget - flagRem; - const uint32_t scalingFactor = 1U << (s * layersCollapse); - const int32_t halfRots = (1 - (numRotationsRem + 1) / 2); - for (int32_t j = halfRots; j < static_cast(gRem + halfRots); ++j) { - indexList.emplace_back(ReduceRotation(j * scalingFactor, M / 4)); - } - for (size_t i = 0; i < bRem; ++i) { - indexList.emplace_back(ReduceRotation((gRem * i) * scalingFactor, M / 4)); - } + M >>= 2; + const uint32_t flagRem = (p.remCollapse == 0) ? 0 : 1; + const uint32_t smax = p.lvlb - flagRem; + const int32_t halfRots = (1 - (p.numRotations + 1) / 2); + const int32_t halfRotsg = halfRots + p.g; + for (uint32_t s = 0; s < smax; ++s) { + const uint32_t scalingFactor = 1U << (s * p.layersCollapse); + for (int32_t j = halfRots; j < halfRotsg; ++j) + indexList.emplace_back(ReduceRotation(j * scalingFactor, M)); + for (uint32_t i = 0; i < p.b; ++i) + indexList.emplace_back(ReduceRotation(i * p.g * scalingFactor, M)); } - uint32_t m = slots * 4; - // additional automorphisms are needed for sparse bootstrapping - if (m != M) { - for (size_t j = 1; j < M / m; j <<= 1) { - indexList.emplace_back(j * slots); - } + if (flagRem == 1) { + const uint32_t scalingFactor = 1U << (smax * p.layersCollapse); + const int32_t halfRotsRem = (1 - (p.numRotationsRem + 1) / 2); + const int32_t halfRotsRemg = halfRotsRem + p.gRem; + for (int32_t j = halfRotsRem; j < halfRotsRemg; ++j) + indexList.emplace_back(ReduceRotation(j * scalingFactor, M)); + for (uint32_t i = 0; i < p.bRem; ++i) + indexList.emplace_back(ReduceRotation(i * p.gRem * scalingFactor, M)); } return indexList; @@ -989,8 +956,8 @@ std::vector FHECKKSRNS::FindSlotsToCoeffsRotationIndices(uint32_t slot std::vector FHECKKSRNS::EvalLinearTransformPrecompute( const CryptoContextImpl& cc, const std::vector>>& A, double scale, uint32_t L) const { - uint32_t slots = A.size(); - if (slots != A[0].size()) + const int32_t slots = A.size(); + if (slots != static_cast(A[0].size())) OPENFHE_THROW("The matrix passed to EvalLTPrecompute is not square"); // make sure the plaintext is created only with the necessary amount of moduli @@ -1017,29 +984,20 @@ std::vector FHECKKSRNS::EvalLinearTransformPrecompute( } auto elementParamsPtr = std::make_shared>(cc.GetCyclotomicOrder(), moduli, roots); - // Computing the baby-step bStep and the giant-step gStep. - auto& p = GetBootPrecom(slots); - int bStep = (p.m_dim1 == 0) ? std::ceil(std::sqrt(slots)) : p.m_dim1; - int gStep = std::ceil(static_cast(slots) / bStep); + auto g = GetBootPrecom(slots).m_paramsEnc.g; + + const int32_t step = (g == 0) ? std::ceil(std::sqrt(slots)) : g; std::vector result(slots); -// parallelizing the loop (below) with OMP causes a segfault on MinGW -// see https://github.com/openfheorg/openfhe-development/issues/176 #if !defined(__MINGW32__) && !defined(__MINGW64__) - #pragma omp parallel for + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(slots)) #endif - for (int j = 0; j < gStep; j++) { - int offset = -bStep * j; - for (int i = 0; i < bStep; i++) { - if (bStep * j + i < static_cast(slots)) { - auto diag = ExtractShiftedDiagonal(A, bStep * j + i); - for (uint32_t k = 0; k < diag.size(); k++) - diag[k] *= scale; - - result[bStep * j + i] = - MakeAuxPlaintext(cc, elementParamsPtr, Rotate(diag, offset), 1, towersToDrop, diag.size()); - } - } + for (int32_t ji = 0; ji < slots; ++ji) { + auto diag = ExtractShiftedDiagonal(A, ji); + for (auto& d : diag) + d *= scale; + result[ji] = + MakeAuxPlaintext(cc, elementParamsPtr, Rotate(diag, -step * (ji / step)), 1, towersToDrop, diag.size()); } return result; } @@ -1072,33 +1030,27 @@ std::vector FHECKKSRNS::EvalLinearTransformPrecompute( } auto elementParamsPtr = std::make_shared>(cc.GetCyclotomicOrder(), moduli, roots); - uint32_t slots = A.size(); + const int32_t slots = static_cast(A.size()); - // Computing the baby-step bStep and the giant-step gStep. - auto& p = GetBootPrecom(slots); - int bStep = (p.m_dim1 == 0) ? ceil(sqrt(slots)) : p.m_dim1; - int gStep = ceil(static_cast(slots) / bStep); + auto g = GetBootPrecom(slots).m_paramsEnc.g; + + const int32_t step = (g == 0) ? std::ceil(std::sqrt(slots)) : g; std::vector result(slots); if (orientation == 0) { // vertical concatenation - used during homomorphic encoding - // #pragma omp parallel for - for (int j = 0; j < gStep; j++) { - int offset = -bStep * j; - for (int i = 0; i < bStep; i++) { - if (bStep * j + i < static_cast(slots)) { - auto vecA = ExtractShiftedDiagonal(A, bStep * j + i); - auto vecB = ExtractShiftedDiagonal(B, bStep * j + i); - - vecA.insert(vecA.end(), vecB.begin(), vecB.end()); - for (uint32_t k = 0; k < vecA.size(); k++) - vecA[k] *= scale; - - result[bStep * j + i] = - MakeAuxPlaintext(cc, elementParamsPtr, Rotate(vecA, offset), 1, towersToDrop, vecA.size()); - } - } +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(slots)) +#endif + for (int32_t ji = 0; ji < slots; ++ji) { + auto vecA = ExtractShiftedDiagonal(A, ji); + auto vecB = ExtractShiftedDiagonal(B, ji); + vecA.insert(vecA.end(), vecB.begin(), vecB.end()); + for (auto& v : vecA) + v *= scale; + result[ji] = + MakeAuxPlaintext(cc, elementParamsPtr, Rotate(vecA, -step * (ji / step)), 1, towersToDrop, vecA.size()); } } else { @@ -1106,27 +1058,23 @@ std::vector FHECKKSRNS::EvalLinearTransformPrecompute( std::vector>> newA(slots); // A and B are concatenated horizontally - for (uint32_t i = 0; i < slots; ++i) { + for (int32_t i = 0; i < slots; ++i) { newA[i].reserve(A[i].size() + B[i].size()); newA[i].insert(newA[i].end(), A[i].begin(), A[i].end()); newA[i].insert(newA[i].end(), B[i].begin(), B[i].end()); } -#pragma omp parallel for - for (int j = 0; j < gStep; j++) { - int offset = -bStep * j; - for (int i = 0; i < bStep; i++) { - if (bStep * j + i < static_cast(slots)) { - // shifted diagonal is computed for rectangular map newA of dimension - // slots x 2*slots - auto vec = ExtractShiftedDiagonal(newA, bStep * j + i); - for (uint32_t k = 0; k < vec.size(); k++) - vec[k] *= scale; - - result[bStep * j + i] = - MakeAuxPlaintext(cc, elementParamsPtr, Rotate(vec, offset), 1, towersToDrop, vec.size()); - } - } +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(slots)) +#endif + for (int32_t ji = 0; ji < slots; ++ji) { + // shifted diagonal is computed for rectangular map newA of dimension + // slots x 2*slots + auto vec = ExtractShiftedDiagonal(newA, ji); + for (auto& v : vec) + v *= scale; + result[ji] = + MakeAuxPlaintext(cc, elementParamsPtr, Rotate(vec, -step * (ji / step)), 1, towersToDrop, vec.size()); } } @@ -1136,33 +1084,21 @@ std::vector FHECKKSRNS::EvalLinearTransformPrecompute( std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecompute( const CryptoContextImpl& cc, const std::vector>& A, const std::vector& rotGroup, bool flag_i, double scale, uint32_t L) const { - uint32_t slots = rotGroup.size(); + const uint32_t slots = rotGroup.size(); - auto& p = GetBootPrecom(slots); + const auto& p = GetBootPrecom(slots).m_paramsEnc; - int32_t levelBudget = p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - int32_t layersCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_COLL]; - int32_t remCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_REM]; - int32_t numRotations = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - int32_t b = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP]; - int32_t g = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP]; - int32_t numRotationsRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - int32_t bRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - int32_t gRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; + // result is the rotated plaintext version of the coefficients + std::vector> result(p.lvlb, std::vector(p.numRotations)); int32_t stop = -1; int32_t flagRem = 0; - - if (remCollapse != 0) { + if (p.remCollapse != 0) { stop = 0; flagRem = 1; - } - // result is the rotated plaintext version of the coefficients - std::vector> result(levelBudget, std::vector(numRotations)); - if (flagRem == 1 && levelBudget >= 1) { // remainder corresponds to index 0 in encoding and to last index in decoding - result[0].resize(numRotationsRem); + result[0].resize(p.numRotationsRem); } // make sure the plaintext is created only with the necessary amount of moduli @@ -1171,11 +1107,11 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp auto elementParams = *(cryptoParams->GetElementParams()); - uint32_t towersToDrop = (L == 0) ? 0 : elementParams.GetParams().size() - L - compositeDegree * levelBudget; + uint32_t towersToDrop = (L == 0) ? 0 : elementParams.GetParams().size() - L - compositeDegree * p.lvlb; for (uint32_t i = 0; i < towersToDrop; ++i) elementParams.PopLastParam(); - uint32_t level0 = towersToDrop + compositeDegree * (levelBudget - 1); + uint32_t level0 = towersToDrop + compositeDegree * (p.lvlb - 1); auto paramsQ = elementParams.GetParams(); uint32_t sizeQ = paramsQ.size(); @@ -1188,7 +1124,6 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp moduli[i] = paramsQ[i]->GetModulus(); roots[i] = paramsQ[i]->GetRootOfUnity(); } - for (uint32_t i = 0; i < sizeP; ++i) { moduli[sizeQ + i] = paramsP[i]->GetModulus(); roots[sizeQ + i] = paramsP[i]->GetRootOfUnity(); @@ -1196,8 +1131,8 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp // we need to pre-compute the plaintexts in the extended basis P*Q uint32_t M = cc.GetCyclotomicOrder(); - std::vector>> paramsVector(levelBudget - stop); - for (int32_t s = levelBudget - 1; s >= stop; s--) { + std::vector>> paramsVector(p.lvlb - stop); + for (int32_t s = -1 + p.lvlb; s >= stop; --s) { paramsVector[s - stop] = std::make_shared>(M, moduli, roots); for (uint32_t j = 0; j < compositeDegree; ++j, --sizeQ) { moduli.erase(moduli.begin() + sizeQ - 1); @@ -1205,52 +1140,48 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp } } - if (slots == M / 4) { + if (uint32_t M4 = M / 4; slots == M4) { //------------------------------------------------------------------------------ // fully-packed mode //------------------------------------------------------------------------------ - auto coeff = CoeffEncodingCollapse(A, rotGroup, levelBudget, flag_i); + auto coeff = CoeffEncodingCollapse(A, rotGroup, p.lvlb, flag_i); - for (int32_t s = levelBudget - 1; s > stop; s--) { - for (int32_t i = 0; i < b; i++) { + for (int32_t s = -1 + p.lvlb; s > stop; --s) { + const int32_t rotScale = (1 << ((s - flagRem) * p.layersCollapse + p.remCollapse)) * p.g; + const uint32_t limit = p.b * p.g; #if !defined(__MINGW32__) && !defined(__MINGW64__) - #pragma omp parallel for + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) #endif - for (int32_t j = 0; j < g; j++) { - if (g * i + j != static_cast(numRotations)) { - uint32_t rot = - ReduceRotation(-g * i * (1 << ((s - flagRem) * layersCollapse + remCollapse)), slots); - if ((flagRem == 0) && (s == stop + 1)) { - // do the scaling only at the last set of coefficients - for (uint32_t k = 0; k < slots; k++) { - coeff[s][g * i + j][k] *= scale; - } - } - - auto rotateTemp = Rotate(coeff[s][g * i + j], rot); - - result[s][g * i + j] = MakeAuxPlaintext(cc, paramsVector[s - stop], rotateTemp, 1, - level0 - compositeDegree * s, rotateTemp.size()); + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotations) { + if ((flagRem == 0) && (s == stop + 1)) { + // do the scaling only at the last set of coefficients + for (auto& c : coeff[s][ij]) + c *= scale; } + + auto rot = Rotate(coeff[s][ij], ReduceRotation(-rotScale * (ij / p.g), slots)); + + result[s][ij] = + MakeAuxPlaintext(cc, paramsVector[s - stop], rot, 1, level0 - compositeDegree * s, rot.size()); } } } - if (flagRem) { - for (int32_t i = 0; i < bRem; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < gRem; j++) { - if (gRem * i + j != static_cast(numRotationsRem)) { - uint32_t rot = ReduceRotation(-gRem * i, slots); - for (uint32_t k = 0; k < slots; k++) { - coeff[stop][gRem * i + j][k] *= scale; - } + if (flagRem == 1) { + const uint32_t limit = p.bRem * p.gRem; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotationsRem) { + for (auto& c : coeff[stop][ij]) + c *= scale; - auto rotateTemp = Rotate(coeff[stop][gRem * i + j], rot); - result[stop][gRem * i + j] = - MakeAuxPlaintext(cc, paramsVector[0], rotateTemp, 1, level0, rotateTemp.size()); - } + auto rot = Rotate(coeff[stop][ij], ReduceRotation(-p.gRem * (ij / p.gRem), slots)); + + result[stop][ij] = MakeAuxPlaintext(cc, paramsVector[0], rot, 1, level0, rot.size()); } } } @@ -1260,55 +1191,52 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp // sparsely-packed mode //------------------------------------------------------------------------------ - auto coeff = CoeffEncodingCollapse(A, rotGroup, levelBudget, false); - auto coeffi = CoeffEncodingCollapse(A, rotGroup, levelBudget, true); + auto coeff = CoeffEncodingCollapse(A, rotGroup, p.lvlb, false); + auto coeffi = CoeffEncodingCollapse(A, rotGroup, p.lvlb, true); - for (int32_t s = levelBudget - 1; s > stop; s--) { - for (int32_t i = 0; i < b; i++) { + for (int32_t s = -1 + p.lvlb; s > stop; --s) { + const int32_t rotScale = (1 << ((s - flagRem) * p.layersCollapse + p.remCollapse)) * p.g; + const uint32_t limit = p.b * p.g; #if !defined(__MINGW32__) && !defined(__MINGW64__) - #pragma omp parallel for + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) #endif - for (int32_t j = 0; j < g; j++) { - if (g * i + j != static_cast(numRotations)) { - uint32_t rot = - ReduceRotation(-g * i * (1 << ((s - flagRem) * layersCollapse + remCollapse)), M / 4); - // concatenate the coefficients horizontally on their third dimension, which corresponds to the # of slots - auto clearTemp = coeff[s][g * i + j]; - auto clearTempi = coeffi[s][g * i + j]; - clearTemp.insert(clearTemp.end(), clearTempi.begin(), clearTempi.end()); - if ((flagRem == 0) && (s == stop + 1)) { - // do the scaling only at the last set of coefficients - for (uint32_t k = 0; k < clearTemp.size(); k++) { - clearTemp[k] *= scale; - } - } - - auto rotateTemp = Rotate(clearTemp, rot); - result[s][g * i + j] = MakeAuxPlaintext(cc, paramsVector[s - stop], rotateTemp, 1, - level0 - compositeDegree * s, rotateTemp.size()); + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotations) { + // concatenate the coefficients horizontally on their third dimension, which corresponds to the # of slots + auto clearTmp = coeff[s][ij]; + auto& clearTmpi = coeffi[s][ij]; + clearTmp.insert(clearTmp.end(), clearTmpi.begin(), clearTmpi.end()); + if ((flagRem == 0) && (s == stop + 1)) { + // do the scaling only at the last set of coefficients + for (auto& c : clearTmp) + c *= scale; } + + auto rot = Rotate(clearTmp, ReduceRotation(-rotScale * (ij / p.g), M4)); + + result[s][ij] = + MakeAuxPlaintext(cc, paramsVector[s - stop], rot, 1, level0 - compositeDegree * s, rot.size()); } } } - if (flagRem) { - for (int32_t i = 0; i < bRem; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < gRem; j++) { - if (gRem * i + j != static_cast(numRotationsRem)) { - uint32_t rot = ReduceRotation(-gRem * i, M / 4); - // concatenate the coefficients on their third dimension, which corresponds to the # of slots - auto clearTemp = coeff[stop][gRem * i + j]; - auto clearTempi = coeffi[stop][gRem * i + j]; - clearTemp.insert(clearTemp.end(), clearTempi.begin(), clearTempi.end()); - for (uint32_t k = 0; k < clearTemp.size(); k++) { - clearTemp[k] *= scale; - } - - auto rotateTemp = Rotate(clearTemp, rot); - result[stop][gRem * i + j] = - MakeAuxPlaintext(cc, paramsVector[0], rotateTemp, 1, level0, rotateTemp.size()); - } + if (flagRem == 1) { + const uint32_t limit = p.bRem * p.gRem; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotationsRem) { + // concatenate the coefficients on their third dimension, which corresponds to the # of slots + auto clearTmp = coeff[stop][ij]; + auto& clearTmpi = coeffi[stop][ij]; + clearTmp.insert(clearTmp.end(), clearTmpi.begin(), clearTmpi.end()); + for (auto& c : clearTmp) + c *= scale; + + auto rot = Rotate(clearTmp, ReduceRotation(-p.gRem * (ij / p.gRem), M4)); + + result[stop][ij] = MakeAuxPlaintext(cc, paramsVector[0], rot, 1, level0, rot.size()); } } } @@ -1319,27 +1247,17 @@ std::vector> FHECKKSRNS::EvalCoeffsToSlotsPrecomp std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecompute( const CryptoContextImpl& cc, const std::vector>& A, const std::vector& rotGroup, bool flag_i, double scale, uint32_t L) const { - uint32_t slots = rotGroup.size(); + const uint32_t slots = rotGroup.size(); - auto& p = GetBootPrecom(slots); - - int32_t levelBudget = p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - int32_t layersCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_COLL]; - int32_t remCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_REM]; - int32_t numRotations = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - int32_t b = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP]; - int32_t g = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP]; - int32_t numRotationsRem = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - int32_t bRem = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - int32_t gRem = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; + const auto& p = GetBootPrecom(slots).m_paramsDec; - int32_t flagRem = (remCollapse == 0) ? 0 : 1; + const int32_t flagRem = (p.remCollapse == 0) ? 0 : 1; // result is the rotated plaintext version of coeff - std::vector> result(levelBudget, std::vector(numRotations)); - if (flagRem == 1 && levelBudget >= 1) { + std::vector> result(p.lvlb, std::vector(p.numRotations)); + if (flagRem == 1) { // remainder corresponds to index 0 in encoding and to last index in decoding - result[levelBudget - 1].resize(numRotationsRem); + result[p.lvlb - 1].resize(p.numRotationsRem); } // make sure the plaintext is created only with the necessary amount of moduli @@ -1348,12 +1266,10 @@ std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecomp uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); auto elementParams = *(cryptoParams->GetElementParams()); - uint32_t towersToDrop = (L == 0) ? 0 : elementParams.GetParams().size() - L - compositeDegree * levelBudget; + const uint32_t towersToDrop = (L == 0) ? 0 : elementParams.GetParams().size() - L - compositeDegree * p.lvlb; for (uint32_t i = 0; i < towersToDrop; ++i) elementParams.PopLastParam(); - uint32_t level0 = towersToDrop; - auto paramsQ = elementParams.GetParams(); uint32_t sizeQ = paramsQ.size(); auto paramsP = cryptoParams->GetParamsP()->GetParams(); @@ -1370,8 +1286,9 @@ std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecomp } // we need to pre-compute the plaintexts in the extended basis P*Q - std::vector>> paramsVector(levelBudget - flagRem + 1); - for (int32_t s = 0; s < levelBudget - flagRem + 1; ++s) { + const uint32_t pvlen = p.lvlb + 1 - flagRem; + std::vector>> paramsVector(pvlen); + for (uint32_t s = 0; s < pvlen; ++s) { paramsVector[s] = std::make_shared>(cc.GetCyclotomicOrder(), moduli, roots); for (uint32_t i = 0; i < compositeDegree; ++i, --sizeQ) { moduli.erase(moduli.begin() + sizeQ - 1); @@ -1379,47 +1296,47 @@ std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecomp } } - uint32_t M4 = cc.GetCyclotomicOrder() / 4; - if (slots == M4) { + if (uint32_t M4 = cc.GetCyclotomicOrder() / 4; M4 == slots) { // fully-packed - auto coeff = CoeffDecodingCollapse(A, rotGroup, levelBudget, flag_i); - - for (int32_t s = 0; s < levelBudget - flagRem; s++) { - for (int32_t i = 0; i < b; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < g; j++) { - if (g * i + j != static_cast(numRotations)) { - uint32_t rot = ReduceRotation(-g * i * (1 << (s * layersCollapse)), slots); - if ((flagRem == 0) && (s == levelBudget - flagRem - 1)) { - // do the scaling only at the last set of coefficients - for (uint32_t k = 0; k < slots; k++) { - coeff[s][g * i + j][k] *= scale; - } - } - - auto rotateTemp = Rotate(coeff[s][g * i + j], rot); - result[s][g * i + j] = MakeAuxPlaintext(cc, paramsVector[s], rotateTemp, 1, - level0 + compositeDegree * s, rotateTemp.size()); + auto coeff = CoeffDecodingCollapse(A, rotGroup, p.lvlb, flag_i); + const uint32_t smax = p.lvlb - flagRem; + for (uint32_t s = 0; s < smax; ++s) { + const int32_t rotScale = (1 << (s * p.layersCollapse)) * p.g; + const uint32_t limit = p.b * p.g; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotations) { + if ((flagRem == 0) && (s + 1 == smax)) { + // do the scaling only at the last set of coefficients + for (auto& c : coeff[s][ij]) + c *= scale; } + + auto rot = Rotate(coeff[s][ij], ReduceRotation(-rotScale * (ij / p.g), slots)); + + result[s][ij] = + MakeAuxPlaintext(cc, paramsVector[s], rot, 1, towersToDrop + compositeDegree * s, rot.size()); } } } - if (flagRem) { - int32_t s = levelBudget - flagRem; - for (int32_t i = 0; i < bRem; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < gRem; j++) { - if (gRem * i + j != static_cast(numRotationsRem)) { - uint32_t rot = ReduceRotation(-gRem * i * (1 << (s * layersCollapse)), slots); - for (uint32_t k = 0; k < slots; k++) { - coeff[s][gRem * i + j][k] *= scale; - } - - auto rotateTemp = Rotate(coeff[s][gRem * i + j], rot); - result[s][gRem * i + j] = MakeAuxPlaintext(cc, paramsVector[s], rotateTemp, 1, - level0 + compositeDegree * s, rotateTemp.size()); - } + if (flagRem == 1) { + const int32_t rotScale = (1 << (smax * p.layersCollapse)) * p.gRem; + const uint32_t limit = p.bRem * p.gRem; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotationsRem) { + for (auto& c : coeff[smax][ij]) + c *= scale; + + auto rot = Rotate(coeff[smax][ij], ReduceRotation(-rotScale * (ij / p.g), slots)); + + result[smax][ij] = MakeAuxPlaintext(cc, paramsVector[smax], rot, 1, + towersToDrop + compositeDegree * smax, rot.size()); } } } @@ -1429,53 +1346,55 @@ std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecomp // sparsely-packed mode //------------------------------------------------------------------------------ - auto coeff = CoeffDecodingCollapse(A, rotGroup, levelBudget, false); - auto coeffi = CoeffDecodingCollapse(A, rotGroup, levelBudget, true); - - for (int32_t s = 0; s < levelBudget - flagRem; s++) { - for (int32_t i = 0; i < b; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < g; j++) { - if (g * i + j != static_cast(numRotations)) { - uint32_t rot = ReduceRotation(-g * i * (1 << (s * layersCollapse)), M4); - // concatenate the coefficients horizontally on their third dimension, which corresponds to the # of slots - auto clearTemp = coeff[s][g * i + j]; - auto clearTempi = coeffi[s][g * i + j]; - clearTemp.insert(clearTemp.end(), clearTempi.begin(), clearTempi.end()); - if ((flagRem == 0) && (s == levelBudget - flagRem - 1)) { - // do the scaling only at the last set of coefficients - for (uint32_t k = 0; k < clearTemp.size(); k++) { - clearTemp[k] *= scale; - } - } - - auto rotateTemp = Rotate(clearTemp, rot); - result[s][g * i + j] = MakeAuxPlaintext(cc, paramsVector[s], rotateTemp, 1, - level0 + compositeDegree * s, rotateTemp.size()); + auto coeff = CoeffDecodingCollapse(A, rotGroup, p.lvlb, false); + auto coeffi = CoeffDecodingCollapse(A, rotGroup, p.lvlb, true); + + const uint32_t smax = p.lvlb - flagRem; + for (uint32_t s = 0; s < smax; ++s) { + const int32_t rotScale = (1 << (s * p.layersCollapse)) * p.g; + const uint32_t limit = p.b * p.g; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotations) { + // concatenate the coefficients horizontally on their third dimension, which corresponds to the # of slots + auto clearTmp = coeff[s][ij]; + auto& clearTmpi = coeffi[s][ij]; + clearTmp.insert(clearTmp.end(), clearTmpi.begin(), clearTmpi.end()); + if ((flagRem == 0) && (s + 1 == smax)) { + // do the scaling only at the last set of coefficients + for (auto& c : clearTmp) + c *= scale; } + + auto rot = Rotate(clearTmp, ReduceRotation(-rotScale * (ij / p.g), M4)); + + result[s][ij] = + MakeAuxPlaintext(cc, paramsVector[s], rot, 1, towersToDrop + compositeDegree * s, rot.size()); } } } - if (flagRem) { - int32_t s = levelBudget - flagRem; - for (int32_t i = 0; i < bRem; i++) { -#pragma omp parallel for - for (int32_t j = 0; j < gRem; j++) { - if (gRem * i + j != static_cast(numRotationsRem)) { - uint32_t rot = ReduceRotation(-gRem * i * (1 << (s * layersCollapse)), M4); - // concatenate the coefficients horizontally on their third dimension, which corresponds to the # of slots - auto clearTemp = coeff[s][gRem * i + j]; - auto clearTempi = coeffi[s][gRem * i + j]; - clearTemp.insert(clearTemp.end(), clearTempi.begin(), clearTempi.end()); - for (uint32_t k = 0; k < clearTemp.size(); k++) { - clearTemp[k] *= scale; - } - - auto rotateTemp = Rotate(clearTemp, rot); - result[s][gRem * i + j] = MakeAuxPlaintext(cc, paramsVector[s], rotateTemp, 1, - level0 + compositeDegree * s, rotateTemp.size()); - } + if (flagRem == 1) { + const int32_t rotScale = (1 << (smax * p.layersCollapse)) * p.g; + const uint32_t limit = p.bRem * p.gRem; +#if !defined(__MINGW32__) && !defined(__MINGW64__) + #pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) +#endif + for (uint32_t ij = 0; ij < limit; ++ij) { + if (ij != p.numRotationsRem) { + // concatenate the coefficients on their third dimension, which corresponds to the # of slots + auto clearTmp = coeff[smax][ij]; + auto& clearTmpi = coeffi[smax][ij]; + clearTmp.insert(clearTmp.end(), clearTmpi.begin(), clearTmpi.end()); + for (auto& c : clearTmp) + c *= scale; + + auto rot = Rotate(clearTmp, ReduceRotation(-rotScale * (ij / p.g), M4)); + + result[smax][ij] = MakeAuxPlaintext(cc, paramsVector[smax], rot, 1, + towersToDrop + compositeDegree * smax, rot.size()); } } } @@ -1490,26 +1409,23 @@ std::vector> FHECKKSRNS::EvalSlotsToCoeffsPrecomp Ciphertext FHECKKSRNS::EvalLinearTransform(const std::vector& A, ConstCiphertext& ct) const { // Computing the baby-step bStep and the giant-step gStep. - uint32_t slots = A.size(); - auto& p = GetBootPrecom(slots); - uint32_t bStep = (p.m_dim1 == 0) ? ceil(sqrt(slots)) : p.m_dim1; - uint32_t gStep = ceil(static_cast(slots) / bStep); + const uint32_t slots = A.size(); + const auto& p = GetBootPrecom(slots); + const uint32_t bStep = (p.m_paramsEnc.g == 0) ? std::ceil(std::sqrt(slots)) : p.m_paramsEnc.g; + const uint32_t gStep = std::ceil(static_cast(slots) / bStep); - auto cc = ct->GetCryptoContext(); - uint32_t M = cc->GetCyclotomicOrder(); - uint32_t N = cc->GetRingDimension(); - - // computes the NTTs for each CRT limb (for the hoisted automorphisms used - // later on) + auto cc = ct->GetCryptoContext(); auto digits = cc->EvalFastRotationPrecompute(ct); - std::vector> fastRotation(bStep - 1); - // hoisted automorphisms -#pragma omp parallel for + std::vector> fastRotation(bStep - 1); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(bStep - 1)) for (uint32_t j = 1; j < bStep; ++j) fastRotation[j - 1] = cc->EvalFastRotationExt(ct, j, digits, true); + const uint32_t M = cc->GetCyclotomicOrder(); + const uint32_t N = cc->GetRingDimension(); + std::vector map(N); Ciphertext result; DCRTPoly first; for (uint32_t j = 0; j < gStep; ++j) { @@ -1530,7 +1446,6 @@ Ciphertext FHECKKSRNS::EvalLinearTransform(const std::vectorKeySwitchDown(inner); // Find the automorphism index that corresponds to rotation index index. uint32_t autoIndex = FindAutomorphismIndex2nComplex(bStep * j, M); - std::vector map(N); PrecomputeAutoMap(N, autoIndex, &map); first += inner->GetElements()[0].AutomorphismTransform(autoIndex, map); @@ -1545,104 +1460,90 @@ Ciphertext FHECKKSRNS::EvalLinearTransform(const std::vector FHECKKSRNS::EvalCoeffsToSlots(const std::vector>& A, ConstCiphertext& ctxt) const { - uint32_t slots = ctxt->GetSlots(); + const uint32_t slots = ctxt->GetSlots(); - auto& p = GetBootPrecom(slots); + const auto& p = GetBootPrecom(slots).m_paramsEnc; - int32_t levelBudget = p.m_paramsEnc[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - int32_t layersCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_COLL]; - int32_t remCollapse = p.m_paramsEnc[CKKS_BOOT_PARAMS::LAYERS_REM]; - int32_t numRotations = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - int32_t b = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP]; - int32_t g = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP]; - int32_t numRotationsRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - int32_t bRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - int32_t gRem = p.m_paramsEnc[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; + // precompute the inner and outer rotations + std::vector> rot_out(p.lvlb, std::vector(p.b + p.bRem)); + std::vector> rot_in(p.lvlb, std::vector(p.numRotations + 1)); int32_t stop = -1; int32_t flagRem = 0; - - auto cc = ctxt->GetCryptoContext(); - auto algo = cc->GetScheme(); - const auto cryptoParams = std::dynamic_pointer_cast(cc->GetCryptoParameters()); - uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); - - if (remCollapse != 0) { + if (p.remCollapse != 0) { stop = 0; flagRem = 1; - } - // precompute the inner and outer rotations - std::vector> rot_in(levelBudget, std::vector(numRotations + 1)); - if (flagRem == 1) { // remainder corresponds to index 0 in encoding and to last index in decoding - rot_in[0].resize(numRotationsRem + 1); + rot_in[0].resize(p.numRotationsRem + 1); } - std::vector> rot_out(levelBudget, std::vector(b + bRem)); - uint32_t M = cc->GetCyclotomicOrder(); - for (int32_t s = levelBudget - 1; s > stop; --s) { - for (int32_t j = 0; j < g; ++j) { - rot_in[s][j] = ReduceRotation((j - static_cast((numRotations + 1) / 2) + 1) * - (1 << ((s - flagRem) * layersCollapse + remCollapse)), - slots); - } - for (int32_t i = 0; i < b; ++i) - rot_out[s][i] = ReduceRotation((g * i) * (1 << ((s - flagRem) * layersCollapse + remCollapse)), M / 4); + auto cc = ctxt->GetCryptoContext(); + + const uint32_t M4 = cc->GetCyclotomicOrder() / 4; + + int32_t offset = static_cast((p.numRotations + 1) / 2) - 1; + for (int32_t s = p.lvlb - 1; s > stop; --s) { + int32_t scale = (1 << ((s - flagRem) * p.layersCollapse + p.remCollapse)); + for (uint32_t i = 0; i < p.b; ++i) + rot_out[s][i] = ReduceRotation(scale * p.g * i, M4); + for (uint32_t j = 0; j < p.g; ++j) + rot_in[s][j] = ReduceRotation(scale * (j - offset), slots); } - if (flagRem) { - for (int32_t j = 0; j < gRem; ++j) - rot_in[stop][j] = ReduceRotation((j - static_cast((numRotationsRem + 1) / 2) + 1), slots); - for (int32_t i = 0; i < bRem; ++i) - rot_out[stop][i] = ReduceRotation((gRem * i), M / 4); + if (flagRem == 1) { + offset = static_cast((p.numRotationsRem + 1) / 2) - 1; + for (uint32_t i = 0; i < p.bRem; ++i) + rot_out[stop][i] = ReduceRotation(p.gRem * i, M4); + for (uint32_t j = 0; j < p.gRem; ++j) + rot_in[stop][j] = ReduceRotation(j - offset, slots); } - uint32_t N = cc->GetRingDimension(); auto result = ctxt->Clone(); + uint32_t N = cc->GetRingDimension(); + std::vector map(N); + + auto algo = cc->GetScheme(); + const auto cryptoParams = std::dynamic_pointer_cast(cc->GetCryptoParameters()); + uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); + // hoisted automorphisms - for (int32_t s = levelBudget - 1; s > stop; --s) { - if (s != levelBudget - 1) + const int32_t smax = -1 + p.lvlb; + for (int32_t s = smax; s > stop; --s) { + if (s != smax) algo->ModReduceInternalInPlace(result, compositeDegree); // computes the NTTs for each CRT limb (for the hoisted automorphisms used later on) auto digits = cc->EvalFastRotationPrecompute(result); - - std::vector> fastRotation(g); -#pragma omp parallel for - for (int32_t j = 0; j < g; j++) { - if (rot_in[s][j] != 0) - fastRotation[j] = cc->EvalFastRotationExt(result, rot_in[s][j], digits, true); - else - fastRotation[j] = cc->KeySwitchExt(result, true); - } + std::vector> fastRotation(p.g); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(p.g)) + for (uint32_t j = 0; j < p.g; ++j) + fastRotation[j] = (rot_in[s][j] != 0) ? cc->EvalFastRotationExt(result, rot_in[s][j], digits, true) : + cc->KeySwitchExt(result, true); Ciphertext outer; DCRTPoly first; - for (int32_t i = 0; i < b; i++) { + for (uint32_t i = 0; i < p.b; ++i) { // for the first iteration with j=0: - int32_t G = g * i; + uint32_t G = p.g * i; auto inner = EvalMultExt(fastRotation[0], A[s][G]); // continue the loop - for (int32_t j = 1; j < g; ++j) { - if ((G + j) != static_cast(numRotations)) + for (uint32_t j = 1; j < p.g; ++j) { + if ((G + j) != p.numRotations) EvalAddExtInPlace(inner, EvalMultExt(fastRotation[j], A[s][G + j])); } if (i == 0) { - first = cc->KeySwitchDownFirstElement(inner); - auto elements = inner->GetElements(); - elements[0].SetValuesToZero(); - inner->SetElements(std::move(elements)); + first = cc->KeySwitchDownFirstElement(inner); outer = std::move(inner); + outer->GetElements()[0].SetValuesToZero(); } else { if (rot_out[s][i] != 0) { inner = cc->KeySwitchDown(inner); // Find the automorphism index that corresponds to rotation index index. - uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[s][i], M); - std::vector map(N); + uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[s][i], cc->GetCyclotomicOrder()); PrecomputeAutoMap(N, autoIndex, &map); first += inner->GetElements()[0].AutomorphismTransform(autoIndex, map); auto&& innerDigits = cc->EvalFastRotationPrecompute(inner); @@ -1650,9 +1551,8 @@ Ciphertext FHECKKSRNS::EvalCoeffsToSlots(const std::vectorKeySwitchDownFirstElement(inner); - auto elements = inner->GetElements(); + auto& elements = inner->GetElements(); elements[0].SetValuesToZero(); - inner->SetElements(std::move(elements)); EvalAddExtInPlace(outer, inner); } } @@ -1661,46 +1561,40 @@ Ciphertext FHECKKSRNS::EvalCoeffsToSlots(const std::vectorGetElements()[0] += first; } - if (flagRem) { + if (flagRem == 1) { algo->ModReduceInternalInPlace(result, compositeDegree); // computes the NTTs for each CRT limb (for the hoisted automorphisms used later on) auto digits = cc->EvalFastRotationPrecompute(result); - std::vector> fastRotation(gRem); - -#pragma omp parallel for - for (int32_t j = 0; j < gRem; ++j) { - if (rot_in[stop][j] != 0) - fastRotation[j] = cc->EvalFastRotationExt(result, rot_in[stop][j], digits, true); - else - fastRotation[j] = cc->KeySwitchExt(result, true); - } + std::vector> fastRotationRem(p.gRem); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(p.gRem)) + for (uint32_t j = 0; j < p.gRem; ++j) + fastRotationRem[j] = (rot_in[stop][j] != 0) ? + cc->EvalFastRotationExt(result, rot_in[stop][j], digits, true) : + cc->KeySwitchExt(result, true); Ciphertext outer; DCRTPoly first; - for (int32_t i = 0; i < bRem; i++) { + for (uint32_t i = 0; i < p.bRem; ++i) { // for the first iteration with j=0: - int32_t GRem = gRem * i; - auto inner = EvalMultExt(fastRotation[0], A[stop][GRem]); + int32_t GRem = p.gRem * i; + auto inner = EvalMultExt(fastRotationRem[0], A[stop][GRem]); // continue the loop - for (int32_t j = 1; j < gRem; ++j) { - if ((GRem + j) != static_cast(numRotationsRem)) - EvalAddExtInPlace(inner, EvalMultExt(fastRotation[j], A[stop][GRem + j])); + for (uint32_t j = 1; j < p.gRem; ++j) { + if ((GRem + j) != p.numRotationsRem) + EvalAddExtInPlace(inner, EvalMultExt(fastRotationRem[j], A[stop][GRem + j])); } if (i == 0) { - first = cc->KeySwitchDownFirstElement(inner); - auto elements = inner->GetElements(); - elements[0].SetValuesToZero(); - inner->SetElements(std::move(elements)); + first = cc->KeySwitchDownFirstElement(inner); outer = std::move(inner); + outer->GetElements()[0].SetValuesToZero(); } else { if (rot_out[stop][i] != 0) { inner = cc->KeySwitchDown(inner); // Find the automorphism index that corresponds to rotation index index. - uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[stop][i], M); - std::vector map(N); + uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[stop][i], cc->GetCyclotomicOrder()); PrecomputeAutoMap(N, autoIndex, &map); first += inner->GetElements()[0].AutomorphismTransform(autoIndex, map); auto&& innerDigits = cc->EvalFastRotationPrecompute(inner); @@ -1723,83 +1617,73 @@ Ciphertext FHECKKSRNS::EvalCoeffsToSlots(const std::vector FHECKKSRNS::EvalSlotsToCoeffs(const std::vector>& A, ConstCiphertext& ctxt) const { - uint32_t slots = ctxt->GetSlots(); + const uint32_t slots = ctxt->GetSlots(); - auto& p = GetBootPrecom(slots); - - int32_t levelBudget = p.m_paramsDec[CKKS_BOOT_PARAMS::LEVEL_BUDGET]; - int32_t layersCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_COLL]; - int32_t remCollapse = p.m_paramsDec[CKKS_BOOT_PARAMS::LAYERS_REM]; - int32_t numRotations = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS]; - int32_t b = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP]; - int32_t g = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP]; - int32_t numRotationsRem = p.m_paramsDec[CKKS_BOOT_PARAMS::NUM_ROTATIONS_REM]; - int32_t bRem = p.m_paramsDec[CKKS_BOOT_PARAMS::BABY_STEP_REM]; - int32_t gRem = p.m_paramsDec[CKKS_BOOT_PARAMS::GIANT_STEP_REM]; - - auto cc = ctxt->GetCryptoContext(); - auto algo = cc->GetScheme(); - const auto cryptoParams = std::dynamic_pointer_cast(cc->GetCryptoParameters()); - uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); - - int32_t flagRem = (remCollapse == 0) ? 0 : 1; + const auto& p = GetBootPrecom(slots).m_paramsDec; // precompute the inner and outer rotations - std::vector> rot_in(levelBudget, std::vector(numRotations + 1)); + std::vector> rot_out(p.lvlb, std::vector(p.b + p.bRem)); + std::vector> rot_in(p.lvlb, std::vector(p.numRotations + 1)); + const int32_t flagRem = (p.remCollapse == 0) ? 0 : 1; if (flagRem == 1) { // remainder corresponds to index 0 in encoding and to last index in decoding - rot_in[levelBudget - 1].resize(numRotationsRem + 1); + rot_in[p.lvlb - 1].resize(p.numRotationsRem + 1); } - std::vector> rot_out(levelBudget, std::vector(b + bRem)); - uint32_t M = cc->GetCyclotomicOrder(); - for (int32_t s = 0; s < levelBudget - flagRem; ++s) { - for (int32_t j = 0; j < g; ++j) - rot_in[s][j] = ReduceRotation( - (j - static_cast((numRotations + 1) / 2) + 1) * (1 << (s * layersCollapse)), M / 4); - for (int32_t i = 0; i < b; i++) - rot_out[s][i] = ReduceRotation((g * i) * (1 << (s * layersCollapse)), M / 4); + auto cc = ctxt->GetCryptoContext(); + + const uint32_t M4 = cc->GetCyclotomicOrder() / 4; + const int32_t smax = p.lvlb - flagRem; + const int32_t offset = static_cast((p.numRotations + 1) / 2) - 1; + for (int32_t s = 0; s < smax; ++s) { + const int32_t scale = 1 << (s * p.layersCollapse); + for (uint32_t j = 0; j < p.g; ++j) + rot_in[s][j] = ReduceRotation((j - offset) * scale, M4); + for (uint32_t i = 0; i < p.b; ++i) + rot_out[s][i] = ReduceRotation((p.g * i) * scale, M4); } - if (flagRem) { - int32_t s = levelBudget - flagRem; - for (int32_t j = 0; j < gRem; ++j) - rot_in[s][j] = ReduceRotation( - (j - static_cast((numRotationsRem + 1) / 2) + 1) * (1 << (s * layersCollapse)), M / 4); - for (int32_t i = 0; i < bRem; ++i) - rot_out[s][i] = ReduceRotation((gRem * i) * (1 << (s * layersCollapse)), M / 4); + if (flagRem == 1) { + const int32_t scaleRem = 1 << (smax * p.layersCollapse); + const int32_t offsetRem = static_cast((p.numRotationsRem + 1) / 2) - 1; + for (uint32_t j = 0; j < p.gRem; ++j) + rot_in[smax][j] = ReduceRotation((j - offsetRem) * scaleRem, M4); + for (uint32_t i = 0; i < p.bRem; ++i) + rot_out[smax][i] = ReduceRotation((p.gRem * i) * scaleRem, M4); } // No need for Encrypted Bit Reverse auto result = ctxt->Clone(); - uint32_t N = cc->GetRingDimension(); + + uint32_t N = cc->GetRingDimension(); + std::vector map(N); + + auto algo = cc->GetScheme(); + const auto cryptoParams = std::dynamic_pointer_cast(cc->GetCryptoParameters()); + uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); // hoisted automorphisms - for (int32_t s = 0; s < levelBudget - flagRem; ++s) { + for (int32_t s = 0; s < smax; ++s) { if (s != 0) algo->ModReduceInternalInPlace(result, compositeDegree); // computes the NTTs for each CRT limb (for the hoisted automorphisms used later on) auto digits = cc->EvalFastRotationPrecompute(result); - - std::vector> fastRotation(g); -#pragma omp parallel for - for (int32_t j = 0; j < g; ++j) { - if (rot_in[s][j] != 0) - fastRotation[j] = cc->EvalFastRotationExt(result, rot_in[s][j], digits, true); - else - fastRotation[j] = cc->KeySwitchExt(result, true); - } + std::vector> fastRotation(p.g); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(p.g)) + for (uint32_t j = 0; j < p.g; ++j) + fastRotation[j] = (rot_in[s][j] != 0) ? cc->EvalFastRotationExt(result, rot_in[s][j], digits, true) : + cc->KeySwitchExt(result, true); Ciphertext outer; DCRTPoly first; - for (int32_t i = 0; i < b; ++i) { + for (uint32_t i = 0; i < p.b; ++i) { // for the first iteration with j=0: - int32_t G = g * i; + uint32_t G = i * p.g; auto inner = EvalMultExt(fastRotation[0], A[s][G]); // continue the loop - for (int32_t j = 1; j < g; ++j) { - if ((G + j) != static_cast(numRotations)) + for (uint32_t j = 1; j < p.g; ++j) { + if ((G + j) != p.numRotations) EvalAddExtInPlace(inner, EvalMultExt(fastRotation[j], A[s][G + j])); } @@ -1814,8 +1698,7 @@ Ciphertext FHECKKSRNS::EvalSlotsToCoeffs(const std::vectorKeySwitchDown(inner); // Find the automorphism index that corresponds to rotation index index. - uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[s][i], M); - std::vector map(N); + auto autoIndex = FindAutomorphismIndex2nComplex(rot_out[s][i], cc->GetCyclotomicOrder()); PrecomputeAutoMap(N, autoIndex, &map); first += inner->GetElements()[0].AutomorphismTransform(autoIndex, map); auto&& innerDigits = cc->EvalFastRotationPrecompute(inner); @@ -1834,31 +1717,28 @@ Ciphertext FHECKKSRNS::EvalSlotsToCoeffs(const std::vectorGetElements()[0] += first; } - if (flagRem) { + if (flagRem == 1) { algo->ModReduceInternalInPlace(result, compositeDegree); + // computes the NTTs for each CRT limb (for the hoisted automorphisms used later on) auto digits = cc->EvalFastRotationPrecompute(result); - std::vector> fastRotation(gRem); - - int32_t s = levelBudget - flagRem; -#pragma omp parallel for - for (int32_t j = 0; j < gRem; ++j) { - if (rot_in[s][j] != 0) - fastRotation[j] = cc->EvalFastRotationExt(result, rot_in[s][j], digits, true); - else - fastRotation[j] = cc->KeySwitchExt(result, true); - } + std::vector> fastRotationRem(p.gRem); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(p.gRem)) + for (uint32_t j = 0; j < p.gRem; ++j) + fastRotationRem[j] = (rot_in[smax][j] != 0) ? + cc->EvalFastRotationExt(result, rot_in[smax][j], digits, true) : + cc->KeySwitchExt(result, true); Ciphertext outer; DCRTPoly first; - for (int32_t i = 0; i < bRem; i++) { + for (uint32_t i = 0; i < p.bRem; ++i) { // for the first iteration with j=0: - int32_t GRem = gRem * i; - auto inner = EvalMultExt(fastRotation[0], A[s][GRem]); + uint32_t GRem = i * p.gRem; + auto inner = EvalMultExt(fastRotationRem[0], A[smax][GRem]); // continue the loop - for (int32_t j = 1; j < gRem; ++j) { - if ((GRem + j) != static_cast(numRotationsRem)) - EvalAddExtInPlace(inner, EvalMultExt(fastRotation[j], A[s][GRem + j])); + for (uint32_t j = 1; j < p.gRem; ++j) { + if ((GRem + j) != p.numRotationsRem) + EvalAddExtInPlace(inner, EvalMultExt(fastRotationRem[j], A[smax][GRem + j])); } if (i == 0) { @@ -1869,15 +1749,14 @@ Ciphertext FHECKKSRNS::EvalSlotsToCoeffs(const std::vectorKeySwitchDown(inner); // Find the automorphism index that corresponds to rotation index index. - uint32_t autoIndex = FindAutomorphismIndex2nComplex(rot_out[s][i], M); - std::vector map(N); + auto autoIndex = FindAutomorphismIndex2nComplex(rot_out[smax][i], cc->GetCyclotomicOrder()); PrecomputeAutoMap(N, autoIndex, &map); first += inner->GetElements()[0].AutomorphismTransform(autoIndex, map); auto innerDigits = cc->EvalFastRotationPrecompute(inner); - EvalAddExtInPlace(outer, cc->EvalFastRotationExt(inner, rot_out[s][i], innerDigits, false)); + EvalAddExtInPlace(outer, cc->EvalFastRotationExt(inner, rot_out[smax][i], innerDigits, false)); } else { first += cc->KeySwitchDownFirstElement(inner); @@ -1980,7 +1859,7 @@ void FHECKKSRNS::AdjustCiphertextFBT(Ciphertext& ciphertext, double co #endif } -void FHECKKSRNS::ExtendCiphertext(std::vector& ctxtDCRT, const CryptoContextImpl& cc, +void FHECKKSRNS::ExtendCiphertext(std::vector& ctxtDCRTs, const CryptoContextImpl& cc, const std::shared_ptr elementParamsRaisedPtr) const { // TODO: YSP We should be able to use one of the DCRTPoly methods for this; If not, we can define a new method there and use it here @@ -1999,83 +1878,72 @@ void FHECKKSRNS::ExtendCiphertext(std::vector& ctxtDCRT, const CryptoC std::vector qhat_modqj(compositeDegree); qhat_modqj[0] = qj[1].Mod(qj[0]); qhat_modqj[1] = qj[0].Mod(qj[1]); - - std::vector qhat_inv_modqj(compositeDegree); - for (uint32_t d = 2; d < compositeDegree; d++) { - for (uint32_t j = 0; j < d; ++j) { + for (uint32_t j = 0; j < d; ++j) qhat_modqj[j] = qj[d].ModMul(qhat_modqj[j], qj[j]); - } qhat_modqj[d] = qj[1].ModMul(qj[0], qj[d]); - for (uint32_t j = 2; j < d; ++j) { + for (uint32_t j = 2; j < d; ++j) qhat_modqj[d] = qj[j].ModMul(qhat_modqj[d], qj[d]); - } } - for (uint32_t j = 0; j < compositeDegree; ++j) { + std::vector qhat_inv_modqj(compositeDegree); + for (uint32_t j = 0; j < compositeDegree; ++j) qhat_inv_modqj[j] = qhat_modqj[j].ModInverse(qj[j]); - } NativeInteger qjProduct = std::accumulate(qj.begin() + 1, qj.end(), NativeInteger{1}, std::multiplies()); uint32_t init_element_index = compositeDegree; - for (size_t i = 0; i < ctxtDCRT.size(); i++) { - std::vector temp(compositeDegree + 1, DCRTPoly(elementParamsRaisedPtr, COEFFICIENT)); - std::vector ctxtDCRT_modq(compositeDegree, DCRTPoly(elementParamsRaisedPtr, COEFFICIENT)); - - ctxtDCRT[i].SetFormat(COEFFICIENT); - for (size_t j = 0; j < ctxtDCRT[i].GetNumOfElements(); j++) { - for (size_t k = 0; k < compositeDegree; k++) - ctxtDCRT_modq[k].SetElementAtIndex(j, ctxtDCRT[i].GetElementAtIndex(j) * qhat_inv_modqj[k]); - } - //========================================================================================================= - temp[0] = ctxtDCRT_modq[0].GetElementAtIndex(0); - for (auto& el : temp[0].GetAllElements()) { - el *= qjProduct; + + for (auto& dcrt : ctxtDCRTs) { + dcrt.SetFormat(COEFFICIENT); + + std::vector tmp(compositeDegree + 1, DCRTPoly(elementParamsRaisedPtr, COEFFICIENT)); + std::vector ctxtDCRTs_modq(compositeDegree, DCRTPoly(elementParamsRaisedPtr, COEFFICIENT)); + +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(dcrt.GetNumOfElements())) + for (size_t j = 0; j < dcrt.GetNumOfElements(); ++j) { + for (uint32_t k = 0; k < compositeDegree; ++k) + ctxtDCRTs_modq[k].SetElementAtIndex(j, dcrt.GetElementAtIndex(j) * qhat_inv_modqj[k]); } - //========================================================================================================= - for (size_t d = 1; d < compositeDegree; d++) { - temp[init_element_index] = ctxtDCRT_modq[d].GetElementAtIndex(d); - for (size_t k = 0; k < compositeDegree; k++) { - if (k != d) { - temp[d].SetElementAtIndex(k, temp[0].GetElementAtIndex(k) * qj[k]); - } - } - //========================================================================================================= + tmp[0] = ctxtDCRTs_modq[0].GetElementAtIndex(0); + +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(tmp[0].GetAllElements().size())) + for (auto& el : tmp[0].GetAllElements()) + el *= qjProduct; + + for (uint32_t d = 1; d < compositeDegree; ++d) { + tmp[init_element_index] = ctxtDCRTs_modq[d].GetElementAtIndex(d); + NativeInteger qjProductD{1}; - for (size_t k = 0; k < compositeDegree; k++) { - if (k != d) + for (uint32_t k = 0; k < compositeDegree; ++k) { + if (k != d) { qjProductD *= qj[k]; + tmp[d].SetElementAtIndex(k, tmp[0].GetElementAtIndex(k) * qj[k]); + } } - for (size_t j = compositeDegree; j < elementParamsRaisedPtr->GetParams().size(); j++) { - auto value = temp[init_element_index].GetElementAtIndex(j) * qjProductD; - temp[d].SetElementAtIndex(j, value); - } - //========================================================================================================= - { - auto value = temp[init_element_index].GetElementAtIndex(d) * qjProductD; - temp[d].SetElementAtIndex(d, value); - } - //========================================================================================================= - temp[0] += temp[d]; + for (uint32_t j = compositeDegree; j < elementParamsRaisedPtr->GetParams().size(); ++j) + tmp[d].SetElementAtIndex(j, tmp[init_element_index].GetElementAtIndex(j) * qjProductD); + + tmp[d].SetElementAtIndex(d, tmp[init_element_index].GetElementAtIndex(d) * qjProductD); + tmp[0] += tmp[d]; } - temp[0].SetFormat(EVALUATION); - ctxtDCRT[i] = temp[0]; + tmp[0].SetFormat(EVALUATION); + dcrt = std::move(tmp[0]); } } void FHECKKSRNS::ApplyDoubleAngleIterations(Ciphertext& ciphertext, uint32_t numIter) const { + constexpr double twoPi = 2.0 * M_PI; + auto cc = ciphertext->GetCryptoContext(); - const int32_t r = numIter; - for (int32_t j = 1; j <= r; ++j) { + for (int32_t i = 1 - numIter; i <= 0; ++i) { + double scalar = -std::pow(twoPi, -std::pow(2.0, i)); cc->EvalSquareInPlace(ciphertext); - ciphertext = cc->EvalAdd(ciphertext, ciphertext); - double scalar = -1.0 / std::pow((2.0 * M_PI), std::pow(2.0, j - r)); - cc->EvalAddInPlace(ciphertext, scalar); + cc->EvalAddInPlace(ciphertext, cc->EvalAdd(ciphertext, scalar)); cc->ModReduceInPlace(ciphertext); } } @@ -2105,8 +1973,8 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co double powP = std::pow(2.0, MAX_DOUBLE_PRECISION); int32_t pCurrent = pBits - MAX_DOUBLE_PRECISION; - std::vector temp(2 * slots); - for (size_t i = 0; i < slots; ++i) { + std::vector tmp(2 * slots); + for (uint32_t i = 0; i < slots; ++i) { // extract the mantissa of real part and multiply it by 2^52 int32_t n1 = 0; double dre = std::frexp(inverse[i].real(), &n1) * powP; @@ -2181,10 +2049,10 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co im = pPowRemaining * im64; } - temp[i] = (re < 0) ? Max128BitValue() + re : re; - temp[i + slots] = (im < 0) ? Max128BitValue() + im : im; + tmp[i] = (re < 0) ? Max128BitValue() + re : re; + tmp[i + slots] = (im < 0) ? Max128BitValue() + im : im; - if (is128BitOverflow(temp[i]) || is128BitOverflow(temp[i + slots])) { + if (is128BitOverflow(tmp[i]) || is128BitOverflow(tmp[i + slots])) { OPENFHE_THROW("Overflow, try to decrease scaling factor"); } } @@ -2194,7 +2062,7 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co for (size_t i = 0; i < nativeParams.size(); i++) { NativeVector nativeVec(N, nativeParams[i]->GetModulus()); - FitToNativeVector(N, temp, Max128BitValue(), &nativeVec); + FitToNativeVector(N, tmp, Max128BitValue(), &nativeVec); NativePoly element = plainElement.GetElementAtIndex(i); element.SetValues(std::move(nativeVec), Format::COEFFICIENT); plainElement.SetElementAtIndex(i, std::move(element)); @@ -2211,8 +2079,8 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co auto currPowP = crtPowP; - // We want to scale temp by 2^(pd), and the loop starts from j=2 - // because temp is already scaled by 2^p in the re/im loop above, + // We want to scale tmp by 2^(pd), and the loop starts from j=2 + // because tmp is already scaled by 2^p in the re/im loop above, // and currPowP already is 2^p. for (size_t i = 2; i < noiseScaleDeg; i++) { currPowP = CKKSPackedEncoding::CRTMult(currPowP, crtPowP, moduli); @@ -2233,7 +2101,7 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co uint32_t level, uint32_t slots) const { const auto cryptoParams = std::dynamic_pointer_cast(cc.GetCryptoParameters()); - double scFact = cryptoParams->GetScalingFactorReal(level); + const double scFact = cryptoParams->GetScalingFactorReal(level); Plaintext p = Plaintext(std::make_shared(params, cc.GetEncodingParams(), value, noiseScaleDeg, level, scFact, slots, COMPLEX)); @@ -2243,7 +2111,6 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co uint32_t N = cc.GetRingDimension(); std::vector> inverse = value; - inverse.resize(slots); DiscreteFourierTransform::FFTSpecialInv(inverse, N * 2); @@ -2256,12 +2123,12 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co for (uint32_t i = 0; i < slots; ++i) { inverse[i] *= powP; if (inverse[i].real() != 0) { - int32_t logci = static_cast(ceil(log2(std::abs(inverse[i].real())))); + int32_t logci = static_cast(std::ceil(std::log2(std::abs(inverse[i].real())))); if (logc < logci) logc = logci; } if (inverse[i].imag() != 0) { - int32_t logci = static_cast(ceil(log2(std::abs(inverse[i].imag())))); + int32_t logci = static_cast(std::ceil(std::log2(std::abs(inverse[i].imag())))); if (logc < logci) logc = logci; } @@ -2272,11 +2139,11 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co int32_t logValid = (logc <= MAX_BITS_IN_WORD) ? logc : MAX_BITS_IN_WORD; int32_t logApprox = logc - logValid; - double approxFactor = pow(2, logApprox); + double approxFactor = std::pow(2, logApprox); - std::vector temp(2 * slots); + std::vector tmp(2 * slots); - for (size_t i = 0; i < slots; ++i) { + for (uint32_t i = 0; i < slots; ++i) { // Scale down by approxFactor in case the value exceeds a 64-bit integer. double dre = inverse[i].real() / approxFactor; double dim = inverse[i].imag() / approxFactor; @@ -2291,27 +2158,22 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co double realMax = -1, imagMax = -1; uint32_t realMaxIdx = -1, imagMaxIdx = -1; - for (uint32_t idx = 0; idx < inverse.size(); idx++) { - // exp( j*2*pi*n*k/N ) - std::complex expFactor = {cos((factor * idx) / invLen), sin((factor * idx) / invLen)}; - + for (uint32_t idx = 0; idx < inverse.size(); ++idx) { // X[k] * exp( j*2*pi*n*k/N ) - std::complex prodFactor = inverse[idx] * expFactor; + auto prodFactor = inverse[idx] * std::complex{std::cos((factor * idx) / invLen), + std::sin((factor * idx) / invLen)}; - double realVal = prodFactor.real(); - double imagVal = prodFactor.imag(); - - if (realVal > realMax) { - realMax = realVal; + if (prodFactor.real() > realMax) { + realMax = prodFactor.real(); realMaxIdx = idx; } - if (imagVal > imagMax) { - imagMax = imagVal; + if (prodFactor.imag() > imagMax) { + imagMax = prodFactor.imag(); imagMaxIdx = idx; } } - auto scaledInputSize = ceil(log2(dre)); + auto scaledInputSize = std::ceil(std::log2(dre)); std::stringstream buffer; buffer << std::endl @@ -2327,18 +2189,18 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co } int64_t re = std::llround(dre); - int64_t im = std::llround(dim); + tmp[i] = (re < 0) ? Max64BitValue() + re : re; - temp[i] = (re < 0) ? Max64BitValue() + re : re; - temp[i + slots] = (im < 0) ? Max64BitValue() + im : im; + int64_t im = std::llround(dim); + tmp[i + slots] = (im < 0) ? Max64BitValue() + im : im; } - const std::shared_ptr> bigParams = plainElement.GetParams(); - const std::vector>& nativeParams = bigParams->GetParams(); + const auto& bigParams = plainElement.GetParams(); + const auto& nativeParams = bigParams->GetParams(); - for (size_t i = 0; i < nativeParams.size(); i++) { + for (size_t i = 0; i < nativeParams.size(); ++i) { NativeVector nativeVec(N, nativeParams[i]->GetModulus()); - FitToNativeVector(N, temp, Max64BitValue(), &nativeVec); + FitToNativeVector(N, tmp, Max64BitValue(), &nativeVec); NativePoly element = plainElement.GetElementAtIndex(i); element.SetValues(std::move(nativeVec), Format::COEFFICIENT); plainElement.SetElementAtIndex(i, std::move(element)); @@ -2346,7 +2208,7 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co uint32_t numTowers = nativeParams.size(); std::vector moduli(numTowers); - for (uint32_t i = 0; i < numTowers; i++) { + for (uint32_t i = 0; i < numTowers; ++i) { moduli[i] = nativeParams[i]->GetModulus(); } @@ -2355,7 +2217,7 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co cryptoParams->GetScalingTechnique() == COMPOSITESCALINGMANUAL) { // Duhyeong: Support the case powP > 2^64 // Later we might need to use the NATIVE_INT=128 version of FHECKKSRNS::MakeAuxPlaintext for higher precision - int32_t logPowP = static_cast(ceil(log2(fabs(powP)))); + int32_t logPowP = static_cast(std::ceil(std::log2(std::abs(powP)))); if (logPowP > 64) { // Compute approxFactor, a value to scale down by, in case the value exceeds a 64-bit integer. @@ -2364,17 +2226,17 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co LargeScalingFactorConstants::MAX_BITS_IN_WORD; int32_t logApprox_PowP = logPowP - logValid; if (logApprox_PowP > 0) { - int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? - logApprox_PowP : - LargeScalingFactorConstants::MAX_LOG_STEP; - DCRTPoly::Integer intStep = static_cast(1) << logStep; + int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? + logApprox_PowP : + LargeScalingFactorConstants::MAX_LOG_STEP; + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtApprox(numTowers, intStep); logApprox_PowP -= logStep; while (logApprox_PowP > 0) { - int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? - logApprox : - LargeScalingFactorConstants::MAX_LOG_STEP; - DCRTPoly::Integer intStep = static_cast(1) << logStep; + int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? + logApprox : + LargeScalingFactorConstants::MAX_LOG_STEP; + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtStep(numTowers, intStep); crtApprox = CKKSPackedEncoding::CRTMult(crtApprox, crtStep, moduli); logApprox_PowP -= logStep; @@ -2382,7 +2244,7 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co crtPowP = CKKSPackedEncoding::CRTMult(crtPowP, crtApprox, moduli); } else { - double approxFactor = pow(2, logApprox_PowP); + double approxFactor = std::pow(2, logApprox_PowP); DCRTPoly::Integer intPowP{static_cast(std::llround(powP / approxFactor))}; crtPowP = std::vector(numTowers, intPowP); } @@ -2399,10 +2261,10 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co auto currPowP = crtPowP; - // We want to scale temp by 2^(pd), and the loop starts from j=2 - // because temp is already scaled by 2^p in the re/im loop above, + // We want to scale tmp by 2^(pd), and the loop starts from j=2 + // because tmp is already scaled by 2^p in the re/im loop above, // and currPowP already is 2^p. - for (size_t i = 2; i < noiseScaleDeg; i++) + for (size_t i = 2; i < noiseScaleDeg; ++i) currPowP = CKKSPackedEncoding::CRTMult(currPowP, crtPowP, moduli); if (noiseScaleDeg > 1) plainElement = plainElement.Times(currPowP); @@ -2410,13 +2272,13 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co // Scale back up by the approxFactor to get the correct encoding. if (logApprox > 0) { int32_t logStep = (logApprox <= MAX_LOG_STEP) ? logApprox : MAX_LOG_STEP; - auto intStep = DCRTPoly::Integer(static_cast(1) << logStep); + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtApprox(numTowers, intStep); logApprox -= logStep; while (logApprox > 0) { logStep = (logApprox <= MAX_LOG_STEP) ? logApprox : MAX_LOG_STEP; - intStep = DCRTPoly::Integer(static_cast(1) << logStep); + intStep = DCRTPoly::Integer(1) << logStep; std::vector crtSF(numTowers, intStep); crtApprox = CKKSPackedEncoding::CRTMult(crtApprox, crtSF, moduli); logApprox -= logStep; @@ -2433,7 +2295,6 @@ Plaintext FHECKKSRNS::MakeAuxPlaintext(const CryptoContextImpl& cc, co Ciphertext FHECKKSRNS::EvalMultExt(ConstCiphertext ciphertext, ConstPlaintext plaintext) const { auto pt = plaintext->GetElement(); pt.SetFormat(Format::EVALUATION); - auto result = ciphertext->Clone(); for (auto& c : result->GetElements()) c *= pt; @@ -2461,12 +2322,10 @@ EvalKey FHECKKSRNS::ConjugateKeyGen(const PrivateKey private uint32_t N = privateKey->GetPrivateElement().GetRingDimension(); std::vector vec(N); PrecomputeAutoMap(N, 2 * N - 1, &vec); - const auto cc = privateKey->GetCryptoContext(); auto pkPermuted = std::make_shared>(cc); pkPermuted->SetPrivateElement(privateKey->GetPrivateElement().AutomorphismTransform(2 * N - 1, vec)); pkPermuted->SetKeyTag(privateKey->GetKeyTag()); - return cc->GetScheme()->KeySwitchGen(privateKey, pkPermuted); } @@ -2496,7 +2355,7 @@ void FHECKKSRNS::FitToNativeVector(uint32_t ringDim, const std::vector& NativeInteger diff = bigBound - modulus; uint32_t dslots = vec.size(); uint32_t gap = ringDim / dslots; - for (uint32_t i = 0; i < vec.size(); i++) { + for (uint32_t i = 0; i < dslots; ++i) { NativeInteger n(vec[i]); if (n > bigValueHf) { (*nativeVec)[gap * i] = n.ModSub(diff, modulus); @@ -2517,7 +2376,7 @@ void FHECKKSRNS::FitToNativeVector(uint32_t ringDim, const std::vector NativeInteger diff = NativeInteger((uint128_t)bigBound) - modulus; uint32_t dslots = vec.size(); uint32_t gap = ringDim / dslots; - for (uint32_t i = 0; i < vec.size(); i++) { + for (uint32_t i = 0; i < dslots; ++i) { NativeInteger n((uint128_t)vec[i]); if (n > bigValueHf) { (*nativeVec)[gap * i] = n.ModSub(diff, modulus); @@ -2548,8 +2407,6 @@ void FHECKKSRNS::EvalFBTSetupInternal(const CryptoContextImpl& cc, con auto& precom = m_bootPrecomMap[slots]; precom->m_slots = slots; - precom->m_dim1 = dim1[0]; - precom->m_gs = dim1[1]; // even for the case of a single slot we need one level for rescaling uint32_t logSlots = (slots < 3) ? 1 : std::log2(slots); @@ -2562,8 +2419,6 @@ void FHECKKSRNS::EvalFBTSetupInternal(const CryptoContextImpl& cc, con if (levelBudget[0] < 1 || levelBudget[1] < 1) OPENFHE_THROW("The level budget cannot be zero. Please set it to be at least one and at most log(slots)."); - precom->m_levelEnc = levelBudget[0]; - precom->m_levelDec = levelBudget[1]; precom->m_paramsEnc = GetCollapsedFFTParams(slots, levelBudget[0], dim1[0]); precom->m_paramsDec = GetCollapsedFFTParams(slots, levelBudget[1], dim1[1]); @@ -2700,7 +2555,7 @@ Ciphertext FHECKKSRNS::EvalHomDecoding(ConstCiphertext& ciph // linear transform for decoding auto slots = ciphertext->GetSlots(); auto& p = GetBootPrecom(slots); - auto isLTBS = (p.m_levelEnc == 1) && (p.m_levelDec == 1); + auto isLTBS = (p.m_paramsEnc.lvlb == 1) && (p.m_paramsDec.lvlb == 1); auto ctxtDec = (isLTBS) ? EvalLinearTransform(p.m_U0Pre, ctxtEnc) : EvalSlotsToCoeffs(p.m_U0PreFFT, ctxtEnc); if (slots != cc->GetCyclotomicOrder() / 4) { @@ -2716,6 +2571,7 @@ Ciphertext FHECKKSRNS::EvalHomDecoding(ConstCiphertext& ciph cc->GetScheme()->MultByIntegerInPlace(ctxtDec, postScaling); cc->ModReduceInPlace(ctxtDec); + // 64-bit only: No need to scale back the message to its original scale. return ctxtDec; } @@ -2771,30 +2627,30 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( raised = KeySwitchSparse(raised, evalKeyMap.at(2 * N - 4)); // Only level 0 ciphertext used here. Other towers ignored to make CKKS bootstrapping faster. - auto& ctxtDCRT = raised->GetElements(); - for (auto& poly : ctxtDCRT) { - poly.SetFormat(COEFFICIENT); - DCRTPoly temp(elementParamsRaisedPtr, COEFFICIENT); - temp = poly.GetElementAtIndex(0); - temp.SetFormat(EVALUATION); - poly = std::move(temp); + auto& ctxtDCRTs = raised->GetElements(); + + for (auto& dcrt : ctxtDCRTs) { + dcrt.SetFormat(COEFFICIENT); + DCRTPoly tmp(dcrt.GetElementAtIndex(0), elementParamsRaisedPtr); + tmp.SetFormat(EVALUATION); + dcrt = std::move(tmp); } - raised->SetLevel(L0 - ctxtDCRT[0].GetNumOfElements()); + raised->SetLevel(L0 - ctxtDCRTs[0].GetNumOfElements()); // go back to a denser secret algo->KeySwitchInPlace(raised, evalKeyMap.at(2 * N - 2)); } else { // Only level 0 ciphertext used here. Other towers ignored to make CKKS bootstrapping faster. - auto& ctxtDCRT = raised->GetElements(); - for (auto& poly : ctxtDCRT) { - poly.SetFormat(COEFFICIENT); - DCRTPoly temp(elementParamsRaisedPtr, COEFFICIENT); - temp = poly.GetElementAtIndex(0); - temp.SetFormat(EVALUATION); - poly = std::move(temp); + auto& ctxtDCRTs = raised->GetElements(); + + for (auto& dcrt : ctxtDCRTs) { + dcrt.SetFormat(COEFFICIENT); + DCRTPoly tmp(dcrt.GetElementAtIndex(0), elementParamsRaisedPtr); + tmp.SetFormat(EVALUATION); + dcrt = std::move(tmp); } - raised->SetLevel(L0 - ctxtDCRT[0].GetNumOfElements()); + raised->SetLevel(L0 - ctxtDCRTs[0].GetNumOfElements()); } #ifdef BOOTSTRAPTIMING @@ -2809,8 +2665,7 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( auto skd = cryptoParams->GetSecretKeyDist(); double k = (skd == SPARSE_TERNARY || skd == SPARSE_ENCAPSULATED) ? 1.0 : K_UNIFORM; - double constantEvalMult = 1.0 / (k * N); - cc->EvalMultInPlace(raised, constantEvalMult); + cc->EvalMultInPlace(raised, 1.0 / (k * N)); // no linear transformations are needed for Chebyshev series as the range has been normalized to [-1,1] double coeffLowerBound = -1.0; @@ -2818,7 +2673,7 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( auto slots = ciphertext->GetSlots(); auto& p = GetBootPrecom(slots); - bool isLTBootstrap = (p.m_levelEnc == 1) && (p.m_levelDec == 1); + bool isLTBootstrap = (p.m_paramsEnc.lvlb == 1) && (p.m_paramsDec.lvlb == 1); std::vector> ctxtEnc; std::shared_ptr> ctxtPowers; @@ -2865,21 +2720,21 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( if (digitBitSize == 1 && order == 1) { auto& coeff_cos = (skd == SPARSE_ENCAPSULATED) ? coeff_cos_16_double : coeff_cos_25_double; - ctxtEnc[0] = cc->EvalChebyshevSeries(ctxtEnc[0], coeff_cos, coeffLowerBound, coeffUpperBound); - ctxtEnc[1] = cc->EvalChebyshevSeries(ctxtEnc[1], coeff_cos, coeffLowerBound, coeffUpperBound); + ctxtEnc[0] = algo->EvalChebyshevSeries(ctxtEnc[0], coeff_cos, coeffLowerBound, coeffUpperBound); + ctxtEnc[1] = algo->EvalChebyshevSeries(ctxtEnc[1], coeff_cos, coeffLowerBound, coeffUpperBound); // Double angle-iterations to get cos(pi*x) cc->EvalSquareInPlace(ctxtEnc[0]); cc->EvalAddInPlaceNoCheck(ctxtEnc[0], ctxtEnc[0]); cc->EvalSubInPlace(ctxtEnc[0], 1.0); cc->ModReduceInPlace(ctxtEnc[0]); // cos(pi x) + cc->EvalSquareInPlace(ctxtEnc[0]); + cc->ModReduceInPlace(ctxtEnc[0]); // cos^2(pi x) + cc->EvalSquareInPlace(ctxtEnc[1]); cc->EvalAddInPlaceNoCheck(ctxtEnc[1], ctxtEnc[1]); cc->EvalSubInPlace(ctxtEnc[1], 1.0); cc->ModReduceInPlace(ctxtEnc[1]); // cos(pi x) - - cc->EvalSquareInPlace(ctxtEnc[0]); - cc->ModReduceInPlace(ctxtEnc[0]); // cos^2(pi x) cc->EvalSquareInPlace(ctxtEnc[1]); cc->ModReduceInPlace(ctxtEnc[1]); // cos^2(pi x) } @@ -2889,8 +2744,8 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( coeff_exp_25_double_58; // Obtain exp(Pi/2*i*x) approximation via Chebyshev Basis Polynomial Interpolation - ctxtEnc[0] = cc->EvalChebyshevSeries(ctxtEnc[0], coeff_exp, coeffLowerBound, coeffUpperBound); - ctxtEnc[1] = cc->EvalChebyshevSeries(ctxtEnc[1], coeff_exp, coeffLowerBound, coeffUpperBound); + ctxtEnc[0] = algo->EvalChebyshevSeries(ctxtEnc[0], coeff_exp, coeffLowerBound, coeffUpperBound); + ctxtEnc[1] = algo->EvalChebyshevSeries(ctxtEnc[1], coeff_exp, coeffLowerBound, coeffUpperBound); // Double angle-iterations to get exp(2*Pi*i*x) cc->EvalSquareInPlace(ctxtEnc[0]); @@ -2904,8 +2759,9 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( cc->ModReduceInPlace(ctxtEnc[1]); } - auto ctxtPowersRe = cc->EvalPowers(ctxtEnc[0], coefficients); - auto ctxtPowersIm = cc->EvalPowers(ctxtEnc[1], coefficients); + auto ctxtPowersRe = algo->EvalPowers(ctxtEnc[0], coefficients); + auto ctxtPowersIm = algo->EvalPowers(ctxtEnc[1], coefficients); + if (ctxtPowersRe->powers2Re.size() == 0) { ctxtPowers = std::make_shared>(ctxtPowersRe->powersRe, ctxtPowersIm->powersRe); } @@ -2924,8 +2780,9 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( // Running PartialSum //------------------------------------------------------------------------------ - for (uint32_t j = 1; j < N / (2 * slots); j <<= 1) - cc->EvalAddInPlaceNoCheck(raised, cc->EvalRotate(raised, j * slots)); + const uint32_t limit = N / (2 * slots); + for (uint32_t j = 1; j < limit; j <<= 1) + cc->EvalAddInPlace(raised, cc->EvalRotate(raised, j * slots)); //------------------------------------------------------------------------------ // Running CoeffsToSlots @@ -2936,9 +2793,8 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( ctxtEnc.emplace_back((isLTBootstrap) ? EvalLinearTransform(p.m_U0hatTPre, raised) : EvalCoeffsToSlots(p.m_U0hatTPreFFT, raised)); - auto evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc[0]->GetKeyTag()); - auto conj = Conjugate(ctxtEnc[0], evalKeyMap); - cc->EvalAddInPlaceNoCheck(ctxtEnc[0], conj); + auto& evalKeyMap = cc->GetEvalAutomorphismKeyMap(ctxtEnc[0]->GetKeyTag()); + cc->EvalAddInPlace(ctxtEnc[0], Conjugate(ctxtEnc[0], evalKeyMap)); if (cryptoParams->GetScalingTechnique() == FIXEDMANUAL) { while (ctxtEnc[0]->GetNoiseScaleDeg() > 1) @@ -2956,14 +2812,13 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( if (digitBitSize == 1 && order == 1) { auto& coeff_cos = (skd == SPARSE_ENCAPSULATED) ? coeff_cos_16_double : coeff_cos_25_double; - ctxtEnc[0] = cc->EvalChebyshevSeries(ctxtEnc[0], coeff_cos, coeffLowerBound, coeffUpperBound); + ctxtEnc[0] = algo->EvalChebyshevSeries(ctxtEnc[0], coeff_cos, coeffLowerBound, coeffUpperBound); // Double angle-iterations to get cos(pi*x) cc->EvalSquareInPlace(ctxtEnc[0]); cc->EvalAddInPlaceNoCheck(ctxtEnc[0], ctxtEnc[0]); cc->EvalSubInPlace(ctxtEnc[0], 1.0); cc->ModReduceInPlace(ctxtEnc[0]); // cos(pi x) - cc->EvalSquareInPlace(ctxtEnc[0]); cc->ModReduceInPlace(ctxtEnc[0]); // cos^2(pi x) } @@ -2973,7 +2828,7 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( coeff_exp_25_double_58; // Obtain exp(Pi/2*i*x) approximation via Chebyshev Basis Polynomial Interpolation - ctxtEnc[0] = cc->EvalChebyshevSeries(ctxtEnc[0], coeff_exp, coeffLowerBound, coeffUpperBound); + ctxtEnc[0] = algo->EvalChebyshevSeries(ctxtEnc[0], coeff_exp, coeffLowerBound, coeffUpperBound); // Double angle-iterations to get exp(2*Pi*i*x) cc->EvalSquareInPlace(ctxtEnc[0]); @@ -2983,7 +2838,7 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecomputeInternal( } // No need to scale the message back up after Chebyshev interpolation - ctxtPowers = cc->EvalPowers(ctxtEnc[0], coefficients); + ctxtPowers = algo->EvalPowers(ctxtEnc[0], coefficients); } // 64-bit only: No need to scale back the message to its original scale. @@ -3003,7 +2858,7 @@ std::shared_ptr> FHECKKSRNS::EvalMVBPrecompute(ConstCiphe } template -Ciphertext FHECKKSRNS::EvalMVBNoDecodingInternal(const std::shared_ptr> ciphertexts, +Ciphertext FHECKKSRNS::EvalMVBNoDecodingInternal(const std::shared_ptr>& ciphertexts, const std::vector& coefficients, uint32_t digitBitSize, size_t order) { const auto cryptoParams = @@ -3014,13 +2869,13 @@ Ciphertext FHECKKSRNS::EvalMVBNoDecodingInternal(const std::shared_ptr OPENFHE_THROW("CKKS Bootstrapping is only supported for the Hybrid key switching method."); auto cc = ciphertexts->powersRe[0]->GetCryptoContext(); - uint32_t M = cc->GetCyclotomicOrder(); + uint32_t M4 = cc->GetCyclotomicOrder() / 4; uint32_t slots = ciphertexts->powersRe[0]->GetSlots(); auto algo = cc->GetScheme(); Ciphertext ctxtEnc; - if (slots == M / 4) { + if (slots == M4) { //------------------------------------------------------------------------------ // FULLY PACKED CASE //------------------------------------------------------------------------------ @@ -3069,14 +2924,13 @@ Ciphertext FHECKKSRNS::EvalMVBNoDecodingInternal(const std::shared_ptr // Take the real part // Division by 2 was already performed ctxtEnc = cc->EvalPolyWithPrecomp(ctxtPowersRe, coefficients); - cc->EvalAddInPlaceNoCheck(ctxtEnc, Conjugate(ctxtEnc, cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()))); + cc->EvalAddInPlace(ctxtEnc, Conjugate(ctxtEnc, cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()))); ctxtEncI = cc->EvalPolyWithPrecomp(ctxtPowersIm, coefficients); - cc->EvalAddInPlaceNoCheck(ctxtEncI, - Conjugate(ctxtEncI, cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()))); + cc->EvalAddInPlace(ctxtEncI, Conjugate(ctxtEncI, cc->GetEvalAutomorphismKeyMap(ctxtEnc->GetKeyTag()))); } - algo->MultByMonomialInPlace(ctxtEncI, M / 4); - cc->EvalAddInPlaceNoCheck(ctxtEnc, ctxtEncI); + algo->MultByMonomialInPlace(ctxtEncI, M4); + cc->EvalAddInPlace(ctxtEnc, ctxtEncI); // No need to scale the message back up after Chebyshev interpolation } else { @@ -3189,9 +3043,8 @@ template Ciphertext FHECKKSRNS::EvalHermiteTrigSeriesInternal( ConstCiphertext& ciphertext, const std::vector>& coefficientsCheb, double a, double b, const std::vector& coefficientsHerm, size_t precomp) { - auto cc = ciphertext->GetCryptoContext(); - auto slots = ciphertext->GetSlots(); - auto& p = GetBootPrecom(slots); + auto cc = ciphertext->GetCryptoContext(); + auto& p = GetBootPrecom(ciphertext->GetSlots()); auto& ctxt_exp = (precomp == 0 || precomp == 2) ? p.m_precompExp : p.m_precompExpI; if (precomp == 0 || precomp == 1) { @@ -3208,7 +3061,6 @@ Ciphertext FHECKKSRNS::EvalHermiteTrigSeriesInternal( // Obtain the complex Hermite Trigonometric Interpolation via Power Basis Polynomial Interpolation // Coefficients are divided by 2 auto result = cc->EvalPoly(ctxt_exp, coefficientsHerm); - // Take the real part // Division by 2 was already performed cc->EvalAddInPlaceNoCheck(result, Conjugate(result, cc->GetEvalAutomorphismKeyMap(result->GetKeyTag()))); @@ -3360,6 +3212,7 @@ Ciphertext FHECKKSRNS::KeySwitchSparse(Ciphertext& ciphertex // modswitch cvRes from p*q to q, i.e., compute round(cvRes/p) mod q // In RNS, we use the technique described in Appendix B.2.2 of https://eprint.iacr.org/2021/204 for the BFV case, i.e., for t=1. + for (uint32_t i = 0; i < 2; ++i) { auto polyP = cvRes[i].GetElementAtIndex(1); polyP.SetFormat(Format::COEFFICIENT); diff --git a/src/pke/lib/scheme/ckksrns/ckksrns-leveledshe.cpp b/src/pke/lib/scheme/ckksrns/ckksrns-leveledshe.cpp index 237368624..f4e666685 100644 --- a/src/pke/lib/scheme/ckksrns/ckksrns-leveledshe.cpp +++ b/src/pke/lib/scheme/ckksrns/ckksrns-leveledshe.cpp @@ -58,8 +58,13 @@ Ciphertext LeveledSHECKKSRNS::EvalAdd(ConstCiphertext& ciphe } void LeveledSHECKKSRNS::EvalAddInPlace(Ciphertext& ciphertext, double operand) const { - auto& cv = ciphertext->GetElements(); - cv[0] = cv[0] + GetElementForEvalAddOrSub(ciphertext, operand); + auto elmnts = GetElementForEvalAddOrSub(ciphertext, operand); + auto& polys = ciphertext->GetElements()[0].GetAllElements(); + + const uint32_t limit = polys.size(); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) + for (uint32_t i = 0; i < limit; ++i) + polys[i] += elmnts[i]; } Ciphertext LeveledSHECKKSRNS::EvalAdd(ConstCiphertext& ciphertext, @@ -79,11 +84,11 @@ void LeveledSHECKKSRNS::EvalAddInPlace(Ciphertext& ciphertext, std::co auto posimag = operand.imag() > 0.; DCRTPoly elemsComplex(cv[0].GetParams(), Format::COEFFICIENT, true); - uint32_t sizeQl = elemsComplex.GetNumOfElements(); + const uint32_t sizeQl = elemsComplex.GetNumOfElements(); for (uint32_t i = 0; i < sizeQl; ++i) { auto element = cv[0].GetElementAtIndex(i); auto modulus = element.GetModulus(); - NativeVector vec(N, modulus.ConvertToInt()); + NativeVector vec(N, modulus); vec[0] = posreal ? NativeInteger(elemsRe[i].Mod(modulus)) : modulus.ModSub(elemsRe[i], modulus); vec[Nhalf] = posimag ? NativeInteger(elemsIm[i].Mod(modulus)) : modulus.ModSub(elemsIm[i], modulus); element.SetValues(std::move(vec), Format::COEFFICIENT); @@ -105,8 +110,13 @@ Ciphertext LeveledSHECKKSRNS::EvalSub(ConstCiphertext& ciphe } void LeveledSHECKKSRNS::EvalSubInPlace(Ciphertext& ciphertext, double operand) const { - auto& cv = ciphertext->GetElements(); - cv[0] = cv[0] - GetElementForEvalAddOrSub(ciphertext, operand); + auto elmnts = GetElementForEvalAddOrSub(ciphertext, operand); + auto& polys = ciphertext->GetElements()[0].GetAllElements(); + + const uint32_t limit = polys.size(); +#pragma omp parallel for num_threads(OpenFHEParallelControls.GetThreadLimit(limit)) + for (uint32_t i = 0; i < limit; ++i) + polys[i] -= elmnts[i]; } ///////////////////////////////////////// @@ -130,7 +140,7 @@ void LeveledSHECKKSRNS::EvalMultInPlace(Ciphertext& ciphertext, double Ciphertext LeveledSHECKKSRNS::EvalMult(ConstCiphertext& ciphertext, std::complex operand) const { - Ciphertext result = ciphertext->Clone(); + auto result = ciphertext->Clone(); EvalMultInPlace(result, operand); return result; } @@ -168,17 +178,13 @@ void LeveledSHECKKSRNS::ModReduceInternalInPlace(Ciphertext& ciphertex size_t sizeQl = cv[0].GetNumOfElements(); size_t diffQl = sizeQ - sizeQl; - for (size_t l = 0; l < levels; ++l) { - for (size_t i = 0; i < cv.size(); ++i) { - cv[i].DropLastElementAndScale(cryptoParams->GetQlQlInvModqlDivqlModq(diffQl + l), - cryptoParams->GetqlInvModq(diffQl + l)); - } - } - ciphertext->SetNoiseScaleDeg(ciphertext->GetNoiseScaleDeg() - levels / cryptoParams->GetCompositeDegree()); ciphertext->SetLevel(ciphertext->GetLevel() + levels); for (size_t i = 0; i < levels; ++i) { + for (auto& dcrtpoly : cv) + dcrtpoly.DropLastElementAndScale(cryptoParams->GetQlQlInvModqlDivqlModq(diffQl + i), + cryptoParams->GetqlInvModq(diffQl + i)); double modReduceFactor = cryptoParams->GetModReduceFactor(sizeQl - 1 - i); ciphertext->SetScalingFactor(ciphertext->GetScalingFactor() / modReduceFactor); } @@ -210,13 +216,12 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalAddOrSub(Cons uint32_t precision = 52; double powP = std::pow(2, precision); - const std::vector& cv = ciphertext->GetElements(); - uint32_t numTowers = cv[0].GetNumOfElements(); + const auto& cv = ciphertext->GetElements(); + uint32_t numTowers = cv[0].GetNumOfElements(); std::vector moduli(numTowers); - for (uint32_t i = 0; i < numTowers; i++) { + for (uint32_t i = 0; i < numTowers; ++i) moduli[i] = cv[0].GetElementAtIndex(i).GetModulus(); - } // the idea is to break down real numbers // expressed as input_mantissa * 2^input_exponent @@ -251,21 +256,18 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalAddOrSub(Cons std::vector currPowP(numTowers, scaledConstant); // multiply c*powP with powP a total of (depth-1) times to get c*powP^d - for (size_t i = 0; i < ciphertext->GetNoiseScaleDeg() - 1; i++) { + for (uint32_t i = 0; i < ciphertext->GetNoiseScaleDeg() - 1; ++i) currPowP = CKKSPackedEncoding::CRTMult(currPowP, crtPowP, moduli); - } - return currPowP; } #else // NATIVEINT == 64 std::vector LeveledSHECKKSRNS::GetElementForEvalAddOrSub(ConstCiphertext& ciphertext, double operand) const { - const std::vector& cv = ciphertext->GetElements(); - uint32_t sizeQl = cv[0].GetNumOfElements(); + const auto& polys = ciphertext->GetElements()[0].GetAllElements(); + const uint32_t sizeQl = polys.size(); std::vector moduli(sizeQl); - for (uint32_t i = 0; i < sizeQl; i++) { - moduli[i] = cv[0].GetElementAtIndex(i).GetModulus(); - } + for (uint32_t i = 0; i < sizeQl; ++i) + moduli[i] = polys[i].GetModulus(); const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); @@ -346,47 +348,42 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalAddOrSub(Cons if (logSF_cp < 64) { DCRTPoly::Integer intScFactor = static_cast(scFactor + 0.5); std::vector crtScFactor(sizeQl, intScFactor); - for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); i++) { + for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); ++i) crtConstant = CKKSPackedEncoding::CRTMult(crtConstant, crtScFactor, moduli); - } } else { // Multiply scFactor in two steps: scFactor / approxFactor and then approxFactor DCRTPoly::Integer intScFactor = static_cast(scFactor / approxFactor + 0.5); std::vector crtScFactor(sizeQl, intScFactor); - for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); i++) { + for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); ++i) crtConstant = CKKSPackedEncoding::CRTMult(crtConstant, crtScFactor, moduli); - } if (logApprox_cp > 0) { - int32_t logStep = (logApprox_cp <= LargeScalingFactorConstants::MAX_LOG_STEP) ? - logApprox_cp : - LargeScalingFactorConstants::MAX_LOG_STEP; - DCRTPoly::Integer intStep = static_cast(1) << logStep; + int32_t logStep = (logApprox_cp <= LargeScalingFactorConstants::MAX_LOG_STEP) ? + logApprox_cp : + LargeScalingFactorConstants::MAX_LOG_STEP; + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtApprox(sizeQl, intStep); logApprox_cp -= logStep; while (logApprox_cp > 0) { - int32_t logStep = (logApprox_cp <= LargeScalingFactorConstants::MAX_LOG_STEP) ? - logApprox_cp : - LargeScalingFactorConstants::MAX_LOG_STEP; - DCRTPoly::Integer intStep = static_cast(1) << logStep; + int32_t logStep = (logApprox_cp <= LargeScalingFactorConstants::MAX_LOG_STEP) ? + logApprox_cp : + LargeScalingFactorConstants::MAX_LOG_STEP; + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtSF(sizeQl, intStep); crtApprox = CKKSPackedEncoding::CRTMult(crtApprox, crtSF, moduli); logApprox_cp -= logStep; } - for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); i++) { + for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); ++i) crtConstant = CKKSPackedEncoding::CRTMult(crtConstant, crtApprox, moduli); - } } } } else { DCRTPoly::Integer intScFactor = static_cast(scFactor + 0.5); std::vector crtScFactor(sizeQl, intScFactor); - - for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); i++) { + for (uint32_t i = 1; i < ciphertext->GetNoiseScaleDeg(); ++i) crtConstant = CKKSPackedEncoding::CRTMult(crtConstant, crtScFactor, moduli); - } } return crtConstant; @@ -420,8 +417,8 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalMult(ConstCip scaled128 = ppRemaining * scaled64; } - const std::vector& cv = ciphertext->GetElements(); - uint32_t numTowers = cv[0].GetNumOfElements(); + const auto& cv = ciphertext->GetElements(); + uint32_t numTowers = cv[0].GetNumOfElements(); std::vector factors(numTowers); for (uint32_t i = 0; i < numTowers; i++) { @@ -448,9 +445,8 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalMult(ConstCip const std::vector& cv = ciphertext->GetElements(); uint32_t numTowers = cv[0].GetNumOfElements(); std::vector moduli(numTowers); - for (uint32_t i = 0; i < numTowers; i++) { + for (uint32_t i = 0; i < numTowers; ++i) moduli[i] = cv[0].GetElementAtIndex(i).GetModulus(); - } double scFactor = cryptoParams->GetScalingFactorReal(ciphertext->GetLevel()); @@ -519,10 +515,10 @@ std::vector LeveledSHECKKSRNS::GetElementForEvalMult(ConstCip logApprox -= logStep; while (logApprox > 0) { - int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? - logApprox : - LargeScalingFactorConstants::MAX_LOG_STEP; - DCRTPoly::Integer intStep = static_cast(1) << logStep; + int32_t logStep = (logApprox <= LargeScalingFactorConstants::MAX_LOG_STEP) ? + logApprox : + LargeScalingFactorConstants::MAX_LOG_STEP; + auto intStep = DCRTPoly::Integer(1) << logStep; std::vector crtSF(numTowers, intStep); crtApprox = CKKSPackedEncoding::CRTMult(crtApprox, crtSF, moduli); logApprox -= logStep; @@ -543,49 +539,45 @@ Ciphertext LeveledSHECKKSRNS::EvalFastRotationExt( // return result; // } - const auto cc = ciphertext->GetCryptoContext(); - const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); - uint32_t N = cryptoParams->GetElementParams()->GetRingDimension(); - uint32_t M = cryptoParams->GetElementParams()->GetCyclotomicOrder(); + const uint32_t M = cryptoParams->GetElementParams()->GetCyclotomicOrder(); // Find the automorphism index that corresponds to rotation index index. - uint32_t autoIndex = FindAutomorphismIndex2nComplex(index, M); + const uint32_t autoIndex = FindAutomorphismIndex2nComplex(index, M); // Retrieve the automorphism key that corresponds to the auto index. auto evalKeyIterator = evalKeys.find(autoIndex); - if (evalKeyIterator == evalKeys.end()) { + if (evalKeyIterator == evalKeys.end()) OPENFHE_THROW("EvalKey for index [" + std::to_string(autoIndex) + "] is not found."); - } - auto evalKey = evalKeyIterator->second; + auto& evalKey = evalKeyIterator->second; - const std::vector& cv = ciphertext->GetElements(); - const auto paramsQl = cv[0].GetParams(); - - auto algo = cc->GetScheme(); + const auto& cv = ciphertext->GetElements(); + const auto paramsQl = cv[0].GetParams(); - std::shared_ptr> cTilda = algo->EvalFastKeySwitchCoreExt(digits, evalKey, paramsQl); + const auto cc = ciphertext->GetCryptoContext(); + auto cTilda = *cc->GetScheme()->EvalFastKeySwitchCoreExt(digits, evalKey, paramsQl); if (addFirst) { - const auto paramsQlP = (*cTilda)[0].GetParams(); - size_t sizeQl = paramsQl->GetParams().size(); - DCRTPoly psiC0 = DCRTPoly(paramsQlP, Format::EVALUATION, true); - auto cMult = ciphertext->GetElements()[0].TimesNoCheck(cryptoParams->GetPModq()); - for (uint32_t i = 0; i < sizeQl; i++) { + DCRTPoly psiC0(cTilda[0].GetParams(), Format::EVALUATION, true); + auto cMult = cv[0].TimesNoCheck(cryptoParams->GetPModq()); + + const uint32_t sizeQl = paramsQl->GetParams().size(); + for (uint32_t i = 0; i < sizeQl; ++i) psiC0.SetElementAtIndex(i, std::move(cMult.GetElementAtIndex(i))); - } - (*cTilda)[0] += psiC0; + + cTilda[0] += psiC0; } + const uint32_t N = cryptoParams->GetElementParams()->GetRingDimension(); std::vector vec(N); PrecomputeAutoMap(N, autoIndex, &vec); - (*cTilda)[0] = (*cTilda)[0].AutomorphismTransform(autoIndex, vec); - (*cTilda)[1] = (*cTilda)[1].AutomorphismTransform(autoIndex, vec); + cTilda[0] = cTilda[0].AutomorphismTransform(autoIndex, vec); + cTilda[1] = cTilda[1].AutomorphismTransform(autoIndex, vec); auto result = ciphertext->CloneEmpty(); - result->SetElements({std::move((*cTilda)[0]), std::move((*cTilda)[1])}); + result->SetElements(std::move(cTilda)); return result; } @@ -603,21 +595,20 @@ Ciphertext LeveledSHECKKSRNS::MultByInteger(ConstCiphertext& } void LeveledSHECKKSRNS::MultByIntegerInPlace(Ciphertext& ciphertext, uint64_t integer) const { - std::vector& cv = ciphertext->GetElements(); - - for (uint32_t i = 0; i < cv.size(); i++) + auto& cv = ciphertext->GetElements(); + for (uint32_t i = 0; i < cv.size(); ++i) cv[i] = cv[i].Times(NativeInteger(integer)); } void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& ciphertext1, Ciphertext& ciphertext2) const { + const uint32_t c1lvl = ciphertext1->GetLevel(); + const uint32_t c2lvl = ciphertext2->GetLevel(); + const uint32_t c1depth = ciphertext1->GetNoiseScaleDeg(); + const uint32_t c2depth = ciphertext2->GetNoiseScaleDeg(); + const uint32_t sizeQl1 = ciphertext1->GetElements()[0].GetNumOfElements(); + const uint32_t sizeQl2 = ciphertext2->GetElements()[0].GetNumOfElements(); const auto cryptoParams = std::dynamic_pointer_cast(ciphertext1->GetCryptoParameters()); - uint32_t c1lvl = ciphertext1->GetLevel(); - uint32_t c2lvl = ciphertext2->GetLevel(); - uint32_t c1depth = ciphertext1->GetNoiseScaleDeg(); - uint32_t c2depth = ciphertext2->GetNoiseScaleDeg(); - auto sizeQl1 = ciphertext1->GetElements()[0].GetNumOfElements(); - auto sizeQl2 = ciphertext2->GetElements()[0].GetNumOfElements(); uint32_t compositeDegree = cryptoParams->GetCompositeDegree(); if (c1lvl < c2lvl) { @@ -627,14 +618,12 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf2 = ciphertext2->GetScalingFactor(); double scf = cryptoParams->GetScalingFactorReal(c1lvl); double q1 = cryptoParams->GetModReduceFactor(sizeQl1 - 1); - for (uint32_t j = 1; j < compositeDegree; j++) { + for (uint32_t j = 1; j < compositeDegree; ++j) q1 *= cryptoParams->GetModReduceFactor(sizeQl1 - j - 1); - } EvalMultCoreInPlace(ciphertext1, scf2 / scf1 * q1 / scf); ModReduceInternalInPlace(ciphertext1, compositeDegree); - if (c1lvl + compositeDegree < c2lvl) { + if (c1lvl + compositeDegree < c2lvl) LevelReduceInternalInPlace(ciphertext1, c2lvl - c1lvl - compositeDegree); - } ciphertext1->SetScalingFactor(ciphertext2->GetScalingFactor()); } else { @@ -646,14 +635,12 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf2 = cryptoParams->GetScalingFactorRealBig(c2lvl - compositeDegree); double scf = cryptoParams->GetScalingFactorReal(c1lvl); double q1 = cryptoParams->GetModReduceFactor(sizeQl1 - 1); - for (uint32_t j = 1; j < compositeDegree; j++) { + for (uint32_t j = 1; j < compositeDegree; ++j) q1 *= cryptoParams->GetModReduceFactor(sizeQl1 - j - 1); - } EvalMultCoreInPlace(ciphertext1, scf2 / scf1 * q1 / scf); ModReduceInternalInPlace(ciphertext1, compositeDegree); - if (c1lvl + 2 * compositeDegree < c2lvl) { + if (c1lvl + 2 * compositeDegree < c2lvl) LevelReduceInternalInPlace(ciphertext1, c2lvl - c1lvl - 2 * compositeDegree); - } ModReduceInternalInPlace(ciphertext1, compositeDegree); ciphertext1->SetScalingFactor(ciphertext2->GetScalingFactor()); } @@ -673,9 +660,8 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf2 = cryptoParams->GetScalingFactorRealBig(c2lvl - compositeDegree); double scf = cryptoParams->GetScalingFactorReal(c1lvl); EvalMultCoreInPlace(ciphertext1, scf2 / scf1 / scf); - if (c1lvl + compositeDegree < c2lvl) { + if (c1lvl + compositeDegree < c2lvl) LevelReduceInternalInPlace(ciphertext1, c2lvl - c1lvl - compositeDegree); - } ModReduceInternalInPlace(ciphertext1, compositeDegree); ciphertext1->SetScalingFactor(ciphertext2->GetScalingFactor()); } @@ -688,14 +674,12 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf1 = ciphertext1->GetScalingFactor(); double scf = cryptoParams->GetScalingFactorReal(c2lvl); double q2 = cryptoParams->GetModReduceFactor(sizeQl2 - 1); - for (uint32_t j = 1; j < compositeDegree; j++) { + for (uint32_t j = 1; j < compositeDegree; ++j) q2 *= cryptoParams->GetModReduceFactor(sizeQl2 - j - 1); - } EvalMultCoreInPlace(ciphertext2, scf1 / scf2 * q2 / scf); ModReduceInternalInPlace(ciphertext2, compositeDegree); - if (c2lvl + compositeDegree < c1lvl) { + if (c2lvl + compositeDegree < c1lvl) LevelReduceInternalInPlace(ciphertext2, c1lvl - c2lvl - compositeDegree); - } ciphertext2->SetScalingFactor(ciphertext1->GetScalingFactor()); } else { @@ -707,14 +691,12 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf1 = cryptoParams->GetScalingFactorRealBig(c1lvl - compositeDegree); double scf = cryptoParams->GetScalingFactorReal(c2lvl); double q2 = cryptoParams->GetModReduceFactor(sizeQl2 - 1); - for (uint32_t j = 1; j < compositeDegree; j++) { + for (uint32_t j = 1; j < compositeDegree; ++j) q2 *= cryptoParams->GetModReduceFactor(sizeQl2 - j - 1); - } EvalMultCoreInPlace(ciphertext2, scf1 / scf2 * q2 / scf); ModReduceInternalInPlace(ciphertext2, compositeDegree); - if (c2lvl + 2 * compositeDegree < c1lvl) { + if (c2lvl + 2 * compositeDegree < c1lvl) LevelReduceInternalInPlace(ciphertext2, c1lvl - c2lvl - 2 * compositeDegree); - } ModReduceInternalInPlace(ciphertext2, compositeDegree); ciphertext2->SetScalingFactor(ciphertext1->GetScalingFactor()); } @@ -734,9 +716,8 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthInPlace(Ciphertext& cipher double scf1 = cryptoParams->GetScalingFactorRealBig(c1lvl - compositeDegree); double scf = cryptoParams->GetScalingFactorReal(c2lvl); EvalMultCoreInPlace(ciphertext2, scf1 / scf2 / scf); - if (c2lvl + compositeDegree < c1lvl) { + if (c2lvl + compositeDegree < c1lvl) LevelReduceInternalInPlace(ciphertext2, c1lvl - c2lvl - compositeDegree); - } ModReduceInternalInPlace(ciphertext2, compositeDegree); ciphertext2->SetScalingFactor(ciphertext1->GetScalingFactor()); } @@ -765,62 +746,40 @@ void LeveledSHECKKSRNS::AdjustLevelsAndDepthToOneInPlace(Ciphertext& c } void LeveledSHECKKSRNS::EvalMultCoreInPlace(Ciphertext& ciphertext, double operand) const { - const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); - - auto factors = GetElementForEvalMult(ciphertext, operand); auto& cv = ciphertext->GetElements(); - uint32_t len = cv.size(); - for (uint32_t i = 0; i < len; ++i) + auto factors = GetElementForEvalMult(ciphertext, operand); + for (uint32_t i = 0; i < cv.size(); ++i) cv[i] = cv[i] * factors; + ciphertext->SetNoiseScaleDeg(ciphertext->GetNoiseScaleDeg() + 1); - double scFactor = cryptoParams->GetScalingFactorReal(ciphertext->GetLevel()); + auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); + double scFactor = cryptoParams->GetScalingFactorReal(ciphertext->GetLevel()); ciphertext->SetScalingFactor(ciphertext->GetScalingFactor() * scFactor); } void LeveledSHECKKSRNS::EvalMultCoreInPlace(Ciphertext& ciphertext, std::complex operand) const { - const auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); - - double operandRe = operand.real(); - double operandIm = operand.imag(); - - auto factorsRe = GetElementForEvalMult(ciphertext, operandRe); - auto factorsIm = GetElementForEvalMult(ciphertext, operandIm); - auto& cv = ciphertext->GetElements(); - std::vector cvRe; - cvRe.reserve(cv.size()); - std::vector cvIm; - cvIm.reserve(cv.size()); - for (uint32_t i = 0; i < cv.size(); ++i) { - cvRe.emplace_back(cv[i] * factorsRe); - cvIm.emplace_back(cv[i] * factorsIm); - } + auto& cv = ciphertext->GetElements(); // MultByMonomialInPlace - const auto& elemParams = cv[0].GetParams(); - const auto& paramsNative = elemParams->GetParams()[0]; - uint32_t N = elemParams->GetRingDimension(); - uint32_t M = 2 * N; - - NativePoly monomial(paramsNative, Format::COEFFICIENT, true); - - uint32_t power = M / 4; - uint32_t powerReduced = power % M; - uint32_t index = power % N; - monomial[index] = powerReduced < N ? NativeInteger(1) : paramsNative->GetModulus() - NativeInteger(1); + const auto& elemParams = cv[0].GetParams(); + NativePoly monomial(elemParams->GetParams()[0], Format::COEFFICIENT, true); + monomial[elemParams->GetCyclotomicOrder() >> 2] = NativeInteger(1); DCRTPoly monomialDCRT(elemParams, Format::COEFFICIENT, true); monomialDCRT = monomial; monomialDCRT.SetFormat(Format::EVALUATION); - for (uint32_t i = 0; i < cv.size(); ++i) { - cvIm[i] *= monomialDCRT; - cv[i] = cvRe[i] + cvIm[i]; - } + auto factorsRe = GetElementForEvalMult(ciphertext, operand.real()); + auto factorsIm = monomialDCRT * GetElementForEvalMult(ciphertext, operand.imag()); + + for (uint32_t i = 0; i < cv.size(); ++i) + cv[i] = (cv[i] * factorsRe) + (cv[i] * factorsIm); ciphertext->SetNoiseScaleDeg(ciphertext->GetNoiseScaleDeg() + 1); - double scFactor = cryptoParams->GetScalingFactorReal(ciphertext->GetLevel()); + auto cryptoParams = std::dynamic_pointer_cast(ciphertext->GetCryptoParameters()); + double scFactor = cryptoParams->GetScalingFactorReal(ciphertext->GetLevel()); ciphertext->SetScalingFactor(ciphertext->GetScalingFactor() * scFactor); } diff --git a/src/pke/lib/scheme/ckksrns/ckksrns-utils.cpp b/src/pke/lib/scheme/ckksrns/ckksrns-utils.cpp index 1586663a5..2696618c1 100644 --- a/src/pke/lib/scheme/ckksrns/ckksrns-utils.cpp +++ b/src/pke/lib/scheme/ckksrns/ckksrns-utils.cpp @@ -697,7 +697,7 @@ std::vector>>> CoeffDecodingCollaps return coeff; } -std::vector GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget, uint32_t dim1) { +struct ckks_boot_params GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget, uint32_t dim1) { if (slots == 0) OPENFHE_THROW("slots can not be 0"); if (levelBudget == 0) @@ -706,15 +706,15 @@ std::vector GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget, // even for the case of (slots = 1) we need one level for rescaling as (std::log2(1) = 0) uint32_t logSlots = (slots < 3) ? 1 : std::log2(slots); - std::vector dims = SelectLayers(logSlots, levelBudget); // Need to compute how many layers are collapsed in each of the level from the budget. // If there is no exact division between the maximum number of possible levels (log(slots)) and the // level budget, the last level will contain the remaining layers collapsed. - const uint32_t layersCollapse = dims[0]; - const uint32_t remCollapse = dims[2]; + auto dims = SelectLayers(logSlots, levelBudget); + uint32_t layersCollapse = dims[0]; + uint32_t remCollapse = dims[2]; - const uint32_t numRotations = (1U << (layersCollapse + 1)) - 1; - const uint32_t numRotationsRem = (1U << (remCollapse + 1)) - 1; + uint32_t numRotations = (1U << (layersCollapse + 1)) - 1; + uint32_t numRotationsRem = (1U << (remCollapse + 1)) - 1; // Computing the baby-step b and the giant-step g for the collapsed layers for decoding. uint32_t g = (dim1 == 0 || dim1 > numRotations) ? (1U << (layersCollapse / 2 + 1 + (numRotations > 7))) : dim1; @@ -723,16 +723,7 @@ std::vector GetCollapsedFFTParams(uint32_t slots, uint32_t levelBudget, uint32_t gRem = (remCollapse != 0) ? (1U << (remCollapse / 2 + 1 + (numRotationsRem > 7))) : 0; uint32_t bRem = (remCollapse != 0) ? (numRotationsRem + 1) / gRem : 0; - // If this return statement changes then CKKS_BOOT_PARAMS should be altered as well - return {static_cast(levelBudget), - static_cast(layersCollapse), - static_cast(remCollapse), - static_cast(numRotations), - static_cast(b), - static_cast(g), - static_cast(numRotationsRem), - static_cast(bRem), - static_cast(gRem)}; + return {levelBudget, layersCollapse, remCollapse, numRotations, b, g, numRotationsRem, bRem, gRem}; } uint32_t getRatioBSGSLT(uint32_t slots) { // returns powers of two diff --git a/src/pke/lib/schemebase/base-leveledshe.cpp b/src/pke/lib/schemebase/base-leveledshe.cpp index c56d2a4a8..f1a5fdc44 100644 --- a/src/pke/lib/schemebase/base-leveledshe.cpp +++ b/src/pke/lib/schemebase/base-leveledshe.cpp @@ -355,8 +355,8 @@ std::shared_ptr>> LeveledSHEBase::E const auto cc = privateKey->GetCryptoContext(); const auto& s = privateKey->GetPrivateElement(); - uint32_t N = s.GetRingDimension(); - uint32_t M = 2 * N; + const uint32_t N = s.GetRingDimension(); + const uint32_t M = s.GetCyclotomicOrder(); // we already have checks on higher level? // if (indexList.size() > N - 1) @@ -366,18 +366,17 @@ std::shared_ptr>> LeveledSHEBase::E // we should be able to assign values to the map without using "omp critical" as all evalKeys' elements would // have already been created auto evalKeys = std::make_shared>>(); - for (auto indx : indexList) { + for (auto indx : indexList) (*evalKeys)[indx]; - } - const size_t sz = indexList.size(); -#pragma omp parallel for - for (size_t i = 0; i < sz; ++i) { - auto privateKeyPermuted = std::make_shared>(cc); - uint32_t index = NativeInteger(indexList[i]).ModInverse(M).ConvertToInt(); + const uint32_t sz = indexList.size(); +#pragma omp parallel for + for (uint32_t i = 0; i < sz; ++i) { + auto index = NativeInteger(indexList[i]).ModInverse(M).ConvertToInt(); std::vector vec(N); PrecomputeAutoMap(N, index, &vec); + auto privateKeyPermuted = std::make_shared>(cc); privateKeyPermuted->SetPrivateElement(s.AutomorphismTransform(index, vec)); (*evalKeys)[indexList[i]] = cc->GetScheme()->KeySwitchGen(privateKey, privateKeyPermuted); } @@ -453,19 +452,19 @@ Ciphertext LeveledSHEBase::EvalFastRotation( const auto cryptoParams = ciphertext->GetCryptoParameters(); - uint32_t N = cryptoParams->GetElementParams()->GetRingDimension(); + const uint32_t N = cryptoParams->GetElementParams()->GetRingDimension(); std::vector vec(N); PrecomputeAutoMap(N, autoIndex, &vec); const auto& cv = ciphertext->GetElements(); - auto ba = cc->GetScheme()->EvalFastKeySwitchCore(digits, evalKey, cv[0].GetParams()); - (*ba)[0] += cv[0]; - (*ba)[0] = (*ba)[0].AutomorphismTransform(autoIndex, vec); - (*ba)[1] = (*ba)[1].AutomorphismTransform(autoIndex, vec); + auto ba = *cc->GetScheme()->EvalFastKeySwitchCore(digits, evalKey, cv[0].GetParams()); + ba[0] += cv[0]; + ba[0] = ba[0].AutomorphismTransform(autoIndex, vec); + ba[1] = ba[1].AutomorphismTransform(autoIndex, vec); - auto result = ciphertext->Clone(); - result->SetElements({std::move((*ba)[0]), std::move((*ba)[1])}); + auto result = ciphertext->CloneEmpty(); + result->SetElements(std::move(ba)); return result; } diff --git a/src/pke/unittest/utckksrns/UnitTestFBT.cpp b/src/pke/unittest/utckksrns/UnitTestFBT.cpp index e5ef41a3e..9dc7c5c33 100644 --- a/src/pke/unittest/utckksrns/UnitTestFBT.cpp +++ b/src/pke/unittest/utckksrns/UnitTestFBT.cpp @@ -438,6 +438,8 @@ class UTCKKSRNS_FBT : public ::testing::TestWithParam { // std::cerr << "\n=======Error count: " << std::accumulate(exact.begin(), exact.end(), 0) << "\n"; // std::cerr << "\n=======Max absolute error: " << *max_error_it << "\n"; checkEquality((*max_error_it), int64_t(0), 0.0001, failmsg + " LUT evaluation fails"); + + cc->ClearStaticMapsAndVectors(); } catch (std::exception& e) { std::cerr << "Exception thrown from " << __func__ << "(): " << e.what() << std::endl; @@ -675,6 +677,8 @@ class UTCKKSRNS_FBT : public ::testing::TestWithParam { levelsToDrop = lvlsToDrop; } } + + cc->ClearStaticMapsAndVectors(); } catch (std::exception& e) { std::cerr << "Exception thrown from " << __func__ << "(): " << e.what() << std::endl; @@ -888,6 +892,8 @@ class UTCKKSRNS_FBT : public ::testing::TestWithParam { // std::cerr << "\n=======Error count: " << std::accumulate(exact.begin(), exact.end(), 0) << "\n"; // std::cerr << "\n=======Max absolute error: " << *max_error_it << "\n"; checkEquality((*max_error_it), int64_t(0), 0.0001, failmsg + " LUT evaluation fails"); + + cc->ClearStaticMapsAndVectors(); } catch (std::exception& e) { std::cerr << "Exception thrown from " << __func__ << "(): " << e.what() << std::endl; @@ -1087,6 +1093,8 @@ class UTCKKSRNS_FBT : public ::testing::TestWithParam { // std::cerr << "\n=======Error count: " << std::accumulate(exact.begin(), exact.end(), 0) << "\n"; // std::cerr << "\n=======Max absolute error: " << *max_error_it << "\n"; checkEquality((*max_error_it), int64_t(0), 0.0001, failmsg + " LUT evaluation fails"); + + cc->ClearStaticMapsAndVectors(); } catch (std::exception& e) { std::cerr << "Exception thrown from " << __func__ << "(): " << e.what() << std::endl;