Skip to content

Commit

Permalink
Move decimal-to-string API to DecimalUtil (facebookincubator#10477)
Browse files Browse the repository at this point in the history
Summary:
Moves 'convertStringView' function to DecimalUtil for the reuse of Spark
function 'toprettystring' and renames it as 'castToString'.

Pull Request resolved: facebookincubator#10477

Reviewed By: xiaoxmeng

Differential Revision: D61350958

Pulled By: kgpai

fbshipit-source-id: 2f30ff44e854a1d4e2eec07816c4acddb2ecab4e
  • Loading branch information
jinchengchenghh authored and facebook-github-bot committed Aug 16, 2024
1 parent d23dde5 commit 7fd6273
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 80 deletions.
88 changes: 8 additions & 80 deletions velox/expression/CastExpr-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
*/
#pragma once

#include <charconv>

#include "velox/common/base/CountBits.h"
#include "velox/common/base/Exceptions.h"
#include "velox/core/CoreTypeSystem.h"
Expand Down Expand Up @@ -52,66 +50,6 @@ inline std::exception_ptr makeBadCastException(
false));
}

/// @brief Convert the unscaled value of a decimal to varchar and write to raw
/// string buffer from start position.
/// @tparam T The type of input value.
/// @param unscaledValue The input unscaled value.
/// @param scale The scale of decimal.
/// @param maxVarcharSize The estimated max size of a varchar.
/// @param startPosition The start position to write from.
/// @return A string view.
template <typename T>
StringView convertToStringView(
T unscaledValue,
int32_t scale,
int32_t maxVarcharSize,
char* const startPosition) {
char* writePosition = startPosition;
if (unscaledValue == 0) {
*writePosition++ = '0';
if (scale > 0) {
*writePosition++ = '.';
// Append leading zeros.
std::memset(writePosition, '0', scale);
writePosition += scale;
}
} else {
if (unscaledValue < 0) {
*writePosition++ = '-';
unscaledValue = -unscaledValue;
}
auto [position, errorCode] = std::to_chars(
writePosition,
writePosition + maxVarcharSize,
unscaledValue / DecimalUtil::kPowersOfTen[scale]);
VELOX_DCHECK_EQ(
errorCode,
std::errc(),
"Failed to cast decimal to varchar: {}",
std::make_error_code(errorCode).message());
writePosition = position;

if (scale > 0) {
*writePosition++ = '.';
uint128_t fraction = unscaledValue % DecimalUtil::kPowersOfTen[scale];
// Append leading zeros.
int numLeadingZeros = std::max(scale - countDigits(fraction), 0);
std::memset(writePosition, '0', numLeadingZeros);
writePosition += numLeadingZeros;
// Append remaining fraction digits.
auto result = std::to_chars(
writePosition, writePosition + maxVarcharSize, fraction);
VELOX_DCHECK_EQ(
result.ec,
std::errc(),
"Failed to cast decimal to varchar: {}",
std::make_error_code(result.ec).message());
writePosition = result.ptr;
}
}
return StringView(startPosition, writePosition - startPosition);
}

} // namespace

namespace detail {
Expand Down Expand Up @@ -632,24 +570,14 @@ VectorPtr CastExpr::applyDecimalToVarcharCast(
const auto simpleInput = input.as<SimpleVector<FromNativeType>>();
int precision = getDecimalPrecisionScale(*fromType).first;
int scale = getDecimalPrecisionScale(*fromType).second;
// A varchar's size is estimated with unscaled value digits, dot, leading
// zero, and possible minus sign.
int32_t rowSize = precision + 1;
if (scale > 0) {
++rowSize; // A dot.
}
if (precision == scale) {
++rowSize; // Leading zero.
}

auto rowSize = DecimalUtil::maxStringViewSize(precision, scale);
auto flatResult = result->asFlatVector<StringView>();
if (StringView::isInline(rowSize)) {
char inlined[StringView::kInlineSize];
applyToSelectedNoThrowLocal(context, rows, result, [&](vector_size_t row) {
flatResult->setNoCopy(
row,
convertToStringView<FromNativeType>(
simpleInput->valueAt(row), scale, rowSize, inlined));
auto actualSize = DecimalUtil::castToString<FromNativeType>(
simpleInput->valueAt(row), scale, rowSize, inlined);
flatResult->setNoCopy(row, StringView(inlined, actualSize));
});
return result;
}
Expand All @@ -659,13 +587,13 @@ VectorPtr CastExpr::applyDecimalToVarcharCast(
char* rawBuffer = buffer->asMutable<char>() + buffer->size();

applyToSelectedNoThrowLocal(context, rows, result, [&](vector_size_t row) {
auto stringView = convertToStringView<FromNativeType>(
auto actualSize = DecimalUtil::castToString<FromNativeType>(
simpleInput->valueAt(row), scale, rowSize, rawBuffer);
flatResult->setNoCopy(row, stringView);
if (!stringView.isInline()) {
flatResult->setNoCopy(row, StringView(rawBuffer, actualSize));
if (!StringView::isInline(actualSize)) {
// If string view is inline, corresponding bytes on the raw string buffer
// are not needed.
rawBuffer += stringView.size();
rawBuffer += actualSize;
}
});
// Update the exact buffer size.
Expand Down
11 changes: 11 additions & 0 deletions velox/type/DecimalUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,15 @@ void DecimalUtil::computeAverage(
}
}

int32_t DecimalUtil::maxStringViewSize(int precision, int scale) {
int32_t rowSize = precision + 1; // Number and symbol.
if (scale > 0) {
++rowSize; // A dot.
}
if (precision == scale) {
++rowSize; // Leading zero.
}
return rowSize;
}

} // namespace facebook::velox
66 changes: 66 additions & 0 deletions velox/type/DecimalUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <charconv>
#include <string>
#include "velox/common/base/CheckedArithmetic.h"
#include "velox/common/base/CountBits.h"
Expand Down Expand Up @@ -309,6 +310,71 @@ class DecimalUtil {
return remainder * resultSign;
}

/// Returns the max required size to convert the decimal of this precision and
/// scale to varchar. A varchar's size is estimated with unscaled value
/// digits, dot, leading zero, and possible minus sign.
static int32_t maxStringViewSize(int precision, int scale);

/// @brief Convert the unscaled value of a decimal to string and write to raw
/// string buffer from start position.
/// @tparam T The type of input value.
/// @param unscaledValue The input unscaled value.
/// @param scale The scale of decimal.
/// @param maxSize The estimated max size of string.
/// @param startPosition The start position to write from.
/// @return The actual size of the string.
template <typename T>
static size_t castToString(
T unscaledValue,
int32_t scale,
int32_t maxSize,
char* const startPosition) {
char* writePosition = startPosition;
if (unscaledValue == 0) {
*writePosition++ = '0';
if (scale > 0) {
*writePosition++ = '.';
// Append trailing zeros.
std::memset(writePosition, '0', scale);
writePosition += scale;
}
} else {
if (unscaledValue < 0) {
*writePosition++ = '-';
unscaledValue = -unscaledValue;
}
auto [position, errorCode] = std::to_chars(
writePosition,
writePosition + maxSize,
unscaledValue / DecimalUtil::kPowersOfTen[scale]);
VELOX_DCHECK_EQ(
errorCode,
std::errc(),
"Failed to cast decimal to varchar: {}",
std::make_error_code(errorCode).message());
writePosition = position;

if (scale > 0) {
*writePosition++ = '.';
uint128_t fraction = unscaledValue % DecimalUtil::kPowersOfTen[scale];
// Append leading zeros.
int numLeadingZeros = std::max(scale - countDigits(fraction), 0);
std::memset(writePosition, '0', numLeadingZeros);
writePosition += numLeadingZeros;
// Append remaining fraction digits.
auto result =
std::to_chars(writePosition, writePosition + maxSize, fraction);
VELOX_DCHECK_EQ(
result.ec,
std::errc(),
"Failed to cast decimal to varchar: {}",
std::make_error_code(result.ec).message());
writePosition = result.ptr;
}
}
return writePosition - startPosition;
}

/*
* sum up and return overflow/underflow.
*/
Expand Down
67 changes: 67 additions & 0 deletions velox/type/tests/DecimalTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,28 @@ void testToByteArray(int128_t value, int8_t* expected, int32_t size) {
EXPECT_EQ(std::memcmp(expected, out, length), 0);
}

template <typename T>
void testcastToString(
T unscaleValue,
int precision,
int scale,
int maxStringSize,
const std::string& expected) {
char out[maxStringSize];
auto actualSize =
DecimalUtil::castToString<T>(unscaleValue, scale, maxStringSize, out);
EXPECT_EQ(expected.size(), actualSize);
EXPECT_EQ(std::memcmp(expected.data(), out, expected.size()), 0);
}

void testMaxStringViewSize(
int precision,
int scale,
int expectedMaxStringSize) {
EXPECT_EQ(
DecimalUtil::maxStringViewSize(precision, scale), expectedMaxStringSize);
}

std::string zeros(uint32_t numZeros) {
return std::string(numZeros, '0');
}
Expand Down Expand Up @@ -490,5 +512,50 @@ TEST(DecimalTest, rescaleReal) {
assertRescaleRealFail(
INFINITY, DECIMAL(10, 2), "The input value should be finite.");
}

TEST(DecimalTest, maxStringViewSize) {
testMaxStringViewSize(10, 0, 11);
testMaxStringViewSize(10, 1, 12);
testMaxStringViewSize(10, 10, 13);
}

TEST(DecimalTest, castToString) {
testcastToString<int64_t>(12, 10, 0, 11, "12");
testcastToString<int64_t>(12, 10, 1, 12, "1.2");
testcastToString<int64_t>(12, 10, 3, 12, "0.012");
testcastToString<int64_t>(-12, 10, 3, 12, "-0.012");
testcastToString<int64_t>(12, 5, 5, 8, "0.00012");
testcastToString<int64_t>(-12, 5, 5, 8, "-0.00012");
testcastToString<int64_t>(-12, 5, 5, 8, "-0.00012");
testcastToString<int64_t>(
DecimalUtil::kShortDecimalMax, 18, 0, 19, std::string(18, '9'));
testcastToString<int64_t>(
DecimalUtil::kShortDecimalMin, 18, 0, 19, "-" + std::string(18, '9'));

testcastToString<int128_t>(
HugeInt::parse("-18446744073709551616"),
20,
0,
21,
"-18446744073709551616");

testcastToString<int128_t>(
HugeInt::parse("-18446744073709551616"),
20,
3,
22,
"-18446744073709551.616");

testcastToString<int128_t>(
HugeInt::parse("-12345678901234567890"),
20,
20,
23,
"-0.12345678901234567890");
testcastToString<int128_t>(
DecimalUtil::kLongDecimalMax, 38, 0, 39, std::string(38, '9'));
testcastToString<int128_t>(
DecimalUtil::kLongDecimalMin, 38, 0, 39, "-" + std::string(38, '9'));
}
} // namespace
} // namespace facebook::velox

0 comments on commit 7fd6273

Please sign in to comment.