Skip to content

Commit d01e6a4

Browse files
committed
small optimizations for the double parser
1 parent 4d3a7e3 commit d01e6a4

File tree

2 files changed

+43
-25
lines changed

2 files changed

+43
-25
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DoubleParser.kt

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import java.text.DecimalFormatSymbols
1010
import java.text.NumberFormat
1111
import java.text.ParsePosition
1212
import java.util.Locale
13-
import kotlin.reflect.KFunction1
1413

1514
private val logger = KotlinLogging.logger {}
1615

@@ -74,13 +73,14 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
7473
* its fallback char from [fallbackDecimalFormatSymbols] if it's safe to do so.
7574
* [additionals] will be added to the set as well when they're safe to add.
7675
*/
77-
fun KFunction1<DecimalFormatSymbols, Char>.fromLocalWithFallBack(vararg additionals: Char): Set<Char> =
76+
fun ((DecimalFormatSymbols) -> Char).fromLocalWithFallBack(vararg additionals: Char): Set<Char> =
7877
buildSet {
79-
val char = this@fromLocalWithFallBack(localDecimalFormatSymbols).lowercaseChar()
78+
val getChar = this@fromLocalWithFallBack
79+
val char = getChar(localDecimalFormatSymbols).lowercaseChar()
8080
add(char)
8181

8282
// add fallback char if it's safe to do so
83-
val fallbackChar = this@fromLocalWithFallBack(fallbackDecimalFormatSymbols).lowercaseChar()
83+
val fallbackChar = getChar(fallbackDecimalFormatSymbols).lowercaseChar()
8484
if (fallbackChar !in localChars && !localStrings.any { fallbackChar in it }) {
8585
add(fallbackChar)
8686
}
@@ -102,13 +102,14 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
102102
* its fallback string from [fallbackDecimalFormatSymbols] if it's safe to do so.
103103
* [additionals] will be added to the set as well when they're safe to add.
104104
*/
105-
fun KFunction1<DecimalFormatSymbols, String>.fromLocalWithFallBack(vararg additionals: String): Set<String> =
105+
fun ((DecimalFormatSymbols) -> String).fromLocalWithFallBack(vararg additionals: String): Set<String> =
106106
buildSet {
107-
val string = this@fromLocalWithFallBack(localDecimalFormatSymbols).lowercase()
107+
val getString = this@fromLocalWithFallBack
108+
val string = getString(localDecimalFormatSymbols).lowercase()
108109
add(string)
109110

110111
// add fallback string if it's safe to do so
111-
val fallbackString = this@fromLocalWithFallBack(fallbackDecimalFormatSymbols).lowercase()
112+
val fallbackString = getString(fallbackDecimalFormatSymbols).lowercase()
112113
if (!fallbackString.any { it in localChars } && fallbackString !in localStrings) {
113114
add(fallbackString)
114115
}
@@ -135,7 +136,7 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
135136
.withNaN(DecimalFormatSymbols::getNaN.fromLocalWithFallBack("nan", "na", "n/a"))
136137
}
137138

138-
// fallback method for parsing doubles
139+
/** Fallback method for parsing doubles. */
139140
protected open fun String.parseToDoubleOrNullFallback(): Double? =
140141
when (lowercase()) {
141142
"inf", "+inf", "infinity", "+infinity", "infty", "+infty", "", "+∞" -> Double.POSITIVE_INFINITY
@@ -147,10 +148,8 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
147148
else -> {
148149
// not thread safe; must be created here
149150
val numberFormat = NumberFormat.getInstance(locale)
150-
151151
val parsePosition = ParsePosition(0)
152152
val result = numberFormat.parse(this, parsePosition)?.toDouble()
153-
154153
if (parsePosition.index != this.length || parsePosition.errorIndex != -1) {
155154
null
156155
} else {
@@ -163,10 +162,21 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
163162
}
164163
}
165164

166-
public open fun parseOrNull(ba: ByteArray, charset: Charset = Charsets.UTF_8): Double? {
165+
/**
166+
* Parses a double value from a substring of the specified byte array.
167+
*
168+
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
169+
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
170+
*/
171+
public open fun parseOrNull(
172+
ba: ByteArray,
173+
offset: Int = 0,
174+
length: Int = ba.size,
175+
charset: Charset = Charsets.UTF_8,
176+
): Double? {
167177
if (parserOptions.useFastDoubleParser && charset in supportedFastCharsets) {
168178
try {
169-
return parser.parseDouble(ba)
179+
return parser.parseDouble(ba, offset, length)
170180
} catch (e: Exception) {
171181
logger.debug(e) {
172182
"Failed to parse '${
@@ -175,9 +185,16 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
175185
}
176186
}
177187
}
178-
return ba.toString(charset).parseToDoubleOrNullFallback()
188+
return String(bytes = ba, offset = offset, length = length, charset = charset)
189+
.parseToDoubleOrNullFallback()
179190
}
180191

192+
/**
193+
* Parses a double value from the specified [CharSequence].
194+
*
195+
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
196+
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
197+
*/
181198
public open fun parseOrNull(cs: CharSequence): Double? {
182199
if (parserOptions.useFastDoubleParser) {
183200
try {
@@ -192,10 +209,16 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
192209
return cs.toString().parseToDoubleOrNullFallback()
193210
}
194211

195-
public open fun parseOrNull(ca: CharArray): Double? {
212+
/**
213+
* Parses a double value from the specified [CharArray].
214+
*
215+
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
216+
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
217+
*/
218+
public open fun parseOrNull(ca: CharArray, offset: Int = 0, length: Int = ca.size): Double? {
196219
if (parserOptions.useFastDoubleParser) {
197220
try {
198-
return parser.parseDouble(ca)
221+
return parser.parseDouble(ca, offset, length)
199222
} catch (e: Exception) {
200223
logger.debug(e) {
201224
"Failed to parse '${
@@ -204,7 +227,6 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
204227
}
205228
}
206229
}
207-
208-
return ca.joinToString("").parseToDoubleOrNullFallback()
230+
return String(chars = ca, offset = offset, length = length).parseToDoubleOrNullFallback()
209231
}
210232
}

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DataFrameCustomDoubleParser.kt

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,12 @@ internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions) : Custo
1111

1212
private val doubleParser = DoubleParser(parserOptions)
1313

14-
override fun parse(bs: ByteSlice): Double {
15-
val array = ByteArray(bs.size())
14+
override fun parse(bs: ByteSlice): Double =
1615
try {
17-
bs.copyTo(array, 0)
16+
doubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
1817
} catch (e: Exception) {
19-
throw NumberFormatException("Failed to parse double")
20-
}
21-
return doubleParser.parseOrNull(array)
22-
?: throw NumberFormatException("Failed to parse double")
23-
}
18+
null
19+
} ?: throw NumberFormatException("Failed to parse double")
2420

2521
override fun parse(cs: CharSequence): Double =
2622
doubleParser.parseOrNull(cs.toString())

0 commit comments

Comments
 (0)