Skip to content

Commit 6abc0dd

Browse files
committed
updating from fast double parser branch
1 parent d01e6a4 commit 6abc0dd

File tree

5 files changed

+38
-33
lines changed

5 files changed

+38
-33
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import org.jetbrains.kotlinx.dataframe.hasNulls
3333
import org.jetbrains.kotlinx.dataframe.impl.canParse
3434
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
3535
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
36-
import org.jetbrains.kotlinx.dataframe.impl.io.DoubleParser
36+
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
3737
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3838
import org.jetbrains.kotlinx.dataframe.io.isURL
3939
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
@@ -293,12 +293,12 @@ internal object Parsers : GlobalParserOptions {
293293
): StringParserWithFormat<T> = StringParserWithFormat(typeOf<T>(), coveredBy, body)
294294

295295
private val parserToDoubleWithOptions = stringParserWithOptions { options ->
296-
val doubleParser = DoubleParser(options ?: ParserOptions())
297-
val parser = { it: String -> doubleParser.parseOrNull(it) }
296+
val fastDoubleParser = FastDoubleParser(options ?: ParserOptions())
297+
val parser = { it: String -> fastDoubleParser.parseOrNull(it) }
298298
parser
299299
}
300300

301-
private val posixDoubleParser = DoubleParser(
301+
private val posixDoubleParser = FastDoubleParser(
302302
ParserOptions(locale = Locale.forLanguageTag("C.UTF-8")),
303303
)
304304

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DoubleParser.kt renamed to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser.kt

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,22 @@ private val logger = KotlinLogging.logger {}
2222
* support different locales and fallback symbols.
2323
*
2424
* Public, so it can be used in other modules.
25-
* Open, so it may be modified.
2625
*
2726
* @param parserOptions can be supplied to configure the parser.
2827
* We'll only use [ParserOptions.locale] and [ParserOptions.useFastDoubleParser].
2928
*/
3029
@Suppress("ktlint:standard:comment-wrapping")
31-
public open class DoubleParser(private val parserOptions: ParserOptions) {
30+
public class FastDoubleParser(private val parserOptions: ParserOptions) {
3231

33-
protected val locale: Locale = parserOptions.locale ?: Locale.getDefault()
34-
protected val supportedFastCharsets: Set<Charset> = setOf(Charsets.UTF_8, Charsets.ISO_8859_1, Charsets.US_ASCII)
32+
private val supportedFastCharsets = setOf(Charsets.UTF_8, Charsets.ISO_8859_1, Charsets.US_ASCII)
3533

36-
protected val localDecimalFormatSymbols: DecimalFormatSymbols = DecimalFormatSymbols.getInstance(locale)
37-
protected val fallbackDecimalFormatSymbols: DecimalFormatSymbols = DecimalFormatSymbols.getInstance(Locale.ROOT)
34+
private val locale: Locale = parserOptions.locale ?: Locale.getDefault()
35+
private val fallbackLocale: Locale = Locale.ROOT
3836

39-
protected open val parser: ConfigurableDoubleParser =
37+
private val localDecimalFormatSymbols: DecimalFormatSymbols = DecimalFormatSymbols.getInstance(locale)
38+
private val fallbackDecimalFormatSymbols: DecimalFormatSymbols = DecimalFormatSymbols.getInstance(fallbackLocale)
39+
40+
private val parser: ConfigurableDoubleParser =
4041
ConfigurableDoubleParser(
4142
/* symbols = */ setupNumberFormatSymbols(),
4243
/* ignoreCase = */ true,
@@ -48,20 +49,20 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
4849
*
4950
* Fallback characters/strings are only added if they're not clashing with local characters/strings.
5051
*/
51-
protected fun setupNumberFormatSymbols(): NumberFormatSymbols {
52+
private fun setupNumberFormatSymbols(): NumberFormatSymbols {
5253
// collect all chars and strings that are locale-specific such that we can check whether
5354
// fallback chars and strings are safe to add
54-
val localChars = buildSet {
55-
with(localDecimalFormatSymbols) {
55+
val localChars = with(localDecimalFormatSymbols) {
56+
buildSet {
5657
add(decimalSeparator.lowercaseChar())
5758
add(groupingSeparator.lowercaseChar())
5859
add(minusSign.lowercaseChar())
5960
add('+')
6061
add(zeroDigit.lowercaseChar())
6162
}
6263
}
63-
val localStrings = buildSet {
64-
with(localDecimalFormatSymbols) {
64+
val localStrings = with(localDecimalFormatSymbols) {
65+
buildSet {
6566
add(exponentSeparator.lowercase())
6667
add(infinity.lowercase())
6768
add(naN.lowercase())
@@ -137,7 +138,7 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
137138
}
138139

139140
/** Fallback method for parsing doubles. */
140-
protected open fun String.parseToDoubleOrNullFallback(): Double? =
141+
private fun String.parseToDoubleOrNullFallback(): Double? =
141142
when (lowercase()) {
142143
"inf", "+inf", "infinity", "+infinity", "infty", "+infty", "", "+∞" -> Double.POSITIVE_INFINITY
143144

@@ -168,7 +169,7 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
168169
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
169170
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
170171
*/
171-
public open fun parseOrNull(
172+
public fun parseOrNull(
172173
ba: ByteArray,
173174
offset: Int = 0,
174175
length: Int = ba.size,
@@ -195,7 +196,7 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
195196
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
196197
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
197198
*/
198-
public open fun parseOrNull(cs: CharSequence): Double? {
199+
public fun parseOrNull(cs: CharSequence): Double? {
199200
if (parserOptions.useFastDoubleParser) {
200201
try {
201202
return parser.parseDouble(cs)
@@ -215,7 +216,7 @@ public open class DoubleParser(private val parserOptions: ParserOptions) {
215216
* It uses the [fast double parser][ConfigurableDoubleParser] if [ParserOptions.useFastDoubleParser] is enabled,
216217
* else, or if that fails, it uses [parseToDoubleOrNullFallback].
217218
*/
218-
public open fun parseOrNull(ca: CharArray, offset: Int = 0, length: Int = ca.size): Double? {
219+
public fun parseOrNull(ca: CharArray, offset: Int = 0, length: Int = ca.size): Double? {
219220
if (parserOptions.useFastDoubleParser) {
220221
try {
221222
return parser.parseDouble(ca, offset, length)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ public fun DataFrame.Companion.readDelim(
395395
null -> column.tryParse(parserOptions)
396396

397397
else -> {
398+
// TODO use skipAllExcept
398399
val parser = Parsers[colType.toKType()]!!
399400
column.parse(parser, parserOptions)
400401
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DoubleParserTests.kt renamed to core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/FastDoubleParserTests.kt

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,34 @@ package org.jetbrains.kotlinx.dataframe.io
22

33
import io.kotest.matchers.collections.shouldContainInOrder
44
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
5-
import org.jetbrains.kotlinx.dataframe.impl.io.DoubleParser
5+
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
66
import org.junit.After
77
import org.junit.Before
88
import org.junit.Test
99
import java.util.Locale
1010

11-
class DoubleParserTests {
11+
private const val LOG_LEVEL = "org.slf4j.simpleLogger.defaultLogLevel"
12+
13+
class FastDoubleParserTests {
1214

1315
private var loggerBefore: String? = null
1416

1517
@Before
1618
fun setLogger() {
17-
loggerBefore = System.getProperty("org.slf4j.simpleLogger.defaultLogLevel")
18-
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "debug")
19+
loggerBefore = System.getProperty(LOG_LEVEL)
20+
System.setProperty(LOG_LEVEL, "debug")
1921
}
2022

2123
@After
2224
fun restoreLogger() {
2325
if (loggerBefore != null) {
24-
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", loggerBefore)
26+
System.setProperty(LOG_LEVEL, loggerBefore)
2527
}
2628
}
2729

2830
@Test
2931
fun `can fast parse doubles`() {
30-
val parser = DoubleParser(ParserOptions(locale = Locale.ROOT, useFastDoubleParser = true))
32+
val parser = FastDoubleParser(ParserOptions(locale = Locale.ROOT, useFastDoubleParser = true))
3133

3234
val numbers = listOf(
3335
"+12.45",
@@ -67,7 +69,7 @@ class DoubleParserTests {
6769

6870
@Test
6971
fun `can fast parse german locale`() {
70-
val parser = DoubleParser(ParserOptions(locale = Locale.GERMANY, useFastDoubleParser = true))
72+
val parser = FastDoubleParser(ParserOptions(locale = Locale.GERMANY, useFastDoubleParser = true))
7173

7274
val numbers = listOf(
7375
"12,45",
@@ -97,7 +99,7 @@ class DoubleParserTests {
9799

98100
@Test
99101
fun `can fast parse french locale`() {
100-
val parser = DoubleParser(ParserOptions(locale = Locale.FRANCE, useFastDoubleParser = true))
102+
val parser = FastDoubleParser(ParserOptions(locale = Locale.FRANCE, useFastDoubleParser = true))
101103

102104
val numbers = listOf(
103105
"12,45",
@@ -127,7 +129,8 @@ class DoubleParserTests {
127129

128130
@Test
129131
fun `can fast parse estonian locale`() {
130-
val parser = DoubleParser(ParserOptions(locale = Locale.forLanguageTag("et-EE"), useFastDoubleParser = true))
132+
val parser =
133+
FastDoubleParser(ParserOptions(locale = Locale.forLanguageTag("et-EE"), useFastDoubleParser = true))
131134

132135
val numbers = listOf(
133136
"12,45",

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/DataFrameCustomDoubleParser.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@ import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
55
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
66

77
/**
8-
* Wrapper around [DoubleParser] so we can use it from Deephaven.
8+
* Wrapper around [FastDoubleParser] so we can use it from Deephaven.
99
*/
1010
internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions) : CustomDoubleParser {
1111

12-
private val doubleParser = DoubleParser(parserOptions)
12+
private val fastDoubleParser = FastDoubleParser(parserOptions)
1313

1414
override fun parse(bs: ByteSlice): Double =
1515
try {
16-
doubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
16+
fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
1717
} catch (e: Exception) {
1818
null
1919
} ?: throw NumberFormatException("Failed to parse double")
2020

2121
override fun parse(cs: CharSequence): Double =
22-
doubleParser.parseOrNull(cs.toString())
22+
fastDoubleParser.parseOrNull(cs.toString())
2323
?: throw NumberFormatException("Failed to parse double")
2424
}

0 commit comments

Comments
 (0)