Skip to content

Commit 0e16817

Browse files
committed
added IDE sample of exposed->DataFrame
1 parent 55c3525 commit 0e16817

File tree

7 files changed

+303
-0
lines changed

7 files changed

+303
-0
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
2+
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
3+
4+
plugins {
5+
application
6+
kotlin("jvm")
7+
8+
id("org.jetbrains.kotlinx.dataframe")
9+
10+
// only mandatory if `kotlin.dataframe.add.ksp=false` in gradle.properties
11+
id("com.google.devtools.ksp")
12+
}
13+
14+
repositories {
15+
mavenLocal() // in case of local dataframe development
16+
mavenCentral()
17+
}
18+
19+
dependencies {
20+
// implementation("org.jetbrains.kotlinx:dataframe:X.Y.Z")
21+
implementation(project(":"))
22+
23+
// exposed + sqlite database support
24+
implementation(libs.sqlite)
25+
implementation(libs.exposed.core)
26+
implementation(libs.exposed.kotlin.datetime)
27+
implementation(libs.exposed.jdbc)
28+
implementation(libs.exposed.json)
29+
implementation(libs.exposed.money)
30+
}
31+
32+
tasks.withType<KotlinCompile> {
33+
compilerOptions.jvmTarget = JvmTarget.JVM_1_8
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package org.jetbrains.kotlinx.dataframe.examples.exposed
2+
3+
import org.jetbrains.exposed.v1.core.BiCompositeColumn
4+
import org.jetbrains.exposed.v1.core.Column
5+
import org.jetbrains.exposed.v1.core.Expression
6+
import org.jetbrains.exposed.v1.core.ExpressionAlias
7+
import org.jetbrains.exposed.v1.core.ResultRow
8+
import org.jetbrains.exposed.v1.core.Table
9+
import org.jetbrains.exposed.v1.jdbc.Query
10+
import org.jetbrains.kotlinx.dataframe.AnyFrame
11+
import org.jetbrains.kotlinx.dataframe.DataFrame
12+
import org.jetbrains.kotlinx.dataframe.api.convertTo
13+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
14+
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
15+
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
16+
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
17+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
18+
import kotlin.reflect.KProperty1
19+
import kotlin.reflect.full.isSubtypeOf
20+
import kotlin.reflect.full.memberProperties
21+
import kotlin.reflect.typeOf
22+
23+
/**
24+
* Retrieves all columns of any [Iterable][Iterable]`<`[ResultRow][ResultRow]`>`, like [Query][Query],
25+
* from Exposed row by row and converts the resulting [Map] into a [DataFrame], cast to type [T].
26+
*
27+
* In notebooks, the untyped version works just as well due to runtime inference :)
28+
*/
29+
inline fun <reified T : Any> Iterable<ResultRow>.convertToDataFrame(): DataFrame<T> =
30+
convertToDataFrame().convertTo<T>()
31+
32+
/**
33+
* Retrieves all columns of any [Iterable][Iterable]`<`[ResultRow][ResultRow]`>`, like [Query][Query],
34+
* from Exposed row by row and converts the resulting [Map] into a [DataFrame].
35+
*/
36+
@JvmName("convertToAnyFrame")
37+
fun Iterable<ResultRow>.convertToDataFrame(): AnyFrame {
38+
val map = mutableMapOf<String, MutableList<Any?>>()
39+
for (row in this) {
40+
for (expression in row.fieldIndex.keys) {
41+
map.getOrPut(expression.readableName) {
42+
mutableListOf()
43+
} += row[expression]
44+
}
45+
}
46+
return map.toDataFrame()
47+
}
48+
49+
/**
50+
* Retrieves a simple column name from [this] [Expression].
51+
*
52+
* Might need to be expanded with multiple types of [Expression].
53+
*/
54+
val Expression<*>.readableName: String
55+
get() = when (this) {
56+
is Column<*> -> name
57+
is ExpressionAlias<*> -> alias
58+
is BiCompositeColumn<*, *, *> -> getRealColumns().joinToString("_") { it.readableName }
59+
else -> toString()
60+
}
61+
62+
/**
63+
* Creates a [DataFrameSchema] from the declared [Table] instance.
64+
*
65+
* @param columnNameToAccessor Optional [MutableMap] which will be filled with entries mapping
66+
* the SQL column name to the accessor name from the [Table].
67+
* This can be used to define a [NameNormalizer] later.
68+
*/
69+
@Suppress("UNCHECKED_CAST")
70+
fun Table.toDataFrameSchema(columnNameToAccessor: MutableMap<String, String> = mutableMapOf()): DataFrameSchema {
71+
val columns = this::class.memberProperties
72+
.filter { it.returnType.isSubtypeOf(typeOf<Column<*>>()) }
73+
.associate { prop ->
74+
prop as KProperty1<Table, Column<*>>
75+
76+
// retrieve the actual column name
77+
val columnName = prop.get(this).name
78+
// store the actual column name together with the accessor name in the map
79+
columnNameToAccessor[columnName] = prop.name
80+
81+
// get the column type from `val a: Column<Type>`
82+
val type = prop.returnType.arguments.first().type!!
83+
84+
columnName to ColumnSchema.Value(type)
85+
}
86+
return DataFrameSchemaImpl(columns)
87+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package org.jetbrains.kotlinx.dataframe.examples.exposed
2+
3+
import org.jetbrains.exposed.v1.core.Column
4+
import org.jetbrains.exposed.v1.core.StdOutSqlLogger
5+
import org.jetbrains.exposed.v1.jdbc.Database
6+
import org.jetbrains.exposed.v1.jdbc.SchemaUtils
7+
import org.jetbrains.exposed.v1.jdbc.addLogger
8+
import org.jetbrains.exposed.v1.jdbc.batchInsert
9+
import org.jetbrains.exposed.v1.jdbc.deleteAll
10+
import org.jetbrains.exposed.v1.jdbc.selectAll
11+
import org.jetbrains.exposed.v1.jdbc.transactions.transaction
12+
import org.jetbrains.kotlinx.dataframe.api.asSequence
13+
import org.jetbrains.kotlinx.dataframe.api.count
14+
import org.jetbrains.kotlinx.dataframe.api.describe
15+
import org.jetbrains.kotlinx.dataframe.api.groupBy
16+
import org.jetbrains.kotlinx.dataframe.api.print
17+
import org.jetbrains.kotlinx.dataframe.api.sortByDesc
18+
import org.jetbrains.kotlinx.dataframe.size
19+
import java.io.File
20+
21+
/**
22+
* Describes a simple bridge between [Exposed](https://www.jetbrains.com/exposed/) and DataFrame!
23+
*/
24+
fun main() {
25+
// defining where to find our SQLite database for Exposed
26+
val resourceDb = "chinook.db"
27+
val dbPath = File(object {}.javaClass.classLoader.getResource(resourceDb)!!.toURI()).absolutePath
28+
val db = Database.connect(url = "jdbc:sqlite:$dbPath", driver = "org.sqlite.JDBC")
29+
30+
// let's read the database!
31+
val df = transaction(db) {
32+
addLogger(StdOutSqlLogger)
33+
34+
// tables in Exposed need to be defined, see tables.kt
35+
SchemaUtils.create(Customers, Artists, Albums)
36+
37+
// Perform the specific query you want to read into the DataFrame.
38+
// Note: DataFrames are in-memory structures, so don't make it too large if you don't have the RAM ;)
39+
val query = Customers.selectAll() // .where { Customers.company.isNotNull() }
40+
41+
// read and convert the query to a typed DataFrame
42+
// see compatibilityLayer.kt for how we created convertToDataFrame<>()
43+
// and see tables.kt for how we created CustomersDf!
44+
query.convertToDataFrame<CustomersDf>()
45+
}
46+
47+
println(df.size())
48+
49+
// now we have a DataFrame, we can perform DataFrame operations,
50+
// like seeing how often a country is represented
51+
df.groupBy { country }.count()
52+
.sortByDesc { "count"<Int>() }
53+
.print(columnTypes = true, borders = true)
54+
55+
// or just general statistics
56+
df.describe()
57+
.print(columnTypes = true, borders = true)
58+
59+
// or make plots using Kandy! It's all up to you
60+
61+
// writing a DataFrame back into an SQL database with Exposed can also be done!
62+
transaction(db) {
63+
addLogger(StdOutSqlLogger)
64+
65+
// first delete the original contents
66+
Customers.deleteAll()
67+
68+
// batch insert our rows back into the SQL database
69+
Customers.batchInsert(df.asSequence()) { dfRow ->
70+
for (column in Customers.columns) {
71+
this[column as Column<Any?>] = dfRow[column.name]
72+
}
73+
}
74+
}
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package org.jetbrains.kotlinx.dataframe.examples.exposed
2+
3+
import org.jetbrains.exposed.v1.core.Column
4+
import org.jetbrains.exposed.v1.core.Table
5+
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
6+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
7+
import org.jetbrains.kotlinx.dataframe.api.generateDataClasses
8+
import org.jetbrains.kotlinx.dataframe.api.print
9+
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
10+
11+
object Albums : Table() {
12+
val albumId: Column<Int> = integer("AlbumId").autoIncrement()
13+
val title: Column<String> = varchar("Title", 160)
14+
val artistId: Column<Int> = integer("ArtistId")
15+
16+
override val primaryKey = PrimaryKey(albumId)
17+
}
18+
19+
object Artists : Table() {
20+
val artistId: Column<Int> = integer("ArtistId").autoIncrement()
21+
val name: Column<String> = varchar("Name", 120)
22+
23+
override val primaryKey = PrimaryKey(artistId)
24+
}
25+
26+
object Customers : Table() {
27+
val customerId: Column<Int> = integer("CustomerId").autoIncrement()
28+
val firstName: Column<String> = varchar("FirstName", 40)
29+
val lastName: Column<String> = varchar("LastName", 20)
30+
val company: Column<String?> = varchar("Company", 80).nullable()
31+
val address: Column<String?> = varchar("Address", 70).nullable()
32+
val city: Column<String?> = varchar("City", 40).nullable()
33+
val state: Column<String?> = varchar("State", 40).nullable()
34+
val country: Column<String?> = varchar("Country", 40).nullable()
35+
val postalCode: Column<String?> = varchar("PostalCode", 10).nullable()
36+
val phone: Column<String?> = varchar("Phone", 24).nullable()
37+
val fax: Column<String?> = varchar("Fax", 24).nullable()
38+
val email: Column<String> = varchar("Email", 60)
39+
val supportRepId: Column<Int?> = integer("SupportRepId").nullable()
40+
41+
override val primaryKey = PrimaryKey(customerId)
42+
}
43+
44+
/**
45+
* Exposed requires you to provide [Table] instances to
46+
* provide type-safe access to your columns and data.
47+
*
48+
* While DataFrame can infer types at runtime, which is enough for Kotlin Notebook,
49+
* to get type safe access at compile time, we need to define a [@DataSchema][DataSchema].
50+
*
51+
* This is what we created the [toDataFrameSchema] function for!
52+
*/
53+
fun main() {
54+
val columnNameToAccessor = mutableMapOf<String, String>()
55+
val schema = Customers.toDataFrameSchema(columnNameToAccessor)
56+
57+
// checking whether the schema is converted correctly.
58+
// schema.print()
59+
60+
// printing a @DataSchema data class to copy-paste into the code.
61+
// we use a NameNormalizer to let DataFrame generate the same accessors as in the Table
62+
// while keeping the correct column names
63+
schema.generateDataClasses(
64+
name = "CustomersDf",
65+
nameNormalizer = NameNormalizer { columnNameToAccessor[it] ?: it },
66+
).print()
67+
}
68+
69+
// created by Customers.toDataFrameSchema()
70+
// The same can be done for the other tables
71+
@DataSchema
72+
data class CustomersDf(
73+
@ColumnName("Address")
74+
val address: String?,
75+
@ColumnName("City")
76+
val city: String?,
77+
@ColumnName("Company")
78+
val company: String?,
79+
@ColumnName("Country")
80+
val country: String?,
81+
@ColumnName("CustomerId")
82+
val customerId: Int,
83+
@ColumnName("Email")
84+
val email: String,
85+
@ColumnName("Fax")
86+
val fax: String?,
87+
@ColumnName("FirstName")
88+
val firstName: String,
89+
@ColumnName("LastName")
90+
val lastName: String,
91+
@ColumnName("Phone")
92+
val phone: String?,
93+
@ColumnName("PostalCode")
94+
val postalCode: String?,
95+
@ColumnName("State")
96+
val state: String?,
97+
@ColumnName("SupportRepId")
98+
val supportRepId: Int?,
99+
)
Binary file not shown.

gradle/libs.versions.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ jai-core = "1.1.3"
6262
jts = "1.20.0"
6363

6464
kandy = "0.8.1-dev-66"
65+
exposed = "1.0.0-beta-2"
6566

6667
[libraries]
6768
ksp-gradle = { group = "com.google.devtools.ksp", name = "symbol-processing-gradle-plugin", version.ref = "ksp" }
@@ -158,6 +159,12 @@ dataframe-symbol-processor = { group = "org.jetbrains.kotlinx.dataframe", name =
158159

159160
duckdb-jdbc = { group = "org.duckdb", name = "duckdb_jdbc", version.ref= "duckdb"}
160161

162+
exposed-core = { group = "org.jetbrains.exposed", name = "exposed-core", version.ref = "exposed" }
163+
exposed-jdbc = { group = "org.jetbrains.exposed", name = "exposed-jdbc", version.ref = "exposed" }
164+
exposed-kotlin-datetime = { group = "org.jetbrains.exposed", name = "exposed-kotlin-datetime", version.ref = "exposed" }
165+
exposed-json = { group = "org.jetbrains.exposed", name = "exposed-json", version.ref = "exposed" }
166+
exposed-money = { group = "org.jetbrains.exposed", name = "exposed-money", version.ref = "exposed" }
167+
161168
[plugins]
162169
jupyter-api = { id = "org.jetbrains.kotlin.jupyter.api", version.ref = "kotlinJupyter" }
163170
ksp = { id = "com.google.devtools.ksp", version.ref = "ksp" }

settings.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ include("examples:idea-examples:titanic")
2525
include("examples:idea-examples:movies")
2626
include("examples:idea-examples:youtube")
2727
include("examples:idea-examples:json")
28+
include("examples:idea-examples:unsupported-data-sources")
2829

2930
val jupyterApiTCRepo: String by settings
3031

0 commit comments

Comments
 (0)