Skip to content

Commit de6e9c2

Browse files
authored
Merge pull request #441 from Kotlin/split-doc-update
Docs update & DynamicDataFrameBuilder
2 parents 9a53196 + 5b425a7 commit de6e9c2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3065
-370
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1515
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
1616
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
1717
import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
18+
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
1819
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
1920
import org.jetbrains.kotlinx.dataframe.impl.asList
2021
import org.jetbrains.kotlinx.dataframe.impl.columnName
@@ -23,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn
2324
import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference
2425
import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve
2526
import org.jetbrains.kotlinx.dataframe.impl.columns.unbox
27+
import org.jetbrains.kotlinx.dataframe.impl.unnamedColumnPrefix
2628
import org.jetbrains.kotlinx.dataframe.size
2729
import kotlin.random.Random
2830
import kotlin.random.nextInt
@@ -348,6 +350,34 @@ public class DataFrameBuilder(private val header: List<String>) {
348350
public fun randomBoolean(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextBoolean() }
349351
}
350352

353+
/**
354+
* Helper class for implementing operations when column names can be potentially duplicated.
355+
* For example, operations involving multiple dataframes, computed columns or parsing some third-party data
356+
*/
357+
public class DynamicDataFrameBuilder {
358+
private var cols: MutableList<AnyCol> = mutableListOf()
359+
private val generator = ColumnNameGenerator()
360+
361+
public fun add(col: AnyCol): String {
362+
val uniqueName = if (col.name().isEmpty()) {
363+
generator.addUnique(unnamedColumnPrefix)
364+
} else {
365+
generator.addUnique(col.name())
366+
}
367+
val renamed = if (uniqueName != col.name()) {
368+
col.rename(uniqueName)
369+
} else {
370+
col
371+
}
372+
cols.add(renamed)
373+
return uniqueName
374+
}
375+
376+
public fun toDataFrame(): AnyFrame {
377+
return dataFrameOf(cols)
378+
}
379+
}
380+
351381
/**
352382
* Returns [DataFrame] with no rows and no columns.
353383
*

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle
2727
import org.jetbrains.kotlinx.dataframe.io.renderToString
2828
import kotlin.reflect.KProperty
2929

30-
private const val unnamedColumnPrefix = "untitled"
30+
internal const val unnamedColumnPrefix = "untitled"
3131

3232
internal open class DataFrameImpl<T>(cols: List<AnyCol>, val nrow: Int) : DataFrame<T>, AggregatableInternal<T> {
3333

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.junit.Test
5+
6+
class ConstructorsTests {
7+
8+
@Test
9+
fun `untitled column naming`() {
10+
val builder = DynamicDataFrameBuilder()
11+
repeat(5) {
12+
builder.add(columnOf(1, 2, 3))
13+
}
14+
builder.toDataFrame() shouldBe dataFrameOf(List(5) { columnOf(1, 2, 3) })
15+
}
16+
17+
@Test
18+
fun `duplicated name`() {
19+
val builder = DynamicDataFrameBuilder()
20+
val column by columnOf(1, 2, 3)
21+
builder.add(column)
22+
builder.add(column)
23+
val df = builder.toDataFrame()
24+
df.columnsCount() shouldBe 2
25+
df.columnNames() shouldBe listOf(column.name(), "${column.name()}1")
26+
}
27+
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt

Lines changed: 44 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ object PluginCallbackProxy : PluginCallback {
118118
body =
119119
"""
120120
<details>
121-
<summary>${expressions.joinToString(".") { it.source }
121+
<summary>${expressions.joinToSource()
122122
.also {
123123
if (it.length > 95) TODO("expression is too long ${it.length}. better to split sample in multiple snippets")
124124
}
@@ -155,50 +155,67 @@ object PluginCallbackProxy : PluginCallback {
155155
)
156156
}
157157

158+
private fun List<Expression>.joinToSource(): String =
159+
joinToString(".") { it.source }
160+
158161
private fun statementOutput(
159162
expressions: List<Expression>,
160163
): DataFrameHtmlData {
161164
var data = DataFrameHtmlData()
162-
if (expressions.size < 2) error("Sample without output or input (i.e. function returns some value)")
163-
for ((i, expression) in expressions.withIndex()) {
164-
when (i) {
165-
0 -> {
166-
val table = convertToHTML(expression.df)
167-
val description = table.copy(
168-
body = """
165+
val allow = setOf(
166+
"toDataFrame", "peek(dataFrameOf(col), dataFrameOf(col))"
167+
)
168+
if (expressions.isEmpty()) {
169+
error("No dataframe expressions in sample")
170+
}
171+
if (expressions.size == 1) {
172+
if (allow.any { expressions[0].source.contains(it) }) {
173+
val expression = expressions[0]
174+
data += convertToHTML(expression.df)
175+
} else {
176+
error("${expressions.joinToSource()} Sample without output or input (i.e. function returns some value)")
177+
}
178+
} else {
179+
for ((i, expression) in expressions.withIndex()) {
180+
when (i) {
181+
0 -> {
182+
val table = convertToHTML(expression.df)
183+
val description = table.copy(
184+
body = """
169185
<details>
170186
<summary>Input ${convertToDescription(expression.df)}</summary>
171187
${table.body}
172188
</details>
173-
""".trimIndent()
174-
)
175-
data += description
176-
}
189+
""".trimIndent()
190+
)
191+
data += description
192+
}
177193

178-
expressions.lastIndex -> {
179-
val table = convertToHTML(expression.df)
180-
val description = table.copy(
181-
body = """
194+
expressions.lastIndex -> {
195+
val table = convertToHTML(expression.df)
196+
val description = table.copy(
197+
body = """
182198
<details>
183199
<summary>Output ${convertToDescription(expression.df)}</summary>
184200
${table.body}
185201
</details>
186-
""".trimIndent()
187-
)
188-
data += description
189-
}
202+
""".trimIndent()
203+
)
204+
data += description
205+
}
190206

191-
else -> {
192-
val table = convertToHTML(expression.df)
193-
val description = table.copy(
194-
body = """
207+
else -> {
208+
val table = convertToHTML(expression.df)
209+
val description = table.copy(
210+
body = """
195211
<details>
196212
<summary>Step $i: ${convertToDescription(expression.df)}</summary>
197213
${table.body}
198214
</details>
199-
""".trimIndent()
200-
)
201-
data += description
215+
""".trimIndent()
216+
)
217+
data += description
218+
}
202219
}
203220
}
204221
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.jetbrains.kotlinx.dataframe.samples.api
22

33
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.AnyFrame
45
import org.jetbrains.kotlinx.dataframe.DataFrame
6+
import org.jetbrains.kotlinx.dataframe.api.DynamicDataFrameBuilder
57
import org.jetbrains.kotlinx.dataframe.api.Infer
68
import org.jetbrains.kotlinx.dataframe.api.ValueProperty
79
import org.jetbrains.kotlinx.dataframe.api.add
@@ -431,4 +433,21 @@ class Create : TestBase() {
431433
df["scores"].kind shouldBe ColumnKind.Frame
432434
df["summary"]["min score"].values() shouldBe listOf(3, 5)
433435
}
436+
437+
@Test
438+
@TransformDataFrameExpressions
439+
fun duplicatedColumns() {
440+
// SampleStart
441+
fun peek(vararg dataframes: AnyFrame): AnyFrame {
442+
val builder = DynamicDataFrameBuilder()
443+
for (df in dataframes) {
444+
df.columns().firstOrNull()?.let { builder.add(it) }
445+
}
446+
return builder.toDataFrame()
447+
}
448+
449+
val col by columnOf(1, 2, 3)
450+
peek(dataFrameOf(col), dataFrameOf(col))
451+
// SampleEnd
452+
}
434453
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ class Modify : TestBase() {
469469
@TransformDataFrameExpressions
470470
fun splitInplace_properties() {
471471
// SampleStart
472-
df.split { name.firstName }.by { it.chars().toList() }.inplace()
472+
df.split { name.firstName }.by { it.asIterable() }.inplace()
473473
// SampleEnd
474474
}
475475

@@ -480,25 +480,23 @@ class Modify : TestBase() {
480480
val name by columnGroup()
481481
val firstName by name.column<String>()
482482

483-
df.split { firstName }.by { it.chars().toList() }.inplace()
483+
df.split { firstName }.by { it.asIterable() }.inplace()
484484
// SampleEnd
485485
}
486486

487487
@Test
488488
@TransformDataFrameExpressions
489489
fun splitInplace_strings() {
490490
// SampleStart
491-
df.split { "name"["firstName"]<String>() }.by { it.chars().toList() }.inplace()
491+
df.split { "name"["firstName"]<String>() }.by { it.asIterable() }.inplace()
492492
// SampleEnd
493493
}
494494

495495
@Test
496496
@TransformDataFrameExpressions
497497
fun split_properties() {
498498
// SampleStart
499-
df.split { name }.by { it.values() }.into("nameParts")
500-
501-
df.split { name.lastName }.by(" ").default("").inward { "word$it" }
499+
df.split { name.lastName }.by { it.asIterable() }.into("char1", "char2")
502500
// SampleEnd
503501
}
504502

@@ -509,28 +507,69 @@ class Modify : TestBase() {
509507
val name by columnGroup()
510508
val lastName by name.column<String>()
511509

512-
df.split { name }.by { it.values() }.into("nameParts")
513-
514-
df.split { lastName }.by(" ").default("").inward { "word$it" }
510+
df.split { lastName }.by { it.asIterable() }.into("char1", "char2")
515511
// SampleEnd
516512
}
517513

518514
@Test
519515
@TransformDataFrameExpressions
520516
fun split_strings() {
521517
// SampleStart
522-
df.split { name }.by { it.values() }.into("nameParts")
518+
df.split { "name"["lastName"]<String>() }.by { it.asIterable() }.into("char1", "char2")
519+
// SampleEnd
520+
}
521+
522+
@Test
523+
@TransformDataFrameExpressions
524+
fun split1_properties() {
525+
// SampleStart
526+
df.split { name.lastName }
527+
.by { it.asIterable() }.default(' ')
528+
.inward { "char$it" }
529+
// SampleEnd
530+
}
531+
532+
@Test
533+
@TransformDataFrameExpressions
534+
fun split1_accessors() {
535+
// SampleStart
536+
val name by columnGroup()
537+
val lastName by name.column<String>()
523538

524-
df.split { "name"["lastName"] }.by(" ").default("").inward { "word$it" }
539+
df.split { lastName }
540+
.by { it.asIterable() }.default(' ')
541+
.inward { "char$it" }
542+
// SampleEnd
543+
}
544+
545+
@Test
546+
@TransformDataFrameExpressions
547+
fun split1_strings() {
548+
// SampleStart
549+
df.split { "name"["lastName"]<String>() }
550+
.by { it.asIterable() }.default(' ')
551+
.inward { "char$it" }
525552
// SampleEnd
526553
}
527554

528555
@Test
529556
@TransformDataFrameExpressions
530557
fun splitRegex() {
531-
val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name")
532-
val name by column<String>()
533558
// SampleStart
559+
val merged = df.merge { name.lastName and name.firstName }
560+
.by { it[0] + " (" + it[1] + ")" }
561+
.into("name")
562+
// SampleEnd
563+
}
564+
565+
private val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name")
566+
567+
@Test
568+
@TransformDataFrameExpressions
569+
fun splitRegex1() {
570+
// SampleStart
571+
val name by column<String>()
572+
534573
merged.split { name }
535574
.match("""(.*) \((.*)\)""")
536575
.inward("firstName", "lastName")
@@ -562,7 +601,7 @@ class Modify : TestBase() {
562601
@TransformDataFrameExpressions
563602
fun splitIntoRows_properties() {
564603
// SampleStart
565-
df.split { name.firstName }.by { it.chars().toList() }.intoRows()
604+
df.split { name.firstName }.by { it.asIterable() }.intoRows()
566605

567606
df.split { name }.by { it.values() }.intoRows()
568607
// SampleEnd
@@ -575,7 +614,7 @@ class Modify : TestBase() {
575614
val name by columnGroup()
576615
val firstName by name.column<String>()
577616

578-
df.split { firstName }.by { it.chars().toList() }.intoRows()
617+
df.split { firstName }.by { it.asIterable() }.intoRows()
579618

580619
df.split { name }.by { it.values() }.intoRows()
581620
// SampleEnd
@@ -585,7 +624,7 @@ class Modify : TestBase() {
585624
@TransformDataFrameExpressions
586625
fun splitIntoRows_strings() {
587626
// SampleStart
588-
df.split { "name"["firstName"]<String>() }.by { it.chars().toList() }.intoRows()
627+
df.split { "name"["firstName"]<String>() }.by { it.asIterable() }.intoRows()
589628

590629
df.split { colGroup("name") }.by { it.values() }.intoRows()
591630
// SampleEnd

0 commit comments

Comments
 (0)