diff --git a/docs/StardustDocs/resources/api/join/dfLeftImplicit.html b/docs/StardustDocs/resources/api/join/dfLeftImplicit.html new file mode 100644 index 0000000000..684a256999 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/dfLeftImplicit.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/dfRightImplicit.html b/docs/StardustDocs/resources/api/join/dfRightImplicit.html new file mode 100644 index 0000000000..5012eab739 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/dfRightImplicit.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_10.html b/docs/StardustDocs/resources/api/join/notebook_test_join_10.html new file mode 100644 index 0000000000..c8b4044d36 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_10.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_11.html b/docs/StardustDocs/resources/api/join/notebook_test_join_11.html new file mode 100644 index 0000000000..728ce9335f --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_11.html @@ -0,0 +1,514 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_12.html b/docs/StardustDocs/resources/api/join/notebook_test_join_12.html new file mode 100644 index 0000000000..5c537a666a --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_12.html @@ -0,0 +1,513 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_13.html b/docs/StardustDocs/resources/api/join/notebook_test_join_13.html new file mode 100644 index 0000000000..684a256999 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_13.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_14.html b/docs/StardustDocs/resources/api/join/notebook_test_join_14.html new file mode 100644 index 0000000000..5012eab739 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_14.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_15.html b/docs/StardustDocs/resources/api/join/notebook_test_join_15.html new file mode 100644 index 0000000000..5c537a666a --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_15.html @@ -0,0 +1,513 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_16.html b/docs/StardustDocs/resources/api/join/notebook_test_join_16.html new file mode 100644 index 0000000000..ba0b59eac0 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_16.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_17.html b/docs/StardustDocs/resources/api/join/notebook_test_join_17.html new file mode 100644 index 0000000000..d5ba335fd8 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_17.html @@ -0,0 +1,513 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_18.html b/docs/StardustDocs/resources/api/join/notebook_test_join_18.html new file mode 100644 index 0000000000..0cae5cdfde --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_18.html @@ -0,0 +1,513 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_19.html b/docs/StardustDocs/resources/api/join/notebook_test_join_19.html new file mode 100644 index 0000000000..df66e90218 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_19.html @@ -0,0 +1,513 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_20.html b/docs/StardustDocs/resources/api/join/notebook_test_join_20.html new file mode 100644 index 0000000000..3e764003bd --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_20.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_3.html b/docs/StardustDocs/resources/api/join/notebook_test_join_3.html new file mode 100644 index 0000000000..8048945b2b --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_3.html @@ -0,0 +1,511 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_5.html b/docs/StardustDocs/resources/api/join/notebook_test_join_5.html new file mode 100644 index 0000000000..224bf04eb2 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_5.html @@ -0,0 +1,511 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_6.html b/docs/StardustDocs/resources/api/join/notebook_test_join_6.html new file mode 100644 index 0000000000..14c98a73e9 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_6.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/resources/api/join/notebook_test_join_8.html b/docs/StardustDocs/resources/api/join/notebook_test_join_8.html new file mode 100644 index 0000000000..c511396c06 --- /dev/null +++ b/docs/StardustDocs/resources/api/join/notebook_test_join_8.html @@ -0,0 +1,512 @@ + + + + + +
+ +

+ + + diff --git a/docs/StardustDocs/topics/_shadow_resources.md b/docs/StardustDocs/topics/_shadow_resources.md index b9e0d4d08e..9bcd182239 100644 --- a/docs/StardustDocs/topics/_shadow_resources.md +++ b/docs/StardustDocs/topics/_shadow_resources.md @@ -166,6 +166,23 @@ + + + + + + + + + + + + + + + + + diff --git a/docs/StardustDocs/topics/join.md b/docs/StardustDocs/topics/join.md deleted file mode 100644 index 1358d67b86..0000000000 --- a/docs/StardustDocs/topics/join.md +++ /dev/null @@ -1,112 +0,0 @@ -[//]: # (title: join) - - - -Joins two [`DataFrame`](DataFrame.md) object by join columns. - -```kotlin -join(otherDf, type = JoinType.Inner) [ { joinColumns } ] - -joinColumns: JoinDsl.(LeftDataFrame) -> Columns - -interface JoinDsl: LeftDataFrame { - - val right: RightDataFrame - - fun DataColumn.match(rightColumn: DataColumn) -} -``` - -`joinColumns` is a [column selector](ColumnSelectors.md) that defines column mapping for join: - -Related operations: [](multipleDataFrames.md) - - - - - -```kotlin -df.join(other) { name match right.fullName } -``` - - - - -```kotlin -df.join(other) { "name" match "fullName" } -``` - - - - - -If mapped columns have the same name, just select join columns from the left [`DataFrame`](DataFrame.md): - - - - - -```kotlin -df.join(other) { name and city } -``` - - - - -```kotlin -df.join(other, "name", "city") -``` - - - - - -If `joinColumns` is not specified, columns with the same name from both [`DataFrame`](DataFrame.md) objects will be used as join columns: - - - -```kotlin -df.join(other) -``` - - - - -### Join types - -Supported join types: -* `Inner` (default) — only matched rows from left and right [`DataFrame`](DataFrame.md) objects -* `Filter` — only matched rows from left [`DataFrame`](DataFrame.md) -* `Left` — all rows from left [`DataFrame`](DataFrame.md), mismatches from right [`DataFrame`](DataFrame.md) filled with `null` -* `Right` — all rows from right [`DataFrame`](DataFrame.md), mismatches from left [`DataFrame`](DataFrame.md) filled with `null` -* `Full` — all rows from left and right [`DataFrame`](DataFrame.md) objects, any mismatches filled with `null` -* `Exclude` — only mismatched rows from left [`DataFrame`](DataFrame.md) - -For every join type there is a shortcut operation: - - - - - -```kotlin -df.innerJoin(other) { name and city } -df.leftJoin(other) { name and city } -df.rightJoin(other) { name and city } -df.fullJoin(other) { name and city } -df.excludeJoin(other) { name and city } -``` - - - - -```kotlin -df.innerJoin(other, "name", "city") -df.leftJoin(other, "name", "city") -df.rightJoin(other, "name", "city") -df.fullJoin(other, "name", "city") -df.excludeJoin(other, "name", "city") -``` - - - - diff --git a/docs/StardustDocs/topics/operations/multiple/join.md b/docs/StardustDocs/topics/operations/multiple/join.md new file mode 100644 index 0000000000..e8ef4e2c0b --- /dev/null +++ b/docs/StardustDocs/topics/operations/multiple/join.md @@ -0,0 +1,393 @@ +[//]: # (title: join) + + + +Joins two [`DataFrame`](DataFrame.md) objects by join columns. + +A *join* creates a new dataframe by combining rows from two input dataframes according to one or more key columns. +Rows are merged when the values in the join columns match. +If there is no match, whether the row is included and how missing values are filled depends on the type of join (e.g., inner, left, right, full). + +Returns a new [`DataFrame`](DataFrame.md) that contains the merged rows and columns from both inputs. + +```kotlin +join(otherDf, type = JoinType.Inner) [ { joinColumns } ] + +joinColumns: JoinDsl.(LeftDataFrame) -> Columns + +interface JoinDsl: LeftDataFrame { + + val right: RightDataFrame + + fun DataColumn.match(rightColumn: DataColumn) +} +``` + +`joinColumns` is a [column selector](ColumnSelectors.md) that defines column mapping for join: + +Related operations: [](multipleDataFrames.md) + +## Examples + +### Join with explicit keys (with different names) {collapsible="true"} + +Use the Join DSL when the key column names differ: + +- access the right `DataFrame` via `right`; +- define the join condition with **match()**. + + + +```kotlin +dfAges +``` + + + + + + + +```kotlin +dfCities +``` + + + + + + + +```kotlin +// INNER JOIN on differently named keys: +// Merge a row when dfAges.firstName == dfCities.name. +// With the given data all 3 names match → all rows merge. +dfAges.join(dfCities) { firstName match right.name } +``` + + + + + +### Join with explicit keys (with the same names) {collapsible="true"} + +If mapped columns have the same name, just select join columns (one or several) from the left [`DataFrame`](DataFrame.md): + + + +```kotlin +dfLeft +``` + + + + + + + + +```kotlin +dfRight +``` + + + + + + + +```kotlin +// INNER JOIN on "name" only: +// Merge when left.name == right.name. +// Duplicate keys produce multiple merged rows (one per pairing). +dfLeft.join(dfRight) { name } +``` + + + + + +> In this example, the "city" columns from the left and right dataframes do not match to each other. +> After joining, the "city" column from the right dataframe is included in the result dataframe +> with the name **"city1"** to avoid a name conflict. +> { style = "note" } + +### Join with implicit keys (all columns with the same name) {collapsible="true"} + +If `joinColumns` is not specified, columns with the same name from both [`DataFrame`](DataFrame.md) +objects will be used as join columns: + + + +```kotlin +dfLeft +``` + + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// INNER JOIN on all same-named columns ("name" and "city"): +// Merge when BOTH name AND city are equal; otherwise the row is dropped. +dfLeft.join(dfRight) { name and city } +``` + + + + + + +## Join types + +Supported join types: +* `Inner` (default) — only matched rows from left and right [`DataFrame`](DataFrame.md) objects +* `Filter` — only matched rows from left [`DataFrame`](DataFrame.md) +* `Left` — all rows from left [`DataFrame`](DataFrame.md), mismatches from right [`DataFrame`](DataFrame.md) filled with `null` +* `Right` — all rows from right [`DataFrame`](DataFrame.md), mismatches from left [`DataFrame`](DataFrame.md) filled with `null` +* `Full` — all rows from left and right [`DataFrame`](DataFrame.md) objects, any mismatches filled with `null` +* `Exclude` — only mismatched rows from left [`DataFrame`](DataFrame.md) + +For every join type there is a shortcut operation: + +```kotlin +df.innerJoin(otherDf) [ { joinColumns } ] +df.filterJoin(otherDf) [ { joinColumns } ] +df.leftJoin(otherDf) [ { joinColumns } ] +df.rightJoin(otherDf) [ { joinColumns } ] +df.fullJoin(otherDf) [ { joinColumns } ] +df.excludeJoin(otherDf) [ { joinColumns } ] +``` + + +### Examples {id="examples_1"} + +#### Inner {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + +```kotlin +// INNER JOIN: +// Keep only rows where (name, city) match on both sides. +// In this dataset both Charlies match twice (Moscow, Milan) → 2 merged rows. +dfLeft.innerJoin(dfRight) { name and city } +``` + + + + + +#### Filter {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// FILTER JOIN: +// Keep ONLY left rows that have ANY match on (name, city). +// No right-side columns are added. +dfLeft.filterJoin(dfRight) { name and city } +``` + + + + + +#### Left {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// LEFT JOIN: +// Keep ALL left rows. If (name, city) matches, attach right columns; +// if not, right columns are null (e.g., Alice–London has no right match). +dfLeft.leftJoin(dfRight) { name and city } +``` + + + + + +#### Right {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// RIGHT JOIN: +// Keep ALL right rows. If no left match, left columns become null +// (e.g., Alice with city=null exists only on the right). +dfLeft.rightJoin(dfRight) { name and city } +``` + + + + + +#### Full {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// FULL JOIN: +// Keep ALL rows from both sides. Where there's no match on (name, city), +// the other side is filled with nulls. +dfLeft.fullJoin(dfRight) { name and city } +``` + + + + + +#### Exclude {collapsible="true"} + + + +```kotlin +dfLeft +``` + + + + + + + +```kotlin +dfRight +``` + + + + + + + + +```kotlin +// EXCLUDE JOIN: +// Keep ONLY left rows that have NO match on (name, city). +// Useful to find "unpaired" left rows. +dfLeft.excludeJoin(dfRight) { name and city } +``` + + + + + diff --git a/gradle.properties b/gradle.properties index 47de21bfd9..532fc3a057 100644 --- a/gradle.properties +++ b/gradle.properties @@ -2,7 +2,7 @@ projectName=dataframe version=1.0.0 jupyterApiTCRepo= kotlin.jupyter.add.scanner=false -org.gradle.jvmargs=-Xmx4G -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 +org.gradle.jvmargs=-Xmx16G -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 # build.number.detection=false # build.number=0.8.0 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 67515d2cd2..5891f4513c 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -63,7 +63,7 @@ jai-core = "1.1.3" jts = "1.20.0" # Normal examples Kandy versions -kandy = "0.8.1-dev-89" +kandy = "0.8.2-dev-90" # Example notebooks Kandy versions kandy-notebook = "0.8.1n" diff --git a/samples/build.gradle.kts b/samples/build.gradle.kts index 6311bffd63..fb101a8441 100644 --- a/samples/build.gradle.kts +++ b/samples/build.gradle.kts @@ -52,6 +52,10 @@ tasks.withType { } } +tasks.withType().configureEach { + friendPaths.from(project(projects.core.path).projectDir) +} + // get the output of the instrumentedJars configuration, aka the jar-files of the compiled modules // all modules with jar-task have this artifact in the DataFrame project val dependentProjectJarPaths = dependentProjects.map { @@ -108,6 +112,7 @@ korro { include("docs/StardustDocs/topics/format.md") include("docs/StardustDocs/topics/guides/*.md") include("docs/StardustDocs/topics/operations/utils/*.md") + include("docs/StardustDocs/topics/operations/multiple/*.md") include("docs/StardustDocs/topics/operations/column/*.md") include("docs/StardustDocs/topics/collectionsInterop/*.md") include("docs/StardustDocs/topics/dataSources/sql/*.md") @@ -118,6 +123,7 @@ korro { include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/*.kt") include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/*.kt") include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/*.kt") + include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/multiple/*.kt") include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/render/*.kt") include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/*.kt") include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/column/*.kt") diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/multiple/JoinSamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/multiple/JoinSamples.kt new file mode 100644 index 0000000000..b36d3df62b --- /dev/null +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/multiple/JoinSamples.kt @@ -0,0 +1,307 @@ +package org.jetbrains.kotlinx.dataframe.samples.api.multiple + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.RgbColor +import org.jetbrains.kotlinx.dataframe.api.and +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.excludeJoin +import org.jetbrains.kotlinx.dataframe.api.filterJoin +import org.jetbrains.kotlinx.dataframe.api.format +import org.jetbrains.kotlinx.dataframe.api.fullJoin +import org.jetbrains.kotlinx.dataframe.api.innerJoin +import org.jetbrains.kotlinx.dataframe.api.join +import org.jetbrains.kotlinx.dataframe.api.leftJoin +import org.jetbrains.kotlinx.dataframe.api.perRowCol +import org.jetbrains.kotlinx.dataframe.api.rightJoin +import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper +import org.jetbrains.kotlinx.dataframe.util.defaultHeaderFormatting +import org.junit.Test + +class JoinSamples : DataFrameSampleHelper("join", "api") { + + @DataSchema + interface DfAges { + val age: Int + val firstName: String + } + + private val dfAges = dataFrameOf( + "firstName" to listOf("Alice", "Bob", "Charlie"), + "age" to listOf(14, 45, 20), + ).cast() + + @DataSchema + interface DfCities { + val city: String + val name: String + } + + private val dfCities = dataFrameOf( + "name" to listOf("Bob", "Alice", "Charlie"), + "city" to listOf("London", "Dubai", "Moscow"), + ).cast() + + @DataSchema + interface DfWithNameAndCity { + val name: String + val city: String? + } + + @DataSchema + interface DfLeft : DfWithNameAndCity { + val age: Int + override val city: String + override val name: String + } + + private val dfLeft = dataFrameOf( + "name" to listOf("Alice", "Bob", "Charlie", "Charlie"), + "age" to listOf(15, 45, 20, 40), + "city" to listOf("London", "Dubai", "Moscow", "Tokyo"), + ).cast() + + @DataSchema + interface DfRight : DfWithNameAndCity { + override val city: String? + val isBusy: Boolean + override val name: String + } + + private val dfRight = dataFrameOf( + "name" to listOf("Alice", "Bob", "Alice", "Charlie"), + "isBusy" to listOf(true, false, true, true), + "city" to listOf("London", "Tokyo", null, "Moscow"), + ).cast() + + private fun nameToColor(name: String): RgbColor = + when (name) { + "Alice" -> RgbColor(189, 206, 233) + "Bob" -> RgbColor(198, 224, 198) + "Charlie" -> RgbColor(219, 198, 230) + else -> RgbColor(255, 255, 255) + } + + private fun nameAndCityToColor(name: String, city: String?): RgbColor = + when (name to city) { + "Alice" to "London" -> RgbColor(242, 210, 189) + "Bob" to "Dubai" -> RgbColor(245, 226, 191) + "Charlie" to "Moscow" -> RgbColor(210, 229, 199) + "Charlie" to "Tokyo" -> RgbColor(191, 223, 232) + "Bob" to "Tokyo" -> RgbColor(200, 200, 232) + "Alice" to null -> RgbColor(233, 199, 220) + else -> RgbColor(255, 255, 255) + } + + private fun DataFrame.colorized() = + format().perRowCol { row, _ -> + val color = nameAndCityToColor(row["name"] as String, row["city"] as String?) + background(color) and textColor(black) + } + + @Test + fun notebook_test_join_3() { + // SampleStart + dfAges + // SampleEnd + .format().perRowCol { row, _ -> + val color = nameToColor(row.firstName) + background(color) and textColor(black) + } + .defaultHeaderFormatting { firstName } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_5() { + // SampleStart + dfCities + // SampleEnd + .format().perRowCol { row, _ -> + val color = nameToColor(row.name) + background(color) and textColor(black) + } + .defaultHeaderFormatting { name } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_6() { + // SampleStart + // INNER JOIN on differently named keys: + // Merge a row when dfAges.firstName == dfCities.name. + // With the given data all 3 names match → all rows merge. + dfAges.join(dfCities) { firstName match right.name } + // SampleEnd + .format().perRowCol { row, _ -> + val color = nameToColor(row.firstName) + background(color) and textColor(black) + } + .defaultHeaderFormatting { firstName } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_8() { + // SampleStart + dfLeft + // SampleEnd + .colorized() + .defaultHeaderFormatting { name } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_10() { + // SampleStart + dfRight + // SampleEnd + .colorized() + .defaultHeaderFormatting { name } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_11() { + // SampleStart + // INNER JOIN on "name" only: + // Merge when left.name == right.name. + // Duplicate keys produce multiple merged rows (one per pairing). + dfLeft.join(dfRight) { name } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name } + .saveDfHtmlSample() + } + + @Test + fun dfLeftImplicit() { + // SampleStart + dfLeft + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun dfRightImplicit() { + // SampleStart + dfRight + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_12() { + // SampleStart + // INNER JOIN on all same-named columns ("name" and "city"): + // Merge when BOTH name AND city are equal; otherwise the row is dropped. + dfLeft.join(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_13() { + // SampleStart + dfLeft + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_14() { + // SampleStart + dfRight + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_15() { + // SampleStart + // INNER JOIN: + // Keep only rows where (name, city) match on both sides. + // In this dataset both Charlies match twice (Moscow, Milan) → 2 merged rows. + dfLeft.innerJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_16() { + // SampleStart + // FILTER JOIN: + // Keep ONLY left rows that have ANY match on (name, city). + // No right-side columns are added. + dfLeft.filterJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_17() { + // SampleStart + // LEFT JOIN: + // Keep ALL left rows. If (name, city) matches, attach right columns; + // if not, right columns are null (e.g., Alice–London has no right match). + dfLeft.leftJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_18() { + // SampleStart + // RIGHT JOIN: + // Keep ALL right rows. If no left match, left columns become null + // (e.g., Alice with city=null exists only on the right). + dfLeft.rightJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_19() { + // SampleStart + // FULL JOIN: + // Keep ALL rows from both sides. Where there's no match on (name, city), + // the other side is filled with nulls. + dfLeft.fullJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } + + @Test + fun notebook_test_join_20() { + // SampleStart + // EXCLUDE JOIN: + // Keep ONLY left rows that have NO match on (name, city). + // Useful to find "unpaired" left rows. + dfLeft.excludeJoin(dfRight) { name and city } + // SampleEnd + .colorized() + .defaultHeaderFormatting { name and city } + .saveDfHtmlSample() + } +} diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/util/defaultHeaderFormatting.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/util/defaultHeaderFormatting.kt new file mode 100644 index 0000000000..21a971919b --- /dev/null +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/util/defaultHeaderFormatting.kt @@ -0,0 +1,60 @@ +package org.jetbrains.kotlinx.dataframe.util + +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.CellAttributes +import org.jetbrains.kotlinx.dataframe.api.FormattedFrame +import org.jetbrains.kotlinx.dataframe.api.FormattingDsl +import org.jetbrains.kotlinx.dataframe.api.and +import org.jetbrains.kotlinx.dataframe.api.formatHeader +import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths +import org.jetbrains.kotlinx.dataframe.api.with + +internal val baseColorSet = listOf( + FormattingDsl.rgb(244, 67, 54), // red + FormattingDsl.rgb(33, 150, 243), // blue + FormattingDsl.rgb(76, 175, 80), // green + FormattingDsl.rgb(255, 152, 0), // orange + FormattingDsl.rgb(156, 39, 176), // purple + FormattingDsl.rgb(0, 150, 136), // teal + FormattingDsl.rgb(233, 30, 99), // pink/magenta +) + +internal val FormattingDsl.monospace: CellAttributes + get() = attr( + "font-family", + "ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace", + ) + +internal fun DataFrame.defaultHeaderFormatting(headers: ColumnsSelector): FormattedFrame { + val columns = getColumnsWithPaths(headers) + require(columns.size <= baseColorSet.size) { + "Too many headers: ${columns.size}. Max supported is ${baseColorSet.size}." + } + + val start = formatHeader().with { null } + + return columns.foldIndexed(start) { idx, acc, header -> + acc.formatHeader { header } + .with { + textColor(baseColorSet[idx]) and monospace + } + } +} + +@Suppress("INVISIBLE_REFERENCE") +internal fun FormattedFrame.defaultHeaderFormatting(headers: ColumnsSelector): FormattedFrame { + val columns = df.getColumnsWithPaths(headers) + require(columns.size <= baseColorSet.size) { + "Too many headers: ${columns.size}. Max supported is ${baseColorSet.size}." + } + + val start = formatHeader().with { null } + + return columns.foldIndexed(start) { idx, acc, header -> + acc.formatHeader { header } + .with { + textColor(baseColorSet[idx]) and monospace + } + } +}