Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.jetbrains.kotlinx.dataframe.examples.movies

import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.io.*


private const val pathToCsv = "examples/idea-examples/movies/src/main/resources/movies.csv"
// Uncomment this line if you want to copy-paste and run the code in your project without downloading the file
//private const val pathToCsv = "https://raw.githubusercontent.com/Kotlin/dataframe/master/examples/idea-examples/movies/src/main/resources/movies.csv"

fun main() {
// This example shows how to the use extension properties API to address columns in different operations
// https://kotlin.github.io/dataframe/apilevels.html

// Add the Gradle plugin and run `assemble`
// check the README https://github.com/Kotlin/dataframe?tab=readme-ov-file#setup
val step1 = DataFrame
.read(pathToCsv).convertTo<Movie>()
.split("genres").by("|").inplace()
.split { "title"<String>() }.by {
listOf(
"""\s*\(\d{4}\)\s*$""".toRegex().replace(it, ""),
"\\d{4}".toRegex().findAll(it).lastOrNull()?.value?.toIntOrNull() ?: -1
)
}.into("title", "year")
.explode("genres")
step1.print()

/**
* Data is parsed and prepared for aggregation
* movieId title year genres
* 0 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Fantasy
* 1 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Suspenseful
* 2 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Comedy
* 3 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Comedy
* 4 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Jazz
* 5 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Family
* 6 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Animation
*/
val step2 = step1
.filter { "year"<Int>() >= 0 && "genres"<String>() != "(no genres listed)" }
.groupBy("year")
.sortBy("year")
.pivot("genres", inward = false)
.aggregate {
count() into "count"
mean() into "mean"
}

step2.print(10)
// Discover the final reshaped data in an interactive HTML table
// step2.toStandaloneHTML().openInBrowser()
}
5 changes: 5 additions & 0 deletions examples/kotlin-dataframe-plugin-example/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@ plugins {
val kotlinVersion = "2.2.20"
kotlin("jvm") version kotlinVersion
kotlin("plugin.dataframe") version kotlinVersion
application
}

group = "org.example"
version = "1.0-SNAPSHOT"

application {
mainClass.set("org.jetbrains.kotlinx.dataframe.examples.plugin.MainKt")
}

repositories {
maven("https://packages.jetbrains.team/maven/p/kt/dev/")
mavenCentral()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.jetbrains.kotlinx.dataframe.examples.plugin

import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.into
import org.jetbrains.kotlinx.dataframe.api.rename
import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.io.readCsv
import org.jetbrains.kotlinx.dataframe.io.writeCsv
import java.net.URL

fun main() {
val repos = DataFrame
// Read DataFrame from the CSV file.
.readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
// And convert it to match the `Repositories` schema.
.convertTo<Repositories>()

// With Compiler Plugin, the DataFrame schema changes immediately after each operation:
// For example, if a new column is added or the old one is renamed (or its type is changed)
// during the operation, you can use the new name immediately in the following operations:
repos
// Add a new "name" column...
.add("name") { "full_name"<String>().substringAfterLast("/") }
// ... and now we can use "name" extension in DataFrame operations, such as `filter`.
.filter { name.lowercase().contains("kotlin") }

// Let's update the DataFrame with some operations using these features.
val reposUpdated = repos
// Rename columns to CamelCase.
// Note that after that, in the following operations, extension properties will have
// new names corresponding to the column names.
.renameToCamelCase()
// Rename "stargazersCount" column to "stars".
.rename { "stargazersCount"<String>() }.into("stars")
// And we can immediately use the updated name in the filtering.
.filter { "stars"<Int>() > 50 }
// Convert values in the "topic" column (which were `String` initially)
// to the list of topics.
.convert { "topics"<String>() }.with {
val inner = it.removeSurrounding("[", "]")
if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim)
}
// Now "topics" is a `List<String>` column.
// Add a new column with the number of topics.
.add("topicCount") { "topics"<List<String>>().size }
// Add a new column with the kind of repository.
.add("kind") { getKind("full_name"(), "topics"()) }

// Write the updated DataFrame to a CSV file.
reposUpdated.writeCsv("jetbrains_repositories_new.csv")

// TODO: Add Kandy Plot
// reposUpdated.groupBy { kind }.max { stargazersCount }.plot {
// bars {
// x(kind)
// y(stargazersCount)
// }
// }
}