diff --git a/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithInterfaceStringAPI.kt b/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithInterfaceStringAPI.kt new file mode 100644 index 0000000000..fa067b9b97 --- /dev/null +++ b/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithInterfaceStringAPI.kt @@ -0,0 +1,55 @@ +package org.jetbrains.kotlinx.dataframe.examples.movies + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + + +private const val pathToCsv = "examples/idea-examples/movies/src/main/resources/movies.csv" +// Uncomment this line if you want to copy-paste and run the code in your project without downloading the file +//private const val pathToCsv = "https://raw.githubusercontent.com/Kotlin/dataframe/master/examples/idea-examples/movies/src/main/resources/movies.csv" + +fun main() { + // This example shows how to the use extension properties API to address columns in different operations + // https://kotlin.github.io/dataframe/apilevels.html + + // Add the Gradle plugin and run `assemble` + // check the README https://github.com/Kotlin/dataframe?tab=readme-ov-file#setup + val step1 = DataFrame + .read(pathToCsv).convertTo() + .split("genres").by("|").inplace() + .split { "title"() }.by { + listOf( + """\s*\(\d{4}\)\s*$""".toRegex().replace(it, ""), + "\\d{4}".toRegex().findAll(it).lastOrNull()?.value?.toIntOrNull() ?: -1 + ) + }.into("title", "year") + .explode("genres") + step1.print() + + /** + * Data is parsed and prepared for aggregation + * movieId title year genres + * 0 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Fantasy + * 1 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Suspenseful + * 2 9b30aff7943f44579e92c261f3adc193 Women in Black 1997 Comedy + * 3 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Comedy + * 4 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Jazz + * 5 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Family + * 6 2a1ba1fc5caf492a80188e032995843e Bumblebee Movie 2007 Animation + */ + val step2 = step1 + .filter { "year"() >= 0 && "genres"() != "(no genres listed)" } + .groupBy("year") + .sortBy("year") + .pivot("genres", inward = false) + .aggregate { + count() into "count" + mean() into "mean" + } + + step2.print(10) +// Discover the final reshaped data in an interactive HTML table +// step2.toStandaloneHTML().openInBrowser() +} diff --git a/examples/kotlin-dataframe-plugin-example/build.gradle.kts b/examples/kotlin-dataframe-plugin-example/build.gradle.kts index 6a54cd4e17..1ab07202ba 100644 --- a/examples/kotlin-dataframe-plugin-example/build.gradle.kts +++ b/examples/kotlin-dataframe-plugin-example/build.gradle.kts @@ -6,11 +6,16 @@ plugins { val kotlinVersion = "2.2.20" kotlin("jvm") version kotlinVersion kotlin("plugin.dataframe") version kotlinVersion + application } group = "org.example" version = "1.0-SNAPSHOT" +application { + mainClass.set("org.jetbrains.kotlinx.dataframe.examples.plugin.MainKt") +} + repositories { maven("https://packages.jetbrains.team/maven/p/kt/dev/") mavenCentral() diff --git a/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/MainStringAPI.kt b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/MainStringAPI.kt new file mode 100644 index 0000000000..4c1bf142a2 --- /dev/null +++ b/examples/kotlin-dataframe-plugin-example/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/plugin/MainStringAPI.kt @@ -0,0 +1,65 @@ +package org.jetbrains.kotlinx.dataframe.examples.plugin + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.convertTo +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.io.readCsv +import org.jetbrains.kotlinx.dataframe.io.writeCsv +import java.net.URL + +fun main() { + val repos = DataFrame + // Read DataFrame from the CSV file. + .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv") + // And convert it to match the `Repositories` schema. + .convertTo() + + // With Compiler Plugin, the DataFrame schema changes immediately after each operation: + // For example, if a new column is added or the old one is renamed (or its type is changed) + // during the operation, you can use the new name immediately in the following operations: + repos + // Add a new "name" column... + .add("name") { "full_name"().substringAfterLast("/") } + // ... and now we can use "name" extension in DataFrame operations, such as `filter`. + .filter { name.lowercase().contains("kotlin") } + + // Let's update the DataFrame with some operations using these features. + val reposUpdated = repos + // Rename columns to CamelCase. + // Note that after that, in the following operations, extension properties will have + // new names corresponding to the column names. + .renameToCamelCase() + // Rename "stargazersCount" column to "stars". + .rename { "stargazersCount"() }.into("stars") + // And we can immediately use the updated name in the filtering. + .filter { "stars"() > 50 } + // Convert values in the "topic" column (which were `String` initially) + // to the list of topics. + .convert { "topics"() }.with { + val inner = it.removeSurrounding("[", "]") + if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim) + } + // Now "topics" is a `List` column. + // Add a new column with the number of topics. + .add("topicCount") { "topics">().size } + // Add a new column with the kind of repository. + .add("kind") { getKind("full_name"(), "topics"()) } + + // Write the updated DataFrame to a CSV file. + reposUpdated.writeCsv("jetbrains_repositories_new.csv") + + // TODO: Add Kandy Plot + // reposUpdated.groupBy { kind }.max { stargazersCount }.plot { + // bars { + // x(kind) + // y(stargazersCount) + // } + // } +}