From 51b451fa4445feef112bd9a0d62535fbc6cca2e0 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Wed, 2 Jul 2025 20:39:30 +0400 Subject: [PATCH 1/3] count and countDistinct kdocs --- .../jetbrains/kotlinx/dataframe/DataFrame.kt | 5 + .../jetbrains/kotlinx/dataframe/api/count.kt | 247 ++++++++++++++++++ .../kotlinx/dataframe/api/countDistinct.kt | 61 +++++ .../jetbrains/kotlinx/dataframe/api/filter.kt | 4 +- .../documentation/DocumentationUrls.kt | 9 + .../documentation/ExtensionPropertiesAPI.kt | 43 +++ .../documentation/RowFilterDescription.kt | 11 + 7 files changed, 378 insertions(+), 2 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExtensionPropertiesAPI.kt create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt index 26a5d88451..c43bb4ee86 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt @@ -68,6 +68,11 @@ public interface DataFrame : // region rows + /** + * Returns the total number of rows of this [DataFrame]. + * + * @return The number of rows in the [DataFrame]. + */ public fun rowsCount(): Int public operator fun iterator(): Iterator> = rows().iterator() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt index aa3e601225..b70b13fb4a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt @@ -8,10 +8,24 @@ import org.jetbrains.kotlinx.dataframe.Predicate import org.jetbrains.kotlinx.dataframe.RowFilter import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine +import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls +import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue // region DataColumn +/** + * Counts the elements in this [DataColumn] that satisfy a given [predicate] or returns the total count + * if no predicate is provided. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * @param predicate An optional predicate used to filter the elements. + * The predicate should return `true` for elements to be counted. + * If `null` (by default), all elements are counted. + * @return The count of elements in the column + * that either match the predicate or the total count of elements if no predicate is provided. + */ public fun DataColumn.count(predicate: Predicate? = null): Int = if (predicate == null) { size() @@ -23,27 +37,126 @@ public fun DataColumn.count(predicate: Predicate? = null): Int = // region DataRow +/** + * Returns the number of columns in this [DataRow]. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * @return the number of columns in this row. + */ public fun AnyRow.count(): Int = columnsCount() +/** + * Counts the number of elements in the current row that satisfy the given [predicate]. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * @param predicate A predicate function to test each element. + * The predicate should return `true` for elements to be counted. + * @return The number of elements that satisfy the predicate. + */ public inline fun AnyRow.count(predicate: Predicate): Int = values().count(predicate) // endregion // region DataFrame +/** + * Returns the total number of rows of this [DataFrame]. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * @return The number of rows in the [DataFrame]. + */ public fun DataFrame.count(): Int = rowsCount() +/** + * Counts the number of rows in this [DataFrame] that satisfy the given [predicate]. + * + * {@include [RowFilterDescription]} + * + * See also: + * - [filter][DataFrame.filter] — filters rows using a [RowFilter] condition. + * - [countDistinct][DataFrame.countDistinct] — counts distinct rows or values. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Count rows where the value in the "age" column is greater than 18 + * // and the "name/firstName" column starts with 'A' + * df.count { age > 18 && name.firstName.startsWith("A") } + * // Count rows + * df.count { prev()?.length >= 50.0 ?: false } + * ``` + * + * @param T The schema marker type of the [DataFrame]. + * @param predicate A [RowFilter] that returns `true` for rows that should be counted. + * @return The number of rows that satisfy the predicate. + */ public inline fun DataFrame.count(predicate: RowFilter): Int = rows().count { predicate(it, it) } // endregion // region GroupBy +/** + * Aggregates this [GroupBy] by counting the number of rows in each group. + * + * Returns a new [DataFrame] where each row corresponds to a group. + * The resulting frame contains: + * - the original group key columns, + * - a new column (named [resultName], default is `"count"`) that contains the number of rows in each group. + * + * This is equivalent to applying `.aggregate { count() }`, but more efficient. + * + * See also common [aggregate][Grouped.aggregate]. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Counts number of rows for each city, returning + * // a new DataFrame with columns "city" and "count" + * df.groupBy { city }.count() + * ``` + * + * @param resultName The name of the result column that will store the group sizes. Defaults to `"count"`. + * @return A new [DataFrame] with group keys and corresponding group sizes. + */ @Refine @Interpretable("GroupByCount0") public fun Grouped.count(resultName: String = "count"): DataFrame = aggregateValue(resultName) { count() default 0 } +/** + * Aggregates this [GroupBy] by counting the number of rows in each group + * that satisfy the given [predicate]. + * + * {@include [RowFilterDescription]} + * + * Returns a new [DataFrame] where each row corresponds to a group. + * The resulting frame contains: + * - the original group key columns, + * - a new column (named [resultName], defaults to `"count"`) + * that stores the number of rows in each group matching the [predicate]. + * + * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient. + * + * See also: common [aggregate][Grouped.aggregate]. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Count rows for each city where the "income" value is greater than 30.0. + * // Returns a new DataFrame with columns "city" and "pointsCount". + * df.groupBy { city }.count("pointsCount") { income >= 30.0 } + * ``` + * + * @param resultName The name of the result column containing the group sizes. Defaults to `"count"`. + * @return A new [DataFrame] with group keys and filtered row counts per group. + */ @Refine @Interpretable("GroupByCount0") public inline fun Grouped.count( @@ -55,16 +168,150 @@ public inline fun Grouped.count( // region Pivot +/** + * Aggregates this [Pivot] by counting the number of rows in each group. + * + * Returns a single [DataRow] where: + * - each column corresponds to a [pivot] group — if multiple pivot keys were used, + * the result will contain column groups for each pivot key, with columns inside + * corresponding to the values of that key; + * - each value contains the number of rows in that group. + * + * The original [Pivot] column structure is preserved. + * If the [Pivot] was created using multiple or nested keys + * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), + * the structure remains unchanged — only the contents of each group + * are replaced with the number of rows in that group. + * + * This is equivalent to calling `.aggregate { count() }`, but more efficient. + * + * See also: + * - common [aggregate][Pivot.aggregate]. + * - [pivotCounts][DataFrame.pivotCounts] shortcut. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Count the number of rows for each city. + * // Returns a single DataRow with one column per city and the count of rows in each. + * df.pivot { city }.count() + * ``` + * + * @return A single [DataRow] with one column per group and the corresponding group size as its value. + */ public fun Pivot.count(): DataRow = delegate { count() } +/** + * Aggregates this [Pivot] by counting the number of rows in each group + * that satisfy the given [predicate]. + * + * {@include [RowFilterDescription]} + * + * Returns a single [DataRow] where: + * - each column corresponds to a [pivot] group — if multiple pivot keys were used, + * the result will contain column groups for each pivot key, with columns inside + * corresponding to the values of that key; + * - each value contains the number of rows in that group matching the [predicate]. + * + * The original [Pivot] column structure is preserved. + * If the [Pivot] was created using multiple or nested keys + * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), + * the structure remains unchanged — only the contents of each group + * are replaced with the number of rows (matching the [predicate]) in that group. + * + * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient. + * + * See also: + * - common [aggregate][Pivot.aggregate]. + * - [pivotCounts][DataFrame.pivotCounts] shortcut. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Count rows for each city where the "income" value is greater than 30.0. + * // Returns a single DataRow with one column per city and the count of matching rows. + * df.pivot { city }.count { income > 30.0 } + * ``` + * + * @return A single [DataRow] with original [Pivot] columns and filtered row counts per group. + */ public inline fun Pivot.count(crossinline predicate: RowFilter): DataRow = delegate { count(predicate) } // endregion // region PivotGroupBy +/** + * Aggregates this [PivotGroupBy] by counting the number of rows in each + * combined [pivot] + [groupBy] group. + * + * Returns a new [DataFrame] containing a following matrix: + * - one row per [groupBy] key (or keys set); + * - one column group per [pivot] key, where each inner column corresponds to a value of that key; + * - each cell contains the number of rows in the corresponding pivot–group pair. + * + * The original [Pivot] column structure is preserved. + * If the [Pivot] was created using multiple or nested keys + * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), + * the result will contain nested column groups reflecting that key structure, + * with each group containing columns for the values of the corresponding key. + * + * This is equivalent to calling `.aggregate { count() }`, but more efficient. + * + * See also: + * - common [aggregate][PivotGroupBy.aggregate]; + * - [GroupBy.pivotCounts] shortcut. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Compute a matrix with "city" values horizontally and + * // "age" values vertically, where each cell contains + * // the number of rows with the corresponding age–city pair. + * df.pivot { city }.groupBy { age }.count() + * ``` + * + * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns. + */ public fun PivotGroupBy.count(): DataFrame = aggregate { count() default 0 } +/** + * Aggregates this [PivotGroupBy] by counting the number of rows in each + * combined [pivot] + [groupBy] group, that satisfy the given [predicate]. + * + * Returns a new [DataFrame] containing a following matrix: + * - one row per [groupBy] key (or keys set); + * - one column group per [pivot] key, where each inner column corresponds to a value of that key; + * - each cell contains the number of rows in the corresponding pivot–group pair. + * + * The original [Pivot] column structure is preserved. + * If the [Pivot] was created using multiple or nested keys + * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), + * the result will contain nested column groups reflecting that key structure, + * with each group containing columns for the values + * (matching the [predicate]) of the corresponding key. + * + * This is equivalent to calling `.aggregate { count() }`, but more efficient. + * + * See also: + * - common [aggregate][PivotGroupBy.aggregate]; + * - [GroupBy.pivotCounts] shortcut. + * + * For more information: {@include [DocumentationUrls.Count]} + * + * ### Example + * ```kotlin + * // Compute a matrix with "city" values horizontally and + * // "age" values vertically, where each cell contains + * // the number of rows with the corresponding age–city pair. + * df.pivot { city }.groupBy { age }.count() + * ``` + * + * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns matching the [predicate].. + */ public inline fun PivotGroupBy.count(crossinline predicate: RowFilter): DataFrame = aggregate { count(predicate) default diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index b6c2a79279..5994021ede 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -6,19 +6,80 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.indices import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KProperty // region DataFrame +/** + * Returns the number of distinct rows in this [DataFrame]. + * + * Compares rows based on the values in all columns and returns + * the number of unique row combinations. + * + * See also: + * - [distinct][DataFrame.distinct], which removes duplicate rows and returns a new [DataFrame]. + * - [count][DataFrame.count], which counts the number of rows satisfying a given predicate. + * + * For more information: {@include [DocumentationUrls.CountDistinct]} + * + * @return The number of distinct rows in this [DataFrame]. + */ public fun AnyFrame.countDistinct(): Int = countDistinct { all() } +/** + * Returns number of distinct combinations of values in selected [columns] in this [DataFrame]. + * + * Compares values in the selected columns and returns + * the number of unique values combinations. + * + * See also: + * - [distinct][DataFrame.distinct], which removes duplicate rows and returns a new [DataFrame]. + * - [count][DataFrame.count], which counts the number of rows satisfying a given predicate. + * + * For more information: {@include [DocumentationUrls.CountDistinct]} + * + * ### This [countDistinct] overload + */ +internal interface CountDistinctDocs + + +/** + * {@include [CountDistinctDocs]} + * {@include [SelectingColumns.Dsl]} + * + * #### Example + * + * ```kotlin + * // Counts unique combinations of values in the "year" and "title" columns + * // across all rows in the DataFrame + * df.countDistinct { year and title } + * ``` + * + * @return The number of distinct rows in this [DataFrame]. + */ public fun DataFrame.countDistinct(columns: ColumnsSelector): Int { val cols = get(columns) return indices.distinctBy { i -> cols.map { it[i] } }.size } +/** + * {@include [CountDistinctDocs]} + * {@include [SelectingColumns.ColumnNames]} + * + * #### Example + * + * ```kotlin + * // Counts unique combinations of values in the "year" and "title" columns + * // across all rows in the DataFrame + * df.countDistinct("year", "title") + * ``` + * + * @return The number of distinct rows in this [DataFrame]. + */ public fun DataFrame.countDistinct(vararg columns: String): Int = countDistinct { columns.toColumnSet() } @Deprecated(DEPRECATED_ACCESS_API) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt index 5dbbcd3fbf..84e1a0439e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt @@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSe import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent import org.jetbrains.kotlinx.dataframe.documentation.LineBreak +import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet import org.jetbrains.kotlinx.dataframe.impl.getTrueIndices @@ -49,8 +50,7 @@ public inline fun DataColumn.filter(predicate: Predicate): DataColumn< * Filters the rows of this [DataFrame] based on the provided [RowFilter]. * Returns a new [DataFrame] containing only the rows that satisfy the given [predicate]. * - * A [RowFilter] provides each row as a lambda argument, allowing you to define filtering logic - * using a [Boolean] condition. + * {@include [RowFilterDescription]} * * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 19f2f73e30..a4a80aa006 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -26,6 +26,9 @@ internal interface DocumentationUrls { /** [See Column Selectors on the documentation website.]({@include [Url]}/columnselectors.html) */ interface ColumnSelectors + /** [See Extension Properties API on the documentation website.]({@include [Url]}/extensionpropertiesapi.html) */ + interface ExtensionPropertiesApi + interface DataRow { /** [See Row Expressions on the documentation website.]({@include [Url]}/datarow.html#row-expressions) */ interface RowExpressions @@ -113,4 +116,10 @@ internal interface DocumentationUrls { /** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */ interface Filter + + /** [See `count` on the documentation website.]({@include [Url]}/count.html) */ + interface Count + + /** [See `countDistinct` on the documentation website.]({@include [Url]}/countdistinct.html) */ + interface CountDistinct } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExtensionPropertiesAPI.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExtensionPropertiesAPI.kt new file mode 100644 index 0000000000..024af0daf9 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExtensionPropertiesAPI.kt @@ -0,0 +1,43 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame + +/** + * ## Extension Properties API + * + * When working with a [DataFrame], the most convenient and reliable way to access its columns — + * including for operations and retrieving column values in row expressions — is through auto-generated extension properties. + * + * These properties are generated based on the + * [DataFrame schema][org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema], + * with their names and types inferred from the names and types of the corresponding columns. + * This also works for hierarchical [DataFrame] structures (i.e., column groups). + * + * ### Example + * + * Given the following [DataFrame]: + * + * | name | age | height | + * |-------|-----|--------| + * | Alice | 23 | 175.5 | + * | Bob | 27 | 160.2 | + * + * You can access columns using extension properties in a type-safe way, avoiding typos and relying on autocompletion. + * These properties can be used in: + * - [Columns Selection DSL][SelectingColumns.Dsl.WithExample] + * - [DataRow Expressions][ExpressionsGivenRow] + * + * ```kotlin + * // Access the "name" column + * df.name + * + * // Select the "age" and "height" columns + * df.select { age and height } + * + * // Filter rows where "age" > 18 and "name" starts with 'A' + * df.filter { age > 18 && name.startsWith("A") } + * ``` + * + * For more information, see: {@include [DocumentationUrls.ExtensionPropertiesApi]} + */ +internal interface ExtensionPropertiesAPIDocs diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt new file mode 100644 index 0000000000..dcafe287f2 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/RowFilterDescription.kt @@ -0,0 +1,11 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowFilter + +/** + * The [predicate] is a [RowFilter] — a lambda that receives each [DataRow] both as `this` and `it`, + * allowing you to define a [Boolean] condition using the row's values, + * including through [extension properties][ExtensionPropertiesAPIDocs] for convenient access. + */ +internal interface RowFilterDescription From e0c579859b712abdf41c759c25689724d0ea299b Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 3 Jul 2025 18:32:42 +0400 Subject: [PATCH 2/3] fix ktlint --- .../kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index 5994021ede..a2e2f9f105 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -46,7 +46,6 @@ public fun AnyFrame.countDistinct(): Int = countDistinct { all() } */ internal interface CountDistinctDocs - /** * {@include [CountDistinctDocs]} * {@include [SelectingColumns.Dsl]} From febc3554fa6ad0c06d079c1ee28d14421b1a3ef3 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 10 Jul 2025 14:15:22 +0400 Subject: [PATCH 3/3] fix count kdocs --- .../org/jetbrains/kotlinx/dataframe/api/count.kt | 16 ++++++++-------- .../kotlinx/dataframe/api/countDistinct.kt | 4 +++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt index b70b13fb4a..5acaf176d3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt @@ -40,17 +40,14 @@ public fun DataColumn.count(predicate: Predicate? = null): Int = /** * Returns the number of columns in this [DataRow]. * - * For more information: {@include [DocumentationUrls.Count]} - * * @return the number of columns in this row. + * @see [columnsCount]. */ public fun AnyRow.count(): Int = columnsCount() /** * Counts the number of elements in the current row that satisfy the given [predicate]. * - * For more information: {@include [DocumentationUrls.Count]} - * * @param predicate A predicate function to test each element. * The predicate should return `true` for elements to be counted. * @return The number of elements that satisfy the predicate. @@ -110,7 +107,7 @@ public inline fun DataFrame.count(predicate: RowFilter): Int = rows(). * * This is equivalent to applying `.aggregate { count() }`, but more efficient. * - * See also common [aggregate][Grouped.aggregate]. + * See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate]. * * For more information: {@include [DocumentationUrls.Count]} * @@ -143,7 +140,7 @@ public fun Grouped.count(resultName: String = "count"): DataFrame = * * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient. * - * See also: common [aggregate][Grouped.aggregate]. + * See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate]. * * For more information: {@include [DocumentationUrls.Count]} * @@ -186,6 +183,7 @@ public inline fun Grouped.count( * This is equivalent to calling `.aggregate { count() }`, but more efficient. * * See also: + * - [pivot]. * - common [aggregate][Pivot.aggregate]. * - [pivotCounts][DataFrame.pivotCounts] shortcut. * @@ -223,6 +221,7 @@ public fun Pivot.count(): DataRow = delegate { count() } * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient. * * See also: + * - [pivot]. * - common [aggregate][Pivot.aggregate]. * - [pivotCounts][DataFrame.pivotCounts] shortcut. * @@ -261,6 +260,7 @@ public inline fun Pivot.count(crossinline predicate: RowFilter): DataR * This is equivalent to calling `.aggregate { count() }`, but more efficient. * * See also: + * - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot]. * - common [aggregate][PivotGroupBy.aggregate]; * - [GroupBy.pivotCounts] shortcut. * @@ -297,6 +297,7 @@ public fun PivotGroupBy.count(): DataFrame = aggregate { count() defau * This is equivalent to calling `.aggregate { count() }`, but more efficient. * * See also: + * - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot]. * - common [aggregate][PivotGroupBy.aggregate]; * - [GroupBy.pivotCounts] shortcut. * @@ -314,8 +315,7 @@ public fun PivotGroupBy.count(): DataFrame = aggregate { count() defau */ public inline fun PivotGroupBy.count(crossinline predicate: RowFilter): DataFrame = aggregate { - count(predicate) default - 0 + count(predicate) default 0 } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt index a2e2f9f105..2c05edff2f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls +import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.indices import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API @@ -31,7 +32,7 @@ import kotlin.reflect.KProperty public fun AnyFrame.countDistinct(): Int = countDistinct { all() } /** - * Returns number of distinct combinations of values in selected [columns] in this [DataFrame]. + * Returns number of distinct combinations of values in selected [columns\] in this [DataFrame]. * * Compares values in the selected columns and returns * the number of unique values combinations. @@ -44,6 +45,7 @@ public fun AnyFrame.countDistinct(): Int = countDistinct { all() } * * ### This [countDistinct] overload */ +@ExcludeFromSources internal interface CountDistinctDocs /**