diff --git a/polars-cheat-sheet.ipynb b/polars-cheat-sheet.ipynb index 104d67b..08fecb8 100644 --- a/polars-cheat-sheet.ipynb +++ b/polars-cheat-sheet.ipynb @@ -118,7 +118,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.filter(pl.col(\"nrs\") < 4).groupby(\"groups\").agg(pl.all().sum())" + "df.filter(pl.col(\"nrs\") < 4).group_by(\"groups\").agg(pl.all().sum())" ] }, { @@ -150,7 +150,7 @@ "source": [ "# Sample\n", "# Randomly select fraction of rows.\n", - "df.sample(frac=0.5)\n", + "df.sample(fraction=0.5)\n", "\n", "# Randomly select n rows.\n", "df.sample(n=2)" @@ -240,7 +240,10 @@ "outputs": [], "source": [ "# Select rows meeting logical condition, and only the specific columns.\n", - "df[df[\"random\"] > 0.5, [\"names\", \"groups\"]]" + "(df\n", + " .filter(pl.col(\"random\") > 0.5)\n", + " .select(['names', 'groups'])\n", + ")" ] }, { @@ -339,7 +342,7 @@ "outputs": [], "source": [ "# Order rows by values of a column (high to low)\n", - "df.sort(\"random\", reverse=True)" + "df.sort(\"random\", descending=True)" ] }, { @@ -481,7 +484,7 @@ "outputs": [], "source": [ "# Group by values in column named \"col\", returning a GroupBy object\n", - "df.groupby(\"groups\")" + "df.group_by(\"groups\")" ] }, { @@ -492,7 +495,7 @@ "outputs": [], "source": [ "# All of the aggregation functions from above can be applied to a group as well\n", - "df.groupby(by=\"groups\").agg(\n", + "df.group_by(\"groups\").agg(\n", " [\n", " # Sum values\n", " pl.sum(\"random\").alias(\"sum\"),\n", @@ -528,12 +531,12 @@ "outputs": [], "source": [ "# Additional GroupBy functions\n", - "df.groupby(by=\"groups\").agg(\n", + "df.group_by(\"groups\").agg(\n", " [\n", " # Count the number of values in each group\n", " pl.count(\"random\").alias(\"size\"),\n", " # Sample one element in each group\n", - " pl.col(\"names\").apply(lambda group_df: group_df.sample(1)),\n", + " pl.col(\"names\").map_elements(lambda group_df: group_df.sample(1)),\n", " ]\n", ")" ] @@ -607,7 +610,7 @@ "outputs": [], "source": [ "# Add a new column to the DataFrame\n", - "df.with_column((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))" + "df.with_columns((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))" ] }, { @@ -621,7 +624,7 @@ "df.with_columns(\n", " [\n", " (pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"),\n", - " pl.col(\"names\").str.lengths().alias(\"names_lengths\"),\n", + " pl.col(\"names\").str.len_bytes().alias(\"names_len_bytes\"),\n", " ]\n", ")" ] @@ -634,7 +637,7 @@ "outputs": [], "source": [ "# Add a column at index 0 that counts the rows\n", - "df.with_row_count()" + "df.with_row_index()" ] }, { @@ -684,8 +687,8 @@ " pl.col(\"random\").rolling_skew(window_size=2).alias(\"rolling_skew\"),\n", " # Rolling custom function\n", " pl.col(\"random\")\n", - " .rolling_apply(function=np.nanstd, window_size=2)\n", - " .alias(\"rolling_apply\"),\n", + " .rolling_map(function=np.nanstd, window_size=2)\n", + " .alias(\"rolling_map\"),\n", " ]\n", ")" ] diff --git a/src/index.html b/src/index.html index 1b597a1..f7ddb80 100644 --- a/src/index.html +++ b/src/index.html @@ -51,7 +51,7 @@
Sample
# Randomly select fraction of rows.
-df.sample(frac=0.5)
+df.sample(fraction=0.5)
# Randomly select n rows.
df.sample(n=2)
@@ -216,7 +216,7 @@ All of the aggregation functions from above can be applied to a group as well
-df.groupby(by="groups").agg(
+ df.group_by("groups").agg(
[
# Sum values
pl.sum("random").alias("sum"),
@@ -572,13 +572,13 @@ Group Data
Additional GroupBy functions
-df.groupby(by="groups").agg(
+ df.group_by("groups").agg(
[
# Count the number of values in each group
pl.count("random").alias("size"),
# Sample one element in each group
- pl.col("names").apply(
+ pl.col("names").map_elements(
lambda group_df: group_df.sample(1)
),
]
@@ -593,7 +593,7 @@ Handling Missing Data
Add a new columns to the DataFrame
-df.with_column(
+ df.with_columns(
(pl.col("random") * pl.col("nrs")) \
.alias("product")
)
Add a column at index 0 that counts the rows
-df.with_row_count()
+ df.with_row_index()