diff --git a/polars-cheat-sheet.ipynb b/polars-cheat-sheet.ipynb index 104d67b..08fecb8 100644 --- a/polars-cheat-sheet.ipynb +++ b/polars-cheat-sheet.ipynb @@ -118,7 +118,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.filter(pl.col(\"nrs\") < 4).groupby(\"groups\").agg(pl.all().sum())" + "df.filter(pl.col(\"nrs\") < 4).group_by(\"groups\").agg(pl.all().sum())" ] }, { @@ -150,7 +150,7 @@ "source": [ "# Sample\n", "# Randomly select fraction of rows.\n", - "df.sample(frac=0.5)\n", + "df.sample(fraction=0.5)\n", "\n", "# Randomly select n rows.\n", "df.sample(n=2)" @@ -240,7 +240,10 @@ "outputs": [], "source": [ "# Select rows meeting logical condition, and only the specific columns.\n", - "df[df[\"random\"] > 0.5, [\"names\", \"groups\"]]" + "(df\n", + " .filter(pl.col(\"random\") > 0.5)\n", + " .select(['names', 'groups'])\n", + ")" ] }, { @@ -339,7 +342,7 @@ "outputs": [], "source": [ "# Order rows by values of a column (high to low)\n", - "df.sort(\"random\", reverse=True)" + "df.sort(\"random\", descending=True)" ] }, { @@ -481,7 +484,7 @@ "outputs": [], "source": [ "# Group by values in column named \"col\", returning a GroupBy object\n", - "df.groupby(\"groups\")" + "df.group_by(\"groups\")" ] }, { @@ -492,7 +495,7 @@ "outputs": [], "source": [ "# All of the aggregation functions from above can be applied to a group as well\n", - "df.groupby(by=\"groups\").agg(\n", + "df.group_by(\"groups\").agg(\n", " [\n", " # Sum values\n", " pl.sum(\"random\").alias(\"sum\"),\n", @@ -528,12 +531,12 @@ "outputs": [], "source": [ "# Additional GroupBy functions\n", - "df.groupby(by=\"groups\").agg(\n", + "df.group_by(\"groups\").agg(\n", " [\n", " # Count the number of values in each group\n", " pl.count(\"random\").alias(\"size\"),\n", " # Sample one element in each group\n", - " pl.col(\"names\").apply(lambda group_df: group_df.sample(1)),\n", + " pl.col(\"names\").map_elements(lambda group_df: group_df.sample(1)),\n", " ]\n", ")" ] @@ -607,7 +610,7 @@ "outputs": [], "source": [ "# Add a new column to the DataFrame\n", - "df.with_column((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))" + "df.with_columns((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))" ] }, { @@ -621,7 +624,7 @@ "df.with_columns(\n", " [\n", " (pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"),\n", - " pl.col(\"names\").str.lengths().alias(\"names_lengths\"),\n", + " pl.col(\"names\").str.len_bytes().alias(\"names_len_bytes\"),\n", " ]\n", ")" ] @@ -634,7 +637,7 @@ "outputs": [], "source": [ "# Add a column at index 0 that counts the rows\n", - "df.with_row_count()" + "df.with_row_index()" ] }, { @@ -684,8 +687,8 @@ " pl.col(\"random\").rolling_skew(window_size=2).alias(\"rolling_skew\"),\n", " # Rolling custom function\n", " pl.col(\"random\")\n", - " .rolling_apply(function=np.nanstd, window_size=2)\n", - " .alias(\"rolling_apply\"),\n", + " .rolling_map(function=np.nanstd, window_size=2)\n", + " .alias(\"rolling_map\"),\n", " ]\n", ")" ] diff --git a/src/index.html b/src/index.html index 1b597a1..f7ddb80 100644 --- a/src/index.html +++ b/src/index.html @@ -51,7 +51,7 @@

General

Import

Creating/reading DataFrames

Creating/reading DataFrames @@ -276,7 +276,7 @@

Reshaping Data – Change layout, sorting, renaming

Reshaping Data – Change layout, sorting, renaming
Reshaping Data – Change layout, sorting, renaming Reshaping Data – Change layout, sorting, renaming Reshaping Data – Change layout, sorting, renaming
Reshaping Data – Change layout, sorting, renaming df.sort("random") # high to low -df.sort("random", reverse=True) +df.sort("random", descending=True)
Reshaping Data – Change layout, sorting, renaming
Group Data
Group Data

Group by values in column named "col", returning a GroupBy object

-
df.groupby("groups")
+
df.group_by("groups")

All of the aggregation functions from above can be applied to a group as well

-
df.groupby(by="groups").agg(
+            
df.group_by("groups").agg(
   [
       # Sum values
       pl.sum("random").alias("sum"),
@@ -572,13 +572,13 @@ 

Group Data

Additional GroupBy functions

-
df.groupby(by="groups").agg(
+            
df.group_by("groups").agg(
   [
     # Count the number of values in each group
     pl.count("random").alias("size"),
 
     # Sample one element in each group
-    pl.col("names").apply(
+    pl.col("names").map_elements(
       lambda group_df: group_df.sample(1)
     ),
   ]
@@ -593,7 +593,7 @@ 

Handling Missing Data

Handling Missing Data Handling Missing Data
Make New Columns />

Add a new columns to the DataFrame

-
df.with_column(
+            
df.with_columns(
   (pl.col("random") * pl.col("nrs")) \
     .alias("product")
 )
Make New Columns />

Add a column at index 0 that counts the rows

-
df.with_row_count()
+
df.with_row_index()
@@ -772,9 +772,9 @@

Rolling Functions

# Rolling custom function pl.col("random") \ - .rolling_apply( + .rolling_map( function=np.nanstd, window_size=2) \ - .alias("rolling_apply"), + .alias("rolling_map"), ] )
@@ -783,7 +783,7 @@

Rolling Functions

Window Functions