FranzDiebold · mattkeanny · Mar 3, 2024 · Mar 3, 2024
diff --git a/polars-cheat-sheet.ipynb b/polars-cheat-sheet.ipynb
@@ -118,7 +118,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.filter(pl.col(\"nrs\") < 4).groupby(\"groups\").agg(pl.all().sum())"
+    "df.filter(pl.col(\"nrs\") < 4).group_by(\"groups\").agg(pl.all().sum())"
    ]
   },
   {
@@ -150,7 +150,7 @@
    "source": [
     "# Sample\n",
     "# Randomly select fraction of rows.\n",
-    "df.sample(frac=0.5)\n",
+    "df.sample(fraction=0.5)\n",
     "\n",
     "# Randomly select n rows.\n",
     "df.sample(n=2)"
@@ -240,7 +240,10 @@
    "outputs": [],
    "source": [
     "# Select rows meeting logical condition, and only the specific columns.\n",
-    "df[df[\"random\"] > 0.5, [\"names\", \"groups\"]]"
+    "(df\n",
+    "    .filter(pl.col(\"random\") > 0.5)\n",
+    "    .select(['names', 'groups'])\n",
+    ")"
    ]
   },
   {
@@ -339,7 +342,7 @@
    "outputs": [],
    "source": [
     "# Order rows by values of a column (high to low)\n",
-    "df.sort(\"random\", reverse=True)"
+    "df.sort(\"random\", descending=True)"
    ]
   },
   {
@@ -481,7 +484,7 @@
    "outputs": [],
    "source": [
     "# Group by values in column named \"col\", returning a GroupBy object\n",
-    "df.groupby(\"groups\")"
+    "df.group_by(\"groups\")"
    ]
   },
   {
@@ -492,7 +495,7 @@
    "outputs": [],
    "source": [
     "# All of the aggregation functions from above can be applied to a group as well\n",
-    "df.groupby(by=\"groups\").agg(\n",
+    "df.group_by(\"groups\").agg(\n",
     "    [\n",
     "        # Sum values\n",
     "        pl.sum(\"random\").alias(\"sum\"),\n",
@@ -528,12 +531,12 @@
    "outputs": [],
    "source": [
     "# Additional GroupBy functions\n",
-    "df.groupby(by=\"groups\").agg(\n",
+    "df.group_by(\"groups\").agg(\n",
     "    [\n",
     "        # Count the number of values in each group\n",
     "        pl.count(\"random\").alias(\"size\"),\n",
     "        # Sample one element in each group\n",
-    "        pl.col(\"names\").apply(lambda group_df: group_df.sample(1)),\n",
+    "        pl.col(\"names\").map_elements(lambda group_df: group_df.sample(1)),\n",
     "    ]\n",
     ")"
    ]
@@ -607,7 +610,7 @@
    "outputs": [],
    "source": [
     "# Add a new column to the DataFrame\n",
-    "df.with_column((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))"
+    "df.with_columns((pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"))"
    ]
   },
   {
@@ -621,7 +624,7 @@
     "df.with_columns(\n",
     "    [\n",
     "        (pl.col(\"random\") * pl.col(\"nrs\")).alias(\"product\"),\n",
-    "        pl.col(\"names\").str.lengths().alias(\"names_lengths\"),\n",
+    "        pl.col(\"names\").str.len_bytes().alias(\"names_len_bytes\"),\n",
     "    ]\n",
     ")"
    ]
@@ -634,7 +637,7 @@
    "outputs": [],
    "source": [
     "# Add a column at index 0 that counts the rows\n",
-    "df.with_row_count()"
+    "df.with_row_index()"
    ]
   },
   {
@@ -684,8 +687,8 @@
     "        pl.col(\"random\").rolling_skew(window_size=2).alias(\"rolling_skew\"),\n",
     "        # Rolling custom function\n",
     "        pl.col(\"random\")\n",
-    "        .rolling_apply(function=np.nanstd, window_size=2)\n",
-    "        .alias(\"rolling_apply\"),\n",
+    "        .rolling_map(function=np.nanstd, window_size=2)\n",
+    "        .alias(\"rolling_map\"),\n",
     "    ]\n",
     ")"
    ]