From 501451bb9ac54eb405aadc0f7f3c434974542270 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 19 Mar 2025 13:34:56 -0400 Subject: [PATCH] Escape backslashes in notebook LaTeX Signed-off-by: Keith Battocchi --- ...nline Media Company - EconML + DoWhy.ipynb | 2 +- ...mentation at An Online Media Company.ipynb | 2 +- ...nt Attribution at A Software Company.ipynb | 2 +- ... Testing at An Online Travel Company.ipynb | 106 +++++++++--------- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb index 32b54bf46..59cae0d6a 100644 --- a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company - EconML + DoWhy.ipynb @@ -123,7 +123,7 @@ "\n", "\\begin{align}\n", "\\gamma(X) & = -3 - 14 \\cdot \\{\\text{income}<1\\} \\\\\n", - "\\beta(X,W) & = 20 + 0.5 \\cdot \\text{avg_hours} + 5 \\cdot \\{\\text{days_visited}>4\\} \\\\\n", + "\\beta(X,W) & = 20 + 0.5 \\cdot \\text{avg\\_hours} + 5 \\cdot \\{\\text{days\\_visited}>4\\} \\\\\n", "Y &= \\gamma(X) \\cdot T + \\beta(X,W)\n", "\\end{align}\n", "\n" diff --git a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb index 3a704cc41..c926814ac 100644 --- a/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Customer Segmentation at An Online Media Company.ipynb @@ -109,7 +109,7 @@ "\n", "\\begin{align}\n", "\\gamma(X) & = -3 - 14 \\cdot \\{\\text{income}<1\\} \\\\\n", - "\\beta(X,W) & = 20 + 0.5 \\cdot \\text{avg_hours} + 5 \\cdot \\{\\text{days_visited}>4\\} \\\\\n", + "\\beta(X,W) & = 20 + 0.5 \\cdot \\text{avg\\_hours} + 5 \\cdot \\{\\text{days\\_visited}>4\\} \\\\\n", "Y &= \\gamma(X) \\cdot T + \\beta(X,W)\n", "\\end{align}\n", "\n" diff --git a/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company.ipynb b/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company.ipynb index 2a0e95ba1..4ca7c3200 100644 --- a/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Multi-investment Attribution at A Software Company.ipynb @@ -363,7 +363,7 @@ "The data was generated using the following underlying treatment effect function:\n", "\n", "$$\n", - "\\text{treatment_effect(Size)} = (5,000 + 2\\% \\cdot \\text{Size}) \\cdot I_\\text{Tech Support} + (5\\% \\cdot \\text{Size}) \\cdot I_\\text{Discount}\n", + "\\text{treatment\\_effect(Size)} = (5,000 + 2\\% \\cdot \\text{Size}) \\cdot I_\\text{Tech Support} + (5\\% \\cdot \\text{Size}) \\cdot I_\\text{Discount}\n", "$$\n", "\n", "Therefore, the treatment effect depends on the customer's size as follows: tech support provides an consumption boost of \\$5,000 + 2\\% Size and a discount provides an consumption boost of 5\\% Size.**This is the relationship we seek to learn from the data.**" diff --git a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb index 9c2d7d5a5..1ad8bb948 100644 --- a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company.ipynb @@ -70,21 +70,21 @@ }, "outputs": [], "source": [ - "# Some imports to get us started\r\n", - "# Utilities\r\n", - "import numpy as np\r\n", - "import pandas as pd\r\n", - "\r\n", - "# Generic ML imports\r\n", - "import lightgbm as lgb\r\n", - "from sklearn.preprocessing import PolynomialFeatures\r\n", - "\r\n", - "# EconML imports\r\n", - "from econml.iv.dr import LinearIntentToTreatDRIV\r\n", - "from econml.cate_interpreter import SingleTreeCateInterpreter, \\\r\n", - " SingleTreePolicyInterpreter\r\n", - "\r\n", - "import matplotlib.pyplot as plt\r\n", + "# Some imports to get us started\n", + "# Utilities\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# Generic ML imports\n", + "import lightgbm as lgb\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "\n", + "# EconML imports\n", + "from econml.iv.dr import LinearIntentToTreatDRIV\n", + "from econml.cate_interpreter import SingleTreeCateInterpreter, \\\n", + " SingleTreePolicyInterpreter\n", + "\n", + "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, @@ -335,7 +335,7 @@ "The data was generated using the following undelying treatment effect function:\n", "\n", "$$\n", - "\\text{treatment_effect} = 0.2 + 0.3 \\cdot \\text{days_visited_free_pre} - 0.2 \\cdot \\text{days_visited_hs_pre} + \\text{os_type_osx}\n", + "\\text{treatment\\_effect} = 0.2 + 0.3 \\cdot \\text{days\\_visited\\_free\\_pre} - 0.2 \\cdot \\text{days\\_visited\\_hs\\_pre} + \\text{os\\_type\\_osx}\n", "$$\n", "\n", "The interpretation of this is that users who visited the website before the experiment and/or who use an iPhone tend to benefit from the membership program, whereas users who visited the hotels pages tend to be harmed by membership. **This is the relationship we seek to learn from the data.**" @@ -352,7 +352,7 @@ "outputs": [], "source": [ "# Define underlying treatment effect function\n", - "def TE_fn(X):\r\n", + "def TE_fn(X):\n", " return (0.2 + 0.3 * X[\"days_visited_free_pre\"] - 0.2 * X[\"days_visited_hs_pre\"] + X[\"os_type_osx\"]).values\n", "true_TE = TE_fn(X_data)\n", "\n", @@ -386,23 +386,23 @@ }, "outputs": [], "source": [ - "# Define nuissance estimators\r\n", - "lgb_T_XZ_params = {\r\n", - " 'objective' : 'binary',\r\n", - " 'metric' : 'auc',\r\n", - " 'learning_rate': 0.1,\r\n", - " 'num_leaves' : 30,\r\n", - " 'max_depth' : 5\r\n", - "}\r\n", - "\r\n", - "lgb_Y_X_params = {\r\n", - " 'metric' : 'rmse',\r\n", - " 'learning_rate': 0.1,\r\n", - " 'num_leaves' : 30,\r\n", - " 'max_depth' : 5\r\n", - "}\r\n", - "model_T_XZ = lgb.LGBMClassifier(**lgb_T_XZ_params)\r\n", - "model_Y_X = lgb.LGBMRegressor(**lgb_Y_X_params)\r\n", + "# Define nuissance estimators\n", + "lgb_T_XZ_params = {\n", + " 'objective' : 'binary',\n", + " 'metric' : 'auc',\n", + " 'learning_rate': 0.1,\n", + " 'num_leaves' : 30,\n", + " 'max_depth' : 5\n", + "}\n", + "\n", + "lgb_Y_X_params = {\n", + " 'metric' : 'rmse',\n", + " 'learning_rate': 0.1,\n", + " 'num_leaves' : 30,\n", + " 'max_depth' : 5\n", + "}\n", + "model_T_XZ = lgb.LGBMClassifier(**lgb_T_XZ_params)\n", + "model_Y_X = lgb.LGBMRegressor(**lgb_Y_X_params)\n", "flexible_model_effect = lgb.LGBMRegressor(**lgb_Y_X_params)" ] }, @@ -427,13 +427,13 @@ } ], "source": [ - "# Train EconML model\r\n", - "model = LinearIntentToTreatDRIV(\r\n", - " model_y_xw = model_Y_X,\r\n", - " model_t_xwz = model_T_XZ,\r\n", - " flexible_model_effect = flexible_model_effect,\r\n", - " featurizer = PolynomialFeatures(degree=1, include_bias=False)\r\n", - ")\r\n", + "# Train EconML model\n", + "model = LinearIntentToTreatDRIV(\n", + " model_y_xw = model_Y_X,\n", + " model_t_xwz = model_T_XZ,\n", + " flexible_model_effect = flexible_model_effect,\n", + " featurizer = PolynomialFeatures(degree=1, include_bias=False)\n", + ")\n", "model.fit(Y, T, Z=Z, X=X_data, inference=\"statsmodels\")" ] }, @@ -447,11 +447,11 @@ }, "outputs": [], "source": [ - "# Compare learned coefficients with true model coefficients\r\n", - "coef_indices = np.arange(model.coef_.shape[0])\r\n", - "# Calculate error bars\r\n", - "coef_error = np.asarray(model.coef__interval()) # 95% confidence interval for coefficients\r\n", - "coef_error[0, :] = model.coef_ - coef_error[0, :]\r\n", + "# Compare learned coefficients with true model coefficients\n", + "coef_indices = np.arange(model.coef_.shape[0])\n", + "# Calculate error bars\n", + "coef_error = np.asarray(model.coef__interval()) # 95% confidence interval for coefficients\n", + "coef_error[0, :] = model.coef_ - coef_error[0, :]\n", "coef_error[1, :] = coef_error[1, :] - model.coef_" ] }, @@ -478,10 +478,10 @@ } ], "source": [ - "plt.errorbar(coef_indices, model.coef_, coef_error, fmt=\"o\", label=\"Learned coefficients\\nand 95% confidence interval\")\r\n", - "plt.scatter(coef_indices, true_coefs, color='C1', label=\"True coefficients\")\r\n", - "plt.xticks(coef_indices, X_data.columns, rotation='vertical')\r\n", - "plt.legend()\r\n", + "plt.errorbar(coef_indices, model.coef_, coef_error, fmt=\"o\", label=\"Learned coefficients\\nand 95% confidence interval\")\n", + "plt.scatter(coef_indices, true_coefs, color='C1', label=\"True coefficients\")\n", + "plt.xticks(coef_indices, X_data.columns, rotation='vertical')\n", + "plt.legend()\n", "plt.show()" ] }, @@ -607,8 +607,8 @@ }, "outputs": [], "source": [ - "test_customers = X_data.iloc[:1000]\r\n", - "true_customer_TE = TE_fn(test_customers)\r\n", + "test_customers = X_data.iloc[:1000]\n", + "true_customer_TE = TE_fn(test_customers)\n", "model_customer_TE = model.effect(test_customers)" ] }, @@ -772,4 +772,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}