Skip to content

Commit 998a496

Browse files
committed
working through regression
1 parent 23b66dc commit 998a496

File tree

5 files changed

+1059
-480
lines changed

5 files changed

+1059
-480
lines changed

chapters/02_regression/00_intro-to-regression.ipynb

Lines changed: 341 additions & 54 deletions
Large diffs are not rendered by default.
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"slideshow": {
8+
"slide_type": "slide"
9+
}
10+
},
11+
"outputs": [],
12+
"source": [
13+
"%matplotlib inline\n",
14+
"import numpy as np\n",
15+
"import matplotlib.pylab as plt"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": null,
21+
"metadata": {
22+
"slideshow": {
23+
"slide_type": "-"
24+
}
25+
},
26+
"outputs": [],
27+
"source": [
28+
"import scipy.optimize\n",
29+
"import pandas as pd"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {
35+
"slideshow": {
36+
"slide_type": "slide"
37+
}
38+
},
39+
"source": [
40+
"## What does the following code do?"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": null,
46+
"metadata": {
47+
"scrolled": true,
48+
"slideshow": {
49+
"slide_type": "-"
50+
}
51+
},
52+
"outputs": [],
53+
"source": [
54+
"d = pd.read_csv(\"data/dataset_0.csv\")\n",
55+
"plt.plot(d.x,d.y,'o')"
56+
]
57+
},
58+
{
59+
"cell_type": "markdown",
60+
"metadata": {
61+
"slideshow": {
62+
"slide_type": "slide"
63+
}
64+
},
65+
"source": [
66+
"## What does the following code do?"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {
73+
"slideshow": {
74+
"slide_type": "-"
75+
}
76+
},
77+
"outputs": [],
78+
"source": [
79+
"def linear(x,a,b):\n",
80+
" return a + b*x"
81+
]
82+
},
83+
{
84+
"cell_type": "markdown",
85+
"metadata": {
86+
"slideshow": {
87+
"slide_type": "fragment"
88+
}
89+
},
90+
"source": []
91+
},
92+
{
93+
"cell_type": "markdown",
94+
"metadata": {
95+
"slideshow": {
96+
"slide_type": "slide"
97+
}
98+
},
99+
"source": [
100+
"## What does the following code do?\n"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": null,
106+
"metadata": {
107+
"slideshow": {
108+
"slide_type": "-"
109+
}
110+
},
111+
"outputs": [],
112+
"source": [
113+
"def linear(x,a,b):\n",
114+
" return a + b*x\n",
115+
"\n",
116+
"def linear_r(param,x,y):\n",
117+
" return linear(x,param[0],param[1]) - y"
118+
]
119+
},
120+
{
121+
"cell_type": "markdown",
122+
"metadata": {
123+
"slideshow": {
124+
"slide_type": "fragment"
125+
}
126+
},
127+
"source": []
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {
132+
"slideshow": {
133+
"slide_type": "slide"
134+
}
135+
},
136+
"source": [
137+
"## What does the following code do?"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {
144+
"slideshow": {
145+
"slide_type": "-"
146+
}
147+
},
148+
"outputs": [],
149+
"source": [
150+
"def linear_r(param,x,y): # copied from previous cell\n",
151+
" return linear(x,param[0],param[1]) - y # copied from previous cell\n",
152+
"\n",
153+
"param_guesses = [1,1]\n",
154+
"fit = scipy.optimize.least_squares(linear_r,param_guesses,\n",
155+
" args=(d.x,d.y))\n",
156+
"fit_a = fit.x[0]\n",
157+
"fit_b = fit.x[1]\n",
158+
"sum_of_square_residuals = fit.cost"
159+
]
160+
},
161+
{
162+
"cell_type": "markdown",
163+
"metadata": {
164+
"slideshow": {
165+
"slide_type": "fragment"
166+
}
167+
},
168+
"source": []
169+
},
170+
{
171+
"cell_type": "markdown",
172+
"metadata": {
173+
"slideshow": {
174+
"slide_type": "slide"
175+
}
176+
},
177+
"source": [
178+
"## What does the following code do?\n",
179+
"+ What the heck is `linspace`?\n",
180+
"+ What are we plotting?"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {
187+
"slideshow": {
188+
"slide_type": "-"
189+
}
190+
},
191+
"outputs": [],
192+
"source": [
193+
"x_range = np.linspace(np.min(d.x),np.max(d.x),100)\n",
194+
"\n",
195+
"plt.plot(d.x,d.y,\"o\")\n",
196+
"plt.plot(x_range,linear(x_range,fit_a,fit_b))\n"
197+
]
198+
},
199+
{
200+
"cell_type": "markdown",
201+
"metadata": {
202+
"slideshow": {
203+
"slide_type": "slide"
204+
}
205+
},
206+
"source": [
207+
"## Put together"
208+
]
209+
},
210+
{
211+
"cell_type": "code",
212+
"execution_count": null,
213+
"metadata": {
214+
"slideshow": {
215+
"slide_type": "-"
216+
}
217+
},
218+
"outputs": [],
219+
"source": [
220+
"def linear(x,a,b):\n",
221+
" \"\"\"Linear model of x using a (slope) and b (intercept)\"\"\"\n",
222+
" return a + b*x\n",
223+
"\n",
224+
"def linear_r(param,x,y):\n",
225+
" \"\"\"Residuals function for linear\"\"\"\n",
226+
" return linear(x,param[0],param[1]) - y\n",
227+
"\n",
228+
"# Read data\n",
229+
"d = pd.read_csv(\"data/dataset_0.csv\")\n",
230+
"plt.plot(d.x,d.y,'o')\n",
231+
"\n",
232+
"# Perform regression\n",
233+
"param_guesses = [1,1]\n",
234+
"fit = scipy.optimize.least_squares(linear_r,param_guesses,args=(d.x,d.y))\n",
235+
"fit_a = fit.x[0]\n",
236+
"fit_b = fit.x[1]\n",
237+
"sum_of_square_residuals = fit.cost\n",
238+
"\n",
239+
"# Plot result\n",
240+
"x_range = np.linspace(np.min(d.x),np.max(d.x),100)\n",
241+
"plt.plot(x_range,linear(x_range,fit_a,fit_b))\n"
242+
]
243+
},
244+
{
245+
"cell_type": "markdown",
246+
"metadata": {
247+
"slideshow": {
248+
"slide_type": "slide"
249+
}
250+
},
251+
"source": [
252+
"<h4>For your assigned model:</h4>\n",
253+
"<ul>\n",
254+
" <li>Write a function and residuals function</li>\n",
255+
" <li>Estimate the parameters of the model</li>\n",
256+
" <li>Plot the data and the model on the same graph</li>\n",
257+
" <li>Write the SSR and number of parameters on the board</li>\n",
258+
" <li>If you finish early: plot the residuals and decide if you like your model</li>\n",
259+
" <li>If you're still waiting: try to figure out which model best fits dataset_1.csv</li>\n",
260+
"</ul>\n",
261+
"<br/>\n",
262+
"\n",
263+
"<div style=\"font-size:30px;\" >\n",
264+
"<div class=\"row\">\n",
265+
" <div class=\"col-xs-6\">$y = a \\Big ( \\frac{bx}{1 + bx} \\Big )$</div>\n",
266+
" <div class=\"col-xs-6\">$y = a \\Big ( \\frac{bx^{c}}{1 + bx^{c}} \\Big )$</div>\n",
267+
"</div>\n",
268+
"\n",
269+
"<div class=\"row\">\n",
270+
" <div class=\"col-xs-6\">$y = a(1 - e^{-bx})$</div>\n",
271+
" <div class=\"col-xs-6\">$y = a + bx^{2} + cx^{3}$</div>\n",
272+
"</div>\n",
273+
"<div class=\"row\">\n",
274+
" <div class=\"col-xs-6\">$y = a + bx^{2} + cx^{3} + dx^{4}$</div>\n",
275+
" <div class=\"col-xs-6\">$y = asin(bx + c)$</div>\n",
276+
"</div>\n",
277+
"<div class=\"row\">\n",
278+
" <div class=\"col-xs-6\">$y = aln(x + b)$</div>\n",
279+
" <div class=\"col-xs-6\">$y = aln(bx + c)$</div>\n",
280+
"</div>\n",
281+
"\n",
282+
"\n"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": null,
288+
"metadata": {},
289+
"outputs": [],
290+
"source": []
291+
},
292+
{
293+
"cell_type": "code",
294+
"execution_count": null,
295+
"metadata": {},
296+
"outputs": [],
297+
"source": []
298+
}
299+
],
300+
"metadata": {
301+
"kernelspec": {
302+
"display_name": "Python 3",
303+
"language": "python",
304+
"name": "python3"
305+
},
306+
"language_info": {
307+
"codemirror_mode": {
308+
"name": "ipython",
309+
"version": 3
310+
},
311+
"file_extension": ".py",
312+
"mimetype": "text/x-python",
313+
"name": "python",
314+
"nbconvert_exporter": "python",
315+
"pygments_lexer": "ipython3",
316+
"version": "3.6.6"
317+
}
318+
},
319+
"nbformat": 4,
320+
"nbformat_minor": 1
321+
}

0 commit comments

Comments
 (0)