From 369ec06775c76a1e5f46def8bb3893a4de591d3d Mon Sep 17 00:00:00 2001 From: Pedro Albuquerque Date: Wed, 27 Sep 2023 04:04:06 +0000 Subject: [PATCH 01/10] Add some examples --- examples.ipynb | 229 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 examples.ipynb diff --git a/examples.ipynb b/examples.ipynb new file mode 100644 index 00000000..c53799b4 --- /dev/null +++ b/examples.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics examples" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Example 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "Show by coding one example that the mean is the estimator with the Minimum Square Error and give the intuiton on that.\n", + "\n", + "Note:\n", + "\n", + "$$\n", + "MSE=\\sum_{i=1}^{n}(X_{i}-\\mu)^2\n", + "$$\n", + "\n", + "Consider the following array `X=[3,5,6,3,1,5,7,9,5,4]`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "34720ab6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimated mu: 4.799999977760703\n", + "Minimum MSE: 45.6\n", + "Populational mu: 4.8\n" + ] + } + ], + "source": [ + "# TODO\n" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Example 2\n", + "\n", + "Consider this data:\n", + "````\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Set a seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "# Generate left-skewed data using the gamma distribution\n", + "shape = 2 # Shape parameter (controls skewness, adjust as needed)\n", + "scale = 1 # Scale parameter (controls spread, adjust as needed)\n", + "size = 1000 # Number of data points\n", + "\n", + "# Generate left-skewed data\n", + "data = np.random.gamma(shape, scale, size)\n", + "\n", + "# Create a histogram\n", + "plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Left-Skewed Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Left-Skewed Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()\n", + "````\n", + "\n", + "1. Run the code.\n", + "2. Shows that the distribution of the mean will follow a normal distribution" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d590308e", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO" + ] + }, + { + "cell_type": "markdown", + "id": "00a7f40c", + "metadata": {}, + "source": [ + "### Example 3\n", + "\n", + "Consider this data:\n", + "````\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Set a seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "# Parameters for the uniform distribution\n", + "low = 0 # Lower bound\n", + "high = 10 # Upper bound\n", + "size = 1000 # Number of data points\n", + "\n", + "# Generate random data from a uniform distribution\n", + "data = np.random.uniform(low, high, size)\n", + "\n", + "# Create a histogram\n", + "plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Uniform Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Uniform Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()\n", + "````\n", + "\n", + "1. Run the code.\n", + "2. Shows that the distribution of the mean will follow a normal distribution" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "12f8030c", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n" + ] + }, + { + "cell_type": "markdown", + "id": "2767a3c8", + "metadata": {}, + "source": [ + "### Example 4\n", + "\n", + "1. Simulate a normal distribution\n", + "2. Plot the histogram and add a vertical line for the mean and median.\n", + "3. Begin to add outliers and see how the mean, median and mode are affected.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "214ecc0d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# TODO\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ea06451", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0e55e5c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a255f5288eadd94306661803e68c80d3a13cb775 Mon Sep 17 00:00:00 2001 From: bobo305 <126503694+bobo305@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:45:47 +0000 Subject: [PATCH 02/10] added julio work folder and all done with the excercies --- JULIOs_work/problems.ipynb | 176 +++++++++++++++++++++++++++++++++++++ notebook/problems.ipynb | 90 ------------------- 2 files changed, 176 insertions(+), 90 deletions(-) create mode 100644 JULIOs_work/problems.ipynb delete mode 100644 notebook/problems.ipynb diff --git a/JULIOs_work/problems.ipynb b/JULIOs_work/problems.ipynb new file mode 100644 index 00000000..d8b5691c --- /dev/null +++ b/JULIOs_work/problems.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics problems" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "We will use Numpy to obtain information to describe statistically.\n", + "\n", + "- Generate an array of 100 elements following a normal distribution.\n", + "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", + "- Calculate the main metrics and statistical measures that best describe the two vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "34720ab6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stats metrics for normal disturbution\n", + "mean: -0.06865936071598043\n", + "stander deviation: 0.9697609573818616\n", + "meadian: -0.12619537311221996\n", + "min: -1.9361951382884228\n", + "max: 2.277103875562215\n", + "variances: 0.9404363144621848\n", + "\n", + "stats metrics for chi square disturbution\n", + "mean: 3.14695578992872\n", + "stander deviation: 2.467973134872231\n", + "meadian: 2.1340973003611277\n", + "min: 0.18770455963586785\n", + "max: 10.0705079466792\n", + "variances: 6.0908913944510665\n" + ] + } + ], + "source": [ + "# TODO\n", + "import numpy as np \n", + "\n", + "normal_distribution= np.random.normal(loc=0, scale=1,size=100)\n", + "\n", + "chi_square_distribution=np.random.chisquare(df=3,size=100)\n", + "\n", + "mean_normal=np.mean(normal_distribution)\n", + "std_normal=np.std(normal_distribution)\n", + "meadian_normal=np.median(normal_distribution)\n", + "min_normal=np.min(normal_distribution)\n", + "max_normal=np.max(normal_distribution)\n", + "variances_normal=np.var(normal_distribution)\n", + "\n", + "\n", + "\n", + "mean_chi_square=np.mean(chi_square_distribution)\n", + "std_chi_square=np.std(chi_square_distribution)\n", + "meadian_chi_square=np.median(chi_square_distribution)\n", + "min_chi_square=np.min(chi_square_distribution)\n", + "max_chi_square=np.max(chi_square_distribution)\n", + "variances_chi_square=np.var(chi_square_distribution)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "print(\"stats metrics for normal disturbution\")\n", + "print(\"mean:\",mean_normal)\n", + "print(\"stander deviation:\",std_normal)\n", + "print(\"meadian:\",meadian_normal)\n", + "print(\"min:\",min_normal)\n", + "print(\"max:\",max_normal)\n", + "print(\"variances:\",variances_normal)\n", + "\n", + "print(\"\")\n", + "\n", + "print(\"stats metrics for chi square disturbution\")\n", + "print(\"mean:\",mean_chi_square)\n", + "print(\"stander deviation:\",std_chi_square)\n", + "print(\"meadian:\",meadian_chi_square)\n", + "print(\"min:\",min_chi_square)\n", + "print(\"max:\",max_chi_square)\n", + "print(\"variances:\",variances_chi_square)" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "Write a Python program to calculate the standard deviation of the following data:\n", + "\n", + "```py\n", + "data = [4, 2, 5, 8, 6]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d590308e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stander diviation is : 2.0\n" + ] + } + ], + "source": [ + "# TODO\n", + "import numpy as np\n", + "\n", + "data = [4, 2, 5, 8, 6]\n", + "\n", + "mean_value=np.mean(data)\n", + "\n", + "squared_diffrince=[(x - mean_value)** 2 for x in data]\n", + "\n", + "mean_squared_diffrence=np.mean(squared_diffrince)\n", + "\n", + "std=np.sqrt(mean_squared_diffrence)\n", + "\n", + "print(\"stander diviation is :\", std)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebook/problems.ipynb b/notebook/problems.ipynb deleted file mode 100644 index ff2c594b..00000000 --- a/notebook/problems.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "ac622319", - "metadata": {}, - "source": [ - "# Descriptive statistics problems" - ] - }, - { - "cell_type": "markdown", - "id": "aa8993e4", - "metadata": {}, - "source": [ - "### Exercise 1" - ] - }, - { - "cell_type": "markdown", - "id": "5e0ab0d5", - "metadata": {}, - "source": [ - "We will use Numpy to obtain information to describe statistically.\n", - "\n", - "- Generate an array of 100 elements following a normal distribution.\n", - "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", - "- Calculate the main metrics and statistical measures that best describe the two vectors." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "34720ab6", - "metadata": {}, - "outputs": [], - "source": [ - "# TODO" - ] - }, - { - "cell_type": "markdown", - "id": "46c70c3d", - "metadata": {}, - "source": [ - "### Exercise 2\n", - "\n", - "Write a Python program to calculate the standard deviation of the following data:\n", - "\n", - "```py\n", - "data = [4, 2, 5, 8, 6]\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d590308e", - "metadata": {}, - "outputs": [], - "source": [ - "# TODO" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" - }, - "kernelspec": { - "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 794e48935f7ca791ba418e5c0a1c64d59ea7f958 Mon Sep 17 00:00:00 2001 From: Pedro Albuquerque Date: Fri, 29 Sep 2023 23:36:49 +0000 Subject: [PATCH 03/10] New changes --- examples.ipynb | 801 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 784 insertions(+), 17 deletions(-) diff --git a/examples.ipynb b/examples.ipynb index c53799b4..1dfb01d1 100644 --- a/examples.ipynb +++ b/examples.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "34720ab6", "metadata": {}, "outputs": [ @@ -42,14 +42,259 @@ "name": "stdout", "output_type": "stream", "text": [ - "Estimated mu: 4.799999977760703\n", - "Minimum MSE: 45.6\n", - "Populational mu: 4.8\n" + "52\n", + "46\n", + "78\n" ] } ], "source": [ - "# TODO\n" + "import numpy as np\n", + "#Sample from an unknown population\n", + "X = np.array([3,5,6,3,1,5,7,9,5,4])\n", + "\n", + "#Define the loss function\n", + "#Model: X=average+error ->(X-average)**2=error**2\n", + "def loss(mu):\n", + " loss = np.sum(np.square(X-mu))\n", + " return loss\n", + "\n", + "loss_sebastian = loss(4)\n", + "print(loss_sebastian)\n", + "loss_julio = loss(5)\n", + "print(loss_julio)\n", + "loss_rob = loss(3)\n", + "print(loss_rob)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "04777a53", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " message: Optimization terminated successfully.\n", + " success: True\n", + " status: 0\n", + " fun: 45.6\n", + " x: [ 4.800e+00]\n", + " nit: 3\n", + " jac: [ 4.768e-07]\n", + " hess_inv: [[ 5.000e-02]]\n", + " nfev: 8\n", + " njev: 4\n", + "The best possible average here that minimizes the MSE is [4.79999998]\n", + "4.8\n" + ] + } + ], + "source": [ + "from scipy.optimize import minimize\n", + "initial_guess = [0.0]\n", + "result = minimize(loss,initial_guess,method='BFGS')\n", + "print(result)\n", + "print(f\"The best possible average here that minimizes the MSE is {result.x}\")\n", + "print(np.average(X))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "20626934", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[37.6271033 49.25801358 51.76774607 36.22546304 50.2132896 52.29252132\n", + " 33.47598315 33.29169425 45.1421923 31.69617333 39.07981848 16.64922406\n", + " 44.46305569 41.53569879 22.26300143 40.12042989 28.59536197 31.28348567\n", + " 21.24463275 43.64034352 36.29472538 39.79036744 42.92675803 44.08036314\n", + " 27.0221942 42.92165963 53.93568164 15.58799329 35.97759084 35.80911123\n", + " 25.40344864 25.51276397 41.78591563 41.33882332 37.71804396 21.47226349\n", + " 40.88133846 30.42494425 38.75105351 16.68685628 40.75261067 22.59483673\n", + " 23.57046488 51.30310644 47.90359289 47.1199625 37.55664582 58.67891991\n", + " 47.08333506 43.6471633 21.06627581 44.93842797 14.38227947 41.45057177\n", + " 45.42567627 40.2609248 42.89635126 32.18323302 35.8783009 27.55322583\n", + " 59.5951662 26.34265514 40.95002397 28.92083714 51.87142965 21.60613623\n", + " 32.74510322 38.46097479 32.62149649 53.04653142 28.87114162 11.46899169\n", + " 35.45433171 27.71723734 38.90942471 42.35887831 29.54521078 49.88116249\n", + " 33.16924025 21.20547362 31.95823146 39.01894124 35.73001942 30.54317492\n", + " 40.7309696 45.25116668 45.9546001 31.35156271 34.92569278 17.84320411\n", + " 24.74321922 38.44962732 29.94635049 23.92951639 41.99514909 37.29367923\n", + " 35.15682342 33.65987009 37.33368372 38.53353645 17.00252505 30.85557975\n", + " 36.8799357 58.71850189 48.77093693 28.4106544 58.67011582 20.71058889\n", + " 33.31723303 37.42226549 30.47728508 33.00324757 19.26339148 29.03583959\n", + " 36.34123226 28.65708345 41.28780185 34.33694268 42.17958478 37.73892854\n", + " 42.64160312 42.25986371 24.09219286 39.16890286 34.30521295 43.72606268\n", + " 27.18128429 27.74442911 30.63108595 32.42735047 40.34309029 46.76953512\n", + " 26.6181258 36.08910601 40.50778811 29.71097985 38.37919364 41.90300174\n", + " 44.01173702 14.15731694 39.40005042 28.29002221 42.06454206 34.53417982\n", + " 27.73660596 34.19351329 37.91951724 35.86194868 29.8935498 28.87236472\n", + " 50.68483437 46.93282718 31.87627262 28.20381135 54.46247685 40.17232149\n", + " 31.71801618 47.58242128 43.70776522 26.97317688 30.18391217 24.05483354\n", + " 39.2945252 52.8269084 31.37036501 34.04346011 34.0168272 26.33779038\n", + " 15.68519601 31.84014651 45.94614595 64.25252122 45.80270718 27.24683399\n", + " 37.76055132 33.0356684 37.52237593 36.01707258 28.77881565 21.49235339\n", + " 40.10557049 39.1053993 35.13057905 35.81233478 27.93090141 22.37256416\n", + " 22.82835302 49.24594157 36.23375508 55.39824433 29.01726643 21.49416128\n", + " 23.56604505 39.24976814 32.71944097 48.3173851 31.06737474 37.31946933\n", + " 40.42457929 42.89022305 45.69393401 31.3760351 17.59115958 29.70300547\n", + " 21.31643697 47.10826079 23.28275195 33.03813277 43.92689062 47.33194067\n", + " 29.70881912 42.92083624 33.80712178 24.28087218 15.4272794 33.32847469\n", + " 28.08721416 43.68647972 51.26621606 35.7289691 28.49790139 28.10140322\n", + " 25.01076195 27.2121548 19.60673721 38.32378982 26.44174062 25.15051975\n", + " 30.43094758 37.23872964 33.25837534 43.7353568 36.48502961 28.88881192\n", + " 8.47679619 42.80186816 24.93626472 33.54017974 31.38396491 34.08251503\n", + " 51.40049866 52.01859962 30.6907358 42.37705602 40.0831406 25.60653407\n", + " 52.02401579 37.77617654 36.8180199 32.19304171 28.73822811 6.87340965\n", + " 31.31660581 59.72930619 24.5956038 46.15265017 35.5047154 36.55953597\n", + " 27.04767252 25.88642944 33.59118716 38.308081 26.45077719 38.34037476\n", + " 25.69180679 30.58337288 33.46503559 30.45264568 27.72887534 37.38843452\n", + " 25.00844618 33.81717209 34.48593226 37.12173383 40.49733151 35.87491923\n", + " 20.58005942 35.64449106 33.04352454 37.06453805 29.30047899 44.80481871\n", + " 45.478548 34.14161754 32.85733386 46.21229663 28.43302224 43.74762588\n", + " 30.69849614 40.32553244 25.56395672 28.38307257 41.56990364 33.23794221\n", + " 36.52646544 40.04885067 31.09838921 53.03270673 45.52241649 29.03175008\n", + " 33.87863844 27.87869243 19.68766676 16.73992158 49.53369811 30.78795999\n", + " 36.15056609 57.04903192 54.30629366 41.7722212 43.49585261 37.58210314\n", + " 44.99784533 37.13064995 46.78135688 37.2399462 59.47026759 22.89722777\n", + " 31.10253484 37.37357557 45.32189819 36.91006224 34.14291689 45.70301376\n", + " 28.56414997 34.39922509 32.20884682 47.25931746 21.84499179 42.32438158\n", + " 31.76288443 34.10348622 35.47645795 45.94195147 16.08864947 31.45474501\n", + " 25.01768142 30.91915374 23.37797127 30.51707643 41.6452885 46.27321834\n", + " 27.35955692 36.69504422 35.86885794 34.06295175 18.20374947 30.67715446\n", + " 25.98299347 32.08140715 27.01210319 35.77896065 29.57560069 35.81726664\n", + " 51.73358451 28.17674702 49.85585551 20.27533073 29.75737735 26.92718929\n", + " 29.18608153 31.33606607 47.32348256 44.78818107 49.87525436 54.37068854\n", + " 37.17813517 27.08510907 37.01053391 31.70403648 20.12252376 29.86800328\n", + " 30.96901501 27.88858463 38.79726511 38.24965508 35.4723428 41.60353312\n", + " 42.35877855 27.44766177 22.8172199 48.31092697 24.75297227 39.47912163\n", + " 44.88777996 52.63077229 53.12375124 28.21980233 26.96067221 42.00635257\n", + " 38.86017267 39.90899855 27.16387498 25.3107997 32.16652486 49.8775466\n", + " 33.00255261 37.99004831 37.0523635 33.70075964 21.93285206 45.60234218\n", + " 24.81843936 45.14834183 27.1558057 30.93362051 60.69170715 47.14031023\n", + " 55.71759271 22.81929522 48.95898857 31.58654194 34.38132172 36.19659629\n", + " 33.73701082 30.21185042 34.04335083 37.342922 45.43888311 26.20141771\n", + " 37.56772188 36.07122086 51.2604662 38.28882591 46.05301764 22.92610221\n", + " 43.98426988 24.63392141 53.1954661 46.34769484 36.26755507 20.40459942\n", + " 40.98319971 39.11350058 41.55966377 31.9028619 41.06335146 25.61709318\n", + " 27.25301991 30.17365721 22.45456682 40.20870966 35.10999385 22.51787005\n", + " 19.710662 42.72933812 39.12090834 26.21576534 36.41849103 32.02162921\n", + " 34.39417486 38.0533805 35.14770989 25.50996381 47.17279431 29.89374567\n", + " 21.53513763 42.12579909 43.42316389 38.33001622 25.42705137 21.39660966\n", + " 36.24277658 36.37626856 24.24420036 27.98874405 40.39879296 27.81771545\n", + " 34.80368715 31.69797969 32.93309873 20.84028137 37.94723726 38.70398579\n", + " 34.72272385 36.73491879 39.62436491 18.91914884 51.45619604 53.87208928\n", + " 43.12896797 44.48837392 32.78176185 28.5209871 47.73275021 35.40403545\n", + " 32.09242376 23.79112629 20.30874637 34.1527622 42.40065475 25.81785316\n", + " 42.61344543 34.54718919 28.33044236 20.47876962 30.94956565 29.32865614\n", + " 43.69635954 34.16016302 31.83089663 41.51065159 27.63431413 36.27974179\n", + " 24.34445274 22.25187321 46.97955498 32.14865657 40.21304627 46.08822956\n", + " 33.73338325 38.15488948 32.03095418 28.88133836 38.32902539 51.11385924\n", + " 21.85178061 36.95591335 36.12394652 45.14274979 46.28499451 36.38677249\n", + " 40.01872064 39.40698998 42.4601263 54.10675324 42.96062214 15.13892265\n", + " 38.21812224 30.0684173 29.14213711 41.17387124 38.816888 50.17605158\n", + " 42.65308097 33.08865337 31.96771417 33.11152265 33.17460256 28.49072814\n", + " 27.01665856 30.30075813 34.71173089 37.79908716 28.05618723 33.05763839\n", + " 43.29680192 45.14199094 31.62513387 26.28032018 40.9609272 42.34200181\n", + " 36.09794431 19.20164807 26.17557974 35.51174555 28.73878925 46.0690601\n", + " 24.25705377 56.71584853 26.49879721 28.11708707 32.4378332 51.8286247\n", + " 44.51676618 34.28565391 42.6842464 29.50020428 36.55510181 46.5041759\n", + " 33.83579774 22.93901401 50.70090422 31.38041174 33.13649345 30.20879415\n", + " 33.69211786 17.52455314 37.60269218 41.42647786 37.95337958 28.91468894\n", + " 42.57456418 42.82568052 28.77215427 23.04708821 36.90414174 27.62126509\n", + " 42.72648944 27.87455518 13.14431961 25.07555968 40.85238456 29.3856915\n", + " 48.02815202 25.05756998 42.52854966 39.62275343 28.01534438 29.63913156\n", + " 38.81391413 32.75447654 37.19536911 44.59718681 15.19833643 34.45629317\n", + " 55.4866157 37.35520707 38.49202261 31.02691191 26.61860874 40.48343272\n", + " 44.77764451 21.34758406 4.25235367 39.20309797 28.78636928 21.55752219\n", + " 52.22937556 38.41535476 30.52415089 36.22484696 42.14709895 28.57674673\n", + " 35.22047217 43.38572917 38.1969869 34.3153107 38.31646818 46.74988571\n", + " 27.48195934 37.56405542 45.41356082 30.65766971 20.42654532 37.12904577\n", + " 36.84539366 55.88972075 44.01461788 23.27388926 35.15092116 47.62952487\n", + " 30.98326792 21.24782571 35.86328502 45.18804163 40.25893958 33.45153073\n", + " 24.92899299 30.7139417 44.95338101 43.11879029 34.72228902 36.67756086\n", + " 32.22263432 30.3183397 50.63641709 42.52339935 28.03036458 44.71312325\n", + " 31.65915943 19.75529224 48.35642314 39.07611962 43.34611629 35.95571245\n", + " 47.59812422 34.22379036 38.12069392 30.56062644 22.82807128 31.61991098\n", + " 36.62046516 37.43415848 24.58999172 46.48461169 32.56418452 33.32841156\n", + " 35.16715432 41.56322131 43.55272442 51.89386671 21.53952737 46.70630144\n", + " 38.2971253 31.98021485 21.99317535 57.23052554 20.91282493 43.41955171\n", + " 50.22359797 49.0434095 14.64158345 36.45297714 25.65869071 29.11951102\n", + " 32.46281845 35.64078196 63.62762105 46.9354579 26.71195957 24.44330004\n", + " 40.51055204 31.37310319 36.13430857 35.89003814 29.31375206 33.03575386\n", + " 26.80215643 30.51929856 39.27554641 41.20998643 41.86194231 33.35842579\n", + " 26.02868124 28.48085486 27.58440669 33.12278693 55.33740159 44.25733949\n", + " 56.25003924 28.81115565 40.9972882 38.83749043 35.44529207 38.5143657\n", + " 20.35323318 30.14743284 23.94941884 35.08283378 32.39691886 34.89167625\n", + " 29.52560325 48.38910035 46.90359673 45.68574567 56.24840083 34.37894556\n", + " 34.98291611 44.33238555 48.45468265 50.66127207 18.75582914 35.40064419\n", + " 41.53001797 24.65707374 26.53810222 38.01967702 18.2791755 23.51555598\n", + " 34.87789433 32.55563348 32.98271228 28.90705387 42.60127768 48.83111866\n", + " 23.34231814 37.21763627 34.49782475 46.29502028 58.59218696 37.59991144\n", + " 33.38248529 36.51931324 33.76024346 36.75451201 39.37320724 43.12047734\n", + " 38.70154982 48.62181336 33.45165008 35.27878824 45.87529456 21.42468512\n", + " 39.35710867 30.88099022 39.64782182 51.17657228 10.80340239 29.03305738\n", + " 45.02754792 33.84527162 44.73140395 32.26480696 19.96415119 56.44283652\n", + " 34.69445925 44.99909046 28.18595618 36.9288248 51.18234609 46.52686298\n", + " 31.3644046 52.99482279 36.61041958 34.62021921 47.35588893 41.0434417\n", + " 10.80531375 54.71103257 26.13916272 47.41764395 29.93776823 37.43720007\n", + " 34.26107875 46.43905543 33.20892231 33.49507214 39.80380618 49.46899822\n", + " 47.65824528 20.69676039 25.39109345 23.61024291 14.43486396 22.99919505\n", + " 34.13872547 33.28506543 27.86544376 33.89113246 20.67186095 20.68946873\n", + " 36.23573613 30.97804693 35.03062128 27.37717032 45.02627582 20.78536352\n", + " 26.69317775 39.24987156 31.96254073 37.23482516 44.60593676 15.73333883\n", + " 49.99412037 34.05535435 37.19042554 24.58418902 40.55682113 17.32118345\n", + " 36.90828742 58.5351077 34.60236214 37.47915452 20.56171677 14.72782384\n", + " 22.8069674 38.15248659 37.26029311 28.1435699 44.91518261 33.74022694\n", + " 36.63924745 25.70239379 27.16963868 37.0606325 38.34856402 23.41068339\n", + " 32.30574233 23.68186265 45.13782968 40.41255425 56.85007977 27.05281095\n", + " 55.20060582 37.69626218 38.39348554 34.25142032 28.61485855 15.99216949\n", + " 46.24002651 48.73015347 25.13172188 42.43783752 30.86014808 39.95690746\n", + " 35.13389349 38.05156307 51.85002861 35.96586431 44.39298008 43.01353641\n", + " 41.10093496 39.08751427 41.11193204 30.30137095 24.72787933 24.19301203\n", + " 27.95414429 38.73755272 31.67267154 38.59125155 35.8191196 53.54307631\n", + " 41.34721065 33.04334995 31.72532719 44.65015683 34.06058516 32.8430232\n", + " 20.20313491 43.54309672 38.01296394 40.20552166 28.73249195 41.09335347\n", + " 24.77505335 22.48098287 53.9783682 18.75689617 50.70290157 26.92339217\n", + " 45.46219454 34.72879137 30.02154642 36.30951646 22.81141219 36.64673172\n", + " 21.51203028 49.30413496 50.99090809 43.36554515 28.94233676 50.95113623\n", + " 35.49732452 29.29131048 31.9721603 23.58536017 31.80883592 18.33653162\n", + " 35.65137103 30.66393808 38.78428117 22.49215745 36.23874598 36.93296819\n", + " 22.20068958 25.96756587 43.68868152 46.55554667 26.75584609 38.83622013\n", + " 20.38024383 22.12990042 37.04191752 35.73862099 22.20197927 46.81673536\n", + " 47.85467733 37.09874902 30.39007704 32.94743567 23.41849114 46.97615713\n", + " 24.81276828 25.80584881 29.66903405 30.49474184 59.72658415 35.92819481\n", + " 28.57768615 22.67649635 28.13174626 51.01743823 31.46933004 26.80856595\n", + " 43.002335 42.67905471 53.98883964 21.68076976 32.36388345 41.7092776\n", + " 34.81552604 44.84564647 35.09172063 35.63727152 31.62884997 19.58149817\n", + " 32.74915194 26.18873629 26.08546182 43.67064002 43.13422941 29.9120593\n", + " 37.45110815 30.50059461 44.90810521 25.66562241 42.60583583 27.4032675\n", + " 38.10547948 54.87444144 32.30925472 38.9353986 35.33900008 26.21056743\n", + " 24.27830972 23.8452208 34.94479756 38.16253125]\n" + ] + }, + { + "data": { + "text/plain": [ + "35.30168305493937" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "age = np.random.normal(35,10,1000)\n", + "print(age)\n", + "np.average(age)" ] }, { @@ -94,12 +339,105 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 9, "id": "d590308e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Set a seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "# Generate right-skewed data using the gamma distribution\n", + "shape = 2 # Shape parameter (controls skewness, adjust as needed)\n", + "scale = 1 # Scale parameter (controls spread, adjust as needed)\n", + "size = 1000 # Number of data points\n", + "\n", + "# Generate right-skewed data\n", + "data = np.random.gamma(shape, scale, size)\n", + "\n", + "# Create a histogram\n", + "plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Right-Skewed Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Right-Skewed Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e26e3c30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.92931174 1.9599174 1.94880113 ... 1.89850062 1.8887389 2.00216321]\n" + ] + } + ], + "source": [ + "sample_size = 500\n", + "result = []\n", + "for i in range(5000):\n", + " ids = np.random.choice(1000,sample_size)\n", + " sample_data = data[ids]\n", + " sample_mean = np.average(sample_data)\n", + " result.append(sample_mean)\n", + "\n", + "result = np.array(result)\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "fbda0ad8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# TODO" + "# Create a histogram\n", + "plt.hist(result, bins=30, density=True, alpha=0.6, color='b', label='Left-Skewed Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Sample Average')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Average Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" ] }, { @@ -144,12 +482,107 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 27, "id": "12f8030c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# TODO\n" + "### Example 3\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Set a seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "# Parameters for the uniform distribution\n", + "low = 0 # Lower bound\n", + "high = 10 # Upper bound\n", + "size = 1000 # Number of data points\n", + "\n", + "# Generate random data from a uniform distribution\n", + "data = np.random.uniform(low, high, size)\n", + "\n", + "# Create a histogram\n", + "plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Uniform Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Uniform Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b01b7270", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5.64348326 7.99291573 4.57052724 ... 4.6118553 3.68265736 8.0410782 ]\n" + ] + } + ], + "source": [ + "sample_size = 2\n", + "result = []\n", + "for i in range(5000):\n", + " ids = np.random.choice(1000,sample_size)\n", + " sample_data = data[ids]\n", + " sample_mean = np.average(sample_data)\n", + " result.append(sample_mean)\n", + "\n", + "result = np.array(result)\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "c7dbee06", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create a histogram\n", + "plt.hist(result, bins=30, density=True, alpha=0.6, color='b', label='Left-Skewed Data')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Sample Average')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Average Distribution')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" ] }, { @@ -166,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 38, "id": "214ecc0d", "metadata": {}, "outputs": [ @@ -182,22 +615,356 @@ } ], "source": [ - "# TODO\n" + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy import stats\n", + "\n", + "# Set a seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "# Parameters for the normal distribution\n", + "mu = 0 # Mean\n", + "sigma = 1 # Standard deviation\n", + "size = 1000 # Number of data points\n", + "\n", + "# Generate random data from a normal distribution\n", + "data = np.random.normal(mu, sigma, size)\n", + "\n", + "# Create a histogram\n", + "plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Normal Data')\n", + "\n", + "# Calculate mean, median, and mode\n", + "mean = np.mean(data)\n", + "median = np.median(data)\n", + "\n", + "# Add vertical lines for mean, median, and mode\n", + "plt.axvline(mean, color='r', linestyle='dashed', linewidth=2, label='Mean')\n", + "plt.axvline(median, color='g', linestyle='dashed', linewidth=2, label='Median')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Normal Distribution with Mean, Median, and Mode')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "0ea06451", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1.76405235e+00 4.00157208e-01 1.00000000e+01 2.24089320e+00\n", + " 1.86755799e+00 -9.77277880e-01 9.50088418e-01 -1.51357208e-01\n", + " -1.03218852e-01 4.10598502e-01 1.44043571e-01 1.00000000e+01\n", + " 7.61037725e-01 1.21675016e-01 4.43863233e-01 3.33674327e-01\n", + " 1.49407907e+00 -2.05158264e-01 1.00000000e+01 -8.54095739e-01\n", + " -2.55298982e+00 1.00000000e+01 8.64436199e-01 -7.42165020e-01\n", + " 2.26975462e+00 -1.45436567e+00 4.57585173e-02 -1.87183850e-01\n", + " 1.53277921e+00 1.46935877e+00 1.54947426e-01 3.78162520e-01\n", + " -8.87785748e-01 -1.98079647e+00 -3.47912149e-01 1.56348969e-01\n", + " 1.23029068e+00 1.20237985e+00 1.00000000e+01 -3.02302751e-01\n", + " -1.04855297e+00 -1.42001794e+00 -1.70627019e+00 1.95077540e+00\n", + " -5.09652182e-01 -4.38074302e-01 -1.25279536e+00 1.00000000e+01\n", + " -1.61389785e+00 -2.12740280e-01 -8.95466561e-01 3.86902498e-01\n", + " -5.10805138e-01 -1.18063218e+00 -2.81822283e-02 4.28331871e-01\n", + " 6.65172224e-02 3.02471898e-01 -6.34322094e-01 -3.62741166e-01\n", + " -6.72460448e-01 -3.59553162e-01 -8.13146282e-01 1.00000000e+01\n", + " 1.77426142e-01 -4.01780936e-01 -1.63019835e+00 1.00000000e+01\n", + " -9.07298364e-01 5.19453958e-02 7.29090562e-01 1.28982911e-01\n", + " 1.13940068e+00 -1.23482582e+00 4.02341641e-01 -6.84810091e-01\n", + " -8.70797149e-01 -5.78849665e-01 -3.11552532e-01 1.00000000e+01\n", + " -1.16514984e+00 9.00826487e-01 4.65662440e-01 -1.53624369e+00\n", + " 1.48825219e+00 1.89588918e+00 1.17877957e+00 -1.79924836e-01\n", + " -1.07075262e+00 1.05445173e+00 -4.03176947e-01 1.22244507e+00\n", + " 2.08274978e-01 9.76639036e-01 3.56366397e-01 7.06573168e-01\n", + " 1.05000207e-02 1.78587049e+00 1.26912093e-01 4.01989363e-01\n", + " 1.88315070e+00 -1.34775906e+00 -1.27048500e+00 9.69396708e-01\n", + " -1.17312341e+00 1.94362119e+00 -4.13618981e-01 -7.47454811e-01\n", + " 1.92294203e+00 1.48051479e+00 1.86755896e+00 9.06044658e-01\n", + " -8.61225685e-01 1.91006495e+00 -2.68003371e-01 8.02456396e-01\n", + " 9.47251968e-01 -1.55010093e-01 6.14079370e-01 9.22206672e-01\n", + " 3.76425531e-01 -1.09940079e+00 2.98238174e-01 1.32638590e+00\n", + " -6.94567860e-01 -1.49634540e-01 -4.35153552e-01 1.84926373e+00\n", + " 6.72294757e-01 4.07461836e-01 -7.69916074e-01 1.00000000e+01\n", + " -6.74332661e-01 1.00000000e+01 -6.35846078e-01 6.76433295e-01\n", + " 5.76590817e-01 -2.08298756e-01 3.96006713e-01 -1.09306151e+00\n", + " -1.49125759e+00 4.39391701e-01 1.66673495e-01 1.00000000e+01\n", + " 2.38314477e+00 9.44479487e-01 -9.12822225e-01 1.11701629e+00\n", + " -1.31590741e+00 -4.61584605e-01 -6.82416053e-02 1.71334272e+00\n", + " -7.44754822e-01 -8.26438539e-01 -9.84525244e-02 1.00000000e+01\n", + " 1.12663592e+00 1.00000000e+01 -1.14746865e+00 -4.37820045e-01\n", + " -4.98032451e-01 1.92953205e+00 9.49420807e-01 8.75512414e-02\n", + " -1.22543552e+00 8.44362976e-01 -1.00021535e+00 -1.54477110e+00\n", + " 1.18802979e+00 3.16942612e-01 9.20858824e-01 1.00000000e+01\n", + " 8.56830612e-01 -6.51025593e-01 -1.03424284e+00 1.00000000e+01\n", + " -8.03409664e-01 -6.89549778e-01 -4.55532504e-01 1.74791590e-02\n", + " -3.53993911e-01 -1.37495129e+00 -6.43618403e-01 -2.22340315e+00\n", + " 6.25231451e-01 -1.60205766e+00 1.00000000e+01 1.00000000e+01\n", + " -7.39562996e-01 1.54301460e+00 -1.29285691e+00 2.67050869e-01\n", + " -3.92828182e-02 -1.16809350e+00 5.23276661e-01 -1.71546331e-01\n", + " 7.71790551e-01 1.00000000e+01 2.16323595e+00 1.33652795e+00\n", + " 1.00000000e+01 -2.39379178e-01 1.09965960e+00 1.00000000e+01\n", + " 6.40131526e-01 -1.61695604e+00 -2.43261244e-02 -7.38030909e-01\n", + " 2.79924599e-01 -9.81503896e-02 9.10178908e-01 3.17218215e-01\n", + " 7.86327962e-01 -4.66419097e-01 -9.44446256e-01 -4.10049693e-01\n", + " -1.70204139e-02 3.79151736e-01 2.25930895e+00 -4.22571517e-02\n", + " -9.55945000e-01 1.00000000e+01 -4.63595975e-01 4.81481474e-01\n", + " -1.54079701e+00 1.00000000e+01 1.56506538e-01 2.32181036e-01\n", + " 1.00000000e+01 1.00000000e+01 -1.42406091e+00 -4.93319883e-01\n", + " -5.42861476e-01 4.16050046e-01 -1.15618243e+00 7.81198102e-01\n", + " 1.00000000e+01 -2.06998503e+00 4.26258731e-01 6.76908035e-01\n", + " -6.37437026e-01 -3.97271814e-01 -1.32880578e-01 -2.97790879e-01\n", + " -3.09012969e-01 -1.67600381e+00 1.15233156e+00 1.07961859e+00\n", + " -8.13364259e-01 -1.46642433e+00 5.21064876e-01 -5.75787970e-01\n", + " 1.41953163e-01 -3.19328417e-01 1.00000000e+01 6.94749144e-01\n", + " -7.25597378e-01 -1.38336396e+00 -1.58293840e+00 6.10379379e-01\n", + " -1.18885926e+00 -5.06816354e-01 -5.96314038e-01 -5.25672963e-02\n", + " -1.93627981e+00 1.88778597e-01 5.23891024e-01 1.00000000e+01\n", + " -3.10886172e-01 9.74001663e-02 3.99046346e-01 -2.77259276e+00\n", + " 1.95591231e+00 3.90093323e-01 -6.52408582e-01 -3.90953375e-01\n", + " 4.93741777e-01 -1.16103939e-01 -2.03068447e+00 2.06449286e+00\n", + " -1.10540657e-01 1.02017271e+00 -6.92049848e-01 1.53637705e+00\n", + " 2.86343689e-01 1.00000000e+01 -1.04525337e+00 1.21114529e+00\n", + " 6.89818165e-01 1.30184623e+00 -6.28087560e-01 -4.81027118e-01\n", + " 2.30391670e+00 -1.06001582e+00 -1.35949701e-01 1.13689136e+00\n", + " 9.77249677e-02 5.82953680e-01 -3.99449029e-01 3.70055888e-01\n", + " -1.30652685e+00 1.00000000e+01 -1.18164045e-01 -6.80178204e-01\n", + " 6.66383082e-01 1.00000000e+01 -1.33425847e+00 -1.34671751e+00\n", + " 6.93773153e-01 -1.59573438e-01 -1.33701560e-01 1.07774381e+00\n", + " -1.12682581e+00 -7.30677753e-01 -3.84879809e-01 1.00000000e+01\n", + " 1.00000000e+01 -2.86887192e-01 -6.16264021e-02 -1.07305276e-01\n", + " -7.19604389e-01 -8.12992989e-01 2.74516358e-01 -8.90915083e-01\n", + " -1.15735526e+00 -3.12292251e-01 -1.57667016e-01 2.25672350e+00\n", + " 1.00000000e+01 9.43260725e-01 7.47188334e-01 -1.18894496e+00\n", + " 7.73252977e-01 -1.18388064e+00 -2.65917224e+00 6.06319524e-01\n", + " -1.75589058e+00 4.50934462e-01 -6.84010898e-01 1.65955080e+00\n", + " 1.06850940e+00 -4.53385804e-01 -6.87837611e-01 -1.21407740e+00\n", + " 1.00000000e+01 -2.80355495e-01 -3.64693544e-01 1.56703855e-01\n", + " 5.78521498e-01 3.49654457e-01 -7.64143924e-01 -1.43779147e+00\n", + " 1.36453185e+00 -6.89449185e-01 -6.52293600e-01 -5.21189312e-01\n", + " -1.84306955e+00 -4.77974004e-01 -4.79655814e-01 6.20358298e-01\n", + " 6.98457149e-01 3.77088909e-03 1.00000000e+01 3.39964984e-01\n", + " -1.56821116e-02 1.60928168e-01 -1.90653494e-01 -3.94849514e-01\n", + " -2.67733537e-01 -1.12801133e+00 2.80441705e-01 -9.93123611e-01\n", + " 8.41631264e-01 -2.49458580e-01 1.00000000e+01 1.00000000e+01\n", + " 6.43314465e-01 -1.57062341e+00 -2.06903676e-01 8.80178912e-01\n", + " -1.69810582e+00 3.87280475e-01 -2.25556423e+00 -1.02250684e+00\n", + " 3.86305518e-02 -1.65671510e+00 -9.85510738e-01 -1.47183501e+00\n", + " 1.64813493e+00 1.64227755e-01 5.67290278e-01 -2.22675101e-01\n", + " 1.00000000e+01 -1.61647419e+00 -2.91837363e-01 1.00000000e+01\n", + " 8.57923924e-01 1.14110187e+00 1.46657872e+00 8.52551939e-01\n", + " -5.98653937e-01 -1.11589699e+00 7.66663182e-01 3.56292817e-01\n", + " -1.76853845e+00 3.55481793e-01 1.00000000e+01 5.89255892e-02\n", + " -1.85053671e-01 -8.07648488e-01 -1.44653470e+00 8.00297949e-01\n", + " -3.09114445e-01 1.00000000e+01 1.73272119e+00 6.84501107e-01\n", + " 3.70825001e-01 1.42061805e-01 1.51999486e+00 1.71958931e+00\n", + " 9.29505111e-01 5.82224591e-01 -2.09460307e+00 1.23721914e-01\n", + " -1.30106954e-01 9.39532294e-02 9.43046087e-01 -2.73967717e+00\n", + " 1.00000000e+01 2.69904355e-01 -4.66845546e-01 -1.41690611e+00\n", + " 8.68963487e-01 2.76871906e-01 1.00000000e+01 3.14817205e-01\n", + " 8.21585712e-01 5.29264630e-03 8.00564803e-01 7.82601752e-02\n", + " -3.95228983e-01 -1.15942052e+00 1.00000000e+01 1.94292938e-01\n", + " 8.75832762e-01 -1.15107468e-01 4.57415606e-01 -9.64612014e-01\n", + " -7.82629156e-01 -1.10389299e-01 -1.05462846e+00 8.20247837e-01\n", + " 4.63130329e-01 2.79095764e-01 3.38904125e-01 2.02104356e+00\n", + " -4.68864188e-01 -2.20144129e+00 1.99300197e-01 -5.06035410e-02\n", + " -5.17519043e-01 1.00000000e+01 -4.39189522e-01 1.81338429e-01\n", + " -5.02816701e-01 2.41245368e+00 -9.60504382e-01 -7.93117363e-01\n", + " -2.28862004e+00 2.51484415e-01 -2.01640663e+00 -5.39454633e-01\n", + " -2.75670535e-01 -7.09727966e-01 1.73887268e+00 9.94394391e-01\n", + " 1.31913688e+00 -8.82418819e-01 1.12859406e+00 4.96000946e-01\n", + " 7.71405949e-01 1.02943883e+00 -9.08763246e-01 -4.24317621e-01\n", + " 8.62596011e-01 -2.65561909e+00 1.51332808e+00 5.53132064e-01\n", + " -4.57039607e-02 2.20507656e-01 -1.02993528e+00 -3.49943365e-01\n", + " 1.00000000e+01 1.29802197e+00 2.69622405e+00 -7.39246663e-02\n", + " -6.58552967e-01 1.00000000e+01 1.00000000e+01 -7.78547559e-02\n", + " 3.82732430e-01 -3.42422805e-02 1.09634685e+00 1.00000000e+01\n", + " 1.00000000e+01 1.00000000e+01 -1.63263453e+00 -1.56776772e+00\n", + " -1.17915793e+00 1.30142807e+00 1.00000000e+01 1.37496407e+00\n", + " -1.33221165e+00 -1.96862469e+00 -6.60056320e-01 1.00000000e+01\n", + " 4.98690275e-01 1.04797216e+00 2.84279671e-01 1.74266878e+00\n", + " -2.22605681e-01 -9.13079218e-01 -1.68121822e+00 -8.88971358e-01\n", + " 2.42117961e-01 -8.88720257e-01 9.36742464e-01 1.41232771e+00\n", + " -2.36958691e+00 1.00000000e+01 -2.23960406e+00 4.01499055e-01\n", + " 1.22487056e+00 6.48561063e-02 -1.27968917e+00 -5.85431204e-01\n", + " -2.61645446e-01 -1.82244784e-01 -2.02896841e-01 -1.09882779e-01\n", + " 2.13480049e-01 -1.20857365e+00 -2.42019830e-01 1.51826117e+00\n", + " 1.00000000e+01 -4.43836093e-01 1.07819730e+00 -2.55918467e+00\n", + " 1.18137860e+00 -6.31903758e-01 1.63928572e-01 9.63213559e-02\n", + " 9.42468119e-01 -2.67594746e-01 -6.78025782e-01 1.29784579e+00\n", + " -2.36417382e+00 1.00000000e+01 -1.34792542e+00 -7.61573388e-01\n", + " 2.01125668e+00 -4.45954265e-02 1.95069697e-01 -1.78156286e+00\n", + " -7.29044659e-01 1.96557401e-01 3.54757693e-01 6.16886554e-01\n", + " 1.00000000e+01 5.27004208e-01 4.53781913e-01 -1.82974041e+00\n", + " 3.70057219e-02 7.67902408e-01 5.89879821e-01 -3.63858810e-01\n", + " -8.05626508e-01 -1.11831192e+00 -1.31054012e-01 1.13307988e+00\n", + " 1.00000000e+01 -6.59891730e-01 -1.13980246e+00 7.84957521e-01\n", + " -5.54309627e-01 -4.70637658e-01 -2.16949570e-01 4.45393251e-01\n", + " -3.92388998e-01 -3.04614305e+00 5.43311891e-01 4.39042958e-01\n", + " -2.19541028e-01 -1.08403662e+00 3.51780111e-01 3.79235534e-01\n", + " -4.70032883e-01 -2.16731471e-01 -9.30156503e-01 -1.78589092e-01\n", + " -1.55042935e+00 4.17318821e-01 1.00000000e+01 1.00000000e+01\n", + " -1.40596292e+00 -5.90057646e-01 -1.10489405e-01 -1.66069981e+00\n", + " 1.15147873e-01 -3.79147563e-01 -1.74235620e+00 -1.30324275e+00\n", + " 1.00000000e+01 1.00000000e+01 -1.31908640e-01 4.04761812e-01\n", + " 2.23843563e-01 3.29622982e-01 1.28598401e+00 -1.50699840e+00\n", + " 6.76460732e-01 -3.82008956e-01 -2.24258934e-01 -3.02249730e-01\n", + " 1.00000000e+01 -1.22619619e+00 1.00000000e+01 1.67094303e+00\n", + " -5.61330204e-02 -1.38504274e-03 -6.87299037e-01 -1.17474546e-01\n", + " 4.66166426e-01 -3.70242441e-01 -4.53804041e-01 4.03264540e-01\n", + " -9.18004770e-01 1.00000000e+01 8.20321797e-01 1.35994854e+00\n", + " -9.03820073e-02 1.36759724e+00 1.03440989e+00 -9.96212640e-01\n", + " -1.21793851e+00 -3.04963638e-01 1.00000000e+01 -7.22870076e-02\n", + " -6.00657558e-01 1.55224318e+00 2.86904488e-01 -2.32059428e+00\n", + " 3.17160626e-01 5.20040615e-01 2.25608654e-01 1.00000000e+01\n", + " -6.72756089e-02 1.00000000e+01 -3.70704003e-01 -9.45615796e-01\n", + " -9.32740911e-01 -1.26306835e+00 4.52489093e-01 9.78961454e-02\n", + " -4.48165363e-01 -6.49337928e-01 -2.34231050e-02 1.07919473e+00\n", + " -2.00421572e+00 3.76876521e-01 -5.45711974e-01 -1.88458584e+00\n", + " -1.94570308e+00 -9.12783494e-01 2.19509556e-01 3.93062934e-01\n", + " -9.38981573e-01 1.01702099e+00 1.42298350e+00 3.96086585e-01\n", + " -5.91402668e-01 1.12441918e+00 7.55395696e-01 8.67407411e-01\n", + " -6.56463675e-01 -2.83455451e+00 2.11679102e+00 -1.61087840e+00\n", + " -3.57680719e-02 2.38074535e+00 3.30576756e-01 9.49246474e-01\n", + " -1.50239657e+00 -1.77766695e+00 -5.32702792e-01 1.09074973e+00\n", + " -3.46249448e-01 -7.94636321e-01 1.97967290e-01 1.08193522e+00\n", + " -1.44494020e+00 -1.21054299e+00 -7.88669255e-01 1.09463837e+00\n", + " 2.34821526e-01 2.13215341e+00 1.00000000e+01 -3.50951769e-02\n", + " 1.26507784e+00 2.11497013e-01 -7.04921353e-01 6.79974844e-01\n", + " -6.96326654e-01 -2.90397101e-01 1.32778270e+00 -1.01281486e-01\n", + " -8.03141387e-01 -4.64337691e-01 1.02179059e+00 1.00000000e+01\n", + " -3.86870847e-01 -5.10292740e-01 1.83925494e-01 -3.85489760e-01\n", + " -1.60183605e+00 -8.87180942e-01 1.00000000e+01 1.24331938e+00\n", + " 8.12674042e-01 5.87259379e-01 -5.05358317e-01 -8.15791542e-01\n", + " -5.07517602e-01 -1.05188010e+00 1.00000000e+01 -2.24532165e+00\n", + " 5.64008535e-01 -1.28455230e+00 -1.04343491e-01 -9.88001942e-01\n", + " -1.17762896e+00 1.00000000e+01 1.75498615e+00 -1.32988422e-01\n", + " -7.65702194e-01 5.55786964e-01 1.03493146e-02 7.20033759e-01\n", + " -1.82425666e+00 1.00000000e+01 7.72694837e-01 -1.66159829e+00\n", + " 4.48195284e-01 1.69618157e+00 -1.48577034e-02 8.21405937e-01\n", + " 6.70570450e-01 -7.07505698e-01 3.97667346e-02 -1.56699471e+00\n", + " -4.51303037e-01 2.65687975e-01 7.23100494e-01 2.46121252e-02\n", + " 7.19983730e-01 -1.10290621e+00 -1.01697275e-01 1.92793845e-02\n", + " 1.84959125e+00 -2.14166656e-01 -4.99016638e-01 2.13512238e-02\n", + " 1.00000000e+01 1.92753849e-01 -3.65055217e-01 -1.79132755e+00\n", + " -5.85865511e-02 -3.17543094e-01 -1.63242330e+00 -6.71341546e-02\n", + " 1.48935596e+00 5.21303748e-01 6.11927193e-01 -1.34149673e+00\n", + " 4.76898369e-01 1.00000000e+01 5.29045238e-01 4.22628622e-01\n", + " -1.35978073e+00 -4.14008116e-02 -7.57870860e-01 -5.00840943e-02\n", + " -8.97400927e-01 1.31247037e+00 -8.58972388e-01 -8.98942156e-01\n", + " 7.45864065e-02 -1.07709907e+00 -4.24663302e-01 -8.29964598e-01\n", + " 1.41117206e+00 7.85803827e-01 -5.74695185e-02 -3.91217052e-01\n", + " 9.40917615e-01 1.00000000e+01 4.98052405e-01 -2.61922373e-02\n", + " 1.00000000e+01 -1.12465983e-01 -5.32489919e-01 6.45055273e-01\n", + " 1.01184243e+00 -6.57951045e-01 4.68385234e-01 1.73587900e+00\n", + " -6.67712721e-01 1.68192174e+00 -8.52585847e-01 2.29597556e-02\n", + " -1.11456118e-02 1.14988999e-02 -8.37678042e-01 -5.91183104e-01\n", + " -6.67720286e-01 1.00000000e+01 3.30035115e-01 2.22594433e+00\n", + " 1.37098901e+00 -5.09843242e-01 3.24869616e-01 9.97117981e-01\n", + " 3.06018243e-02 -6.96415784e-02 5.15749428e-02 8.67276629e-01\n", + " -8.48320523e-01 -3.25669469e-01 4.70433145e-01 3.11447072e-01\n", + " 2.39582760e-01 -3.69801166e-01 9.72535789e-01 2.13386825e+00\n", + " 4.06415494e-01 -1.93176702e-01 7.55740289e-01 -5.39132637e-01\n", + " -7.49690345e-01 3.28087476e-02 -2.58279663e+00 -1.15395036e+00\n", + " -3.47961856e-01 -1.35338886e+00 -1.03264310e+00 -4.36748337e-01\n", + " -1.64296529e+00 -4.06071796e-01 -5.35270165e-01 2.54052084e-02\n", + " 1.15418403e+00 1.72504416e-01 2.10620213e-02 9.94544570e-02\n", + " 2.27392775e-01 -1.01673865e+00 -1.14775325e-01 3.08751242e-01\n", + " -1.37075998e+00 8.65652923e-01 1.08137603e+00 -6.31375988e-01\n", + " -2.41337791e-01 -8.78190343e-01 6.99380484e-01 -1.06122229e+00\n", + " -2.22477010e-01 -8.58919908e-01 5.09542770e-02 -1.79422927e+00\n", + " 1.32646164e+00 -9.64606424e-01 5.98946831e-02 -2.12523045e-01\n", + " -7.62114512e-01 -8.87780137e-01 1.00000000e+01 -5.25640593e-01\n", + " 2.71170185e-01 -8.01496885e-01 -6.47181432e-01 4.72247150e-01\n", + " 9.30408496e-01 -1.75316402e-01 -1.42191987e+00 1.99795608e+00\n", + " -8.56549308e-01 1.00000000e+01 2.59442459e+00 -4.04032294e-01\n", + " -1.46173269e+00 -6.83439767e-01 3.67544896e-01 1.90311558e-01\n", + " -8.51729197e-01 1.00000000e+01 1.00000000e+01 -1.18468659e+00\n", + " 9.60693398e-01 1.32906285e+00 -8.17493098e-01 -1.40134729e+00\n", + " 1.03043827e+00 -2.04732361e+00 -1.22662166e+00 9.67446150e-01\n", + " -5.53525480e-02 -2.63937349e-01 3.52816606e-01 -1.52774424e-01\n", + " -1.29868672e+00 1.27607535e+00 1.32501405e+00 2.05332564e-01\n", + " 4.51340154e-02 2.33962481e+00 1.00000000e+01 -2.59576982e-01\n", + " 3.64481249e-01 1.47132196e+00 1.00000000e+01 -2.58572632e-01\n", + " 3.08331246e-01 -1.37808347e+00 -3.11976108e-01 1.00000000e+01\n", + " -1.00683175e+00 1.68157672e+00 -7.92286662e-01 -5.31605908e-01\n", + " 3.65848788e-01 1.29782527e+00 4.81115126e-01 2.75935511e+00\n", + " -7.46679783e-02 2.58716440e-01 2.75600674e-01 1.00000000e+01\n", + " 5.07238951e-01 1.00000000e+01 1.00000000e+01 2.44443456e-01\n", + " 1.40134483e+00 1.00000000e+01 5.28943618e-01 2.46147789e-01\n", + " 1.00000000e+01 -8.04753741e-01 2.34664703e+00 -1.27916111e+00\n", + " -3.65551090e-01 9.38092541e-01 2.96733172e-01 8.29986159e-01\n", + " -4.96102334e-01 -7.48049827e-02 1.22319836e-02 1.56925961e+00\n", + " 6.90429024e-01 7.96672108e-01 -6.57926093e-01 9.68882639e-01\n", + " 2.25581664e-01 1.38914532e+00 2.01406015e+00 -3.06765776e-01\n", + " -4.06303130e-01 -8.64044991e-01 -1.43579512e-01 -3.82025449e-01\n", + " 3.59504400e-01 1.00000000e+01 -3.61599281e-01 1.06458514e+00\n", + " -9.37880231e-01 4.33107953e-01 -4.05941727e-01 7.24368505e-01\n", + " 1.38526155e+00 -3.03098253e-01 4.41032907e-01 1.78792866e-01\n", + " -7.99422400e-01 2.40787510e-01 2.89120505e-01 4.12870820e-01\n", + " 1.00000000e+01 9.41923003e-02 -1.14761094e+00 -3.58114075e-01]\n" + ] + } + ], + "source": [ + "data_outliers = data.copy()\n", + "data_outliers[np.random.choice(1000,100)] = 10\n", + "print(data_outliers)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "a0e55e5c", "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Create a histogram\n", + "plt.hist(data_outliers, bins=30, density=True, alpha=0.6, color='b', label='Normal Data')\n", + "\n", + "# Calculate mean, median, and mode\n", + "mean = np.mean(data_outliers)\n", + "median = np.median(data_outliers)\n", + "\n", + "# Add vertical lines for mean, median, and mode\n", + "plt.axvline(mean, color='r', linestyle='dashed', linewidth=2, label='Mean')\n", + "plt.axvline(median, color='g', linestyle='dashed', linewidth=2, label='Median')\n", + "\n", + "# Add labels and a legend\n", + "plt.xlabel('Value')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Normal Distribution with Mean, Median, and Mode')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c63e19a7", + "metadata": {}, "outputs": [], "source": [] } From 85eb10bbf504a9801f7e6c35c3fafb35b825057e Mon Sep 17 00:00:00 2001 From: Israel Dellinger <51415637+idellinger@users.noreply.github.com> Date: Mon, 2 Oct 2023 19:10:30 +0000 Subject: [PATCH 04/10] descriptive stats problems --- Israel/problems.ipynb | 270 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 Israel/problems.ipynb diff --git a/Israel/problems.ipynb b/Israel/problems.ipynb new file mode 100644 index 00000000..b678bf10 --- /dev/null +++ b/Israel/problems.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics problems" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "We will use Numpy to obtain information to describe statistically.\n", + "\n", + "- Generate an array of 100 elements following a normal distribution.\n", + "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", + "- Calculate the main metrics and statistical measures that best describe the two vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "34720ab6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Chi-square Distribution')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "np.random.seed(7)\n", + "\n", + "array1 = np.random.normal(size = 100)\n", + "array2 = np.random.chisquare(3, 100)\n", + "\n", + "fig, (ax1, ax2) = plt.subplots(1,2)\n", + "ax1.hist(array1)\n", + "ax1.set_title(\"Normal Distribution\")\n", + "ax2.hist(array2)\n", + "ax2.set_title(\"Chi-square Distribution\")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "95552460", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal Mean: 0.012065424517735459\n", + "Chi-square Mean: 2.928575498384853\n", + "\n", + "Normal Median: -0.06576545576857008\n", + "Chi-square Median: 2.382302084548116\n", + "\n", + "Normal Mode: 1.690525703800356\n", + "Chi-square Mode: 0.9197430988873622\n", + "\n" + ] + } + ], + "source": [ + "import statistics as stats\n", + "\n", + "### MEAN ###\n", + "print(f\"Normal Mean: {stats.mean(array1)}\")\n", + "print(f\"Chi-square Mean: {stats.mean(array2)}\")\n", + "print()\n", + "\n", + "### MEDIAN ###\n", + "print(f\"Normal Median: {stats.median(array1)}\")\n", + "print(f\"Chi-square Median: {stats.median(array2)}\")\n", + "print()\n", + "\n", + "### MODE ###\n", + "print(f\"Normal Mode: {stats.mode(array1)}\")\n", + "print(f\"Chi-square Mode: {stats.mode(array2)}\")\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "61808660", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal range: 4.548262088597983\n", + "Chi-square range: 10.072765473935583\n", + "\n", + "Normal variance: 1.04831339674988 and stdev: 1.023871767727717\n", + "Chi variance: 4.865280734391722 and stdev: 2.2057381382185244\n" + ] + } + ], + "source": [ + "### RANGE ###\n", + "range_normal = max(array1) - min(array1)\n", + "range_chi = max(array2) - min(array2)\n", + "print(f\"Normal range: {range_normal}\")\n", + "print(f\"Chi-square range: {range_chi}\")\n", + "print()\n", + "\n", + "### VARIENCE and STD DEVEATION ### \n", + "normal_varience = stats.variance(array1)\n", + "chi_varience = stats.variance(array2)\n", + "normal_stdev = stats.stdev(array1)\n", + "chi_stdev = stats.stdev(array2)\n", + "print(f\"Normal variance: {normal_varience} and stdev: {normal_stdev}\")\n", + "print(f\"Chi variance: {chi_varience} and stdev: {chi_stdev}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "491c2e42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal skew is: 0.04690455383857406\n", + "Chi-sq skew is: 1.1498285247258326\n", + "\n", + "Normal kurtosis is: -0.348889587599734\n", + "Chi kurtosis is: 0.9091378874561746\n" + ] + } + ], + "source": [ + "from scipy.stats import skew\n", + "from scipy.stats import kurtosis\n", + "\n", + "### SKEW ###\n", + "normal_skew = skew(array1)\n", + "chi_skew = skew(array2)\n", + "print(f\"Normal skew is: {normal_skew}\")\n", + "print(f\"Chi-sq skew is: {chi_skew}\")\n", + "print()\n", + "\n", + "### KURTOSIS ###\n", + "\n", + "kurtosis_normal = kurtosis(array1)\n", + "kurtosis_chi = kurtosis(array2)\n", + "\n", + "print(f\"Normal kurtosis is: {kurtosis_normal}\")\n", + "print(f\"Chi kurtosis is: {kurtosis_chi}\")" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "Write a Python program to calculate the standard deviation of the following data:\n", + "\n", + "```py\n", + "data = [4, 2, 5, 8, 6]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d590308e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Numpy's standard dev is: 2.0\n", + "Stats standard dev is: 2.23606797749979\n", + "Program's sdev is 2.0?\n" + ] + } + ], + "source": [ + "import math\n", + "\n", + "data = [4, 2, 5, 8, 6]\n", + "\n", + "# Seeing how the built-in functions compare\n", + "print(f\"Numpy's standard dev is: {np.std(data)}\")\n", + "print(f\"Stats standard dev is: {stats.stdev(data)}\")\n", + "\n", + "# find the mean of dataset\n", + "sm=0\n", + "for i in range(len(data)):\n", + " sm+=data[i]\n", + " mean = sm/len(data)\n", + "\n", + "# finding the standard dev \n", + "guess = 0\n", + "for i in range(len(data)):\n", + " guess+=(data[i]- mean)**2\n", + " sdev = math.sqrt((guess)/len(data))\n", + "print(f\"Program's sdev is {sdev}?\")\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6cec258ec2f7c9ca29bd8f83ac5352474bd9a3a5 Mon Sep 17 00:00:00 2001 From: Skcodingacademy <139916977+Skcodingacademy@users.noreply.github.com> Date: Mon, 2 Oct 2023 21:18:30 +0000 Subject: [PATCH 05/10] Questions 1-2 answered --- SethKstats/problems.ipynb | 310 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 SethKstats/problems.ipynb diff --git a/SethKstats/problems.ipynb b/SethKstats/problems.ipynb new file mode 100644 index 00000000..90358800 --- /dev/null +++ b/SethKstats/problems.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics problems" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "We will use Numpy to obtain information to describe statistically.\n", + "\n", + "- Generate an array of 100 elements following a normal distribution.\n", + "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", + "- Calculate the main metrics and statistical measures that best describe the two vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "34720ab6", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import statistics as stats\n", + "from scipy.stats import skew\n", + "from scipy.stats import kurtosis\n", + "\n", + "np.random.seed(0)\n", + "\n", + "norm = np.random.normal(size = 100) # This creates an array of 100 elements that have a normal distribution\n", + "chisq = np.random.chisquare(3, 100) # This creates an array of 100 elements the have a chi-square distribution with 3 degrees of freedom\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "d643f90d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal mean: 0.05980801553448498\n", + "Chi mean: 3.0018406221613745\n" + ] + } + ], + "source": [ + "#What we are doing here is generating the mean to help describe the two vectors. \n", + "print(f\"Normal mean: {stats.mean(norm)}\") \n", + "print(f\"Chi mean: {stats.mean(chisq)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "81a01c51", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal median: 0.09409611943799814\n", + "Chi median: 2.3645115337568323\n" + ] + } + ], + "source": [ + "#What we are doing here is generating the median to help describe the two vectors. \n", + "print(f\"Normal median: {stats.median(norm)}\")\n", + "print(f\"Chi median: {stats.median(chisq)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "0c9e8edc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal median: 1.764052345967664\n", + "Chi median: 9.223556454255386\n" + ] + } + ], + "source": [ + "#What we are doing here is generating the mode to help describe the two vectors. \n", + "print(f\"Normal median: {stats.mode(norm)}\")\n", + "print(f\"Chi median: {stats.mode(chisq)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "df442447", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal range: 4.822744439821687\n", + "Normal chi: 11.255784308217786\n" + ] + } + ], + "source": [ + "#We now will calculate the range which is the max - the min\n", + "#We find the range for the normal distribution\n", + "norm_range = max(norm) - min(norm)\n", + "#Now we find the range for chi-square\n", + "chisq_range = max(chisq) - min(chisq)\n", + "\n", + "#Now print our results \n", + "print(f\"Normal range: {norm_range}\")\n", + "print(f\"Normal chi: {chisq_range}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "832123a7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal variance: 1.0260874941564964 and std: 1.0129597692685017\n", + "Chi variance: 5.777917328519989 and std: 2.403729878443081\n" + ] + } + ], + "source": [ + "#Now we find the variance and standar deviation\n", + "var_normal = stats.variance(norm)\n", + "std_normal = stats.stdev(norm)\n", + "var_chi = stats.variance(chisq)\n", + "std_chi = stats.stdev(chisq)\n", + "\n", + "print(f\"Normal variance: {var_normal} and std: {std_normal}\")\n", + "print(f\"Chi variance: {var_chi} and std: {std_chi}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "379a56bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal skewness: 0.005171839713550013\n", + "Chi skewness: 1.342791266776593\n" + ] + } + ], + "source": [ + "skew_normal = skew(norm)\n", + "skew_chi = skew(chisq)\n", + "\n", + "print(f\"Normal skewness: {skew_normal}\")\n", + "print(f\"Chi skewness: {skew_chi}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "06427a31", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal kurtosis: -0.3783545566331328\n", + "Chi kurtosis: 1.6572033096538448\n" + ] + } + ], + "source": [ + "kurt_normal = kurtosis(norm)\n", + "kurt_chi = kurtosis(chisq)\n", + "\n", + "print(f\"Normal kurtosis: {kurt_normal}\")\n", + "print(f\"Chi kurtosis: {kurt_chi}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "Write a Python program to calculate the standard deviation of the following data:\n", + "\n", + "```py\n", + "data = [4, 2, 5, 8, 6]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "d590308e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample Data: [4, 2, 5, 8, 6]\n", + "Standard Deviation: 2.23606797749979\n" + ] + } + ], + "source": [ + "import math\n", + "import sys\n", + "\n", + "# Define the standard deviation function\n", + "\n", + "def sd_func(data):\n", + " n = len(data)\n", + "\n", + " if (n <= 1):\n", + " return 0.0\n", + "\n", + " mean, sd = avg_func(data), 0.0\n", + "\n", + " for d in data:\n", + " sd += (float(d) - mean) ** 2\n", + " sd = math.sqrt(sd / float(n - 1))\n", + "\n", + " return sd\n", + "\n", + "def avg_func(data):\n", + " n, mean = len(data), 0.0\n", + "\n", + " if (n <= 1):\n", + " return data[0]\n", + "\n", + " for d in data:\n", + " mean = mean + float(d)\n", + "\n", + " mean = mean / float(n)\n", + " return mean\n", + "\n", + "#The f inside the print function allows for efficent formatting\n", + "#The sample data is our data being tested and given\n", + "#When sd_func is called, it will find the standard deviation for our sample data\n", + "\n", + "data = [4, 2, 5, 8, 6]\n", + "print(f\"Sample Data: {data}\")\n", + "print(f\"Standard Deviation: {sd_func(data)}\")\n" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 4d67842c202e3a85abae48ea45d4c8ae25225f7c Mon Sep 17 00:00:00 2001 From: Skcodingacademy <139916977+Skcodingacademy@users.noreply.github.com> Date: Mon, 2 Oct 2023 21:20:29 +0000 Subject: [PATCH 06/10] 1-2 answered --- SethKstats/problems.ipynb | 2 +- notebook/solutions.ipynb | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/SethKstats/problems.ipynb b/SethKstats/problems.ipynb index 90358800..f9ce6309 100644 --- a/SethKstats/problems.ipynb +++ b/SethKstats/problems.ipynb @@ -275,7 +275,7 @@ "\n", "#The f inside the print function allows for efficent formatting\n", "#The sample data is our data being tested and given\n", - "#When sd_func is called, it will find the standard deviation for our sample data\n", + "#When sd_func is called, it will find the standard deviation for our data\n", "\n", "data = [4, 2, 5, 8, 6]\n", "print(f\"Sample Data: {data}\")\n", diff --git a/notebook/solutions.ipynb b/notebook/solutions.ipynb index ed71f320..ef5a9bd9 100644 --- a/notebook/solutions.ipynb +++ b/notebook/solutions.ipynb @@ -268,12 +268,12 @@ } ], "source": [ - "from scipy.stats import skew\n", + "\n", "\n", "skew_normal = skew(normal)\n", "skew_chi = skew(chi)\n", "\n", - "print(f\"Normal skewness: {skew_normal}\")\n", + "print(f\"Normal skewness: {skew_nor}\n", "print(f\"Chi skewness: {skew_chi}\")" ] }, @@ -331,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 4, "id": "d590308e", "metadata": {}, "outputs": [ @@ -345,6 +345,9 @@ } ], "source": [ + "\n", + "\n", + " \n", "import math\n", "import sys\n", "\n", From 4b0d26c9983d02f5208d7fa35c1e491acd18d37d Mon Sep 17 00:00:00 2001 From: Skcodingacademy <139916977+Skcodingacademy@users.noreply.github.com> Date: Mon, 2 Oct 2023 21:34:40 +0000 Subject: [PATCH 07/10] Another way I found to solve it using stats.stdev --- SethKstats/problems.ipynb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/SethKstats/problems.ipynb b/SethKstats/problems.ipynb index f9ce6309..9a3656c2 100644 --- a/SethKstats/problems.ipynb +++ b/SethKstats/problems.ipynb @@ -228,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "id": "d590308e", "metadata": {}, "outputs": [ @@ -237,13 +237,14 @@ "output_type": "stream", "text": [ "Sample Data: [4, 2, 5, 8, 6]\n", - "Standard Deviation: 2.23606797749979\n" + "2.23606797749979\n" ] } ], "source": [ "import math\n", "import sys\n", + "import statistics as stats\n", "\n", "# Define the standard deviation function\n", "\n", @@ -279,7 +280,8 @@ "\n", "data = [4, 2, 5, 8, 6]\n", "print(f\"Sample Data: {data}\")\n", - "print(f\"Standard Deviation: {sd_func(data)}\")\n" + "#print(f\"Standard Deviation: {sd_func(data)}\")\n", + "print(stats.stdev(data))\n" ] } ], From a1094dd1848db50768bed005133d8d1c5f24d2cc Mon Sep 17 00:00:00 2001 From: Ricardo Hernandez <119596579+Ricoou@users.noreply.github.com> Date: Mon, 2 Oct 2023 22:21:19 +0000 Subject: [PATCH 08/10] Finished Exercises --- Ricardo/problems.ipynb | 134 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 Ricardo/problems.ipynb diff --git a/Ricardo/problems.ipynb b/Ricardo/problems.ipynb new file mode 100644 index 00000000..fc028bfb --- /dev/null +++ b/Ricardo/problems.ipynb @@ -0,0 +1,134 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics problems" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "We will use Numpy to obtain information to describe statistically.\n", + "\n", + "- Generate an array of 100 elements following a normal distribution.\n", + "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", + "- Calculate the main metrics and statistical measures that best describe the two vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "34720ab6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean of element 1 is 0.059808015534485\n", + "The Standard Deviation of element 1 is 1.0078822447165796\n", + "The mean of element 2 is 3.001840622161374\n", + "The Standard Deviation of element 2 is 2.3916810312486882\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "np.random.seed(0)\n", + "\n", + "elements1 = np.random.normal(size=100)\n", + "elements2 = np.random.chisquare(df=3, size=100)\n", + "\n", + "elements1_mean = np.mean(elements1)\n", + "elements2_mean = np.mean(elements2)\n", + "\n", + "elements1_std = np.std(elements1)\n", + "elements2_std = np.std(elements2)\n", + "\n", + "print(f\"The mean of element 1 is {elements1_mean}\")\n", + "print(f\"The Standard Deviation of element 1 is {elements1_std}\")\n", + "print(f\"The mean of element 2 is {elements2_mean}\")\n", + "print(f\"The Standard Deviation of element 2 is {elements2_std}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "Write a Python program to calculate the standard deviation of the following data:\n", + "\n", + "```py\n", + "data = [4, 2, 5, 8, 6]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d590308e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Standard Deviation: 2.0\n" + ] + } + ], + "source": [ + "data = [4, 2, 5, 8, 6]\n", + "\n", + "\n", + "mean = sum(data) / len(data)\n", + "\n", + "variance = sum((x - mean) ** 2 for x in data) / len(data)\n", + "\n", + "standard_deviation = variance ** 0.5\n", + "\n", + "print(\"Standard Deviation:\", standard_deviation)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 686f136274b18a7054686513316c273c524dcfa3 Mon Sep 17 00:00:00 2001 From: Rob Date: Thu, 5 Oct 2023 15:25:25 -0400 Subject: [PATCH 09/10] Added problems to notebook folder --- notebook/problems.ipynb | 90 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 notebook/problems.ipynb diff --git a/notebook/problems.ipynb b/notebook/problems.ipynb new file mode 100644 index 00000000..ff2c594b --- /dev/null +++ b/notebook/problems.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac622319", + "metadata": {}, + "source": [ + "# Descriptive statistics problems" + ] + }, + { + "cell_type": "markdown", + "id": "aa8993e4", + "metadata": {}, + "source": [ + "### Exercise 1" + ] + }, + { + "cell_type": "markdown", + "id": "5e0ab0d5", + "metadata": {}, + "source": [ + "We will use Numpy to obtain information to describe statistically.\n", + "\n", + "- Generate an array of 100 elements following a normal distribution.\n", + "- Generate an array of 100 elements following a chi-square distribution with 3 degrees of freedom.\n", + "- Calculate the main metrics and statistical measures that best describe the two vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "34720ab6", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO" + ] + }, + { + "cell_type": "markdown", + "id": "46c70c3d", + "metadata": {}, + "source": [ + "### Exercise 2\n", + "\n", + "Write a Python program to calculate the standard deviation of the following data:\n", + "\n", + "```py\n", + "data = [4, 2, 5, 8, 6]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d590308e", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9248718ffe6ce6938b217e69dbcc175ea21f4c6b28a317e96c05334edae734bb" + }, + "kernelspec": { + "display_name": "Python 3.9.12 ('ML-BOOTCAMP')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 2fa212e787d6629e1d9f37192a6027bbcd9cef17 Mon Sep 17 00:00:00 2001 From: Rob Date: Fri, 20 Oct 2023 19:57:31 +0000 Subject: [PATCH 10/10] Both Problems finished --- notebook/problems.ipynb | 125 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 6 deletions(-) diff --git a/notebook/problems.ipynb b/notebook/problems.ipynb index ff2c594b..3dfe3fc0 100644 --- a/notebook/problems.ipynb +++ b/notebook/problems.ipynb @@ -30,12 +30,104 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "34720ab6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Normal Distribution is[ 0.22827309 1.0268903 -0.83958485 -0.59118152 -0.9568883 -0.22232569\n", + " -0.61991511 1.83790458 -2.05323076 0.86858305 -0.92073444 -0.23231186\n", + " 2.1529569 -1.33466147 0.07637965 -1.24608928 1.20227231 -1.04994158\n", + " 1.05661011 -0.41967767 2.29484234 -2.59448738 2.8227564 0.68088892\n", + " -1.57769345 -1.97625359 0.53333982 -0.29086971 -0.51351967 1.98262608\n", + " 0.22600105 -1.83990496 1.60767083 0.38829194 0.39973206 0.4054766\n", + " 0.21700177 -0.6334391 0.24662153 -1.93954552 0.11405963 -1.8853414\n", + " 0.24308048 -0.70548067 0.36462762 -0.50295216 -0.22575155 -0.56553773\n", + " 0.10339501 2.01840842 1.09424827 1.6624344 -0.62745348 1.62119964\n", + " 1.17813267 -0.37487875 -0.54432898 0.28776118 -0.20581999 1.18998786\n", + " 0.72892688 -0.22204012 -1.62270578 0.31254055 -1.16042141 0.31355968\n", + " 0.47199786 0.57786171 0.50540675 -0.62648777 -0.34636933 -2.06594183\n", + " 0.76893629 1.12886631 0.16692354 -0.96725539 0.49199625 -0.55085717\n", + " -0.08469407 1.96756834 -0.06267511 -0.85113578 0.42652063 -0.27756074\n", + " -1.37794456 -0.09519641 0.83363873 -0.78475445 1.04614478 -0.645784\n", + " -1.89157934 -0.09733285 -1.35889534 0.49830983 -1.14732055 -0.53652108\n", + " -0.91648938 -0.21214793 0.19284449 -0.32268305] and the Main Metrics and Statistical measures for it are:\n", + "Mean: -0.051500969332526\n", + "Standard Deviation: 1.083568563267722\n", + "Median: -0.09626463210302683\n", + "Variance: 1.1741208313020757\n", + "The Chi-Square Distribution is[ 4.4036977 3.43357991 2.43865116 3.76468546 1.74738374 2.87304496\n", + " 1.25990264 1.98658061 2.7382904 2.40379373 6.43281884 1.35117521\n", + " 8.7849878 13.06470946 1.7418934 2.67968429 1.02479675 1.67765141\n", + " 1.06959774 0.32544055 10.02554874 0.68163247 3.39104851 1.08370858\n", + " 1.9876063 1.34763536 1.6517412 8.21316019 1.48801337 4.76964702\n", + " 4.9766 1.96968626 0.87373953 3.55316881 2.3748913 0.79432302\n", + " 0.7701773 3.68548924 3.2519995 10.09726069 3.06195584 4.94862294\n", + " 8.40693551 8.2386102 0.51444887 1.57892549 9.45502228 6.76435901\n", + " 0.08425656 4.31558523 0.53234731 6.0103969 1.6261617 6.64144191\n", + " 5.66904724 2.45959271 1.31656293 5.12092043 9.13811987 0.02222266\n", + " 1.50610432 1.57706424 3.32373864 1.99068763 6.89886783 2.93937469\n", + " 0.78214448 1.80647672 0.62057492 3.73253103 6.79628785 5.09298138\n", + " 4.54692477 1.54790254 2.34164297 0.80446453 3.46601364 0.68885048\n", + " 0.68235736 8.19098932 3.05658688 3.85504193 1.97097259 2.02332414\n", + " 2.90044156 3.14758326 7.51373069 2.52268567 0.96407427 0.66239969\n", + " 2.55400914 1.26706758 4.04029785 1.91665878 1.57053958 0.1736914\n", + " 2.77972265 5.91638755 2.69140858 6.50403792] and the Main Metrics and Statistical measures for it are:\n", + "Mean: 3.35461619770656\n", + "Standard Deviation: 2.696252896948903\n", + "Median: 2.5383474082247273\n", + "Variance: 7.269779684305352\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# TODO" + "# TODO\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "np.random.seed(25)\n", + "\n", + "normal = np.random.normal(size=100)\n", + "chi_square = np.random.chisquare(3,100)\n", + "\n", + "\n", + "\n", + "normal_mean = np.mean(normal)\n", + "normal_std = np.std(normal)\n", + "normal_median = np.median(normal)\n", + "normal_variance = np.var(normal)\n", + "\n", + "chi_square_mean = np.mean(chi_square)\n", + "chi_square_std = np.std(chi_square)\n", + "chi_square_median = np.median(chi_square)\n", + "chi_square_variance = np.var(chi_square)\n", + "\n", + "print(f\"The Normal Distribution is{normal} and the Main Metrics and Statistical measures for it are:\")\n", + "print(f\"Mean: {normal_mean}\")\n", + "print(f\"Standard Deviation: {normal_std}\")\n", + "print(f\"Median: {normal_median}\")\n", + "print(f\"Variance: {normal_variance}\")\n", + "\n", + "print(f\"The Chi-Square Distribution is{chi_square} and the Main Metrics and Statistical measures for it are:\")\n", + "print(f\"Mean: {chi_square_mean}\")\n", + "print(f\"Standard Deviation: {chi_square_std}\")\n", + "print(f\"Median: {chi_square_median}\")\n", + "print(f\"Variance: {chi_square_variance}\")\n", + "\n", + "print" ] }, { @@ -54,12 +146,33 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "d590308e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Standard Deviation:2.0\n" + ] + } + ], "source": [ - "# TODO" + "# TODO\n", + "\n", + "import math\n", + "\n", + "data = [4,2,5,8,6]\n", + "\n", + "\n", + "mean = sum(data) / len(data)\n", + "variance = sum((x - mean) ** 2 for x in data) / len(data)\n", + "\n", + "\n", + "std = math.sqrt(variance)\n", + "\n", + "print(f\"Standard Deviation:{std}\")\n" ] } ],