From b5fea0e8038189ac5aed54ef6d7440a06fe8a0da Mon Sep 17 00:00:00 2001 From: bitterspice Date: Thu, 2 May 2019 19:27:04 -0700 Subject: [PATCH] Add files via upload --- Week 4.ipynb | 347 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 Week 4.ipynb diff --git a/Week 4.ipynb b/Week 4.ipynb new file mode 100644 index 0000000..5a3c116 --- /dev/null +++ b/Week 4.ipynb @@ -0,0 +1,347 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributions in Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.binomial(1, 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.binomial(1000, 0.5)/1000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "chance_of_tornado = 0.01/100\n", + "np.random.binomial(100000, chance_of_tornado)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "chance_of_tornado = 0.01\n", + "\n", + "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n", + " \n", + "two_days_in_a_row = 0\n", + "for j in range(1,len(tornado_events)-1):\n", + " if tornado_events[j]==1 and tornado_events[j-1]==1:\n", + " two_days_in_a_row+=1\n", + "\n", + "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.uniform(0, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.normal(0.75)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Formula for standard deviation\n", + "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "distribution = np.random.normal(0.75,size=1000)\n", + "\n", + "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "np.std(distribution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import scipy.stats as stats\n", + "stats.kurtosis(distribution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stats.skew(distribution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "chi_squared_df2 = np.random.chisquare(2, size=10000)\n", + "stats.skew(chi_squared_df2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "chi_squared_df5 = np.random.chisquare(5, size=10000)\n", + "stats.skew(chi_squared_df5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n", + " label=['2 degrees of freedom','5 degrees of freedom'])\n", + "plt.legend(loc='upper right')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hypothesis Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.read_csv('grades.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "early = df[df['assignment1_submission'] <= '2015-12-31']\n", + "late = df[df['assignment1_submission'] > '2015-12-31']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "early.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "late.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from scipy import stats\n", + "stats.ttest_ind?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}