Add files via upload

This commit is contained in:
bitterspice 2019-05-02 19:27:04 -07:00 committed by GitHub
parent 43730859d5
commit b5fea0e803
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

347
Week 4.ipynb Normal file
View File

@ -0,0 +1,347 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
"\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Distributions in Pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.binomial(1, 0.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.binomial(1000, 0.5)/1000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"chance_of_tornado = 0.01/100\n",
"np.random.binomial(100000, chance_of_tornado)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"chance_of_tornado = 0.01\n",
"\n",
"tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
" \n",
"two_days_in_a_row = 0\n",
"for j in range(1,len(tornado_events)-1):\n",
" if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
" two_days_in_a_row+=1\n",
"\n",
"print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.uniform(0, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.normal(0.75)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Formula for standard deviation\n",
"$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"distribution = np.random.normal(0.75,size=1000)\n",
"\n",
"np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"np.std(distribution)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import scipy.stats as stats\n",
"stats.kurtosis(distribution)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stats.skew(distribution)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
"stats.skew(chi_squared_df2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
"stats.skew(chi_squared_df5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"\n",
"output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
" label=['2 degrees of freedom','5 degrees of freedom'])\n",
"plt.legend(loc='upper right')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hypothesis Testing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.read_csv('grades.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"early = df[df['assignment1_submission'] <= '2015-12-31']\n",
"late = df[df['assignment1_submission'] > '2015-12-31']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"early.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"late.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from scipy import stats\n",
"stats.ttest_ind?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}