Commit 9ef3e8b1 authored by Frank Hellmann's avatar Frank Hellmann

Frank's lesson material

parent da3db119
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import temp_module as temp\n",
"import importlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<module 'temp_module' from '/home/frank/git/2017-05-15-python-novice-lessons/lesson-notebooks/lesson-frank/temp_module.py'>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# When developing a module like temp_module.py it is often useful to be able to reload it.\n",
"# Calling the import statement again does not actually reload the module if it has already been loaded.\n",
"importlib.reload(temp)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-6.666666666666686"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp.fahrenheit_to_celsius(20)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-273.15\n",
"-169.52777777777777\n"
]
}
],
"source": [
"print(temp.absolute_zero_in_celsius)\n",
"print(temp.absolute_zero_in_fahrenheit)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on function fahrenheit_to_kelvin in module temp_module:\n",
"\n",
"fahrenheit_to_kelvin(temperature)\n",
" This function turns a temperature in Fahrenheit into a temperature in Kelvin\n",
"\n"
]
}
],
"source": [
"help(temp.fahrenheit_to_kelvin)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on function kelvin_to_celsius in module temp_module:\n",
"\n",
"kelvin_to_celsius(temperature)\n",
" This function turns a temperature in Kelvin into a temperature in Celsius\n",
" \n",
" This string is a doc string. It uses triple quotes to allow line breaks and contains information about your function that is carried around. You can access it by calling help(function) for any function that provides it. May editors will also show it if you hover over the function.\n",
"\n"
]
}
],
"source": [
"help(temp.kelvin_to_celsius)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def s(p):\n",
" a = 0\n",
" for v in p:\n",
" a += v\n",
" m = a / len(p)\n",
" d = 0\n",
" for v in p:\n",
" d += (v - m) * (v - m)\n",
" return numpy.sqrt(d / (len(p) - 1))\n",
"\n",
"def std_dev(sample):\n",
" sample_sum = 0\n",
" for value in sample:\n",
" sample_sum += value\n",
"\n",
" sample_mean = sample_sum / len(sample)\n",
"\n",
" sum_squared_devs = 0\n",
" for value in sample:\n",
" sum_squared_devs += (value - sample_mean) * (value - sample_mean)\n",
"\n",
" return numpy.sqrt(sum_squared_devs / (len(sample) - 1))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed.
absolute_zero_in_celsius = -273.15
def fahrenheit_to_kelvin(temperature):
"""This function turns a temperature in Fahrenheit into a temperature in Kelvin"""
return (temperature - 32) * 5/9 - absolute_zero_in_celsius
def kelvin_to_celsius(temperature):
"""This function turns a temperature in Kelvin into a temperature in Celsius
This string is a doc string. It uses triple quotes to allow line breaks and contains information about your function that is carried around. You can access it by calling help(function) for any function that provides it. May editors will also show it if you hover over the function."""
return temperature + absolute_zero_in_celsius
def fahrenheit_to_celsius(temperature):
temp_in_kelvin = fahrenheit_to_kelvin(temperature)
return kelvin_to_celsius(temp_in_kelvin)
absolute_zero_in_fahrenheit = fahrenheit_to_celsius(absolute_zero_in_celsius)
# The magic variable __name__ contains the name of the module.
# If the file is run as a script it instead is set to "__main__"
# print(f"You have loaded the module {__name__}")
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import numba as nb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def process_number(x):\n",
" if x > 0:\n",
" res = np.floor(x % 3)\n",
" else:\n",
" res = np.floor(x % 5)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"process_number(0.3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def process_loop(x):\n",
" res = np.empty_like(x)\n",
" for i in range(len(x)):\n",
" if x[i] > 0:\n",
" res[i] = np.floor(x[i] % 3)\n",
" else:\n",
" res[i] = np.floor(x[i] % 5)\n",
" return res\n",
"\n",
"\n",
"def process_numpy(x):\n",
" \n",
" res = np.empty_like(x)\n",
"\n",
" idx = np.where(x > 0) \n",
" res[idx] = np.floor(x[idx] % 3)\n",
"\n",
" idx = np.where(x <= 0)\n",
" res[idx] = np.floor(x[idx] % 5)\n",
"\n",
" return res\n",
"\n",
"\n",
"@nb.jit\n",
"def process_numba(x):\n",
" res = np.empty_like(x)\n",
" for i in range(len(x)):\n",
" if x[i] > 0:\n",
" res[i] = np.floor(x[i] % 3)\n",
" else:\n",
" res[i] = np.floor(x[i] % 5)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2.,\n",
" 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0.,\n",
" 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1.,\n",
" 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2.,\n",
" 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0.,\n",
" 1., 2., 0., 1., 2., 0., 1., 2., 0., 1., 2., 0., 1.,\n",
" 2., 0.])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numbers = np.linspace(-100000,100000, 200000)\n",
"process_loop(numbers)[-200:-120]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 3: 212 ms per loop\n",
"100 loops, best of 3: 4.99 ms per loop\n",
"The slowest run took 62.84 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"1000 loops, best of 3: 1.66 ms per loop\n",
"100 loops, best of 3: 6.76 ms per loop\n"
]
}
],
"source": [
"%timeit process_loop(numbers)\n",
"%timeit process_numpy(numbers)\n",
"%timeit process_numba(numbers)\n",
"%timeit np.floor(np.where(numbers > 0, numbers % 3, numbers % 5))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"@np.vectorize\n",
"def process_numpy_vec(x):\n",
" if x > 0:\n",
" res = np.floor(x % 3)\n",
" else:\n",
" res = np.floor(x % 5)\n",
" return res\n",
"\n",
"\n",
"@nb.vectorize\n",
"def process_numba_vec(x):\n",
" if x > 0:\n",
" res = np.floor(x % 3)\n",
" else:\n",
" res = np.floor(x % 5)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 149 ms per loop\n",
"The slowest run took 23.21 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"1000 loops, best of 3: 1.48 ms per loop\n"
]
}
],
"source": [
"%timeit process_numpy_vec(numbers)\n",
"%timeit process_numba_vec(numbers)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A note on ufuncs\n",
"\n",
"Numba vectorize actually creates numpy ufuncs. These are very flexible, and very powerful but can be a bit arcane.\n",
"\n",
"Look up the details on ufuncs at https://docs.scipy.org/doc/numpy/reference/ufuncs.html.\n",
"\n",
"Numbas documentation is here: http://numba.pydata.org/numba-doc/dev/user/vectorize.html"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 2. , 2.25, 2.5 , 2.75, 3. ],\n",
" [ 2.25, 2.5 , 2.75, 3. , 3.25],\n",
" [ 2.5 , 2.75, 3. , 3.25, 3.5 ],\n",
" [ 2.75, 3. , 3.25, 3.5 , 3.75],\n",
" [ 3. , 3.25, 3.5 , 3.75, 4. ]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@nb.vectorize\n",
"def add(x,y):\n",
" return x + y\n",
"\n",
"# Make sure that add is compiled for floats:\n",
"add(0., 0.)\n",
"\n",
"# Now we can use add.outer to create an outer product.\n",
"# Great for making distance matrices for example:\n",
"add.outer(np.linspace(0,1,5),np.linspace(2,3,5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -38,7 +38,9 @@
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
......@@ -66,7 +68,9 @@
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.read_csv(\n",
......@@ -110,7 +114,9 @@
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -208,7 +214,9 @@
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -346,7 +354,9 @@
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -386,7 +396,9 @@
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -500,7 +512,9 @@
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -524,7 +538,9 @@
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -565,7 +581,9 @@
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -605,7 +623,9 @@
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df[\"10-year-anomaly-pandas\"] = df['anomaly'].rolling(10 * 12, center=True).mean()"
......@@ -615,6 +635,7 @@
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
......@@ -681,7 +702,9 @@
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
......@@ -742,7 +765,9 @@
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import anomalies"
......@@ -751,7 +776,9 @@
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
......@@ -782,7 +809,9 @@
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
......@@ -831,7 +860,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"!chmod +x show_anomalies"
......@@ -840,7 +871,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
......@@ -857,7 +890,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"!ls"
......@@ -895,6 +930,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
......@@ -923,7 +959,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true