## for numpy see: ## https://numpy.org/doc/stable/user/index.html ## for basic info on numpy random number generation see: ## https://numpy.org/doc/stable/reference/random/index.html#module-numpy.random ## Data Science Handbook, 2nd Ed, page 52 # For many types of operations, NumPy provides a convenient interface into just this kind of # statically typed, compiled routine. This is know as a *vectorized* operation. # For simple operations vectorization is a simple as using Python arithmetic operators directly on the array object. # This vectorized approach is designed to push the loop into the compiled layer that underlies NumPy, # leading to much faster execution. ## page 53 # Any time you see such a loop in a NumPy script, you should consider whether it can be replaced # with a vectorized expression. ## This example is taken from Data Science Handbook, 2nd Ed, page 60 ## We just sum a big array different ways and see how long they take import numpy as np import pandas as pd import matplotlib.pyplot as plt from numpy.random import default_rng from time import time ## write a function which sums by looping def mysum(x): '''sum x with loop''' temp = 0.0 for v in x: temp += v return temp ## big array that we will sum n = 1000000 #size of array rng = default_rng(34) barray = rng.random(n) ## iid U(0,1) print(pd.Series(barray).describe()) ## plot nplt = 100 #number draws to plot plt.scatter(np.arange(nplt),barray[:nplt]) plt.xlabel("draw number"); plt.ylabel("draw") plt.title("iid U(0,1)") plt.show() ## check the same answer from different approaches to computing the sum s0 = sum(barray) s1 = mysum(barray) s2 = barray.sum() s3 = np.sum(barray) print(f'\n** the four are {s0}, {s1}, {s2}, and {s3}') ## time t1 = time() temp = mysum(barray) t2 = time() print(f'the time for mysum is {t2-t1}') t1 = time() temp = barray.sum() t2 = time() print(f'the time for the sum method is {t2-t1}') ## time the different approaches print("python sum function") %timeit sum(barray) print("numpy sum function") %timeit np.sum(barray) print("ndarray sum method") %timeit barray.sum() print("mysum function with loop") %timeit mysum(barray) #mus: microsecond, one millionth of a second #ns: nanosecond, one billionth of a second #ms: millisecond, one thousandth of a second