import sys
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
print("Python", sys.executable)
print("Numpy", np.__version__)
print("Pandas",pd.__version__)
print("Seaborn",sns.__version__)
print("Matplotlib",matplotlib.__version__)
print("Matplotlib backend" matplotlib.get_backend())
Python /Users/mlubinsky/anaconda3/bin/python Numpy 1.23.5 Pandas 1.5.3 Seaborn 0.12.2 Matplotlib 3.7.0 module://matplotlib_inline.backend_inline
from scipy.stats import norm
fig, ax = plt.subplots()
x= np.arange(-4,4,0.001)
ax.set_title('N(0,$1^2$)')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
ax.plot(x, norm.pdf(x))
ax.set_ylim(0,0.45)
plt.show()
fig, ax = plt.subplots()
x = np.linspace(-10,10,100)
stdvs = [1.0, 2.0, 3.0, 4.0]
for s in stdvs:
ax.plot(x, norm.pdf(x,scale=s), label='stdv=%.1f' % s)
ax.set_xlabel('x')
ax.set_ylabel('pdf(x)')
ax.set_title('Normal Distribution')
ax.legend(loc='best', frameon=True)
ax.set_ylim(0,0.45)
ax.grid(True)
fig, ax = plt.subplots()
xs = norm.rvs(scale=2,size=1000)
x = np.linspace(-10,10,100)
p = norm.pdf(x,scale=2)
v = np.var(xs)
m = np.mean(xs)
ax = fig.add_subplot(111)
ax.hist(xs, bins=10, alpha=0.5, density=True)
ax.plot(x,p, 'r-', lw=2)
ax.set_xlabel('x')
ax.set_ylabel('pdf(x)')
ax.set_title(f'mean={m:.2f}, var={v:.2f}')
ax.grid(True)
fig, ax = plt.subplots()
# for distribution curve
x= np.arange(-4,4,0.001)
ax.plot(x, norm.pdf(x))
ax.set_title("Cumulative normal distribution")
ax.set_xlabel('x')
ax.set_ylabel('pdf(x)')
ax.grid(True)
# for fill_between
px=np.arange(-4,1,0.01)
ax.set_ylim(0,0.5)
ax.fill_between(px,norm.pdf(px),alpha=0.5, color='g')
# for text
ax.text(-1,0.1,"cdf(x)", fontsize=20)
plt.show()
from scipy.stats import norm
lessthan2=norm.cdf(x=2, loc=3, scale=2)
print(lessthan2)
0.3085375387259869
fig, ax = plt.subplots()
# for distribution curve
x= np.arange(-4,10,0.001)
ax.plot(x, norm.pdf(x,loc=3,scale=2))
ax.set_title("N(3,$2^2$)")
ax.set_xlabel('x')
ax.set_ylabel('pdf(x)')
ax.grid(True)
# for fill_between
px=np.arange(-4,2,0.01)
ax.set_ylim(0,0.25)
ax.fill_between(px,norm.pdf(px,loc=3,scale=2),alpha=0.5, color='g')
# for text
ax.text(-0.5,0.02,round(lessthan2,2), fontsize=20)
plt.show()
# There are > 120 distributions in SciPy
# https://towardsdatascience.com/probability-distributions-with-pythons-scipy-3da89bf60565
from scipy import stats
dist_continu = [d for d in dir(stats) if
isinstance(getattr(stats, d), stats.rv_continuous)]
dist_discrete = [d for d in dir(stats) if
isinstance(getattr(stats, d), stats.rv_discrete)]
print('scipy.stat: number of continuous distributions: %d' % len(dist_continu))
print('scipy.stat: number of discrete distributions: %d' % len(dist_discrete))
scipy.stat: number of continuous distributions: 107 scipy.stat: number of discrete distributions: 19
Inplements regression, linear models, time series analysis, extensions to topics also covered by scipy.stats.
import statsmodels.api as sm
data = sm.datasets.statecrime.load_pandas().data
import seaborn as sns
sns.set(style = "whitegrid")
fig = sns.lmplot(x="poverty", y="murder", data=data)