Skip to content

Commit fdca870

Browse files
committed
central limit theorem study
1 parent 2e930db commit fdca870

File tree

4 files changed

+262
-0
lines changed

4 files changed

+262
-0
lines changed

Diff for: stats/climit_theorem.py

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import random, pylab
2+
3+
#set line width
4+
pylab.rcParams['lines.linewidth'] = 4
5+
#set font size for titles
6+
pylab.rcParams['axes.titlesize'] = 20
7+
#set font size for labels on axes
8+
pylab.rcParams['axes.labelsize'] = 20
9+
#set size of numbers on x-axis
10+
pylab.rcParams['xtick.labelsize'] = 16
11+
#set size of numbers on y-axis
12+
pylab.rcParams['ytick.labelsize'] = 16
13+
#set size of ticks on x-axis
14+
pylab.rcParams['xtick.major.size'] = 7
15+
#set size of ticks on y-axis
16+
pylab.rcParams['ytick.major.size'] = 7
17+
#set size of markers, e.g., circles representing points
18+
#set numpoints for legend
19+
pylab.rcParams['legend.numpoints'] = 1
20+
21+
def getMeanAndStd(X):
22+
mean = sum(X)/float(len(X))
23+
tot = 0.0
24+
for x in X:
25+
tot += (x - mean)**2
26+
std = (tot/len(X))**0.5
27+
return mean, std
28+
29+
#L = [1,1,1,1,2]
30+
#pylab.hist(L)
31+
#factor = pylab.array(len(L)*[1])/len(L)
32+
#print(factor)
33+
#pylab.figure()
34+
#pylab.hist(L, weights = factor)
35+
36+
def plotMeans(numDice, numRolls, numBins, legend, color, style):
37+
means = []
38+
for i in range(numRolls//numDice):
39+
vals = 0
40+
for j in range(numDice):
41+
vals += 5*random.random()
42+
means.append(vals/float(numDice))
43+
pylab.hist(means, numBins, color = color, label = legend,
44+
weights = pylab.array(len(means)*[1])/len(means),
45+
hatch = style)
46+
return getMeanAndStd(means)
47+
48+
mean, std = plotMeans(1, 1000000, 19, '1 die', 'b', '*')
49+
print('Mean of rolling 1 die =', str(mean) + ',', 'Std =', std)
50+
mean, std = plotMeans(50, 1000000, 19, 'Mean of 50 dice', 'r', '//')
51+
print('Mean of rolling 50 dice =', str(mean) + ',', 'Std =', std)
52+
pylab.title('Rolling Continuous Dice')
53+
pylab.xlabel('Value')
54+
pylab.ylabel('Probability')
55+
pylab.legend()
56+
pylab.show()
57+
58+
class FairRoulette():
59+
def __init__(self):
60+
self.pockets = []
61+
for i in range(1,37):
62+
self.pockets.append(i)
63+
self.ball = None
64+
self.blackOdds, self.redOdds = 1.0, 1.0
65+
self.pocketOdds = len(self.pockets) - 1.0
66+
def spin(self):
67+
self.ball = random.choice(self.pockets)
68+
def isBlack(self):
69+
if type(self.ball) != int:
70+
return False
71+
if ((self.ball > 0 and self.ball <= 10)\
72+
or (self.ball>18 and self.ball<=28)):
73+
return self.ball%2 == 0
74+
else:
75+
return self.ball%2 == 1
76+
def isRed(self):
77+
return type(self.ball) == int and not self.isBlack()
78+
def betBlack(self, amt):
79+
if self.isBlack():
80+
return amt*self.blackOdds
81+
else: return -amt
82+
def betRed(self, amt):
83+
if self.isRed():
84+
return amt*self.redOdds
85+
else: return -amt*self.redOdds
86+
def betPocket(self, pocket, amt):
87+
if str(pocket) == str(self.ball):
88+
return amt*self.pocketOdds
89+
else: return -amt
90+
def __str__(self):
91+
return 'Fair Roulette'
92+
93+
def playRoulette(game, numSpins, toPrint = True):
94+
luckyNumber = '2'
95+
bet = 1
96+
totRed, totBlack, totPocket = 0.0, 0.0, 0.0
97+
for i in range(numSpins):
98+
game.spin()
99+
totRed += game.betRed(bet)
100+
totBlack += game.betBlack(bet)
101+
totPocket += game.betPocket(luckyNumber, bet)
102+
if toPrint:
103+
print(numSpins, 'spins of', game)
104+
print('Expected return betting red =',
105+
str(100*totRed/numSpins) + '%')
106+
print('Expected return betting black =',
107+
str(100*totBlack/numSpins) + '%')
108+
print('Expected return betting', luckyNumber, '=',\
109+
str(100*totPocket/numSpins) + '%\n')
110+
return (totRed/numSpins, totBlack/numSpins, totPocket/numSpins)
111+
112+
class EuRoulette(FairRoulette):
113+
def __init__(self):
114+
FairRoulette.__init__(self)
115+
self.pockets.append('0')
116+
def __str__(self):
117+
return 'European Roulette'
118+
119+
class AmRoulette(EuRoulette):
120+
def __init__(self):
121+
EuRoulette.__init__(self)
122+
self.pockets.append('00')
123+
def __str__(self):
124+
return 'American Roulette'
125+
126+
def findPocketReturn(game, numTrials, trialSize, toPrint = False):
127+
pocketReturns = []
128+
for t in range(numTrials):
129+
trialVals = playRoulette(game, trialSize, toPrint)
130+
pocketReturns.append(trialVals[2])
131+
return pocketReturns
132+
#
133+
#random.seed(0)
134+
#numTrials = 50000
135+
#numSpins = 200
136+
#game = FairRoulette()
137+
#
138+
#means = []
139+
#for i in range(numTrials):
140+
# means.append(findPocketReturn(game, 1, numSpins)[0]/numSpins)
141+
#
142+
#pylab.hist(means, bins = 19,
143+
# weights = pylab.array(len(means)*[1])/len(means))
144+
#pylab.xlabel('Mean Return')
145+
#pylab.ylabel('Probability')
146+
#pylab.title('Expected Return Betting a Pocket')
147+
148+
149+

Diff for: stats/coin_flips.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+


Diff for: stats/distributions.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import random, pylab
2+
3+
#set line width
4+
pylab.rcParams['lines.linewidth'] = 4
5+
#set font size for titles
6+
pylab.rcParams['axes.titlesize'] = 20
7+
#set font size for labels on axes
8+
pylab.rcParams['axes.labelsize'] = 20
9+
#set size of numbers on x-axis
10+
pylab.rcParams['xtick.labelsize'] = 16
11+
#set size of numbers on y-axis
12+
pylab.rcParams['ytick.labelsize'] = 16
13+
#set size of ticks on x-axis
14+
pylab.rcParams['xtick.major.size'] = 7
15+
#set size of ticks on y-axis
16+
pylab.rcParams['ytick.major.size'] = 7
17+
#set size of markers, e.g., circles representing points
18+
#set numpoints for legend
19+
pylab.rcParams['legend.numpoints'] = 1
20+
21+
dist = []
22+
for i in range(100000):
23+
dist.append(random.gauss(0, 30))
24+
pylab.hist(dist, 30)
25+
#pylab.show()
26+
27+
import scipy.integrate
28+
29+
def gaussian(x, mu, sigma):
30+
factor1 = (1.0/(sigma*((2*pylab.pi)**0.5)))
31+
factor2 = pylab.e**-(((x-mu)**2)/(2*sigma**2))
32+
return factor1*factor2
33+
34+
def checkEmpirical(numTrials):
35+
for t in range(numTrials):
36+
mu = random.randint(-10, 10)
37+
sigma = random.randint(1, 10)
38+
print('For mu =', mu, 'and sigma =', sigma)
39+
for numStd in (1, 1.96, 3):
40+
area = scipy.integrate.quad(gaussian,
41+
mu-numStd*sigma,
42+
mu+numStd*sigma,
43+
(mu, sigma))[0]
44+
print(' Fraction within', numStd, 'std =', round(area, 4))
45+
46+
checkEmpirical(3)
47+

Diff for: stats/flipcoin.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import pylab, random
2+
random.seed(0)
3+
4+
####################
5+
## Helper functions#
6+
####################
7+
def flipCoin(numFlips):
8+
'''
9+
Returns the result of numFlips coin flips of a biased coin.
10+
11+
numFlips (int): the number of times to flip the coin.
12+
13+
returns: a list of length numFlips, where values are either 1 or 0,
14+
with 1 indicating Heads and 0 indicating Tails.
15+
'''
16+
with open('coin_flips.txt','r') as f:
17+
all_flips = f.read()
18+
flips = random.sample(all_flips, numFlips)
19+
return [int(flip == 'H') for flip in flips]
20+
21+
22+
def getMeanAndStd(X):
23+
mean = sum(X)/float(len(X))
24+
tot = 0.0
25+
for x in X:
26+
tot += (x - mean)**2
27+
std = (tot/len(X))**0.5
28+
return mean, std
29+
30+
31+
#############################
32+
## CLT Hands-on #
33+
## #
34+
## Fill in the missing code #
35+
## Do not use numpy/pylab #
36+
#############################
37+
meanOfMeans, stdOfMeans = [], []
38+
sampleSizes = range(10, 500, 50)
39+
40+
def clt():
41+
for sampleSize in sampleSizes:
42+
sampleMeans = []
43+
for t in range(20):
44+
sample = flipCoin(sampleSize)
45+
sampleMeans.append(getMeanAndStd(sample)[0])
46+
meanOfMeans.append(getMeanAndStd(sampleMeans)[0])
47+
stdOfMeans.append(getMeanAndStd(sampleMeans)[1])
48+
## FILL IN TWO LINES
49+
## WHAT TO DO WITH THE SAMPLE MEANS?
50+
51+
clt()
52+
pylab.figure(1)
53+
#pylab.errorbar(sampleSizes, meanOfMeans,
54+
# yerr = 1.96*pylab.array(stdOfMeans),
55+
# label = "Est. mean and 95 confidence interval")
56+
pylab.xlim(0, max(sampleSizes) + 50)
57+
pylab.axhline(0.65, linestyle = '--',
58+
label = 'True probability of Heads')
59+
pylab.title('Estimates of Probability of Heads')
60+
pylab.xlabel('Sample Size')
61+
pylab.ylabel('Fraction of Heads (minutes)')
62+
pylab.legend(loc = 'best')
63+
pylab.show()
64+
65+

0 commit comments

Comments
 (0)