Skip to content

Commit 9fafc92

Browse files
author
ph1ps
committed
Add README with explanation
1 parent 8ac88ae commit 9fafc92

13 files changed

+224
-115
lines changed

Naive Bayes Classifier/NaiveBayes.playground/Contents.swift

+14-14
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ import Foundation
22

33
/*:
44
## Naive Bayes Classifier
5-
5+
66
This playground uses the given algorithm and utilizes its features with some example datasets
7-
7+
88
### Gaussian Naive Bayes
99
- Note:
1010
When using Gaussian NB you have to have continuous features (Double).
11-
11+
1212
For this example we are going to use a famous dataset with different types of wine. The labels of the features can be viewed [here](https://gist.github.com/tijptjik/9408623)
1313
*/
1414
guard let wineCSV = Bundle.main.path(forResource: "wine", ofType: "csv") else {
@@ -27,7 +27,7 @@ guard let csv = try? String(contentsOfFile: wineCSV) else {
2727
let rows = csv.characters.split(separator: "\r\n").map { String($0) }
2828
/*:
2929
Splitting on the ; sign and converting the value to a Double
30-
30+
3131
- Important:
3232
Do not force unwrap the mapped values in your real application. Carefully convert them! This is just for the sake of showing how the algorithm works.
3333
*/
@@ -47,31 +47,31 @@ let data = wineData.map { row in
4747

4848
/*:
4949
Again use `guard` on the result of a `try?` or simply `do-try-catch` because this would crash your application if an error occured.
50-
50+
5151
The array in the `classifyProba` method I passed is a former entry in the .csv file which I removed in order to classify it.
5252
*/
5353
let wineBayes = try! NaiveBayes(type: .gaussian, data: data, classes: classes).train()
5454
let result = wineBayes.classifyProba(with: [12.85, 1.6, 2.52, 17.8, 95, 2.48, 2.37, 0.26, 1.46, 3.93, 1.09, 3.63, 1015])
5555
/*:
5656
I can assure you that ***class 1*** is the correct result and as you can see the classifier thinks that its ***99.99%*** likely too.
57-
57+
5858
### Multinomial Naive Bayes
59-
59+
6060
- Note:
6161
When using Multinomial NB you have to have categorical features (Int).
62-
62+
6363
Now this dataset is commonly used to describe the classification problem and it is categorical which means you don't have real values you just have categorical data as stated before. The structure of this dataset is as follows.
64-
64+
6565
Outlook,Temperature,Humidity,Windy
66-
66+
6767
***Outlook***: 0 = rainy, 1 = overcast, 2 = sunny
68-
68+
6969
***Temperature***: 0 = hot, 1 = mild, 2 = cool
70-
70+
7171
***Humidity***: 0 = high, 1 = normal
72-
72+
7373
***Windy***: 0 = false, 1 = true
74-
74+
7575
The classes are either he will play golf or not depending on the weather conditions. (0 = won't play, 1 = will play)
7676
*/
7777

Naive Bayes Classifier/NaiveBayes.playground/Sources/NaiveBayes.swift

+50-50
Original file line numberDiff line numberDiff line change
@@ -11,186 +11,186 @@ import Foundation
1111
extension String: Error {}
1212

1313
extension Array where Element == Double {
14-
14+
1515
func mean() -> Double {
1616
return self.reduce(0, +) / Double(count)
1717
}
18-
18+
1919
func standardDeviation() -> Double {
2020
let calculatedMean = mean()
21-
21+
2222
let sum = self.reduce(0.0) { (previous, next) in
2323
return previous + pow(next - calculatedMean, 2)
2424
}
25-
25+
2626
return sqrt(sum / Double(count - 1))
2727
}
2828
}
2929

3030
extension Array where Element == Int {
31-
31+
3232
func uniques() -> Set<Element> {
3333
return Set(self)
3434
}
35-
35+
3636
}
3737

3838
public enum NBType {
39-
39+
4040
case gaussian
4141
case multinomial
4242
//case bernoulli --> TODO
43-
43+
4444
func calcLikelihood(variables: [Any], input: Any) -> Double? {
45-
45+
4646
if case .gaussian = self {
47-
47+
4848
guard let input = input as? Double else {
4949
return nil
5050
}
51-
51+
5252
guard let mean = variables[0] as? Double else {
5353
return nil
5454
}
55-
55+
5656
guard let stDev = variables[1] as? Double else {
5757
return nil
5858
}
59-
59+
6060
let eulerPart = pow(M_E, -1 * pow(input - mean, 2) / (2 * pow(stDev, 2)))
6161
let distribution = eulerPart / sqrt(2 * .pi) / stDev
62-
62+
6363
return distribution
64-
64+
6565
} else if case .multinomial = self {
66-
66+
6767
guard let variables = variables as? [(category: Int, probability: Double)] else {
6868
return nil
6969
}
70-
70+
7171
guard let input = input as? Int else {
7272
return nil
7373
}
74-
74+
7575
return variables.first { variable in
7676
return variable.category == input
7777
}?.probability
78-
78+
7979
}
80-
80+
8181
return nil
8282
}
83-
83+
8484
func train(values: [Any]) -> [Any]? {
85-
85+
8686
if case .gaussian = self {
87-
87+
8888
guard let values = values as? [Double] else {
8989
return nil
9090
}
91-
91+
9292
return [values.mean(), values.standardDeviation()]
93-
93+
9494
} else if case .multinomial = self {
95-
95+
9696
guard let values = values as? [Int] else {
9797
return nil
9898
}
99-
99+
100100
let count = values.count
101101
let categoryProba = values.uniques().map { value -> (Int, Double) in
102102
return (value, Double(values.filter { $0 == value }.count) / Double(count))
103103
}
104104
return categoryProba
105105
}
106-
106+
107107
return nil
108108
}
109109
}
110110

111111
public class NaiveBayes<T> {
112-
112+
113113
var variables: [Int: [(feature: Int, variables: [Any])]]
114114
var type: NBType
115-
115+
116116
var data: [[T]]
117117
var classes: [Int]
118-
118+
119119
public init(type: NBType, data: [[T]], classes: [Int]) throws {
120120
self.type = type
121121
self.data = data
122122
self.classes = classes
123123
self.variables = [Int: [(Int, [Any])]]()
124-
124+
125125
if case .gaussian = type, T.self != Double.self {
126126
throw "When using Gaussian NB you have to have continuous features (Double)"
127127
} else if case .multinomial = type, T.self != Int.self {
128128
throw "When using Multinomial NB you have to have categorical features (Int)"
129129
}
130130
}
131-
131+
132132
public func train() throws -> Self {
133-
133+
134134
for `class` in classes.uniques() {
135135
variables[`class`] = [(Int, [Any])]()
136-
136+
137137
let classDependent = data.enumerated().filter { (offset, _) in
138138
return classes[offset] == `class`
139139
}
140-
140+
141141
for feature in 0..<data[0].count {
142-
142+
143143
let featureDependent = classDependent.map { $0.element[feature] }
144-
144+
145145
guard let trained = type.train(values: featureDependent) else {
146146
throw "Critical! Data could not be casted even though it was checked at init"
147147
}
148-
148+
149149
variables[`class`]?.append((feature, trained))
150150
}
151151
}
152-
152+
153153
return self
154154
}
155-
155+
156156
public func classify(with input: [T]) -> Int {
157157
let likelihoods = classifyProba(with: input).max { (first, second) -> Bool in
158158
return first.1 < second.1
159159
}
160-
160+
161161
guard let `class` = likelihoods?.0 else {
162162
return -1
163163
}
164-
164+
165165
return `class`
166166
}
167-
167+
168168
public func classifyProba(with input: [T]) -> [(Int, Double)] {
169-
169+
170170
var probaClass = [Int: Double]()
171171
let amount = classes.count
172-
172+
173173
classes.forEach { `class` in
174174
let individual = classes.filter { $0 == `class` }.count
175175
probaClass[`class`] = Double(individual) / Double(amount)
176176
}
177-
177+
178178
let classesAndFeatures = variables.map { (`class`, value) -> (Int, [Double]) in
179179
let distribution = value.map { (feature, variables) -> Double in
180180
return type.calcLikelihood(variables: variables, input: input[feature]) ?? 0.0
181181
}
182182
return (`class`, distribution)
183183
}
184-
184+
185185
let likelihoods = classesAndFeatures.map { (`class`, distribution) in
186186
return (`class`, distribution.reduce(1, *) * (probaClass[`class`] ?? 0.0))
187187
}
188-
188+
189189
let sum = likelihoods.map { $0.1 }.reduce(0, +)
190190
let normalized = likelihoods.map { (`class`, likelihood) in
191191
return (`class`, likelihood / sum)
192192
}
193-
193+
194194
return normalized
195195
}
196196
}
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2-
<playground version='5.0' target-platform='ios' display-mode='raw'>
2+
<playground version='5.0' target-platform='ios' display-mode='rendered'>
33
<timeline fileName='timeline.xctimeline'/>
44
</playground>

Naive Bayes Classifier/NaiveBayes.playground/timeline.xctimeline

+10
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,15 @@
1717
selectedRepresentationIndex = "0"
1818
shouldTrackSuperviewWidth = "NO">
1919
</LoggerValueHistoryTimelineItem>
20+
<LoggerValueHistoryTimelineItem
21+
documentLocation = "#CharacterRangeLen=10&amp;CharacterRangeLoc=3410&amp;EndingColumnNumber=15&amp;EndingLineNumber=100&amp;StartingColumnNumber=5&amp;StartingLineNumber=100&amp;Timestamp=514208705.303681"
22+
selectedRepresentationIndex = "0"
23+
shouldTrackSuperviewWidth = "NO">
24+
</LoggerValueHistoryTimelineItem>
25+
<LoggerValueHistoryTimelineItem
26+
documentLocation = "#CharacterRangeLen=6&amp;CharacterRangeLoc=1931&amp;EndingColumnNumber=11&amp;EndingLineNumber=53&amp;StartingColumnNumber=5&amp;StartingLineNumber=53&amp;Timestamp=514208705.303681"
27+
selectedRepresentationIndex = "0"
28+
shouldTrackSuperviewWidth = "NO">
29+
</LoggerValueHistoryTimelineItem>
2030
</TimelineItems>
2131
</Timeline>

0 commit comments

Comments
 (0)