-
Notifications
You must be signed in to change notification settings - Fork 0
/
assignment-2.Rmd
120 lines (104 loc) · 3.86 KB
/
assignment-2.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
---
title: "assignment_2"
author: "alix-bourree"
date: "13 octobre 2017"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
#Exercise 1:
```{r}
source("functions.R")
print(sum_column(iris, "Sepal.Length"))
print(sum_column(iris, "Species"))
print(sum_column(warpbreaks, "breaks"))
print(my_sum(iris$Sepal.Length))
print(my_sum(iris$Species))
print(my_sum(warpbreaks$breaks))
print(sum_divided_by(iris$Sepal.Length, 12))
print(sum_divided_by(iris$Species, 22))
print(sum_divided_by(iris$Sepal.Length, "Not numeric"))
print(sum_divided_by(warpbreaks$breaks, -12))
print(my_mean(iris$Sepal.Length))
print(my_mean(iris$Species))
print(my_mean(warpbreaks$breaks))
```
#Exercise 2
```{r}
print(grouped_violin_plot(iris, "Sepal.Length", "Species"))
p <- grouped_violin_plot(iris, "Sepal.Length", "Species")
# YOUR CODE HERE: Change the colour scheme for the interior of the three violin plots
# to anything else at all.
p <- p + scale_fill_brewer(palette="Dark2")
# YOUR CODE HERE: Add a main title that says "Iris data".
p <- p + labs(title="Iris data")
print(p)
```
#Exercise 3
```{r}
source("functions.R")
print(difference_in_medians(iris, "Sepal.Width", "Species", "versicolor", "virginica"))
print(difference_in_medians(iris, "Sepal.Width", "Species", "virginica", "virginica"))
iris$Sepal.Width[1:10]
if(!exists(".Random.seed")) set.seed(NULL)
previous_seed <- .Random.seed
set.seed(1)
randomize(iris, "Sepal.Width")$Sepal.Width[1:10]
randomize(iris, "Species")$Species[1:10]
randomize(iris, "Species")$Sepal.Width[1:10]
set.seed(previous_seed)
if(!exists(".Random.seed")) set.seed(NULL)
previous_seed <- .Random.seed
set.seed(1)
ptest_1 <- permutation_twogroups(iris, "Sepal.Width", "Species", "versicolor",
"virginica", difference_in_medians, n_samples=10)
ptest_2 <- permutation_twogroups(iris, "Sepal.Width", "Species", "versicolor",
"virginica", difference_in_medians, n_samples=10)
ptest_3 <- permutation_twogroups(randomize(iris, "Sepal.Width"), "Sepal.Width",
"Species", "versicolor", "virginica",
difference_in_medians, n_samples=10)
set.seed(previous_seed)
print(ptest_1)
print(ptest_2)
print(ptest_3)
print(ptest_3$observed)
print(ptest_3[["observed"]])
if(!exists(".Random.seed")) set.seed(NULL)
previous_seed <- .Random.seed
set.seed(1)
ptest <- permutation_twogroups(iris, "Sepal.Width", "Species", "versicolor",
"virginica", difference_in_medians)
set.seed(previous_seed)
```
var et grouping_var sont deux noms de variables différentes, var est utilisé pour trouvé le colonne var et la deuxième est certaines caractéristique correspondant à la colonne var
```{r}
if(!exists(".Random.seed")) set.seed(NULL)
previous_seed <- .Random.seed
set.seed(1)
ptest <- permutation_twogroups(iris, "Sepal.Width", "Species", "versicolor",
"virginica", difference_in_medians)
set.seed(previous_seed)
```
```{r}
ptest_d <- tibble::as_tibble(ptest["permuted"])
# plot a histogram with a vertical line at the observed value
p <- ggplot2::ggplot(ptest_d, aes(x = ptest_d$permuted)) +
geom_histogram() + stat_bin(bins = 30) + ggplot2::geom_vline(ggplot2::aes(xintercept=median(ptest$observed))) + xlab("permuted")
print(p)
```
```{r}
source("functions.R")
if(!exists(".Random.seed")) set.seed(NULL)
previous_seed <- .Random.seed
set.seed(1)
ptest_new <- permutation_twogroups(iris, "Sepal.Width", "Species", "versicolor",
"virginica", new_test_statistic)
set.seed(previous_seed)
```
```{r}
source("functions.R")
print(permutation_pvalue_right(ptest))
print(permutation_pvalue_left(ptest))
```
Prenons l'exemple que nous avons deux groupes, g1 : les notes à un examen des filles dans une classes g2: et celles des garçons. Si la différence des médianes est faible, on peut emmetre l'hypothèse qu'il n'y pas de grandes différences par rapport au sexe à l'examen, tandis que si la différence des médianes est grande on peut faire l'hypothèse que le selon le sexe les notes diffèrent.