-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrscript.R
More file actions
159 lines (130 loc) · 4.53 KB
/
rscript.R
File metadata and controls
159 lines (130 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#loading required packages
library("ggplot2")
library(RGtk2)
#GFX
window <- gtkWindow("toplevel" , show = FALSE)
window['title'] = "Project Peyvandi"
window$setDefaultSize(500 , 200)
frame = gtkFrameNew("Dashboard")
window$add(frame)
window["visible"] = TRUE
box1 = gtkVBoxNew()
box1$setBorderWidth(30)
frame$add(box1)
label = gtkLabelNewWithMnemonic("Project Sections:")
box1$packStart(label)
box3 = gtkHBoxNew();
box3$setBorderWidth(30)
box1$packStart(box3)
loadDataButton = gtkButton("Load Data")
box3$packStart(loadDataButton , fill = F)
box2 = gtkHBoxNew();
box2$setBorderWidth(30)
box1$packStart(box2)
section1 = gtkButton("Section 1");
box2$packStart(section1 , fill = F)
section2 = gtkButton("Section 2");
box2$packStart(section2 , fill = F)
section3 = gtkButton("Section 3");
box2$packStart(section3 , fill = F)
section4 = gtkButton("Section 4");
box2$packStart(section4 , fill = F)
#defining functions
print_ks <- function(arg1) {
print(arg1)
if (arg1[2] > 0.05) {
print("Null hypothesis accepted. Signifance level = 5%")
} else if (arg1[2] > 0.01) {
print("Null hypothesis accpepted. Signifance level = 1%")
} else {
print(paste("Null hypothesis rejected. Alternative hypothesis is: ", arg1[3]))
}
}
#arg2 is name of cdf in one_sample and second sample's vector in two sample mode
plot_ks <- function(arg1, arg2, one_sample = TRUE) {
first_cdf = ecdf(arg1)
second_cdf = NULL
plot_title = NULL
if (one_sample) {
second_cdf = arg2
plot_title <- "KS Test (One-Smaple)"
} else {
second_cdf = ecdf(arg2)
plot_title <- "KS Test (Two-Sample)"
}
frame1 <- data.frame(arg1)
maxInd <- which.max(abs(first_cdf(arg1) - second_cdf(arg1)))
maxDiffAt <- arg1[maxInd]
print(
ggplot(frame1, aes(x = arg1)) + stat_ecdf() + stat_function(fun = second_cdf, color = "red") +
geom_vline(xintercept = maxDiffAt, color = "blue") + labs(title = plot_title, x = "x", y = "F(x)")
)
}
#loading Data
frame1 <- read.csv(file = "DataSet1.csv", header = FALSE)
frame2 <- read.csv(file = "DataSet2.csv", header = FALSE)
vec1 <- frame1[, 1]
vec2 <- frame2[, 1]
loadData <- function(button)
{
frame1 <- read.csv(file.choose(), header = FALSE)
frame2 <- read.csv(file.choose(), header = FALSE)
vec1 <- frame1[, 1]
vec2 <- frame2[, 1]
}
#part one
#one sample ks test with given data in project document
section1_function <- function(button){
x <- c(0.58, 0.42, 0.52, 0.33, 0.43, 0.23, 0.58, 0.76, 0.53, 0.64)
result <- ks.test(x, "punif", 0, 1)
print_ks(result)
#visualizing test result
frame <- data.frame(x)
plot_ks(x, punif)
}
#part two
#two sample ks test with out own data sets
section2_function <- function(button){
#normal with mean = 1, sd = 1
set1 <- rnorm(n = 100, mean = 1, sd = 1)
#normal with mean = 0, sd = 2
set2 <- rnorm(n = 100, mean = 0, sd = 2)
frame <- data.frame(set1, set2)
result <- ks.test(set1, set2)
print_ks(result)
#visualizing test result
plot_ks(set1, set2, FALSE)
}
#part three
#two sample ks test with excel file datas
section3_function <- function(button){
result <- ks.test(vec1, vec2)
print_ks(result)
#visualizing test result
plot_ks(vec1, vec2, FALSE)
}
#part four
#running t test on vec 1 and vec2
section4_function <- function(button){
print(t.test(vec1, vec2))
#checking whether vec1 and vec2 are normal distributions with same variance
print(shapiro.test(vec1))
print(shapiro.test(vec2))
#if at least one of the sets fails to be normal (has p-value less than 1%)
#or they have large enough difference in variance then ttest can't be trusted here
helpFrame <- data.frame(vec1)
second_func <- ecdf(vec2)
mean1 = mean(vec1)
mean2 = mean(vec2)
print(
ggplot(helpFrame, aes(x = vec1)) + stat_ecdf() + stat_function(fun = second_func, color = "red") +
geom_point(aes(x = mean1, y = ecdf(vec1)(mean1)), color = "blue") +
geom_point(aes(mean2, second_func(mean2)), color = "blue") +
labs(title = "T Test", x = "x", y = "F(x)", caption = "Blue dots indicate mean")
)
}
gSignalConnect(section1, "clicked", section1_function)
gSignalConnect(section2, "clicked", section2_function)
gSignalConnect(section3, "clicked", section3_function)
gSignalConnect(section4, "clicked", section4_function)
gSignalConnect(loadDataButton , "clicked" , loadData)