Skip to content

Commit 0130c64

Browse files
committed
Create chisquared.R
adding chi squared multiple questions multiple demographic variable result generation AND post hoc tests
1 parent 8a21c7d commit 0130c64

File tree

1 file changed

+258
-0
lines changed

1 file changed

+258
-0
lines changed

chisquared.R

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
# likertTable for all three functions should be formatted with question columns
2+
# and demographic (or grouping) columns, each row should be a different
3+
# respondents answer. Cells should be e.g. likert responses (More, etc.) or
4+
# demographic data (e.g. age groups)
5+
6+
## Results table for the addKruskal and addChiSquared functions should be formulated like this:
7+
8+
#
9+
# activityGardensResults <-
10+
# tibble(
11+
# demographicVariable = c(
12+
# "Gender",
13+
# "Marital Status",
14+
# "Age",
15+
# "Housing",
16+
# "Town Type",
17+
# "Governorate",
18+
# "Employment",
19+
# "Work Location",
20+
# "Education",
21+
# "Before Income",
22+
# "After Income"
23+
# ),
24+
# afterGardensRelaxing_CHI = rep(0.0, length(demographicVariable)),
25+
# afterGardensRelaxing_PVAL = rep(0.0, length(demographicVariable)),
26+
# afterGardensRelaxing_CORPVAL = rep(0.0, length(demographicVariable)),
27+
# afterGardensExercise_CHI = rep(0.0, length(demographicVariable)),
28+
# afterGardensExercise_PVAL = rep(0.0, length(demographicVariable)),
29+
# afterGardensExercise_CORPVAL = rep(0.0, length(demographicVariable)),
30+
# afterGardensBirdPhotography_CHI = rep(0.0, length(demographicVariable)),
31+
# afterGardensBirdPhotography_PVAL = rep(0.0, length(demographicVariable)),
32+
# afterGardensBirdPhotography_CORPVAL = rep(0.0, length(demographicVariable))
33+
#
34+
# )
35+
36+
37+
38+
addKruskal <-
39+
function(likertTable,
40+
resultsTable,
41+
questionColumns,
42+
demographicColumns) {
43+
i = min(demographicColumns) # row
44+
j = min(questionColumns) # column
45+
46+
a = 1 # the first row in the resultsTable that should have results assigned.
47+
c = 4 # first column that should have corrected pvalues
48+
49+
# Iterate through each demographic variable
50+
for (i in min(demographicColumns):max(demographicColumns)) {
51+
b = 2 # the first column in the resultsTable that should have results assigned to it
52+
53+
for (j in min(questionColumns):max(questionColumns)) {
54+
# Create a temp table and filter out blanks.
55+
tempTable <- likertTable[, c(j, i)]
56+
tempTable <- tempTable[tempTable[2] != "",]
57+
58+
# perform the kruskal wallis test
59+
tempKruskal <- kruskal.test(tempTable[, 1] ~ tempTable[, 2])
60+
61+
# add statistic value and pvalue to the table.
62+
resultsTable[a, b] <- tempKruskal$statistic
63+
resultsTable[a, b + 1] <- tempKruskal$p.value
64+
65+
# add 2 so next results go in the proper place
66+
b = b + 3
67+
} # end question loop
68+
69+
a = a + 1 # go to next row for next set of demographic info
70+
71+
} # end demographic loop
72+
73+
# add corrected pvalue
74+
75+
z = 1
76+
for (z in 1:length(questionColumns)) {
77+
tempCorrected <-
78+
p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm")
79+
80+
resultsTable[, c] <- tempCorrected
81+
82+
c = c + 3
83+
84+
} # end addition of corrected pvalues
85+
86+
return(resultsTable)
87+
88+
} # end function
89+
90+
91+
92+
addChiSquared <-
93+
function(likertTable,
94+
resultsTable,
95+
questionColumns,
96+
demographicColumns,
97+
minVal = 25) {
98+
i = min(demographicColumns) # row
99+
j = min(questionColumns) # column
100+
101+
a = 1 # the first row in the resultsTable that should have results assigned.
102+
c = 4 # first column that should have corrected pvalues
103+
104+
# Iterate through each demographic variable
105+
for (i in min(demographicColumns):max(demographicColumns)) {
106+
b = 2 # the first column in the resultsTable that should have results assigned to it
107+
108+
for (j in min(questionColumns):max(questionColumns)) {
109+
# Create a temp table and filter out blanks.
110+
tempTable <- likertTable[, c(j, i)]
111+
tempTable <-
112+
tempTable[tempTable[2] != "" & !is.na(tempTable[1]) ,]
113+
114+
# Check to see if any group has less than min responses.
115+
testSize <-
116+
tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n))
117+
118+
tooSmall <- testSize[testSize[2] < minVal, 1]
119+
print(unlist(tooSmall))
120+
# print(colnames(tempTable)) # use to debug
121+
122+
# Based on this, either assign a "can't be tested"/NA indicator or the values for the chi-squared test.
123+
if (length(setdiff(unique(tempTable[, 2]), tooSmall)) < 2) {
124+
resultsTable[a, b] <- NA
125+
resultsTable[a, b + 1] <- NA
126+
127+
} else {
128+
tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall),]
129+
130+
# perform the chi squared test
131+
tempChi <- chisq.test(tempTable[, 1], tempTable[, 2])
132+
133+
# add statistic value and pvalue to the table.
134+
resultsTable[a, b] <- tempChi$statistic
135+
resultsTable[a, b + 1] <- tempChi$p.value
136+
137+
} # end of else
138+
139+
# Use this if you want to export a Pivot table
140+
# pivot <- pivot_wider(testSize,
141+
# names_from = colnames(testSize[1]),
142+
# values_from = n)
143+
144+
# Clean up
145+
remove(testSize, tooSmall)
146+
147+
# add 2 so next results go in the proper place
148+
b = b + 3
149+
} # end question loop
150+
151+
a = a + 1 # go to next row for next set of demographic info
152+
153+
} # end demographic loop
154+
155+
# add corrected pvalue
156+
157+
z = 1
158+
for (z in 1:length(questionColumns)) {
159+
tempCorrected <-
160+
p.adjust(as.vector(unlist(resultsTable[, c - 1])), method = "holm")
161+
162+
resultsTable[, c] <- tempCorrected
163+
164+
c = c + 3
165+
166+
} # end addition of corrected pvalues
167+
168+
return(resultsTable)
169+
170+
} # end function
171+
172+
173+
174+
# This function needs to compare between categories within a question to see
175+
# which categories are significantly different.
176+
177+
posthocChiSquared <-
178+
function(likertTable,
179+
questionColumn,
180+
demographicColumn,
181+
minVal = 25,
182+
correction = TRUE) {
183+
tempTable <- likertTable[, c(questionColumn, demographicColumn)]
184+
tempTable <-
185+
tempTable[tempTable[2] != "" & !is.na(tempTable[1]) , ]
186+
187+
# Check to see if any group has less than min allowed responses.
188+
testSize <-
189+
tempTable %>% count(tempTable[1:2]) %>% group_by(across(.cols = 2)) %>% summarise(n = sum(n))
190+
191+
tooSmall <- testSize[testSize[2] < minVal, 1]
192+
print(unlist(tooSmall))
193+
194+
# Filter out too small categories
195+
tempTable <- tempTable[!(tempTable[, 2] %in% tooSmall), ]
196+
# and make sure demographic is a factor (this gets lost sometimes)
197+
if (is.factor(tempTable[, 2]) == FALSE) {
198+
tempTable[, 2] <- as.factor(tempTable[, 2])
199+
}
200+
201+
# Make sure that the number of unique is more than two
202+
if (nlevels(tempTable[, 2]) < 3) {
203+
stop(print("Too few levels"))
204+
}
205+
206+
numLevels <- nlevels(tempTable[, 2])
207+
208+
# create the results table
209+
#create matrix with correct number of columns
210+
resultsTable <- matrix(rep(999, times = numLevels ^ 2),
211+
ncol = numLevels,
212+
byrow = TRUE)
213+
214+
#define column names and row names of matrix
215+
tempLevels <- levels(tempTable[, 2])
216+
colnames(resultsTable) <- tempLevels
217+
rownames(resultsTable) <- tempLevels
218+
219+
# for each [i,j] pair of factors add the pvalue for chisquared test
220+
i = 1 # row
221+
j = 1 # column
222+
for (i in 1:numLevels) {
223+
for (j in 1:numLevels) {
224+
if (i != j) {
225+
# subset for i and j levels
226+
testTable <-
227+
tempTable[tempTable[, 2] %in% tempLevels[c(i, j)] ,]
228+
229+
# run test and assign pvalue to i,j spot
230+
resultsTable[i, j] <-
231+
chisq.test(testTable[, 1], testTable[, 2])$p.value
232+
233+
} else {
234+
resultsTable[i, j] <- NA
235+
}
236+
237+
} # end column loop
238+
239+
} # end row loop
240+
241+
# remove the lower triangle--can probably make this a filter but this is easy
242+
resultsTable[lower.tri(resultsTable, diag = FALSE)] <- NA
243+
244+
# correct pvalues
245+
if (correction == TRUE) {
246+
resultsTable <-
247+
matrix(
248+
p.adjust(as.vector(resultsTable), method = 'holm'),
249+
ncol = numLevels,
250+
dimnames = list(tempLevels, tempLevels)
251+
)
252+
}
253+
#convert matrix to a tibble
254+
resultsTable <- as_tibble(resultsTable, rownames = "levels")
255+
256+
return(resultsTable)
257+
258+
} # end function

0 commit comments

Comments
 (0)