Skip to content

Commit d8f6f78

Browse files
committed
final version
1 parent 4db1ec8 commit d8f6f78

8 files changed

+86
-10
lines changed

.DS_Store

0 Bytes
Binary file not shown.

01-importing.Rmd

+28-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ title: 'Week 03: Examples for Data Importing'
44
---
55

66
# Load packages
7-
```{r}
7+
```{r, warning=F, message=F}
8+
library(tidyverse)
89
library(readr)
910
library(readxl)
1011
```
@@ -16,6 +17,13 @@ library(readxl)
1617
getwd()
1718
```
1819

20+
21+
```{r}
22+
cth_data <- read_csv("data/CTH_raw_na.csv")
23+
```
24+
25+
26+
1927
# Visit Environmental Performance Index at https://epi.yale.edu/
2028

2129
# The Environmental Performance Index (EPI) ranks 180 countries on 32 performance indicators across 11 issue
@@ -159,7 +167,10 @@ excel_sheets("data/Population-1989-2019.xlsx")
159167
# read_excel() reads both xls and xlsx files and detects the format from the extension.
160168

161169
```{r}
162-
pop <- read_excel("data/Population-1989-2019.xlsx")
170+
pop <- read_excel("data/Population-1989-2019.xlsx", range= cell_rows(5:40))
171+
```
172+
173+
```{r}
163174
View(pop)
164175
```
165176

@@ -173,6 +184,21 @@ pop_red <- read_excel("data/Population-1989-2019.xlsx", range = cell_rows(5:40))
173184
View(pop_red)
174185
```
175186

187+
188+
```{r}
189+
colnames(pop)[1:2] <- c("Country", "Number")
190+
```
191+
192+
193+
194+
```{r}
195+
colnames(pop)[-(1:2)] <- paste("y", 1989:2019, sep="")
196+
```
197+
198+
```{r}
199+
View(pop)
200+
```
201+
176202
```{r}
177203
colnames(pop_red)[1:2] <- c("Country", "Number")
178204
View(pop_red)

02-manipulating.Rmd

+56-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ getwd()
1919

2020
# Download file from web to your local.
2121

22+
"
23+
2224
```{r, eval=F}
2325
# download the file into your local computer
2426
download.file("https://epi.yale.edu/downloads/epi2020resultsregions20200604.csv", "data/epi2020resultsregions20200604.csv")
@@ -61,6 +63,7 @@ epi_data %>%
6163
View()
6264
```
6365

66+
6467
# Remember %>% operator does not save the object, you need
6568
# to assign RHS to an object at the LHS.
6669

@@ -84,11 +87,13 @@ epi_data %>%
8487
View()
8588
```
8689

90+
8791
# We can use the %in% operator to deselect certain groups as well,
8892
# in this case we have to negate by adding an exclamation mark (Boolean operator)
8993
# at the beginning of our filter condition.
9094

9195
```{r}
96+
#not works with -
9297
epi_data %>%
9398
filter(!region %in% c("Asia-Pacific", "Eastern Europe")) %>%
9499
View()
@@ -101,12 +106,15 @@ epi_data %>%
101106
# or equal to 50!..
102107

103108

109+
104110
```{r}
105111
epi_data %>%
106112
filter(EPI.new <= 50) %>%
107113
View()
108114
```
109115

116+
117+
110118
# Multiple conditions
111119
# filter(condition1, condition2) will return rows where "both conditions" are met.
112120
# Let's combine multiple conditions in the same call.
@@ -115,12 +123,14 @@ epi_data %>%
115123
# 50 <= EPI.new & EPI.new <= 60.
116124

117125

126+
118127
```{r}
119128
epi_data %>%
120129
filter(50 <= EPI.new & EPI.new <= 60) %>%
121130
View()
122131
```
123132

133+
124134
# or use "helper function" between():
125135

126136
```{r}
@@ -146,6 +156,7 @@ epi_data %>%
146156
View()
147157
```
148158

159+
149160
# Filter Asia-Pacific countries whose EPI score is greater than the grand mean.
150161

151162

@@ -186,6 +197,17 @@ dim(cth_data)
186197

187198
### Caution!!! Discuss with students
188199

200+
201+
Eğer cth_raw_1950 kolonunda missing varsa, veriden çıkar
202+
203+
204+
```{r}
205+
cth_data %>%
206+
filter(!is.na(CTH.raw.1950)) %>%
207+
View()
208+
```
209+
210+
189211
```{r}
190212
cth_data_filtered <- cth_data %>%
191213
filter(!is.na(CTH.raw.1950))
@@ -203,6 +225,8 @@ dim(cth_data_filtered)
203225
# The order in which you add them, will determine the order in which
204226
# they appear in the output.
205227

228+
229+
206230
```{r}
207231
epi_data %>%
208232
select(code, iso, country, region, EPI.new, AIR.new) %>%
@@ -218,6 +242,7 @@ epi_data %>%
218242
View()
219243
```
220244

245+
221246
# A side note:
222247
# ! for taking the complement of a set of variables.
223248

@@ -231,6 +256,7 @@ epi_data %>%
231256
# & and | for selecting the intersection or the union of two sets of variables.
232257
# c() for combining selections.
233258

259+
234260
```{r}
235261
epi_data %>%
236262
select(code:iso & iso:EPI.new) %>%
@@ -249,6 +275,15 @@ epi_data %>%
249275
# You can also deselect blocks of columns.
250276

251277

278+
```{r}
279+
# negative sign works with !
280+
epi_data %>%
281+
select(-code) %>%
282+
View()
283+
```
284+
285+
286+
252287
```{r}
253288
epi_data %>%
254289
select(-HLT.new,-(PMD.new:WWT.rgn.rank)) %>%
@@ -337,6 +372,15 @@ epi_data %>%
337372

338373
# Sort the data according to the EPI.new variable in ascending order
339374

375+
376+
```{r}
377+
epi_data %>%
378+
arrange(desc(EPI.new)) %>%
379+
View()
380+
```
381+
382+
383+
340384
```{r}
341385
epi_data %>%
342386
arrange(EPI.new) %>%
@@ -371,6 +415,9 @@ epi_data %>%
371415

372416
# Let's standardize the EPI.new variable
373417

418+
419+
420+
374421
```{r}
375422
epi_data %>%
376423
mutate(EPI.stand = (EPI.new-mean(EPI.new))/sd(EPI.new)) %>%
@@ -386,6 +433,7 @@ epi_data %>%
386433
# returns TRUE and lastly what to do when it returns FALSE.
387434

388435

436+
389437
```{r}
390438
epi_data %>%
391439
mutate(EPI.cat = ifelse(EPI.new >=50,"Good","Bad")) %>%
@@ -402,10 +450,6 @@ epi_data %>%
402450
# is not true will continue to be evaluated for the next statement.
403451
# For everything that is left at the end just use the TRUE ~ "newname".
404452

405-
EPI.cat such that EPI.new >= 75, "Very Good"
406-
EPI.new >= 50, "Good"
407-
EPI.new < 50, "Not Good"
408-
409453

410454
```{r}
411455
epi_data %>%
@@ -437,6 +481,14 @@ epi_data %>%
437481
# You can add multiple summary functions behind each other.
438482

439483

484+
```{r}
485+
epi_data %>%
486+
summarize(nds = n_distinct(region)) %>%
487+
View()
488+
```
489+
490+
491+
440492
```{r}
441493
## can you guess the dimension of the output?
442494
epi_data %>%
File renamed without changes.

data/CTH_raw_na.csv

100755100644
File mode changed.

images/.tidyverse.png.icloud

163 Bytes
Binary file not shown.

intro_tidyverse.Rmd

-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class: left
5757
* Data tidying with `tidyr` package.
5858
* 03-tidying.Rmd
5959

60-
6160
---
6261
class: middle, center
6362

intro_tidyverse.html

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<title>MAT381E-Week 3: Data Importing, Manipulating, and Tidying</title>
55
<meta charset="utf-8" />
66
<meta name="author" content="Gül İnan" />
7-
<meta name="date" content="2021-10-24" />
7+
<meta name="date" content="2021-10-30" />
88
<script src="intro_tidyverse_files/header-attrs-2.11/header-attrs.js"></script>
99
<link href="intro_tidyverse_files/remark-css-0.0.1/default.css" rel="stylesheet" />
1010
<script src="intro_tidyverse_files/fabric-4.3.1/fabric.min.js"></script>
@@ -26,7 +26,7 @@
2626
# MAT381E-Week 3: Data Importing, Manipulating, and Tidying
2727
### Gül İnan
2828
### Department of Mathematics<br/>Istanbul Technical University
29-
### October 24, 2021
29+
### October 30, 2021
3030

3131
---
3232

@@ -52,7 +52,6 @@
5252
* Data tidying with `tidyr` package.
5353
* 03-tidying.Rmd
5454

55-
5655
---
5756
class: middle, center
5857

0 commit comments

Comments
 (0)