Primary exercises
- Create tibble
 
- Create a tibble 
exercise_group for a group of
individuals with names {Sonja, Steven, Ines, Robert, Tim} with their
heights {164, 188, 164, 180, 170}, weights {56.0, 87.0, 54.0, 80.0,
58.5} and frequency of exercise {high, high, low, moderate, low}. 
exercise_group <- tibble(name=c("Sonja" , "Steven", "Ines", "Robert", "Tim" ),
            height=c(164, 188, 164, 180, 170),  
            weight=c(56.0, 87.0, 54.0, 80.0, 58.5),
            exercise=c("high", "high", "low", "moderate", "low")
     )  
exercise_group
# A tibble: 5 × 4
  name   height weight exercise
  <chr>   <dbl>  <dbl> <chr>   
1 Sonja     164   56   high    
2 Steven    188   87   high    
3 Ines      164   54   low     
4 Robert    180   80   moderate
5 Tim       170   58.5 low     
- Update the tibble 
exercise_group with Ella
and Oscar, leave their respective height,
weight and exercise values as missing
(NA). Avoid copy/paste from (a) with inclusion of new
names, instead try to reuse the columns inside
exercise_group. 
exercise_group <- tibble(name=c(exercise_group$name, "Ella", "Oscar"),
                         height=c(exercise_group$height,NA,NA),
                         weight=c(exercise_group$weight,NA,NA),
                         exercise=c(exercise_group$exercise,NA,NA)
                  )
exercise_group       
# A tibble: 7 × 4
  name   height weight exercise
  <chr>   <dbl>  <dbl> <chr>   
1 Sonja     164   56   high    
2 Steven    188   87   high    
3 Ines      164   54   low     
4 Robert    180   80   moderate
5 Tim       170   58.5 low     
6 Ella       NA   NA   <NA>    
7 Oscar      NA   NA   <NA>    
- Add the 
sex variable to exercise_group
with values male and female. 
exercise_group <- tibble(name=exercise_group$name,
                         height=exercise_group$height,
                         weight=exercise_group$weight,
                         exercise=exercise_group$exercise,
                         sex=c('female','male','female','male','male','female','male')
                  )
exercise_group
# A tibble: 7 × 5
  name   height weight exercise sex   
  <chr>   <dbl>  <dbl> <chr>    <chr> 
1 Sonja     164   56   high     female
2 Steven    188   87   high     male  
3 Ines      164   54   low      female
4 Robert    180   80   moderate male  
5 Tim       170   58.5 low      male  
6 Ella       NA   NA   <NA>     female
7 Oscar      NA   NA   <NA>     male  
- Create a tibble which keeps track of the smoking habits over the
years of 
Julio age 21 started smoking at 17 and stopped in
2020, Camille age 20 started smoking in 2021 and
Travis 19 started at age 16. 
# List the information first as below, here NA (missing value) is interpreted as not 
# stopped, i.e. still smoking to present date.
# 
# name     age  start          stop
# Julio    21   2022-(21-17)   2020
# Camille  20   2021           NA
# Travis   19   2022-(19-16)   NA
tibble(name=c("Julio", "Camille","Travis"), 
       age=c(21,20,19), 
       start=c(2018,2021,2019), 
       stop=c(2020,NA,NA))
# A tibble: 3 × 4
  name      age start  stop
  <chr>   <dbl> <dbl> <dbl>
1 Julio      21  2018  2020
2 Camille    20  2021    NA
3 Travis     19  2019    NA
tibble subset
- Take the tibble 
exercise_group from the previous
exercise and create a new tibble exercise_group_sub without
the height and weight variables by selection
[. 
exercise_group_sub <- exercise_group[c("name","exercise")]
exercise_group_sub
# A tibble: 7 × 2
  name   exercise
  <chr>  <chr>   
1 Sonja  high    
2 Steven high    
3 Ines   low     
4 Robert moderate
5 Tim    low     
6 Ella   <NA>    
7 Oscar  <NA>    
- Create a tibble called 
exercise_group_sub with the 1st
and 3rd column. 
exercise_group_sub <- exercise_group[c(1,3)]
exercise_group_sub
# A tibble: 7 × 2
  name   weight
  <chr>   <dbl>
1 Sonja    56  
2 Steven   87  
3 Ines     54  
4 Robert   80  
5 Tim      58.5
6 Ella     NA  
7 Oscar    NA  
 
Read tibbles from file
- Read 
pulse.csv data set into R and inspect its
dimensions. 
pulse <- read_csv(file = "pulse.csv")
# two alternatives i) nrow and ncol function, ii) dim function.
nrow(pulse) # number of rows
[1] 110
ncol(pulse) # number of columns
[1] 13
dim(pulse)  # dimensions (rows, columns)
[1] 110  13
- Read 
survey.csv data set into R. 
survey <- read_csv(file = "survey.csv")
dim(survey)
[1] 233  13
- Show the first 9 and the last 7 rows.
 
head(survey,9)
# A tibble: 9 × 13
  name    gender span1 span2 hand  fold    pulse clap    exercise smokes height m.i        age
  <chr>   <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>    <dbl>
1 Alyson  female  18.5  18   right right      92 left    some     never    173  metric    18.2
2 Todd    male    19.5  20.5 left  right     104 left    none     regul    178. imperial  17.6
3 Gerald  male    18    13.3 right left       87 neither none     occas     NA  <NA>      16.9
4 Robert  male    18.8  18.9 right right      NA neither none     never    160  metric    20.3
5 Dustin  male    20    20   right neither    35 right   some     never    165  metric    23.7
6 Abby    female  18    17.7 right left       64 right   some     never    173. imperial  21  
7 Andre   male    17.7  17.7 right left       83 right   freq     never    183. imperial  18.8
8 Michael female  17    17.3 right right      74 right   freq     never    157  metric    35.8
9 Edward  male    20    19.5 right right      72 right   some     never    175  metric    19  
tail(survey,7)
# A tibble: 7 × 13
  name     gender span1 span2 hand  fold  pulse clap  exercise smokes height m.i        age
  <chr>    <chr>  <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>    <chr>   <dbl> <chr>    <dbl>
1 Marcella female  18.8  18.5 right right    80 right some     never    169  metric    18.2
2 Jerry    male    18    16   right right    NA right some     never    180. imperial  20.8
3 Jeanne   female  18    18   right left     85 right some     never    165. imperial  17.7
4 Rosanna  female  18.5  18   right left     88 right some     never    160  metric    16.9
5 Tracey   female  17.5  16.5 right right    NA right some     never    170  metric    18.6
6 Keith    male    21    21.5 right right    90 right some     never    183  metric    17.2
7 Celina   female  17.6  17.3 right right    85 right freq     never    168. metric    17.8
mean(survey$age) 
[1] 20.35591
- Calculate the mean height in survey data.
 
# Here we use a second argument 'na.rm = TRUE' because there are missing values (NA) in 
# the variable height. By default the mean function returns NA if it first argument, in this 
# case variable 'height', contains any NA. The second argument 'na.rm = TRUE' changes this 
# behaviour by disregarding the observations with missing height and calculates the mean 
# of observations for which the height is available. 
# 
mean(survey$height, na.rm = TRUE) 
[1] 172.3459