Primary exercises
Create tibble
- Create a tibble 
exercise_group for a group of individuals with names {Sonja, Steven, Ines, Robert, Tim} with their heights {164, 188, 164, 180, 170}, weights {56.0, 87.0, 54.0, 80.0, 58.5} and frequency of exercise {high, high, low, moderate, low}. 
exercise_group <- tibble(name=c("Sonja" , "Steven", "Ines", "Robert", "Tim" ),
            height=c(164, 188, 164, 180, 170),  
            weight=c(56.0, 87.0, 54.0, 80.0, 58.5),
            exercise=c("high", "high", "low", "moderate", "low")
     )  
exercise_group
# A tibble: 5 × 4
  name   height weight exercise
  <chr>   <dbl>  <dbl> <chr>   
1 Sonja     164   56   high    
2 Steven    188   87   high    
3 Ines      164   54   low     
4 Robert    180   80   moderate
5 Tim       170   58.5 low     
 
tibble subset
- Take the tibble 
exercise_group from the previous exercise and create a new tibble exercise_group_sub without the height and weight variables by selection [. 
exercise_group_sub <- exercise_group[c("name","exercise")]
exercise_group_sub
# A tibble: 5 × 2
  name   exercise
  <chr>  <chr>   
1 Sonja  high    
2 Steven high    
3 Ines   low     
4 Robert moderate
5 Tim    low     
 
Read tibbles from file
- Read 
pulse.csv data set into R and inspect its dimensions. 
pulse <- read_csv(file = "pulse.csv")
# two alternatives i) nrow and ncol function, ii) dim function.
nrow(pulse) # number of rows
[1] 110
ncol(pulse) # number of columns
[1] 13
dim(pulse)  # dimensions (rows, columns)
[1] 110  13
- Read 
survey.csv data set into R. 
survey <- read_csv(file = "survey.csv")
dim(survey)
[1] 233  13
- Show the first 9 and the last 7 rows.
 
head(survey,9)
# A tibble: 9 × 13
  name    gender span1 span2 hand  fold    pulse clap    exercise smokes height m.i        age
  <chr>   <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>    <dbl>
1 Alyson  female  18.5  18   right right      92 left    some     never    173  metric    18.2
2 Todd    male    19.5  20.5 left  right     104 left    none     regul    178. imperial  17.6
3 Gerald  male    18    13.3 right left       87 neither none     occas     NA  <NA>      16.9
4 Robert  male    18.8  18.9 right right      NA neither none     never    160  metric    20.3
5 Dustin  male    20    20   right neither    35 right   some     never    165  metric    23.7
6 Abby    female  18    17.7 right left       64 right   some     never    173. imperial  21  
7 Andre   male    17.7  17.7 right left       83 right   freq     never    183. imperial  18.8
8 Michael female  17    17.3 right right      74 right   freq     never    157  metric    35.8
9 Edward  male    20    19.5 right right      72 right   some     never    175  metric    19  
tail(survey,7)
# A tibble: 7 × 13
  name     gender span1 span2 hand  fold  pulse clap  exercise smokes height m.i        age
  <chr>    <chr>  <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>    <chr>   <dbl> <chr>    <dbl>
1 Marcella female  18.8  18.5 right right    80 right some     never    169  metric    18.2
2 Jerry    male    18    16   right right    NA right some     never    180. imperial  20.8
3 Jeanne   female  18    18   right left     85 right some     never    165. imperial  17.7
4 Rosanna  female  18.5  18   right left     88 right some     never    160  metric    16.9
5 Tracey   female  17.5  16.5 right right    NA right some     never    170  metric    18.6
6 Keith    male    21    21.5 right right    90 right some     never    183  metric    17.2
7 Celina   female  17.6  17.3 right right    85 right freq     never    168. metric    17.8
mean(survey$age) 
[1] 20.35591
- Calculate the mean height in survey data.
 
# Here we use a second argument 'na.rm = TRUE' because there are missing values (NA) in 
# the variable height. By default the mean function returns NA if it first argument, in this 
# case variable 'height', contains any NA. The second argument 'na.rm = TRUE' changes this 
# behaviour by disregarding the observations with missing height and calculates the mean 
# of observations for which the height is available. 
# 
mean(survey$height, na.rm = TRUE) 
[1] 172.3459