Primary exercises

Apply the following to survey data:

  1. Select personal information {name, age, gender, height} into a new tibble survey_personal_info.
survey_personal_info <- select(survey, name, age, gender, height)
  1. Select personal information as previous exercise into a new tibble survey_personal_info but with variable names initials in uppercase, e.g. Name, Age etc.
survey_personal_info <- select(survey, Name=name, Age=age, Gender=gender, Height=height)
  1. Reorder the variables in survey dataset as such that name,age and gender appear as first, second and the third column followed by the remaining variables.
select(survey, name,age,gender,everything())
# A tibble: 233 × 13
   name      age gender span1 span2 hand  fold    pulse clap    exercise smokes height m.i     
   <chr>   <dbl> <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>   
 1 Alyson   18.2 female  18.5  18   right right      92 left    some     never    173  metric  
 2 Todd     17.6 male    19.5  20.5 left  right     104 left    none     regul    178. imperial
 3 Gerald   16.9 male    18    13.3 right left       87 neither none     occas     NA  <NA>    
 4 Robert   20.3 male    18.8  18.9 right right      NA neither none     never    160  metric  
 5 Dustin   23.7 male    20    20   right neither    35 right   some     never    165  metric  
 6 Abby     21   female  18    17.7 right left       64 right   some     never    173. imperial
 7 Andre    18.8 male    17.7  17.7 right left       83 right   freq     never    183. imperial
 8 Michael  35.8 female  17    17.3 right right      74 right   freq     never    157  metric  
 9 Edward   19   male    20    19.5 right right      72 right   some     never    175  metric  
10 Carl     22.3 male    18.5  18.5 right right      90 right   some     never    167  metric  
# … with 223 more rows
  1. Deselect variables that relate to hand and/or arm (e.g. span1, span2, hand, etc.). See also description survey data.
select(survey, -span1,-span2,-hand,-fold,-clap)
# A tibble: 233 × 8
   name    gender pulse exercise smokes height m.i        age
   <chr>   <chr>  <dbl> <chr>    <chr>   <dbl> <chr>    <dbl>
 1 Alyson  female    92 some     never    173  metric    18.2
 2 Todd    male     104 none     regul    178. imperial  17.6
 3 Gerald  male      87 none     occas     NA  <NA>      16.9
 4 Robert  male      NA none     never    160  metric    20.3
 5 Dustin  male      35 some     never    165  metric    23.7
 6 Abby    female    64 some     never    173. imperial  21  
 7 Andre   male      83 freq     never    183. imperial  18.8
 8 Michael female    74 freq     never    157  metric    35.8
 9 Edward  male      72 some     never    175  metric    19  
10 Carl    male      90 some     never    167  metric    22.3
# … with 223 more rows
  1. Select the top 20 names along with gender.
# 1)
survey_sub <- select(survey, name,gender)
head( survey_sub , 20)
# A tibble: 20 × 2
   name    gender
   <chr>   <chr> 
 1 Alyson  female
 2 Todd    male  
 3 Gerald  male  
 4 Robert  male  
 5 Dustin  male  
 6 Abby    female
 7 Andre   male  
 8 Michael female
 9 Edward  male  
10 Carl    male  
11 Noemi   female
12 Alfred  male  
13 Bernice female
14 Velma   female
15 Eddie   male  
16 Fern    female
17 Carolyn female
18 Virgil  male  
19 Ken     male  
20 Richard male  
# 2) shorter solution without intermediate variable 'survey_sub' :
head(select(survey,name),20)
# A tibble: 20 × 1
   name   
   <chr>  
 1 Alyson 
 2 Todd   
 3 Gerald 
 4 Robert 
 5 Dustin 
 6 Abby   
 7 Andre  
 8 Michael
 9 Edward 
10 Carl   
11 Noemi  
12 Alfred 
13 Bernice
14 Velma  
15 Eddie  
16 Fern   
17 Carolyn
18 Virgil 
19 Ken    
20 Richard
  1. Reproduce the following tibbles (note that variables are renamed and reshuffled):

    6.1 First 5 observations.

    # Remark: by enclosing select(...) as the first argument of 'head' function you 
    # can avoid creating intermediate variables.
    
    head( select(survey, SPAN1=span1, SPAN2=span2, everything()), 5)
    # A tibble: 5 × 13
      SPAN1 SPAN2 name   gender hand  fold    pulse clap    exercise smokes height m.i        age
      <dbl> <dbl> <chr>  <chr>  <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>    <dbl>
    1  18.5  18   Alyson female right right      92 left    some     never    173  metric    18.2
    2  19.5  20.5 Todd   male   left  right     104 left    none     regul    178. imperial  17.6
    3  18    13.3 Gerald male   right left       87 neither none     occas     NA  <NA>      16.9
    4  18.8  18.9 Robert male   right right      NA neither none     never    160  metric    20.3
    5  20    20   Dustin male   right neither    35 right   some     never    165  metric    23.7

    6.1 Last 3 observations.

    tail( select(survey, Hand=hand,Fold=fold,Clap=clap, everything()), 3)
    # A tibble: 3 × 13
      Hand  Fold  Clap  name   gender span1 span2 pulse exercise smokes height m.i      age
      <chr> <chr> <chr> <chr>  <chr>  <dbl> <dbl> <dbl> <chr>    <chr>   <dbl> <chr>  <dbl>
    1 right right right Tracey female  17.5  16.5    NA some     never    170  metric  18.6
    2 right right right Keith  male    21    21.5    90 some     never    183  metric  17.2
    3 right right right Celina female  17.6  17.3    85 freq     never    168. metric  17.8
  2. Select variables, from survey data, by pattern matching.

    7.1 Select variables that end with ‘e’.

    select(survey, ends_with('e'))
    # A tibble: 233 × 4
       name    pulse exercise   age
       <chr>   <dbl> <chr>    <dbl>
     1 Alyson     92 some      18.2
     2 Todd      104 none      17.6
     3 Gerald     87 none      16.9
     4 Robert     NA none      20.3
     5 Dustin     35 some      23.7
     6 Abby       64 some      21  
     7 Andre      83 freq      18.8
     8 Michael    74 freq      35.8
     9 Edward     72 some      19  
    10 Carl       90 some      22.3
    # … with 223 more rows

    7.2 Select variables that start with ‘s’.

    select(survey, starts_with('s'))
    # A tibble: 233 × 3
       span1 span2 smokes
       <dbl> <dbl> <chr> 
     1  18.5  18   never 
     2  19.5  20.5 regul 
     3  18    13.3 occas 
     4  18.8  18.9 never 
     5  20    20   never 
     6  18    17.7 never 
     7  17.7  17.7 never 
     8  17    17.3 never 
     9  20    19.5 never 
    10  18.5  18.5 never 
    # … with 223 more rows

    7.3 Select hand span variables using a helper function.

    # 1)
    select(survey, contains('span'))
    # A tibble: 233 × 2
       span1 span2
       <dbl> <dbl>
     1  18.5  18  
     2  19.5  20.5
     3  18    13.3
     4  18.8  18.9
     5  20    20  
     6  18    17.7
     7  17.7  17.7
     8  17    17.3
     9  20    19.5
    10  18.5  18.5
    # … with 223 more rows
    # 2) 
    # select(survey, starts_with('span'))

Extra exercises

  1. Rename the m.i variable to system.
# 1) Very tedious, you need to type all the variable names and 
# only rename the 'm.i' variable to 'system'
#
select(survey , name, gender, span1, span2, hand, fold, pulse, clap, 
       exercise, smokes, height, system=m.i, age)
# A tibble: 233 × 13
   name    gender span1 span2 hand  fold    pulse clap    exercise smokes height system     age
   <chr>   <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>    <dbl>
 1 Alyson  female  18.5  18   right right      92 left    some     never    173  metric    18.2
 2 Todd    male    19.5  20.5 left  right     104 left    none     regul    178. imperial  17.6
 3 Gerald  male    18    13.3 right left       87 neither none     occas     NA  <NA>      16.9
 4 Robert  male    18.8  18.9 right right      NA neither none     never    160  metric    20.3
 5 Dustin  male    20    20   right neither    35 right   some     never    165  metric    23.7
 6 Abby    female  18    17.7 right left       64 right   some     never    173. imperial  21  
 7 Andre   male    17.7  17.7 right left       83 right   freq     never    183. imperial  18.8
 8 Michael female  17    17.3 right right      74 right   freq     never    157  metric    35.8
 9 Edward  male    20    19.5 right right      72 right   some     never    175  metric    19  
10 Carl    male    18.5  18.5 right right      90 right   some     never    167  metric    22.3
# … with 223 more rows
# 2) Shorter but side-effect is that m.i (now system) comes at 
# the front.
select(survey, system=m.i, everything()) 
# A tibble: 233 × 13
   system   name    gender span1 span2 hand  fold    pulse clap    exercise smokes height   age
   <chr>    <chr>   <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <dbl>
 1 metric   Alyson  female  18.5  18   right right      92 left    some     never    173   18.2
 2 imperial Todd    male    19.5  20.5 left  right     104 left    none     regul    178.  17.6
 3 <NA>     Gerald  male    18    13.3 right left       87 neither none     occas     NA   16.9
 4 metric   Robert  male    18.8  18.9 right right      NA neither none     never    160   20.3
 5 metric   Dustin  male    20    20   right neither    35 right   some     never    165   23.7
 6 imperial Abby    female  18    17.7 right left       64 right   some     never    173.  21  
 7 imperial Andre   male    17.7  17.7 right left       83 right   freq     never    183.  18.8
 8 metric   Michael female  17    17.3 right right      74 right   freq     never    157   35.8
 9 metric   Edward  male    20    19.5 right right      72 right   some     never    175   19  
10 metric   Carl    male    18.5  18.5 right right      90 right   some     never    167   22.3
# … with 223 more rows
# 3) Use rename function (see ?dplyr::rename). 
rename(survey,system=m.i)
# A tibble: 233 × 13
   name    gender span1 span2 hand  fold    pulse clap    exercise smokes height system     age
   <chr>   <chr>  <dbl> <dbl> <chr> <chr>   <dbl> <chr>   <chr>    <chr>   <dbl> <chr>    <dbl>
 1 Alyson  female  18.5  18   right right      92 left    some     never    173  metric    18.2
 2 Todd    male    19.5  20.5 left  right     104 left    none     regul    178. imperial  17.6
 3 Gerald  male    18    13.3 right left       87 neither none     occas     NA  <NA>      16.9
 4 Robert  male    18.8  18.9 right right      NA neither none     never    160  metric    20.3
 5 Dustin  male    20    20   right neither    35 right   some     never    165  metric    23.7
 6 Abby    female  18    17.7 right left       64 right   some     never    173. imperial  21  
 7 Andre   male    17.7  17.7 right left       83 right   freq     never    183. imperial  18.8
 8 Michael female  17    17.3 right right      74 right   freq     never    157  metric    35.8
 9 Edward  male    20    19.5 right right      72 right   some     never    175  metric    19  
10 Carl    male    18.5  18.5 right right      90 right   some     never    167  metric    22.3
# … with 223 more rows
  1. Select name along with all categorical variables into a new tibble survey_cats.
# Categrical data: variables which take on categories as values, e.g. 
# 
# gender   : {male, female}
# hand     : {left, right}
# fold     : {left, right, neither} 
# clap     : {left, right, neither} 
# exercise : {freq, some, none}
# smokes   : {heavy, regul, occas, never}
# m.i      : {metric, imperial}
#
# 
survey_cats <- select(survey, name, gender, hand, fold, clap, exercise, smokes, m.i)
survey_cats
# A tibble: 233 × 8
   name    gender hand  fold    clap    exercise smokes m.i     
   <chr>   <chr>  <chr> <chr>   <chr>   <chr>    <chr>  <chr>   
 1 Alyson  female right right   left    some     never  metric  
 2 Todd    male   left  right   left    none     regul  imperial
 3 Gerald  male   right left    neither none     occas  <NA>    
 4 Robert  male   right right   neither none     never  metric  
 5 Dustin  male   right neither right   some     never  metric  
 6 Abby    female right left    right   some     never  imperial
 7 Andre   male   right left    right   freq     never  imperial
 8 Michael female right right   right   freq     never  metric  
 9 Edward  male   right right   right   some     never  metric  
10 Carl    male   right right   right   some     never  metric  
# … with 223 more rows
  1. Create a new tibble survey_nums with name and all numerical variables.
survey_nums <- select(survey, name, span1, span2, pulse, height, age)
survey_nums
# A tibble: 233 × 6
   name    span1 span2 pulse height   age
   <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl>
 1 Alyson   18.5  18      92   173   18.2
 2 Todd     19.5  20.5   104   178.  17.6
 3 Gerald   18    13.3    87    NA   16.9
 4 Robert   18.8  18.9    NA   160   20.3
 5 Dustin   20    20      35   165   23.7
 6 Abby     18    17.7    64   173.  21  
 7 Andre    17.7  17.7    83   183.  18.8
 8 Michael  17    17.3    74   157   35.8
 9 Edward   20    19.5    72   175   19  
10 Carl     18.5  18.5    90   167   22.3
# … with 223 more rows
  1. For this exercise you’ll need an additional helper function where explained
    here.

    4.1 Reproduce the result from the previous exercise (3) without dictating all numerical variable names. Hint: you’ll also need is.numeric function (see ?is.numeric for help).

    bind_cols(survey['name'], select(survey, where(is.numeric)))
    # A tibble: 233 × 6
       name    span1 span2 pulse height   age
       <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl>
     1 Alyson   18.5  18      92   173   18.2
     2 Todd     19.5  20.5   104   178.  17.6
     3 Gerald   18    13.3    87    NA   16.9
     4 Robert   18.8  18.9    NA   160   20.3
     5 Dustin   20    20      35   165   23.7
     6 Abby     18    17.7    64   173.  21  
     7 Andre    17.7  17.7    83   183.  18.8
     8 Michael  17    17.3    74   157   35.8
     9 Edward   20    19.5    72   175   19  
    10 Carl     18.5  18.5    90   167   22.3
    # … with 223 more rows

    4.2 Select all non-numerical variables.

    # 1) 
    select(survey,! where(is.numeric))
    # A tibble: 233 × 8
       name    gender hand  fold    clap    exercise smokes m.i     
       <chr>   <chr>  <chr> <chr>   <chr>   <chr>    <chr>  <chr>   
     1 Alyson  female right right   left    some     never  metric  
     2 Todd    male   left  right   left    none     regul  imperial
     3 Gerald  male   right left    neither none     occas  <NA>    
     4 Robert  male   right right   neither none     never  metric  
     5 Dustin  male   right neither right   some     never  metric  
     6 Abby    female right left    right   some     never  imperial
     7 Andre   male   right left    right   freq     never  imperial
     8 Michael female right right   right   freq     never  metric  
     9 Edward  male   right right   right   some     never  metric  
    10 Carl    male   right right   right   some     never  metric  
    # … with 223 more rows
    # 2) Since there are no other non-numerical types the following is also a correct solution.
    select(survey, where(is.character))
    # A tibble: 233 × 8
       name    gender hand  fold    clap    exercise smokes m.i     
       <chr>   <chr>  <chr> <chr>   <chr>   <chr>    <chr>  <chr>   
     1 Alyson  female right right   left    some     never  metric  
     2 Todd    male   left  right   left    none     regul  imperial
     3 Gerald  male   right left    neither none     occas  <NA>    
     4 Robert  male   right right   neither none     never  metric  
     5 Dustin  male   right neither right   some     never  metric  
     6 Abby    female right left    right   some     never  imperial
     7 Andre   male   right left    right   freq     never  imperial
     8 Michael female right right   right   freq     never  metric  
     9 Edward  male   right right   right   some     never  metric  
    10 Carl    male   right right   right   some     never  metric  
    # … with 223 more rows


Copyright © 2022 Biomedical Data Sciences (BDS) | LUMC