class: title-slide, left, top background-image: url(img/sam-balye-k5RD4dl8Y1o-unsplash_blue.jpg) background-position: 75% 75% background-size: cover # C-Path R Training ### Advanced Data Wrangling Part I **Kelsey Gonzalez**<br> May 26, 2021 — Day 1 --- name: about-me layout: false class: about-me-slide, inverse, middle, center # About me <img src="https://kelseygonzalez.github.io/author/kelsey-e.-gonzalez/avatar.png" class="rounded"/> ## Kelsey Gonzalez .fade[University of Arizona<br>IBM] [<svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M326.612 185.391c59.747 59.809 58.927 155.698.36 214.59-.11.12-.24.25-.36.37l-67.2 67.2c-59.27 59.27-155.699 59.262-214.96 0-59.27-59.26-59.27-155.7 0-214.96l37.106-37.106c9.84-9.84 26.786-3.3 27.294 10.606.648 17.722 3.826 35.527 9.69 52.721 1.986 5.822.567 12.262-3.783 16.612l-13.087 13.087c-28.026 28.026-28.905 73.66-1.155 101.96 28.024 28.579 74.086 28.749 102.325.51l67.2-67.19c28.191-28.191 28.073-73.757 0-101.83-3.701-3.694-7.429-6.564-10.341-8.569a16.037 16.037 0 0 1-6.947-12.606c-.396-10.567 3.348-21.456 11.698-29.806l21.054-21.055c5.521-5.521 14.182-6.199 20.584-1.731a152.482 152.482 0 0 1 20.522 17.197zM467.547 44.449c-59.261-59.262-155.69-59.27-214.96 0l-67.2 67.2c-.12.12-.25.25-.36.37-58.566 58.892-59.387 154.781.36 214.59a152.454 152.454 0 0 0 20.521 17.196c6.402 4.468 15.064 3.789 20.584-1.731l21.054-21.055c8.35-8.35 12.094-19.239 11.698-29.806a16.037 16.037 0 0 0-6.947-12.606c-2.912-2.005-6.64-4.875-10.341-8.569-28.073-28.073-28.191-73.639 0-101.83l67.2-67.19c28.239-28.239 74.3-28.069 102.325.51 27.75 28.3 26.872 73.934-1.155 101.96l-13.087 13.087c-4.35 4.35-5.769 10.79-3.783 16.612 5.864 17.194 9.042 34.999 9.69 52.721.509 13.906 17.454 20.446 27.294 10.606l37.106-37.106c59.271-59.259 59.271-155.699.001-214.959z"></path></svg> kelseygonzalez.github.io](https://kelseygonzalez.github.io/) [<svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg> @KelseyEGonzalez](https://twitter.com/kelseyegonzalez) [<svg viewBox="0 0 496 512" style="position:relative;display:inline-block;top:.1em;height:1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg> @KelseyGonzalez](https://github.com/KelseyGonzalez) --- layout: true <!-- <a class="footer-link" href="http://bit.ly/cpath-wrangling">http://bit.ly/cpath-wrangling — Kelsey Gonzalez</a> --> <!-- this adds the link footer to all slides, depends on footer-link class in css--> --- class: left # About you -- .pull-left-narrow[ .center[<svg viewBox="0 0 581 512" style="position:relative;display:inline-block;top:.1em;height:2em;" xmlns="http://www.w3.org/2000/svg"> <path d="M581 226.6C581 119.1 450.9 32 290.5 32S0 119.1 0 226.6C0 322.4 103.3 402 239.4 418.1V480h99.1v-61.5c24.3-2.7 47.6-7.4 69.4-13.9L448 480h112l-67.4-113.7c54.5-35.4 88.4-84.9 88.4-139.7zm-466.8 14.5c0-73.5 98.9-133 220.8-133s211.9 40.7 211.9 133c0 50.1-26.5 85-70.3 106.4-2.4-1.6-4.7-2.9-6.4-3.7-10.2-5.2-27.8-10.5-27.8-10.5s86.6-6.4 86.6-92.7-90.6-87.9-90.6-87.9h-199V361c-74.1-21.5-125.2-67.1-125.2-119.9zm225.1 38.3v-55.6c57.8 0 87.8-6.8 87.8 27.3 0 36.5-38.2 28.3-87.8 28.3zm-.9 72.5H365c10.8 0 18.9 11.7 24 19.2-16.1 1.9-33 2.8-50.6 2.9v-22.1z"></path></svg>]] .pull-right-wide[### you know R] -- .pull-left-narrow[.center[ <img src="https://raw.githubusercontent.com/rstudio/hex-stickers/master/PNG/tidyverse.png" width="25%"/>]] .pull-right-wide[### you know some basic tidyverse] -- .pull-left-narrow[ .center[<svg viewBox="0 0 512 512" style="position:relative;display:inline-block;top:.1em;height:2em;" xmlns="http://www.w3.org/2000/svg"> <path d="M223.75 130.75L154.62 15.54A31.997 31.997 0 0 0 127.18 0H16.03C3.08 0-4.5 14.57 2.92 25.18l111.27 158.96c29.72-27.77 67.52-46.83 109.56-53.39zM495.97 0H384.82c-11.24 0-21.66 5.9-27.44 15.54l-69.13 115.21c42.04 6.56 79.84 25.62 109.56 53.38L509.08 25.18C516.5 14.57 508.92 0 495.97 0zM256 160c-97.2 0-176 78.8-176 176s78.8 176 176 176 176-78.8 176-176-78.8-176-176-176zm92.52 157.26l-37.93 36.96 8.97 52.22c1.6 9.36-8.26 16.51-16.65 12.09L256 393.88l-46.9 24.65c-8.4 4.45-18.25-2.74-16.65-12.09l8.97-52.22-37.93-36.96c-6.82-6.64-3.05-18.23 6.35-19.59l52.43-7.64 23.43-47.52c2.11-4.28 6.19-6.39 10.28-6.39 4.11 0 8.22 2.14 10.33 6.39l23.43 47.52 52.43 7.64c9.4 1.36 13.17 12.95 6.35 19.59z"></path></svg>]] .pull-right-wide[### .my-gold[**you want to master data wrangling**]] --- # Learning Objectives - Describe data frames in R and tidyverse tibbles - Use basic functions of dplyr to manipulate single data frames/tibbles by rows, by columns (variables), and by groups + Choose columns (variables) by names `select()` + Choose rows by column (variable) values `filter()` + Arrange (sort) rows by column (variable) values: `arrange()` + Rename columns (variables): `rename()` + Add/modify new/existing columns (variables): `mutate()` + Group rows by columns (variables): `group_by()` + Calculate summary statistics of Columns with or without grouping. `summarize()`, `group_by()` - Describe tidy data - Make your data tidy with `pivot_longer()`, `pivot_wider()`, `separate()`, and `unite()`. --- # Why become a data wrangling master? Real world data is always messy to start + Variables may be missing + Values may be missing + There can be duplicate rows + Variables may have different names in different tables for the same attribute + Numbers can be stored as characters + The data may be structured for ease of human input rather than analysis + Unsorted or sorted in a different order than we want --- # Our Data ---- We will use the US National Health and Nutrition Examination Study for this workshop ```r NHANES::NHANESraw %>% head() ## # A tibble: 6 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th Gr~ Married ## # ... with 69 more variables: HHIncome <fct>, HHIncomeMid <int>, Poverty <dbl>, ## # HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>, Length <dbl>, ## # HeadCirc <dbl>, Height <dbl>, BMI <dbl>, BMICatUnder20yrs <fct>, ## # BMI_WHO <fct>, Pulse <int>, BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, ## # BPDia1 <int>, BPSys2 <int>, BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, ## # Testosterone <dbl>, DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, ## # UrineFlow1 <dbl>, UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, ## # DiabetesAge <int>, HealthGen <fct>, DaysPhysHlthBad <int>, ## # DaysMentHlthBad <int>, LittleInterest <fct>, Depressed <fct>, ## # nPregnancies <int>, nBabies <int>, Age1stBaby <int>, SleepHrsNight <int>, ## # SleepTrouble <fct>, PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, ## # CompHrsDay <fct>, TVHrsDayChild <int>, CompHrsDayChild <int>, ## # Alcohol12PlusYr <fct>, AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, ## # Smoke100 <fct>, SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, ## # RegularMarij <fct>, AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, ## # SexAge <int>, SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` --- # Our Data ---- This dataset has 78 columns, but here are some that we will be using: - `Weight` - Weight in kg - `Pulse` - 60 second pulse rate - `BPDiaAve` - Combined diastolic blood pressure reading, following the procedure outlined for BPXDAR - `DirectChol` - Direct HDL cholesterol in mmol/L. Reported for participants aged 6 years or older. - `Diabetes` - Study participant told by a doctor or health professional that they have diabetes. Reported for participants aged 1 year or older as Yes or No. - `HealthGen` - Self-reported rating of participant's health in general Reported for participants aged 12 years or older. One of _Excellent_, _Vgood_, _Good_, _Fair_, or _Poor_. - `SleepHrsNight` - Self-reported number of hours study participant usually gets at night on weekdays or workdays. Reported for participants aged 16 years and older. --- # Our Data, continued ---- - `PhysActiveDays` - Number of days in a typical week that participant does moderate or vigorous-intensity activity. Reported for participants 12 years or older. - `AlcoholDay` - Average number of drinks consumed on days that participant drank alcoholic beverages. Reported for participants aged 18 years or older. - `Smoke100` - Study participant has smoked at least 100 cigarettes in their entire life. Reported for participants aged 20 years or older as _Yes_ or _No_. - `Marijuana` Participant has tried marijuana. Reported for participants aged 18 to 59 years as _Yes_ or _No_. To see more descriptions, type `?NHANES` in the console --- # Glimpse <img src="img/dplyr.png" class="title-hex"> `Glimpse` gives us a quick outline of what variables are in our data. ``` ## Rows: 20,293 ## Columns: 78 ## $ ID <int> 51624, 51625, 51626, 51627, 51628, 51629, 51630, 5163~ ## $ SurveyYr <fct> 2009_10, 2009_10, 2009_10, 2009_10, 2009_10, 2009_10,~ ## $ Gender <fct> male, male, male, male, female, male, female, female,~ ## $ Age <int> 34, 4, 16, 10, 60, 26, 49, 1, 10, 80, 10, 80, 4, 35, ~ ## $ AgeMonths <int> 409, 49, 202, 131, 722, 313, 596, 12, 124, NA, 121, N~ ## $ Race1 <fct> White, Other, Black, Black, Black, Mexican, White, Wh~ ## $ Race3 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ Education <fct> High School, NA, NA, NA, High School, 9 - 11th Grade,~ ## $ MaritalStatus <fct> Married, NA, NA, NA, Widowed, Married, LivePartner, N~ ## $ HHIncome <fct> 25000-34999, 20000-24999, 45000-54999, 20000-24999, 1~ ## $ HHIncomeMid <int> 30000, 22500, 50000, 22500, 12500, 30000, 40000, 4000~ ## $ Poverty <dbl> 1.36, 1.07, 2.27, 0.81, 0.69, 1.01, 1.91, 1.36, 2.68,~ ## $ HomeRooms <int> 6, 9, 5, 6, 6, 4, 5, 5, 7, 4, 5, 5, 7, NA, 6, 6, 5, 6~ ## $ HomeOwn <fct> Own, Own, Own, Rent, Rent, Rent, Rent, Rent, Own, Own~ ## $ Work <fct> NotWorking, NA, NotWorking, NA, NotWorking, Working, ~ ## $ Weight <dbl> 87.4, 17.0, 72.3, 39.8, 116.8, 97.6, 86.7, 9.4, 26.0,~ ## $ Length <dbl> NA, NA, NA, NA, NA, NA, NA, 75.7, NA, NA, NA, NA, NA,~ ## $ HeadCirc <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ Height <dbl> 164.7, 105.4, 181.3, 147.8, 166.0, 173.0, 168.4, NA, ~ ## $ BMI <dbl> 32.22, 15.30, 22.00, 18.22, 42.39, 32.61, 30.57, NA, ~ ## $ BMICatUnder20yrs <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ BMI_WHO <fct> 30.0_plus, 12.0_18.5, 18.5_to_24.9, 12.0_18.5, 30.0_p~ ## $ Pulse <int> 70, NA, 68, 68, 72, 72, 86, NA, 70, 88, 84, 54, NA, N~ ## $ BPSysAve <int> 113, NA, 109, 93, 150, 104, 112, NA, 108, 139, 94, 12~ ## $ BPDiaAve <int> 85, NA, 59, 41, 68, 49, 75, NA, 53, 43, 45, 60, NA, N~ ## $ BPSys1 <int> 114, NA, 112, 92, 154, 102, 118, NA, 106, 142, 94, 12~ ## $ BPDia1 <int> 88, NA, 62, 36, 70, 50, 82, NA, 60, 62, 38, 62, NA, N~ ## $ BPSys2 <int> 114, NA, 114, 94, 150, 104, 108, NA, 106, 140, 92, 12~ ## $ BPDia2 <int> 88, NA, 60, 44, 68, 48, 74, NA, 50, 46, 40, 62, NA, N~ ## $ BPSys3 <int> 112, NA, 104, 92, 150, 104, 116, NA, 110, 138, 96, 11~ ## $ BPDia3 <int> 82, NA, 58, 38, 68, 50, 76, NA, 56, 40, 50, 58, NA, N~ ## $ Testosterone <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ DirectChol <dbl> 1.29, NA, 1.55, 1.89, 1.16, 1.16, 1.16, NA, 1.58, 1.9~ ## $ TotChol <dbl> 3.49, NA, 4.97, 4.16, 5.22, 4.14, 6.70, NA, 4.14, 4.7~ ## $ UrineVol1 <int> 352, NA, 281, 139, 30, 202, 77, NA, 39, 128, 109, 38,~ ## $ UrineFlow1 <dbl> NA, NA, 0.415, 1.078, 0.476, 0.563, 0.094, NA, 0.300,~ ## $ UrineVol2 <int> NA, NA, NA, NA, 246, NA, NA, NA, NA, NA, NA, NA, NA, ~ ## $ UrineFlow2 <dbl> NA, NA, NA, NA, 2.51, NA, NA, NA, NA, NA, NA, NA, NA,~ ## $ Diabetes <fct> No, No, No, No, Yes, No, No, No, No, No, No, Yes, No,~ ## $ DiabetesAge <int> NA, NA, NA, NA, 56, NA, NA, NA, NA, NA, NA, 70, NA, N~ ## $ HealthGen <fct> Good, NA, Vgood, NA, Fair, Good, Good, NA, NA, Excell~ ## $ DaysPhysHlthBad <int> 0, NA, 2, NA, 20, 2, 0, NA, NA, 0, NA, 0, NA, NA, NA,~ ## $ DaysMentHlthBad <int> 15, NA, 0, NA, 25, 14, 10, NA, NA, 0, NA, 0, NA, NA, ~ ## $ LittleInterest <fct> Most, NA, NA, NA, Most, None, Several, NA, NA, None, ~ ## $ Depressed <fct> Several, NA, NA, NA, Most, Most, Several, NA, NA, Non~ ## $ nPregnancies <int> NA, NA, NA, NA, 1, NA, 2, NA, NA, NA, NA, NA, NA, NA,~ ## $ nBabies <int> NA, NA, NA, NA, 1, NA, 2, NA, NA, NA, NA, NA, NA, NA,~ ## $ Age1stBaby <int> NA, NA, NA, NA, NA, NA, 27, NA, NA, NA, NA, NA, NA, N~ ## $ SleepHrsNight <int> 4, NA, 8, NA, 4, 4, 8, NA, NA, 6, NA, 9, NA, 7, NA, N~ ## $ SleepTrouble <fct> Yes, NA, No, NA, No, No, Yes, NA, NA, No, NA, No, NA,~ ## $ PhysActive <fct> No, NA, Yes, NA, No, Yes, No, NA, NA, Yes, NA, No, NA~ ## $ PhysActiveDays <int> NA, NA, 5, NA, NA, 2, NA, NA, NA, 4, NA, NA, NA, NA, ~ ## $ TVHrsDay <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ CompHrsDay <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~ ## $ TVHrsDayChild <int> NA, 4, NA, 1, NA, NA, NA, NA, 1, NA, 3, NA, 2, NA, 5,~ ## $ CompHrsDayChild <int> NA, 1, NA, 1, NA, NA, NA, NA, 0, NA, 0, NA, 1, NA, 0,~ ## $ Alcohol12PlusYr <fct> Yes, NA, NA, NA, No, Yes, Yes, NA, NA, Yes, NA, No, N~ ## $ AlcoholDay <int> NA, NA, NA, NA, NA, 19, 2, NA, NA, 1, NA, NA, NA, NA,~ ## $ AlcoholYear <int> 0, NA, NA, NA, 0, 48, 20, NA, NA, 52, NA, 0, NA, NA, ~ ## $ SmokeNow <fct> No, NA, NA, NA, Yes, No, Yes, NA, NA, No, NA, No, NA,~ ## $ Smoke100 <fct> Yes, NA, NA, NA, Yes, Yes, Yes, NA, NA, Yes, NA, Yes,~ ## $ SmokeAge <int> 18, NA, NA, NA, 16, 15, 38, NA, NA, 16, NA, 21, NA, N~ ## $ Marijuana <fct> Yes, NA, NA, NA, NA, Yes, Yes, NA, NA, NA, NA, NA, NA~ ## $ AgeFirstMarij <int> 17, NA, NA, NA, NA, 10, 18, NA, NA, NA, NA, NA, NA, N~ ## $ RegularMarij <fct> No, NA, NA, NA, NA, Yes, No, NA, NA, NA, NA, NA, NA, ~ ## $ AgeRegMarij <int> NA, NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA, NA, N~ ## $ HardDrugs <fct> Yes, NA, NA, NA, No, Yes, Yes, NA, NA, NA, NA, NA, NA~ ## $ SexEver <fct> Yes, NA, NA, NA, Yes, Yes, Yes, NA, NA, NA, NA, NA, N~ ## $ SexAge <int> 16, NA, NA, NA, 15, 9, 12, NA, NA, NA, NA, NA, NA, NA~ ## $ SexNumPartnLife <int> 8, NA, NA, NA, 4, 10, 10, NA, NA, NA, NA, NA, NA, NA,~ ## $ SexNumPartYear <int> 1, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, ~ ## $ SameSex <fct> No, NA, NA, NA, No, No, Yes, NA, NA, NA, NA, NA, NA, ~ ## $ SexOrientation <fct> Heterosexual, NA, NA, NA, NA, Heterosexual, Heterosex~ ## $ WTINT2YR <dbl> 80100.544, 53901.104, 13953.078, 11664.899, 20090.339~ ## $ WTMEC2YR <dbl> 81528.772, 56995.035, 14509.279, 12041.635, 21000.339~ ## $ SDMVPSU <int> 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1,~ ## $ SDMVSTRA <int> 83, 79, 84, 86, 75, 88, 85, 86, 88, 77, 86, 79, 84, 7~ ## $ PregnantNow <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, U~ ``` --- name: live-coding background-color: var(--my-yellow) class: middle, center <svg viewBox="0 0 640 512" style="position:relative;display:inline-block;top:.1em;height:3em;color:#122140;" xmlns="http://www.w3.org/2000/svg"> <path d="M278.9 511.5l-61-17.7c-6.4-1.8-10-8.5-8.2-14.9L346.2 8.7c1.8-6.4 8.5-10 14.9-8.2l61 17.7c6.4 1.8 10 8.5 8.2 14.9L293.8 503.3c-1.9 6.4-8.5 10.1-14.9 8.2zm-114-112.2l43.5-46.4c4.6-4.9 4.3-12.7-.8-17.2L117 256l90.6-79.7c5.1-4.5 5.5-12.3.8-17.2l-43.5-46.4c-4.5-4.8-12.1-5.1-17-.5L3.8 247.2c-5.1 4.7-5.1 12.8 0 17.5l144.1 135.1c4.9 4.6 12.5 4.4 17-.5zm327.2.6l144.1-135.1c5.1-4.7 5.1-12.8 0-17.5L492.1 112.1c-4.8-4.5-12.4-4.3-17 .5L431.6 159c-4.6 4.9-4.3 12.7.8 17.2L523 256l-90.6 79.7c-5.1 4.5-5.5 12.3-.8 17.2l43.5 46.4c4.5 4.9 12.1 5.1 17 .6z"></path></svg><br> # Let's try it live together --- name: question class: inverse, middle, center {{content}} --- template: question <img src="img/tidyverse.png" width="25%"/> # How do we wrangle our data? --- # dplyr <img src="img/dplyr.png" class="title-hex"> The tidyverse `dplyr` package provides tools to *speed up the manipulation of data* + Uses data frames to create consistent structure + Uses the forward pipe operator, `%>%`, to facilitate transparency/readability + Enables faster Exploratory Data Analysis (EDA) with `ggplot2` --- # Select <img src="img/dplyr.png" class="title-hex"> ---- Choose which **columns** to keep or remove .pull-left[ To select columns of a data frame, use `select()`. The `select()` function extracts (subsets) variables (columns) and place them into a new smaller (temporary) data frame. The first argument to this function is the data frame (`penguins`), and the subsequent arguments are the columns to keep. ] .pull-right[ ```r select(DATASET, columns to keep) select(DATASET, -c(columns to remove)) ``` ] --- # Select <img src="img/dplyr.png" class="title-hex"> ---- .pull-left[ ```r select(NHANES::NHANESraw, ID, Gender, Age, Race1) ``` ] <!-- ref.label="select-example-1", --> .pull-right[ <table> <thead> <tr> <th style="text-align:right;"> ID </th> <th style="text-align:left;"> Gender </th> <th style="text-align:right;"> Age </th> <th style="text-align:left;"> Race1 </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 51624 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 34 </td> <td style="text-align:left;"> White </td> </tr> <tr> <td style="text-align:right;"> 51625 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 4 </td> <td style="text-align:left;"> Other </td> </tr> <tr> <td style="text-align:right;"> 51626 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 16 </td> <td style="text-align:left;"> Black </td> </tr> <tr> <td style="text-align:right;"> 51627 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 10 </td> <td style="text-align:left;"> Black </td> </tr> <tr> <td style="text-align:right;"> 51628 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 60 </td> <td style="text-align:left;"> Black </td> </tr> <tr> <td style="text-align:right;"> 51629 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 26 </td> <td style="text-align:left;"> Mexican </td> </tr> </tbody> </table> ] --- # Select Helpers <img src="img/dplyr.png" class="title-hex"> ---- - There are several "helper functions" you can use as arguments inside `select()` + These helper functions are actually part of the tidyselect package that is always installed and loaded with dplyr + See help for "language" from tidyselect or help on "starts_with" from tidyselect - These helper functions reduce the need to specify each and every variable you want or don't want. + Some data frames may have 1000s of variables in them - Variables names in a data frame are always character strings so the helper functions compare the variable names to the character patterns you provide. --- # Select Helpers <img src="img/dplyr.png" class="title-hex"> ---- .panelset[ .panel[.panel-name[starts_with] .pull-left[ - `starts_with("abc")`: matches names that begin with `"abc"`. ```r select(NHANES::NHANESraw, starts_with("BMI")) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:right;"> BMI </th> <th style="text-align:left;"> BMICatUnder20yrs </th> <th style="text-align:left;"> BMI_WHO </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 32.22 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> </tr> <tr> <td style="text-align:right;"> 15.30 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 12.0_18.5 </td> </tr> <tr> <td style="text-align:right;"> 22.00 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 18.5_to_24.9 </td> </tr> <tr> <td style="text-align:right;"> 18.22 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 12.0_18.5 </td> </tr> <tr> <td style="text-align:right;"> 42.39 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> </tr> <tr> <td style="text-align:right;"> 32.61 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel --> .panel[.panel-name[ends_with] .pull-left[ - `ends_with("xyz")`: matches names that end with `"xyz"`. ```r select(NHANES::NHANESraw, ends_with("Ave")) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:right;"> BPSysAve </th> <th style="text-align:right;"> BPDiaAve </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 113 </td> <td style="text-align:right;"> 85 </td> </tr> <tr> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:right;"> 109 </td> <td style="text-align:right;"> 59 </td> </tr> <tr> <td style="text-align:right;"> 93 </td> <td style="text-align:right;"> 41 </td> </tr> <tr> <td style="text-align:right;"> 150 </td> <td style="text-align:right;"> 68 </td> </tr> <tr> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 49 </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel --> .panel[.panel-name[contains] .pull-left[ - `contains("ijk")`: matches names that contain `"ijk"`. ```r select(NHANES::NHANESraw, contains("Active"), ID) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> PhysActive </th> <th style="text-align:right;"> PhysActiveDays </th> <th style="text-align:right;"> ID </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 51624 </td> </tr> <tr> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 51625 </td> </tr> <tr> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 51626 </td> </tr> <tr> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 51627 </td> </tr> <tr> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 51628 </td> </tr> <tr> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 51629 </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel --> .panel[.panel-name[more] - `matches("(.)\\1")`: selects variables that match a *regular expression* (REGEX). + This one matches any variables with two repeated characters, e.g., "YY". + You'll learn more about regular expressions in the class on strings and stringr. - `num_range("x", 1:3)`: matches `x1`, `x2`, and `x3` for whatever numerical sequence you provide + This can be useful for data sets with variables such as month1, month2, month3 ..., or FY18, FY19, FY20, .... ] <!-- end panel --> ] <!-- end panelset --> --- template: live-coding --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Select some variables you would potentially like to work with ---- Think about which variables you may want to work with, <br>using `?NHANES` to help guide you. Here are some options to consider - Can you select which variables you'd like? - Can you deselect the variables you don't like? - Can you use a tidy selection helpers? - Can you combine two of the above techniques together? ]
03
:
00
--- # Filter <img src="img/dplyr.png" class="title-hex"> ---- Choose **Rows** Based on Values of Certain Variables .pull-left[ - The dplyr `filter()` function allows us to choose (subset/extract) only certain rows (observations) based on the values of the variables in those rows. - We create conditions and `filter()` selects the rows satisfying these conditions (return `TRUE`). - We can use logical comparisons, and use AND (`&`) or OR (`|`) as well ] .pull-right[ ```r filter(DATASET, column == "condition") ``` ] --- # Filter <img src="img/dplyr.png" class="title-hex"> ---- The filter function works with most boolean operators, namely: <table class="table table-striped table-hover table-condensed" style="margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;"> Operator </th> <th style="text-align:left;"> Description </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> < </td> <td style="text-align:left;"> less than </td> </tr> <tr> <td style="text-align:left;"> <= </td> <td style="text-align:left;"> less than or equal to </td> </tr> <tr> <td style="text-align:left;"> > </td> <td style="text-align:left;"> greater than </td> </tr> <tr> <td style="text-align:left;"> >= </td> <td style="text-align:left;"> greater than or equal to </td> </tr> <tr> <td style="text-align:left;"> == </td> <td style="text-align:left;"> exactly equal to </td> </tr> <tr> <td style="text-align:left;"> != </td> <td style="text-align:left;"> not equal to </td> </tr> <tr> <td style="text-align:left;"> !x </td> <td style="text-align:left;"> Not x </td> </tr> <tr> <td style="text-align:left;"> x | y </td> <td style="text-align:left;"> x OR y </td> </tr> <tr> <td style="text-align:left;"> x & y </td> <td style="text-align:left;"> x AND y </td> </tr> </tbody> </table> --- # Filter <img src="img/dplyr.png" class="title-hex"> ---- <img src="img/logic-venn.png" width="553" /> --- template: live-coding --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's filter our dataset based on some conditions ---- Here are some options to attempt: Can you only show the rows... - Where `Weight` is greater than 65? - Where the individual does not have `Diabetes`? - Where `SmokeNow` is missing? not missing? - Can you combine the above filters together? ]
03
:
00
--- # The Pipe <img src="img/pipe.png" class="title-hex"> ---- What if you want to select and filter at the same time? There are three ways to do this: use intermediate steps, nested functions, or pipes. With intermediate steps, you create a temporary data frame and use that as input to the next function, like this: ```r nhanes_steps <- filter(NHANES::NHANESraw, Weight > 65) nhanes_steps <- select(nhanes_steps, Weight, Diabetes, SmokeNow) ``` This is readable, but can clutter up your workspace with lots of objects that you have to name individually. With multiple steps, that can be hard to keep track of. --- # The Pipe <img src="img/pipe.png" class="title-hex"> ---- You can also nest functions (i.e. one function inside of another), like this: ```r nhanes_nested <- select(filter(NHANES::NHANESraw,Weight > 65), Weight, Diabetes, SmokeNow) ``` This is handy, but can be difficult to read if too many functions are nested, as R evaluates the expression from the inside out (in this case, filtering, then selecting). --- # The Pipe <img src="img/pipe.png" class="title-hex"> ---- The last option, *pipes*, are a powerful addition to R. Pipes let you take the output of one function and send it directly to the next, which is useful when you need to do many things to the same dataset. Pipes in R look like `%>%` and are made available via the **`magrittr`** package, installed automatically with **`dplyr`**. If you use RStudio, you can type the pipe with <kbd>Ctrl</kbd> + <kbd>Shift</kbd> + <kbd>M</kbd> if you have a PC or <kbd>Cmd</kbd> + <kbd>Shift</kbd> + <kbd>M</kbd> if you have a Mac. ??? In the above code, we use the pipe to send the `penguins` dataset first through `filter()` to keep rows where `island` is "Biscoe", then through `select()` to keep only the `species`, `body_mass_g`,and `sex` columns. Since `%>%` takes the object on its left and passes it as the first argument to the function on its right, we don't need to explicitly include the data frame as an argument to the `filter()` and `select()` functions any more. Some may find it helpful to read the pipe like the word "then". For instance, in the above example, we take the data frame `penguins`, *then* we `filter` for rows with `island == "Biscoe"`, *then* we `select` columns `species`, `body_mass_g`,and `sex`. The **`dplyr`** functions by themselves are somewhat simple, but by combining them into linear workflows with the pipe, we can accomplish more complex manipulations of data frames. Let's see what this looks like.. --- count: false .panel1-pipe-example-user[ ```r *NHANES::NHANESraw ``` ] .panel2-pipe-example-user[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-pipe-example-user[ ```r NHANES::NHANESraw %>% * filter(Weight > 65) ``` ] .panel2-pipe-example-user[ ``` ## # A tibble: 9,916 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 3 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 4 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 5 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 6 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## 7 51635 2009_10 male 80 NA White <NA> 9 - 11th G~ Widowed ## 8 51640 2009_10 male 17 208 Hispan~ <NA> <NA> <NA> ## 9 51643 2009_10 female 42 514 Black <NA> 9 - 11th G~ Married ## 10 51645 2009_10 male 66 795 Mexican <NA> 9 - 11th G~ Married ## # ... with 9,906 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-pipe-example-user[ ```r NHANES::NHANESraw %>% filter(Weight > 65) %>% * select(Weight, * Diabetes, * SmokeNow) ``` ] .panel2-pipe-example-user[ ``` ## # A tibble: 9,916 x 3 ## Weight Diabetes SmokeNow ## <dbl> <fct> <fct> ## 1 87.4 No No ## 2 72.3 No <NA> ## 3 117. Yes Yes ## 4 97.6 No No ## 5 86.7 No Yes ## 6 79.1 No No ## 7 89.6 Yes No ## 8 74.7 No <NA> ## 9 108. Yes <NA> ## 10 82.9 No <NA> ## # ... with 9,906 more rows ``` ] <style> .panel1-pipe-example-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-pipe-example-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-pipe-example-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's filter and select variables in our dataset ---- Using pipes, show the columns for `ID`, `Smoke100` and `SleepHrsNight` for respondents who have an average of 2 or more drinks year day when they consume alcohol (`AlcoholDay`) and who has consumed more than 100 cigarettes in their life (`Smoke100`) Save this to a new data frame. How many rows and columns does it have? ]
04
:
00
--- count: false .panel1-piped-2-auto[ ```r *NHANES::NHANESraw ``` ] .panel2-piped-2-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-piped-2-auto[ ```r NHANES::NHANESraw %>% * filter(AlcoholDay >= 2, * Smoke100 == "Yes") ``` ] .panel2-piped-2-auto[ ``` ## # A tibble: 2,424 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 2 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 3 51648 2009_10 male 28 338 Mexican <NA> 8th Grade NeverMarried ## 4 51668 2009_10 female 70 842 Black <NA> 9 - 11th G~ Divorced ## 5 51673 2009_10 female 30 360 White <NA> High School Married ## 6 51677 2009_10 male 33 404 White <NA> High School Married ## 7 51678 2009_10 male 60 721 White <NA> High School Married ## 8 51680 2009_10 female 60 727 Black <NA> Some Colle~ NeverMarried ## 9 51687 2009_10 male 78 937 White <NA> College Gr~ Married ## 10 51697 2009_10 male 27 328 Black <NA> 9 - 11th G~ Married ## # ... with 2,414 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-piped-2-auto[ ```r NHANES::NHANESraw %>% filter(AlcoholDay >= 2, Smoke100 == "Yes") %>% * select(ID, Smoke100, SleepHrsNight) ``` ] .panel2-piped-2-auto[ ``` ## # A tibble: 2,424 x 3 ## ID Smoke100 SleepHrsNight ## <int> <fct> <int> ## 1 51629 Yes 4 ## 2 51630 Yes 8 ## 3 51648 Yes 8 ## 4 51668 Yes 4 ## 5 51673 Yes 8 ## 6 51677 Yes 6 ## 7 51678 Yes 6 ## 8 51680 Yes 6 ## 9 51687 Yes 6 ## 10 51697 Yes 7 ## # ... with 2,414 more rows ``` ] <style> .panel1-piped-2-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-piped-2-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-piped-2-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # Arrange <img src="img/dplyr.png" class="title-hex"> ---- .panelset[ .panel[.panel-name[Arrange] Rearrange the Order of the Rows - Use `arrange()` to order rows by the value of one or more variables. ] <!-- end panel --> .panel[.panel-name[Ascending] .pull-left[ The **default is to arrange in ascending order** from top to bottom so the lowest number is on top. ```r NHANES::NHANESraw %>% arrange(SleepHrsNight) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:right;"> ID </th> <th style="text-align:left;"> SurveyYr </th> <th style="text-align:left;"> Gender </th> <th style="text-align:right;"> Age </th> <th style="text-align:right;"> AgeMonths </th> <th style="text-align:left;"> Race1 </th> <th style="text-align:left;"> Race3 </th> <th style="text-align:left;"> Education </th> <th style="text-align:left;"> MaritalStatus </th> <th style="text-align:left;"> HHIncome </th> <th style="text-align:right;"> HHIncomeMid </th> <th style="text-align:right;"> Poverty </th> <th style="text-align:right;"> HomeRooms </th> <th style="text-align:left;"> HomeOwn </th> <th style="text-align:left;"> Work </th> <th style="text-align:right;"> Weight </th> <th style="text-align:right;"> Length </th> <th style="text-align:right;"> HeadCirc </th> <th style="text-align:right;"> Height </th> <th style="text-align:right;"> BMI </th> <th style="text-align:left;"> BMICatUnder20yrs </th> <th style="text-align:left;"> BMI_WHO </th> <th style="text-align:right;"> Pulse </th> <th style="text-align:right;"> BPSysAve </th> <th style="text-align:right;"> BPDiaAve </th> <th style="text-align:right;"> BPSys1 </th> <th style="text-align:right;"> BPDia1 </th> <th style="text-align:right;"> BPSys2 </th> <th style="text-align:right;"> BPDia2 </th> <th style="text-align:right;"> BPSys3 </th> <th style="text-align:right;"> BPDia3 </th> <th style="text-align:right;"> Testosterone </th> <th style="text-align:right;"> DirectChol </th> <th style="text-align:right;"> TotChol </th> <th style="text-align:right;"> UrineVol1 </th> <th style="text-align:right;"> UrineFlow1 </th> <th style="text-align:right;"> UrineVol2 </th> <th style="text-align:right;"> UrineFlow2 </th> <th style="text-align:left;"> Diabetes </th> <th style="text-align:right;"> DiabetesAge </th> <th style="text-align:left;"> HealthGen </th> <th style="text-align:right;"> DaysPhysHlthBad </th> <th style="text-align:right;"> DaysMentHlthBad </th> <th style="text-align:left;"> LittleInterest </th> <th style="text-align:left;"> Depressed </th> <th style="text-align:right;"> nPregnancies </th> <th style="text-align:right;"> nBabies </th> <th style="text-align:right;"> Age1stBaby </th> <th style="text-align:right;"> SleepHrsNight </th> <th style="text-align:left;"> SleepTrouble </th> <th style="text-align:left;"> PhysActive </th> <th style="text-align:right;"> PhysActiveDays </th> <th style="text-align:left;"> TVHrsDay </th> <th style="text-align:left;"> CompHrsDay </th> <th style="text-align:right;"> TVHrsDayChild </th> <th style="text-align:right;"> CompHrsDayChild </th> <th style="text-align:left;"> Alcohol12PlusYr </th> <th style="text-align:right;"> AlcoholDay </th> <th style="text-align:right;"> AlcoholYear </th> <th style="text-align:left;"> SmokeNow </th> <th style="text-align:left;"> Smoke100 </th> <th style="text-align:right;"> SmokeAge </th> <th style="text-align:left;"> Marijuana </th> <th style="text-align:right;"> AgeFirstMarij </th> <th style="text-align:left;"> RegularMarij </th> <th style="text-align:right;"> AgeRegMarij </th> <th style="text-align:left;"> HardDrugs </th> <th style="text-align:left;"> SexEver </th> <th style="text-align:right;"> SexAge </th> <th style="text-align:right;"> SexNumPartnLife </th> <th style="text-align:right;"> SexNumPartYear </th> <th style="text-align:left;"> SameSex </th> <th style="text-align:left;"> SexOrientation </th> <th style="text-align:right;"> WTINT2YR </th> <th style="text-align:right;"> WTMEC2YR </th> <th style="text-align:right;"> SDMVPSU </th> <th style="text-align:right;"> SDMVSTRA </th> <th style="text-align:left;"> PregnantNow </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 52120 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 300 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 9 - 11th Grade </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Rent </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 102.4 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 159.1 </td> <td style="text-align:right;"> 40.45 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 96 </td> <td style="text-align:right;"> 114 </td> <td style="text-align:right;"> 56 </td> <td style="text-align:right;"> 108 </td> <td style="text-align:right;"> 56 </td> <td style="text-align:right;"> 114 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> 114 </td> <td style="text-align:right;"> 54 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.76 </td> <td style="text-align:right;"> 6.49 </td> <td style="text-align:right;"> 122 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Good </td> <td style="text-align:right;"> 15 </td> <td style="text-align:right;"> 30 </td> <td style="text-align:left;"> Several </td> <td style="text-align:left;"> Most </td> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 9 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 14 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 18 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 24 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 9 </td> <td style="text-align:right;"> 30 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Bisexual </td> <td style="text-align:right;"> 24146.60 </td> <td style="text-align:right;"> 24164.01 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 83 </td> <td style="text-align:left;"> Yes </td> </tr> <tr> <td style="text-align:right;"> 53243 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 43 </td> <td style="text-align:right;"> 523 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> Some College </td> <td style="text-align:left;"> Divorced </td> <td style="text-align:left;"> 10000-14999 </td> <td style="text-align:right;"> 12500 </td> <td style="text-align:right;"> 0.76 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> Looking </td> <td style="text-align:right;"> 98.4 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 163.4 </td> <td style="text-align:right;"> 36.85 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 64 </td> <td style="text-align:right;"> 129 </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> 132 </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> 136 </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> 122 </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0.83 </td> <td style="text-align:right;"> 6.44 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> 0.307 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Fair </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> 4 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 18 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 13 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 24 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 16 </td> <td style="text-align:right;"> 10 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Heterosexual </td> <td style="text-align:right;"> 20147.96 </td> <td style="text-align:right;"> 20120.87 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 81 </td> <td style="text-align:left;"> No </td> </tr> <tr> <td style="text-align:right;"> 53559 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 54 </td> <td style="text-align:right;"> 659 </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> High School </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> 25000-34999 </td> <td style="text-align:right;"> 30000 </td> <td style="text-align:right;"> 1.40 </td> <td style="text-align:right;"> 4 </td> <td style="text-align:left;"> Rent </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 77.4 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 164.3 </td> <td style="text-align:right;"> 28.67 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 25.0_to_29.9 </td> <td style="text-align:right;"> 62 </td> <td style="text-align:right;"> 128 </td> <td style="text-align:right;"> 66 </td> <td style="text-align:right;"> 122 </td> <td style="text-align:right;"> 68 </td> <td style="text-align:right;"> 124 </td> <td style="text-align:right;"> 66 </td> <td style="text-align:right;"> 132 </td> <td style="text-align:right;"> 66 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0.96 </td> <td style="text-align:right;"> 3.28 </td> <td style="text-align:right;"> 88 </td> <td style="text-align:right;"> 0.351 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Poor </td> <td style="text-align:right;"> 20 </td> <td style="text-align:right;"> 15 </td> <td style="text-align:left;"> Several </td> <td style="text-align:left;"> Several </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 15 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 18 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 30 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 17 </td> <td style="text-align:right;"> 60 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Heterosexual </td> <td style="text-align:right;"> 13702.50 </td> <td style="text-align:right;"> 14052.93 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 80 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 53594 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 49 </td> <td style="text-align:right;"> 598 </td> <td style="text-align:left;"> Black </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> Some College </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> 45000-54999 </td> <td style="text-align:right;"> 50000 </td> <td style="text-align:right;"> 3.29 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 90.7 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 166.5 </td> <td style="text-align:right;"> 32.72 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 72 </td> <td style="text-align:right;"> 123 </td> <td style="text-align:right;"> 64 </td> <td style="text-align:right;"> 124 </td> <td style="text-align:right;"> 74 </td> <td style="text-align:right;"> 122 </td> <td style="text-align:right;"> 62 </td> <td style="text-align:right;"> 124 </td> <td style="text-align:right;"> 66 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 2.12 </td> <td style="text-align:right;"> 5.59 </td> <td style="text-align:right;"> 49 </td> <td style="text-align:right;"> 0.336 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 48 </td> <td style="text-align:left;"> Poor </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> Several </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 36 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 20 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 17 </td> <td style="text-align:right;"> 10 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Heterosexual </td> <td style="text-align:right;"> 20561.67 </td> <td style="text-align:right;"> 20282.93 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 84 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 54482 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 50 </td> <td style="text-align:right;"> 602 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 8th Grade </td> <td style="text-align:left;"> LivePartner </td> <td style="text-align:left;"> 35000-44999 </td> <td style="text-align:right;"> 40000 </td> <td style="text-align:right;"> 1.47 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 88.7 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 169.2 </td> <td style="text-align:right;"> 30.98 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 76 </td> <td style="text-align:right;"> 114 </td> <td style="text-align:right;"> 79 </td> <td style="text-align:right;"> 112 </td> <td style="text-align:right;"> 72 </td> <td style="text-align:right;"> 110 </td> <td style="text-align:right;"> 74 </td> <td style="text-align:right;"> 118 </td> <td style="text-align:right;"> 84 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0.85 </td> <td style="text-align:right;"> 4.22 </td> <td style="text-align:right;"> 68 </td> <td style="text-align:right;"> 0.463 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Good </td> <td style="text-align:right;"> 20 </td> <td style="text-align:right;"> 30 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> Several </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 10 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 10 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 14 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 16 </td> <td style="text-align:right;"> 3 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Homosexual </td> <td style="text-align:right;"> 22032.36 </td> <td style="text-align:right;"> 21678.82 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 86 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 55194 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 75 </td> <td style="text-align:right;"> 904 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> Some College </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> more 99999 </td> <td style="text-align:right;"> 100000 </td> <td style="text-align:right;"> 5.00 </td> <td style="text-align:right;"> 11 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> Working </td> <td style="text-align:right;"> 103.1 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 155.3 </td> <td style="text-align:right;"> 42.75 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 74 </td> <td style="text-align:right;"> 100 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 36 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 102 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.34 </td> <td style="text-align:right;"> 3.67 </td> <td style="text-align:right;"> 62 </td> <td style="text-align:right;"> 1.016 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Good </td> <td style="text-align:right;"> 10 </td> <td style="text-align:right;"> 15 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> 7 </td> <td style="text-align:right;"> 7 </td> <td style="text-align:right;"> 22 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 38596.48 </td> <td style="text-align:right;"> 38913.51 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 76 </td> <td style="text-align:left;"> NA </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel --> .panel[.panel-name[Descending] .pull-left[ The **default is to arrange in ascending order** from top to bottom so the lowest number is on top. ```r NHANES::NHANESraw %>% arrange(-SleepHrsNight) ``` or ```r NHANES::NHANESraw %>% arrange(desc(SleepHrsNight)) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:right;"> ID </th> <th style="text-align:left;"> SurveyYr </th> <th style="text-align:left;"> Gender </th> <th style="text-align:right;"> Age </th> <th style="text-align:right;"> AgeMonths </th> <th style="text-align:left;"> Race1 </th> <th style="text-align:left;"> Race3 </th> <th style="text-align:left;"> Education </th> <th style="text-align:left;"> MaritalStatus </th> <th style="text-align:left;"> HHIncome </th> <th style="text-align:right;"> HHIncomeMid </th> <th style="text-align:right;"> Poverty </th> <th style="text-align:right;"> HomeRooms </th> <th style="text-align:left;"> HomeOwn </th> <th style="text-align:left;"> Work </th> <th style="text-align:right;"> Weight </th> <th style="text-align:right;"> Length </th> <th style="text-align:right;"> HeadCirc </th> <th style="text-align:right;"> Height </th> <th style="text-align:right;"> BMI </th> <th style="text-align:left;"> BMICatUnder20yrs </th> <th style="text-align:left;"> BMI_WHO </th> <th style="text-align:right;"> Pulse </th> <th style="text-align:right;"> BPSysAve </th> <th style="text-align:right;"> BPDiaAve </th> <th style="text-align:right;"> BPSys1 </th> <th style="text-align:right;"> BPDia1 </th> <th style="text-align:right;"> BPSys2 </th> <th style="text-align:right;"> BPDia2 </th> <th style="text-align:right;"> BPSys3 </th> <th style="text-align:right;"> BPDia3 </th> <th style="text-align:right;"> Testosterone </th> <th style="text-align:right;"> DirectChol </th> <th style="text-align:right;"> TotChol </th> <th style="text-align:right;"> UrineVol1 </th> <th style="text-align:right;"> UrineFlow1 </th> <th style="text-align:right;"> UrineVol2 </th> <th style="text-align:right;"> UrineFlow2 </th> <th style="text-align:left;"> Diabetes </th> <th style="text-align:right;"> DiabetesAge </th> <th style="text-align:left;"> HealthGen </th> <th style="text-align:right;"> DaysPhysHlthBad </th> <th style="text-align:right;"> DaysMentHlthBad </th> <th style="text-align:left;"> LittleInterest </th> <th style="text-align:left;"> Depressed </th> <th style="text-align:right;"> nPregnancies </th> <th style="text-align:right;"> nBabies </th> <th style="text-align:right;"> Age1stBaby </th> <th style="text-align:right;"> SleepHrsNight </th> <th style="text-align:left;"> SleepTrouble </th> <th style="text-align:left;"> PhysActive </th> <th style="text-align:right;"> PhysActiveDays </th> <th style="text-align:left;"> TVHrsDay </th> <th style="text-align:left;"> CompHrsDay </th> <th style="text-align:right;"> TVHrsDayChild </th> <th style="text-align:right;"> CompHrsDayChild </th> <th style="text-align:left;"> Alcohol12PlusYr </th> <th style="text-align:right;"> AlcoholDay </th> <th style="text-align:right;"> AlcoholYear </th> <th style="text-align:left;"> SmokeNow </th> <th style="text-align:left;"> Smoke100 </th> <th style="text-align:right;"> SmokeAge </th> <th style="text-align:left;"> Marijuana </th> <th style="text-align:right;"> AgeFirstMarij </th> <th style="text-align:left;"> RegularMarij </th> <th style="text-align:right;"> AgeRegMarij </th> <th style="text-align:left;"> HardDrugs </th> <th style="text-align:left;"> SexEver </th> <th style="text-align:right;"> SexAge </th> <th style="text-align:right;"> SexNumPartnLife </th> <th style="text-align:right;"> SexNumPartYear </th> <th style="text-align:left;"> SameSex </th> <th style="text-align:left;"> SexOrientation </th> <th style="text-align:right;"> WTINT2YR </th> <th style="text-align:right;"> WTMEC2YR </th> <th style="text-align:right;"> SDMVPSU </th> <th style="text-align:right;"> SDMVSTRA </th> <th style="text-align:left;"> PregnantNow </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 51769 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> College Grad </td> <td style="text-align:left;"> Widowed </td> <td style="text-align:left;"> 75000-99999 </td> <td style="text-align:right;"> 87500 </td> <td style="text-align:right;"> 4.31 </td> <td style="text-align:right;"> 7 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> 110 </td> <td style="text-align:right;"> 60 </td> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 62 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 110 </td> <td style="text-align:right;"> 60 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 20 </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 29691.116 </td> <td style="text-align:right;"> 32858.033 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 77 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 51952 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 46 </td> <td style="text-align:right;"> 556 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> High School </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> more 99999 </td> <td style="text-align:right;"> 100000 </td> <td style="text-align:right;"> 5.00 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> Working </td> <td style="text-align:right;"> 62.0 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 166.0 </td> <td style="text-align:right;"> 22.50 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 18.5_to_24.9 </td> <td style="text-align:right;"> 66 </td> <td style="text-align:right;"> 124 </td> <td style="text-align:right;"> 75 </td> <td style="text-align:right;"> 130 </td> <td style="text-align:right;"> 84 </td> <td style="text-align:right;"> 122 </td> <td style="text-align:right;"> 76 </td> <td style="text-align:right;"> 126 </td> <td style="text-align:right;"> 74 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.47 </td> <td style="text-align:right;"> 4.34 </td> <td style="text-align:right;"> 26 </td> <td style="text-align:right;"> 0.183 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Vgood </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 15 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 25 </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 19 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Heterosexual </td> <td style="text-align:right;"> 62342.174 </td> <td style="text-align:right;"> 63386.840 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 84 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 52513 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 73 </td> <td style="text-align:right;"> 884 </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 8th Grade </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> 35000-44999 </td> <td style="text-align:right;"> 40000 </td> <td style="text-align:right;"> 1.81 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 77.8 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 151.7 </td> <td style="text-align:right;"> 33.81 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 62 </td> <td style="text-align:right;"> 107 </td> <td style="text-align:right;"> 57 </td> <td style="text-align:right;"> 108 </td> <td style="text-align:right;"> 56 </td> <td style="text-align:right;"> 106 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> 108 </td> <td style="text-align:right;"> 56 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.24 </td> <td style="text-align:right;"> 3.49 </td> <td style="text-align:right;"> 30 </td> <td style="text-align:right;"> 0.476 </td> <td style="text-align:right;"> 136 </td> <td style="text-align:right;"> 0.978 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Good </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> 9 </td> <td style="text-align:right;"> 9 </td> <td style="text-align:right;"> 22 </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 7 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 8779.904 </td> <td style="text-align:right;"> 10355.639 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 89 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 52682 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> male </td> <td style="text-align:right;"> 64 </td> <td style="text-align:right;"> 772 </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 8th Grade </td> <td style="text-align:left;"> Married </td> <td style="text-align:left;"> 35000-44999 </td> <td style="text-align:right;"> 40000 </td> <td style="text-align:right;"> 1.81 </td> <td style="text-align:right;"> 5 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 65.1 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 164.9 </td> <td style="text-align:right;"> 23.94 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 18.5_to_24.9 </td> <td style="text-align:right;"> 68 </td> <td style="text-align:right;"> 111 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> 118 </td> <td style="text-align:right;"> 64 </td> <td style="text-align:right;"> 112 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> 110 </td> <td style="text-align:right;"> 58 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.11 </td> <td style="text-align:right;"> 4.34 </td> <td style="text-align:right;"> 13 </td> <td style="text-align:right;"> 0.115 </td> <td style="text-align:right;"> 20 </td> <td style="text-align:right;"> 0.230 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 61 </td> <td style="text-align:left;"> Fair </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 7 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 5913.060 </td> <td style="text-align:right;"> 6291.622 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 89 </td> <td style="text-align:left;"> NA </td> </tr> <tr> <td style="text-align:right;"> 52832 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 30 </td> <td style="text-align:right;"> 366 </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> Some College </td> <td style="text-align:left;"> Divorced </td> <td style="text-align:left;"> 20000-24999 </td> <td style="text-align:right;"> 22500 </td> <td style="text-align:right;"> 1.37 </td> <td style="text-align:right;"> 6 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> 81.2 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 163.2 </td> <td style="text-align:right;"> 30.49 </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> 30.0_plus </td> <td style="text-align:right;"> 82 </td> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 48 </td> <td style="text-align:right;"> 98 </td> <td style="text-align:right;"> 46 </td> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 50 </td> <td style="text-align:right;"> 104 </td> <td style="text-align:right;"> 46 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 1.58 </td> <td style="text-align:right;"> 4.55 </td> <td style="text-align:right;"> 36 </td> <td style="text-align:right;"> 0.298 </td> <td style="text-align:right;"> 7 </td> <td style="text-align:right;"> 0.333 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Vgood </td> <td style="text-align:right;"> 0 </td> <td style="text-align:right;"> 0 </td> <td style="text-align:left;"> None </td> <td style="text-align:left;"> None </td> <td style="text-align:right;"> 5 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 20 </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> 1 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 16 </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 16 </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> Yes </td> <td style="text-align:left;"> Yes </td> <td style="text-align:right;"> 16 </td> <td style="text-align:right;"> 100 </td> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> Heterosexual </td> <td style="text-align:right;"> 24930.457 </td> <td style="text-align:right;"> 26170.160 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 75 </td> <td style="text-align:left;"> No </td> </tr> <tr> <td style="text-align:right;"> 53026 </td> <td style="text-align:left;"> 2009_10 </td> <td style="text-align:left;"> female </td> <td style="text-align:right;"> 80 </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> White </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> Some College </td> <td style="text-align:left;"> Widowed </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 8 </td> <td style="text-align:left;"> Own </td> <td style="text-align:left;"> NotWorking </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> 12 </td> <td style="text-align:left;"> No </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> No </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:left;"> NA </td> <td style="text-align:right;"> 24849.914 </td> <td style="text-align:right;"> 0.000 </td> <td style="text-align:right;"> 2 </td> <td style="text-align:right;"> 77 </td> <td style="text-align:left;"> NA </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel --> ] <!-- end panelset --> --- # Renaming variables <img src="img/dplyr, janitor.png" class="title-hex"> .panelset[ .panel[.panel-name[rename] Using the rename function: ```r NHANES::NHANESraw %>% select(Weight, Height, BMI) %>% rename(weight = Weight, height = Height, bmi = BMI) ``` ]<!-- end panel --> .panel[.panel-name[select] Another alternative is to rename columns in the the select function itself: ```r NHANES::NHANESraw %>% select(weight = Weight, height = Height, bmi = BMI, ID) ``` ] <!-- end panel --> .panel[.panel-name[janitor] Finally, there is an extremely popular package called `janitor` that enables fast cleaning of all variable names: .pull-left[ ```r install.packages("janitor") library(janitor) NHANES::NHANESraw %>% clean_names() ``` ]<!-- end pull-left --> .pull-right[ original names: ``` ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus HHIncome HHIncomeMid Poverty HomeRooms HomeOwn Work Weight Length HeadCirc Height BMI ``` cleaned names: ``` id survey_yr gender age age_months race1 race3 education marital_status hh_income hh_income_mid poverty home_rooms home_own work weight length head_circ height bmi ``` ] <!-- end pull-right --> ] <!-- end panel --> ] <!-- end panelset --> --- template: question <svg viewBox="0 0 576 512" style="position:relative;display:inline-block;top:.1em;height:3em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg> # How do change the data itself? --- # mutate <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- The variables we have are usually not enough for all the questions we want to look at in an analysis. + We often use a `log()` function to transform positive data to reduce skew or try to make associations more linear. + We also like to combine variables to create new attributes based on existing attributes. Therefore, we may want to create new columns based on the values in existing columns, for example to do unit conversions, or to find the ratio of values in two columns. For this we'll use `mutate()`. We use `mutate()` to **create new variables from old** - adds one or more columns to our temporary data frame. We do this *a lot*! --- # mutate <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- ```r data %>% mutate(new_name = function(old_name)) ``` --- template: live-coding --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's use mutate to wrangle out dataset ---- - Using pipes, create a new column that recalculates the BMI using the formula `\(BMI\_new = kg/m^2\)` - Note - the Height column is in `cm`! - Select the participants who have a BMI_new smaller than 16 - display their `ID`, `BMI_new`, and `BMI` - Sort by `bmi_new` ]
04
:
00
--- count: false .panel1-mutate-exercise-auto[ ```r *NHANES::NHANESraw ``` ] .panel2-mutate-exercise-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-mutate-exercise-auto[ ```r NHANES::NHANESraw %>% * mutate(Height_m = Height / 100, * bmi_new = Weight/(Height_m ^ 2)) ``` ] .panel2-mutate-exercise-auto[ ``` ## # A tibble: 20,293 x 80 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 71 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct>, Height_m <dbl>, bmi_new <dbl> ``` ] --- count: false .panel1-mutate-exercise-auto[ ```r NHANES::NHANESraw %>% mutate(Height_m = Height / 100, bmi_new = Weight/(Height_m ^ 2)) %>% * filter(bmi_new < 16) ``` ] .panel2-mutate-exercise-auto[ ``` ## # A tibble: 1,661 x 80 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 2 51632 2009_10 male 10 124 Hispanic <NA> <NA> <NA> ## 3 51642 2009_10 male 7 85 Mexican <NA> <NA> <NA> ## 4 51681 2009_10 female 5 64 White <NA> <NA> <NA> ## 5 51743 2009_10 male 8 98 Hispanic <NA> <NA> <NA> ## 6 51753 2009_10 male 3 43 White <NA> <NA> <NA> ## 7 51797 2009_10 female 2 33 Hispanic <NA> <NA> <NA> ## 8 51802 2009_10 male 2 35 Mexican <NA> <NA> <NA> ## 9 51810 2009_10 male 7 93 White <NA> <NA> <NA> ## 10 51811 2009_10 male 5 63 Black <NA> <NA> <NA> ## # ... with 1,651 more rows, and 71 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct>, Height_m <dbl>, bmi_new <dbl> ``` ] --- count: false .panel1-mutate-exercise-auto[ ```r NHANES::NHANESraw %>% mutate(Height_m = Height / 100, bmi_new = Weight/(Height_m ^ 2)) %>% filter(bmi_new < 16) %>% * select(ID, bmi_new, BMI) ``` ] .panel2-mutate-exercise-auto[ ``` ## # A tibble: 1,661 x 3 ## ID bmi_new BMI ## <int> <dbl> <dbl> ## 1 51625 15.3 15.3 ## 2 51632 13.2 13.2 ## 3 51642 15.4 15.4 ## 4 51681 14.3 14.4 ## 5 51743 15.6 15.6 ## 6 51753 14.4 14.4 ## 7 51797 15.3 15.3 ## 8 51802 15.4 15.4 ## 9 51810 15.2 15.2 ## 10 51811 14.8 14.8 ## # ... with 1,651 more rows ``` ] --- count: false .panel1-mutate-exercise-auto[ ```r NHANES::NHANESraw %>% mutate(Height_m = Height / 100, bmi_new = Weight/(Height_m ^ 2)) %>% filter(bmi_new < 16) %>% select(ID, bmi_new, BMI) %>% * arrange(bmi_new) ``` ] .panel2-mutate-exercise-auto[ ``` ## # A tibble: 1,661 x 3 ## ID bmi_new BMI ## <int> <dbl> <dbl> ## 1 67971 12.4 12.4 ## 2 71273 12.5 12.5 ## 3 67553 12.5 12.5 ## 4 53367 12.6 12.6 ## 5 52518 12.6 12.6 ## 6 65821 12.6 12.6 ## 7 66417 12.7 12.7 ## 8 52279 12.7 12.7 ## 9 61406 12.7 12.7 ## 10 69357 12.7 12.7 ## # ... with 1,651 more rows ``` ] <style> .panel1-mutate-exercise-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-mutate-exercise-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-mutate-exercise-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- template: question <svg viewBox="0 0 640 512" style="position:relative;display:inline-block;top:.1em;height:3em;" xmlns="http://www.w3.org/2000/svg"> <path d="M128 352H32c-17.67 0-32 14.33-32 32v96c0 17.67 14.33 32 32 32h96c17.67 0 32-14.33 32-32v-96c0-17.67-14.33-32-32-32zm-24-80h192v48h48v-48h192v48h48v-57.59c0-21.17-17.23-38.41-38.41-38.41H344v-64h40c17.67 0 32-14.33 32-32V32c0-17.67-14.33-32-32-32H256c-17.67 0-32 14.33-32 32v96c0 17.67 14.33 32 32 32h40v64H94.41C73.23 224 56 241.23 56 262.41V320h48v-48zm264 80h-96c-17.67 0-32 14.33-32 32v96c0 17.67 14.33 32 32 32h96c17.67 0 32-14.33 32-32v-96c0-17.67-14.33-32-32-32zm240 0h-96c-17.67 0-32 14.33-32 32v96c0 17.67 14.33 32 32 32h96c17.67 0 32-14.33 32-32v-96c0-17.67-14.33-32-32-32z"></path></svg> # What about summary statistics? --- # Summarize <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- - We create summary statistics for variables by using the `summarize()` function. - **Once you summarize, the data *not* being summarized is not included in the new data frame** + You are creating a temporary, summarized version of the data frame with usually fewer rows .pull-left[ The following calculates the mean BMI across all individuals ] .pull-right[ ```r NHANES::NHANESraw %>% summarize(BMI_mean = mean(BMI)) ## # A tibble: 1 x 1 ## BMI_mean ## <dbl> ## 1 NA ``` ] --- # Summarize <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- Did you notice the `NA`? When `R` performs calculations with missing data, it (correctly) doesn't know how to evaluate them and forces the result to `NA`. to solve this, we need to add in a special option to tell R that we want to ignore the missing values. ```r NHANES::NHANESraw %>% summarize(BMI_mean = mean(BMI, na.rm = TRUE)) ## # A tibble: 1 x 1 ## BMI_mean ## <dbl> ## 1 25.6 ``` --- template: live-coding --- # Summarize <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- What if I want to see a different mean for each gender? ```r summary(NHANES::NHANESraw$Gender) ## female male ## 10212 10081 ``` .pull-left[ ```r NHANES::NHANESraw %>% filter(Gender == "female") %>% summarize(BMI_mean_female = mean(BMI, na.rm = TRUE)) ## # A tibble: 1 x 1 ## BMI_mean_female ## <dbl> ## 1 26.1 ``` ] .pull-right[ ```r NHANES::NHANESraw %>% filter(Gender == "male") %>% summarize(BMI_mean_male = mean(BMI, na.rm = TRUE)) ## # A tibble: 1 x 1 ## BMI_mean_male ## <dbl> ## 1 25.2 ``` ] --- # Group_by <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- Many data analysis tasks can be approached using the *split-apply-combine* paradigm: split the data into groups, apply some analysis to each group, and then combine the results. **`dplyr`** makes this very easy through the use of the `group_by()` function. `group_by()` is often used together with `summarize()`, which collapses each group into a single-row summary of that group. `group_by()` takes as arguments the column names that contain the **categorical** variables for which you want to calculate the summary statistics. Groups are *virtual* in the sense *you are not changing the structure of the original data frame, just how R perceives in subsequent operations* until an `ungroup()` is used. We will do this a lot! The previous exercise required us to run three different sets of code to get summaries for the three penguin species `group_by` allows us to do that all at once by grouping the rows where the values in one of the columns, creates the groups e.g., `species` creates three rows, one for each species. --- count: false .panel1-groupby-auto[ ```r *NHANES::NHANESraw ``` ] .panel2-groupby-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-groupby-auto[ ```r NHANES::NHANESraw %>% * group_by(Gender) ``` ] .panel2-groupby-auto[ ``` ## # A tibble: 20,293 x 78 ## # Groups: Gender [2] ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-groupby-auto[ ```r NHANES::NHANESraw %>% group_by(Gender) %>% * summarize(BMI_mean = mean(BMI, na.rm = TRUE), * count = n()) ``` ] .panel2-groupby-auto[ ``` ## # A tibble: 2 x 3 ## Gender BMI_mean count ## <fct> <dbl> <int> ## 1 female 26.1 10212 ## 2 male 25.2 10081 ``` ] <style> .panel1-groupby-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-groupby-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-groupby-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # count <img src="img/pipe.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- .panelset[ .panel[.panel-name[Description] The dplyr::count() function wraps a bunch of things into one beautiful friendly line of code to help you find counts of observations by group. To demonstrate what it does, let’s find the counts of genders and race groups in the NHANES dataset in two different ways: ] <!-- end panel--> .panel[.panel-name[Summarize] ```r NHANES::NHANESraw %>% group_by(Gender, Race1) %>% summarize(n = n()) ``` ]<!-- end pull-left --> .pull-right[ ``` ## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument. ``` <table> <thead> <tr> <th style="text-align:left;"> Gender </th> <th style="text-align:left;"> Race1 </th> <th style="text-align:right;"> n </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Black </td> <td style="text-align:right;"> 2357 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Hispanic </td> <td style="text-align:right;"> 1145 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:right;"> 1851 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> White </td> <td style="text-align:right;"> 3683 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Other </td> <td style="text-align:right;"> 1176 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Black </td> <td style="text-align:right;"> 2283 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Hispanic </td> <td style="text-align:right;"> 1064 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:right;"> 1888 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> White </td> <td style="text-align:right;"> 3710 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Other </td> <td style="text-align:right;"> 1136 </td> </tr> </tbody> </table> ] <!-- end pull-right --> ]<!-- end panel--> .panel[.panel-name[Count] .pull-left[ ```r NHANES::NHANESraw %>% count(Gender, Race1) ``` ]<!-- end pull-left --> .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> Gender </th> <th style="text-align:left;"> Race1 </th> <th style="text-align:right;"> n </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Black </td> <td style="text-align:right;"> 2357 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Hispanic </td> <td style="text-align:right;"> 1145 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:right;"> 1851 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> White </td> <td style="text-align:right;"> 3683 </td> </tr> <tr> <td style="text-align:left;"> female </td> <td style="text-align:left;"> Other </td> <td style="text-align:right;"> 1176 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Black </td> <td style="text-align:right;"> 2283 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Hispanic </td> <td style="text-align:right;"> 1064 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Mexican </td> <td style="text-align:right;"> 1888 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> White </td> <td style="text-align:right;"> 3710 </td> </tr> <tr> <td style="text-align:left;"> male </td> <td style="text-align:left;"> Other </td> <td style="text-align:right;"> 1136 </td> </tr> </tbody> </table> ] <!-- end pull-right --> ] <!-- end panel--> ] <!-- end panelset--> --- template: live-coding --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's experiment with aggregation ---- - Find the number of participants in the study for each education level - In a separate pipeline, find the minimum, median, and maximum combined diastolic blood pressure reading (`BPDiaAve`) across all participants. - In a separate pipeline, find the average `PhysActiveDays` and `SleepHrsNight` for people who have diabetes and people who don't. - In a final pipeline, find the minimum and maximum `PhysActiveDays` for different `Race1`. ]
07
:
00
--- template: question <svg viewBox="0 0 640 512" style="position:relative;display:inline-block;top:.1em;height:3em;" xmlns="http://www.w3.org/2000/svg"> <path d="M256.47 216.77l86.73 109.18s-16.6 102.36-76.57 150.12C206.66 523.85 0 510.19 0 510.19s3.8-23.14 11-55.43l94.62-112.17c3.97-4.7-.87-11.62-6.65-9.5l-60.4 22.09c14.44-41.66 32.72-80.04 54.6-97.47 59.97-47.76 163.3-40.94 163.3-40.94zM636.53 31.03l-19.86-25c-5.49-6.9-15.52-8.05-22.41-2.56l-232.48 177.8-34.14-42.97c-5.09-6.41-15.14-5.21-18.59 2.21l-25.33 54.55 86.73 109.18 58.8-12.45c8-1.69 11.42-11.2 6.34-17.6l-34.09-42.92 232.48-177.8c6.89-5.48 8.04-15.53 2.55-22.44z"></path></svg> # Part Two: Tidy Data --- # What is tidy data <img src="img/tidyr.png" class="title-hex"> - Data sets are often described in terms of three elements: units, variables and observations: + Units: the items described by the data. - They may be Observational or Experimental. - They may be referred to as units, subjects, individuals, or cases or other terms. - They represent a population or a sample from a population, e.g., cars, people, or countries. - They are not the "units of measurement" but the items being measured. - They may be represented by variable, e.g., name, a combination of variables, e.g., country and year, or be implied and not explicitly represented by any variable (most common in summarized data), e.g., average scores for a group + Variable: a characteristic or attribute of each unit about which we have data, e.g., mpg, age, or GDP. + Observations: The single value for each variable for a given unit, e.g., 20 mpg, 31 years old, or \$20,513,000 US. --- # Definition of Tidy Data <img src="img/tidyr.png" class="title-hex"> - We define tidy data very simply: It is a rectangular data set (the same number of rows for each column) where the data is shaped to have: 1. **One unit per row.** 2. **One variable per column.** 3. **One observation (or value) per cell (the intersection of each row and column).** ![](https://american-stat-412612.netlify.app/img/tidy-1.png) --- # Is this tidy? #1 <img src="img/tidyr.png" class="title-hex"> .pull-left[ 1. **One unit per row.** 2. **One variable per column.** 3. **One observation (or value) per cell (the intersection of each row and column).** ] .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> year </th> <th style="text-align:left;"> type </th> <th style="text-align:right;"> count </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 745 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 19987071 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 2666 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 20595360 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 37737 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 172006362 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 80488 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 174504898 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 212258 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 1272915272 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> cases </td> <td style="text-align:right;"> 213766 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> population </td> <td style="text-align:right;"> 1280428583 </td> </tr> </tbody> </table> ] -- No, violates rule #2 - Cases and population should be different columns because they are different concepts. --- # Is this tidy? #2 <img src="img/tidyr.png" class="title-hex"> .pull-left[ 1. **One unit per row.** 2. **One variable per column.** 3. **One observation (or value) per cell (the intersection of each row and column).** ] .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> year </th> <th style="text-align:left;"> rate </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 745/19987071 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 2666/20595360 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 37737/172006362 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 80488/174504898 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 212258/1272915272 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 213766/1280428583 </td> </tr> </tbody> </table> ] -- No, violates rule #3 - holds two variables in each cell. --- # Is this tidy? #3 <img src="img/tidyr.png" class="title-hex"> .pull-left[ 1. **One unit per row.** 2. **One variable per column.** 3. **One observation (or value) per cell (the intersection of each row and column).** ] .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> 1999 </th> <th style="text-align:right;"> 2000 </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 745 </td> <td style="text-align:right;"> 2666 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 37737 </td> <td style="text-align:right;"> 80488 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 212258 </td> <td style="text-align:right;"> 213766 </td> </tr> </tbody> </table> <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> 1999 </th> <th style="text-align:right;"> 2000 </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 19987071 </td> <td style="text-align:right;"> 20595360 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 172006362 </td> <td style="text-align:right;"> 174504898 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1272915272 </td> <td style="text-align:right;"> 1280428583 </td> </tr> </tbody> </table> ] -- No, violates all rules - has each observation across two columns and each variable is its own table. --- # Is this tidy? #4 <img src="img/tidyr.png" class="title-hex"> .pull-left[ 1. **One unit per row.** 2. **One variable per column.** 3. **One observation (or value) per cell (the intersection of each row and column).** ] .pull-right[ <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> year </th> <th style="text-align:right;"> cases </th> <th style="text-align:right;"> population </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 745 </td> <td style="text-align:right;"> 19987071 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 2666 </td> <td style="text-align:right;"> 20595360 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 37737 </td> <td style="text-align:right;"> 172006362 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 80488 </td> <td style="text-align:right;"> 174504898 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:right;"> 212258 </td> <td style="text-align:right;"> 1272915272 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:right;"> 213766 </td> <td style="text-align:right;"> 1280428583 </td> </tr> </tbody> </table> ] -- Yes! Finally! 🎉 --- # pivot_longer <img src="img/tidyr.png" class="title-hex"> Problem: One Attribute (implied variable) Appears In Multiple Columns. - The first argument is the dataset to reshape (which you can pipe in) - The second argument `cols = ` describes which columns need to be reshaped. + You can use any of the tidyselect tidy helper functions, e.g., `starts_with()` or `num_range()` + See "select" in help. - The `names_to = ` is the name of the variable you want to create to hold *the column names*. - The `values_to = `is the name of the variable you want to create to hold *the cell values*. ```r data %>% pivot_longer(cols = c(""), names_to = "new name column", values_to = "new values column") ``` --- count: false .panel1-longer-1-auto[ ```r *table4a ``` ] .panel2-longer-1-auto[ ``` ## # A tibble: 3 x 3 ## country `1999` `2000` ## * <chr> <int> <int> ## 1 Afghanistan 745 2666 ## 2 Brazil 37737 80488 ## 3 China 212258 213766 ``` ] --- count: false .panel1-longer-1-auto[ ```r table4a %>% * pivot_longer(cols = c(`1999`, `2000`), * names_to = "Year", * values_to = "Cases" ) ``` ] .panel2-longer-1-auto[ ``` ## # A tibble: 6 x 3 ## country Year Cases ## <chr> <chr> <int> ## 1 Afghanistan 1999 745 ## 2 Afghanistan 2000 2666 ## 3 Brazil 1999 37737 ## 4 Brazil 2000 80488 ## 5 China 1999 212258 ## 6 China 2000 213766 ``` ] <style> .panel1-longer-1-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-longer-1-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-longer-1-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's experiment with pivot_longer ---- - read the csv from [http://bit.ly/monkey-mem](http://bit.ly/monkey-mem) ```r monkeys <- read_csv("http://bit.ly/monkey-mem") head(monkeys) ``` - make it tidy <svg viewBox="0 0 640 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M256.47 216.77l86.73 109.18s-16.6 102.36-76.57 150.12C206.66 523.85 0 510.19 0 510.19s3.8-23.14 11-55.43l94.62-112.17c3.97-4.7-.87-11.62-6.65-9.5l-60.4 22.09c14.44-41.66 32.72-80.04 54.6-97.47 59.97-47.76 163.3-40.94 163.3-40.94zM636.53 31.03l-19.86-25c-5.49-6.9-15.52-8.05-22.41-2.56l-232.48 177.8-34.14-42.97c-5.09-6.41-15.14-5.21-18.59 2.21l-25.33 54.55 86.73 109.18 58.8-12.45c8-1.69 11.42-11.2 6.34-17.6l-34.09-42.92 232.48-177.8c6.89-5.48 8.04-15.53 2.55-22.44z"></path></svg> - Why does this code fail? ```r tidyr::table4a %>% pivot_longer(cols = 1999, 2000, names_to = "year", values_to = "cases") ``` ]
04
:
00
--- count: false .panel1-longer-exercise-1-auto[ ```r *monkeys <- read_csv("http://bit.ly/monkey-mem", col_types = cols()) ``` ] .panel2-longer-exercise-1-auto[ ] --- count: false .panel1-longer-exercise-1-auto[ ```r monkeys <- read_csv("http://bit.ly/monkey-mem", col_types = cols()) *monkeys ``` ] .panel2-longer-exercise-1-auto[ ``` ## # A tibble: 18 x 7 ## Monkey Treatment Week2 Week4 Week8 Week12 Week16 ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 Spank Control 95 75 80 65 70 ## 2 Chim Control 85 75 55 75 85 ## 3 Chak Control 75 95 60 40 45 ## 4 Alf Control 85 80 70 45 80 ## 5 Poet Control 65 80 75 65 65 ## 6 Jessie Control 70 90 85 75 75 ## 7 Phil Control 75 80 70 70 70 ## 8 Irv Treated 75 50 70 75 75 ## 9 Edy Treated 85 85 60 70 70 ## 10 Allen Treated 60 70 70 75 70 ## 11 Poe Treated 60 65 70 70 60 ## 12 Joey Treated 65 60 80 70 60 ## 13 Just Treated 55 70 60 65 75 ## 14 Junior Treated 60 55 75 70 50 ## 15 Andy Treated 55 65 45 70 65 ## 16 Sport Treated 60 70 70 85 70 ## 17 Cornelius Treated 45 60 65 65 70 ## 18 Duncan Treated 65 55 55 80 75 ``` ] --- count: false .panel1-longer-exercise-1-auto[ ```r monkeys <- read_csv("http://bit.ly/monkey-mem", col_types = cols()) monkeys %>% * pivot_longer(cols = Week2:Week16, * names_prefix = "Week", * names_to = "Week", * values_to = "Percent" ) ``` ] .panel2-longer-exercise-1-auto[ ``` ## # A tibble: 90 x 4 ## Monkey Treatment Week Percent ## <chr> <chr> <chr> <dbl> ## 1 Spank Control 2 95 ## 2 Spank Control 4 75 ## 3 Spank Control 8 80 ## 4 Spank Control 12 65 ## 5 Spank Control 16 70 ## 6 Chim Control 2 85 ## 7 Chim Control 4 75 ## 8 Chim Control 8 55 ## 9 Chim Control 12 75 ## 10 Chim Control 16 85 ## # ... with 80 more rows ``` ] <style> .panel1-longer-exercise-1-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-longer-exercise-1-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-longer-exercise-1-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- How can we solve this? <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:left;"> iso2 </th> <th style="text-align:left;"> iso3 </th> <th style="text-align:right;"> year </th> <th style="text-align:right;"> new_sp_m014 </th> <th style="text-align:right;"> new_sp_m1524 </th> <th style="text-align:right;"> new_sp_m2534 </th> <th style="text-align:right;"> new_sp_m3544 </th> <th style="text-align:right;"> new_sp_m4554 </th> <th style="text-align:right;"> new_sp_m5564 </th> <th style="text-align:right;"> new_sp_m65 </th> <th style="text-align:right;"> new_sp_f014 </th> <th style="text-align:right;"> new_sp_f1524 </th> <th style="text-align:right;"> new_sp_f2534 </th> <th style="text-align:right;"> new_sp_f3544 </th> <th style="text-align:right;"> new_sp_f4554 </th> <th style="text-align:right;"> new_sp_f5564 </th> <th style="text-align:right;"> new_sp_f65 </th> <th style="text-align:right;"> new_sn_m014 </th> <th style="text-align:right;"> new_sn_m1524 </th> <th style="text-align:right;"> new_sn_m2534 </th> <th style="text-align:right;"> new_sn_m3544 </th> <th style="text-align:right;"> new_sn_m4554 </th> <th style="text-align:right;"> new_sn_m5564 </th> <th style="text-align:right;"> new_sn_m65 </th> <th style="text-align:right;"> new_sn_f014 </th> <th style="text-align:right;"> new_sn_f1524 </th> <th style="text-align:right;"> new_sn_f2534 </th> <th style="text-align:right;"> new_sn_f3544 </th> <th style="text-align:right;"> new_sn_f4554 </th> <th style="text-align:right;"> new_sn_f5564 </th> <th style="text-align:right;"> new_sn_f65 </th> <th style="text-align:right;"> new_ep_m014 </th> <th style="text-align:right;"> new_ep_m1524 </th> <th style="text-align:right;"> new_ep_m2534 </th> <th style="text-align:right;"> new_ep_m3544 </th> <th style="text-align:right;"> new_ep_m4554 </th> <th style="text-align:right;"> new_ep_m5564 </th> <th style="text-align:right;"> new_ep_m65 </th> <th style="text-align:right;"> new_ep_f014 </th> <th style="text-align:right;"> new_ep_f1524 </th> <th style="text-align:right;"> new_ep_f2534 </th> <th style="text-align:right;"> new_ep_f3544 </th> <th style="text-align:right;"> new_ep_f4554 </th> <th style="text-align:right;"> new_ep_f5564 </th> <th style="text-align:right;"> new_ep_f65 </th> <th style="text-align:right;"> newrel_m014 </th> <th style="text-align:right;"> newrel_m1524 </th> <th style="text-align:right;"> newrel_m2534 </th> <th style="text-align:right;"> newrel_m3544 </th> <th style="text-align:right;"> newrel_m4554 </th> <th style="text-align:right;"> newrel_m5564 </th> <th style="text-align:right;"> newrel_m65 </th> <th style="text-align:right;"> newrel_f014 </th> <th style="text-align:right;"> newrel_f1524 </th> <th style="text-align:right;"> newrel_f2534 </th> <th style="text-align:right;"> newrel_f3544 </th> <th style="text-align:right;"> newrel_f4554 </th> <th style="text-align:right;"> newrel_f5564 </th> <th style="text-align:right;"> newrel_f65 </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1980 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1981 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1982 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1983 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1984 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:left;"> AF </td> <td style="text-align:left;"> AFG </td> <td style="text-align:right;"> 1985 </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> <td style="text-align:right;"> NA </td> </tr> </tbody> </table> The data uses the original codes given by the World Health Organization. The column names for columns five through 60 are made by combining new_ - to a code for .my-coral[method of diagnosis] (rel = relapse, sn = negative pulmonary smear, sp = positive pulmonary smear, ep = extrapulmonary) - to a code for .my-coral[gender] (f = female, m = male) - to a code for .my-coral[age group] (014 = 0-14 yrs of age, 1524 = 15-24 years of age, 2534 = 25 to 34 years of age, etc). --- template: live-coding --- count: false .panel1-advanced-longer-auto[ ```r *who ``` ] .panel2-advanced-longer-auto[ ``` ## # A tibble: 7,240 x 60 ## country iso2 iso3 year new_sp_m014 new_sp_m1524 new_sp_m2534 new_sp_m3544 ## <chr> <chr> <chr> <int> <int> <int> <int> <int> ## 1 Afghani~ AF AFG 1980 NA NA NA NA ## 2 Afghani~ AF AFG 1981 NA NA NA NA ## 3 Afghani~ AF AFG 1982 NA NA NA NA ## 4 Afghani~ AF AFG 1983 NA NA NA NA ## 5 Afghani~ AF AFG 1984 NA NA NA NA ## 6 Afghani~ AF AFG 1985 NA NA NA NA ## 7 Afghani~ AF AFG 1986 NA NA NA NA ## 8 Afghani~ AF AFG 1987 NA NA NA NA ## 9 Afghani~ AF AFG 1988 NA NA NA NA ## 10 Afghani~ AF AFG 1989 NA NA NA NA ## # ... with 7,230 more rows, and 52 more variables: new_sp_m4554 <int>, ## # new_sp_m5564 <int>, new_sp_m65 <int>, new_sp_f014 <int>, ## # new_sp_f1524 <int>, new_sp_f2534 <int>, new_sp_f3544 <int>, ## # new_sp_f4554 <int>, new_sp_f5564 <int>, new_sp_f65 <int>, ## # new_sn_m014 <int>, new_sn_m1524 <int>, new_sn_m2534 <int>, ## # new_sn_m3544 <int>, new_sn_m4554 <int>, new_sn_m5564 <int>, ## # new_sn_m65 <int>, new_sn_f014 <int>, new_sn_f1524 <int>, ## # new_sn_f2534 <int>, new_sn_f3544 <int>, new_sn_f4554 <int>, ## # new_sn_f5564 <int>, new_sn_f65 <int>, new_ep_m014 <int>, ## # new_ep_m1524 <int>, new_ep_m2534 <int>, new_ep_m3544 <int>, ## # new_ep_m4554 <int>, new_ep_m5564 <int>, new_ep_m65 <int>, ## # new_ep_f014 <int>, new_ep_f1524 <int>, new_ep_f2534 <int>, ## # new_ep_f3544 <int>, new_ep_f4554 <int>, new_ep_f5564 <int>, ## # new_ep_f65 <int>, newrel_m014 <int>, newrel_m1524 <int>, ## # newrel_m2534 <int>, newrel_m3544 <int>, newrel_m4554 <int>, ## # newrel_m5564 <int>, newrel_m65 <int>, newrel_f014 <int>, ## # newrel_f1524 <int>, newrel_f2534 <int>, newrel_f3544 <int>, ## # newrel_f4554 <int>, newrel_f5564 <int>, newrel_f65 <int> ``` ] --- count: false .panel1-advanced-longer-auto[ ```r who %>% * pivot_longer( * cols = new_sp_m014:newrel_f65, * names_to = c("diagnosis", "gender", "age"), * names_pattern = "new_?(.*)_(.)(.*)", * values_to = "count") ``` ] .panel2-advanced-longer-auto[ ``` ## # A tibble: 405,440 x 8 ## country iso2 iso3 year diagnosis gender age count ## <chr> <chr> <chr> <int> <chr> <chr> <chr> <int> ## 1 Afghanistan AF AFG 1980 sp m 014 NA ## 2 Afghanistan AF AFG 1980 sp m 1524 NA ## 3 Afghanistan AF AFG 1980 sp m 2534 NA ## 4 Afghanistan AF AFG 1980 sp m 3544 NA ## 5 Afghanistan AF AFG 1980 sp m 4554 NA ## 6 Afghanistan AF AFG 1980 sp m 5564 NA ## 7 Afghanistan AF AFG 1980 sp m 65 NA ## 8 Afghanistan AF AFG 1980 sp f 014 NA ## 9 Afghanistan AF AFG 1980 sp f 1524 NA ## 10 Afghanistan AF AFG 1980 sp f 2534 NA ## # ... with 405,430 more rows ``` ] <style> .panel1-advanced-longer-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-advanced-longer-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-advanced-longer-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # pivot_wider <img src="img/tidyr.png" class="title-hex"> Problem: One Observation's Attributes Appear in Multiple rows. - One column contains variable names. - One column contains values for the different attributes i.e., implied variables. - This can be more challenging to tidy as you have multiple variables to address ```r data %>% pivot_wider(id_cols = c(IDs), names_from = "original names", values_from = "original values") ``` --- # pivot_wider <img src="img/tidyr.png" class="title-hex"> ![](https://american-stat-412612.netlify.app/img/tidy-8.png) --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's experiment with pivot_wider ---- - read the csv from [http://bit.ly/tidy_flowers](http://bit.ly/tidy_flowers) ```r flowers <- read_csv2("http://bit.ly/tidy_flowers") slice(flowers,20:28) flowers %>% pivot_wider(id_cols = ___, names_from = _______, values_from = ______) ``` - make it tidy <svg viewBox="0 0 640 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M256.47 216.77l86.73 109.18s-16.6 102.36-76.57 150.12C206.66 523.85 0 510.19 0 510.19s3.8-23.14 11-55.43l94.62-112.17c3.97-4.7-.87-11.62-6.65-9.5l-60.4 22.09c14.44-41.66 32.72-80.04 54.6-97.47 59.97-47.76 163.3-40.94 163.3-40.94zM636.53 31.03l-19.86-25c-5.49-6.9-15.52-8.05-22.41-2.56l-232.48 177.8-34.14-42.97c-5.09-6.41-15.14-5.21-18.59 2.21l-25.33 54.55 86.73 109.18 58.8-12.45c8-1.69 11.42-11.2 6.34-17.6l-34.09-42.92 232.48-177.8c6.89-5.48 8.04-15.53 2.55-22.44z"></path></svg> - Find the number of observations for each `Gender` and `Race1` group like we did earlier. Can transform this tidy format into an _untidy_ pivot table? This can be useful for presentations and reports. ]
03
:
00
--- count: false .panel1-wider-exercise-1-auto[ ```r *flowers <- read_csv2("http://bit.ly/tidy_flowers", col_types = cols()) ``` ] .panel2-wider-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ``` ] --- count: false .panel1-wider-exercise-1-auto[ ```r flowers <- read_csv2("http://bit.ly/tidy_flowers", col_types = cols()) *flowers ``` ] .panel2-wider-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ## # A tibble: 48 x 4 ## Time replication Variable Value ## <dbl> <dbl> <chr> <dbl> ## 1 1 1 Flowers 62.3 ## 2 1 2 Flowers 77.4 ## 3 1 3 Flowers 55.3 ## 4 1 4 Flowers 54.2 ## 5 1 5 Flowers 49.6 ## 6 1 6 Flowers 61.9 ## 7 1 7 Flowers 39.4 ## 8 1 8 Flowers 45.7 ## 9 1 9 Flowers 31.3 ## 10 1 10 Flowers 44.9 ## # ... with 38 more rows ``` ] --- count: false .panel1-wider-exercise-1-auto[ ```r flowers <- read_csv2("http://bit.ly/tidy_flowers", col_types = cols()) flowers %>% * pivot_wider(id_cols = c(Time, replication), * names_from = "Variable", * values_from = "Value") ``` ] .panel2-wider-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ## # A tibble: 24 x 4 ## Time replication Flowers Intensity ## <dbl> <dbl> <dbl> <dbl> ## 1 1 1 62.3 150 ## 2 1 2 77.4 150 ## 3 1 3 55.3 300 ## 4 1 4 54.2 300 ## 5 1 5 49.6 450 ## 6 1 6 61.9 450 ## 7 1 7 39.4 600 ## 8 1 8 45.7 600 ## 9 1 9 31.3 750 ## 10 1 10 44.9 750 ## # ... with 14 more rows ``` ] <style> .panel1-wider-exercise-1-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-wider-exercise-1-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-wider-exercise-1-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false .panel1-wider-exercise-2-auto[ ```r *NHANES::NHANESraw ``` ] .panel2-wider-exercise-2-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-wider-exercise-2-auto[ ```r NHANES::NHANESraw %>% * count(Gender, Race1) ``` ] .panel2-wider-exercise-2-auto[ ``` ## # A tibble: 10 x 3 ## Gender Race1 n ## <fct> <fct> <int> ## 1 female Black 2357 ## 2 female Hispanic 1145 ## 3 female Mexican 1851 ## 4 female White 3683 ## 5 female Other 1176 ## 6 male Black 2283 ## 7 male Hispanic 1064 ## 8 male Mexican 1888 ## 9 male White 3710 ## 10 male Other 1136 ``` ] --- count: false .panel1-wider-exercise-2-auto[ ```r NHANES::NHANESraw %>% count(Gender, Race1) %>% * pivot_wider(id_cols = Gender, * names_from = "Race1", * values_from = "n") ``` ] .panel2-wider-exercise-2-auto[ ``` ## # A tibble: 2 x 6 ## Gender Black Hispanic Mexican White Other ## <fct> <int> <int> <int> <int> <int> ## 1 female 2357 1145 1851 3683 1176 ## 2 male 2283 1064 1888 3710 1136 ``` ] <style> .panel1-wider-exercise-2-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-wider-exercise-2-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-wider-exercise-2-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # separate <img src="img/tidyr.png" class="title-hex"> <table> <thead> <tr> <th style="text-align:left;"> country </th> <th style="text-align:right;"> year </th> <th style="text-align:left;"> rate </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 745/19987071 </td> </tr> <tr> <td style="text-align:left;"> Afghanistan </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 2666/20595360 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 37737/172006362 </td> </tr> <tr> <td style="text-align:left;"> Brazil </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 80488/174504898 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 1999 </td> <td style="text-align:left;"> 212258/1272915272 </td> </tr> <tr> <td style="text-align:left;"> China </td> <td style="text-align:right;"> 2000 </td> <td style="text-align:left;"> 213766/1280428583 </td> </tr> </tbody> </table> --- # separate <img src="img/tidyr.png" class="title-hex"> You need to specify at least three arguments: 1. The column you want to separate that has two (or more) variables, 2. The character vector of the names of the new variables, and 3. The character or numeric positions by which to separate out the new variables from the current column. ```r data %>% separate(col = "original var", into = c("new", "names"), sep = "/") ``` --- count: false .panel1-separate-3-auto[ ```r *tidyr::table3 ``` ] .panel2-separate-3-auto[ ``` ## # A tibble: 6 x 3 ## country year rate ## * <chr> <int> <chr> ## 1 Afghanistan 1999 745/19987071 ## 2 Afghanistan 2000 2666/20595360 ## 3 Brazil 1999 37737/172006362 ## 4 Brazil 2000 80488/174504898 ## 5 China 1999 212258/1272915272 ## 6 China 2000 213766/1280428583 ``` ] --- count: false .panel1-separate-3-auto[ ```r tidyr::table3 %>% * separate(col = rate, * into = c("cases", "population"), * sep = "/") ``` ] .panel2-separate-3-auto[ ``` ## # A tibble: 6 x 4 ## country year cases population ## <chr> <int> <chr> <chr> ## 1 Afghanistan 1999 745 19987071 ## 2 Afghanistan 2000 2666 20595360 ## 3 Brazil 1999 37737 172006362 ## 4 Brazil 2000 80488 174504898 ## 5 China 1999 212258 1272915272 ## 6 China 2000 213766 1280428583 ``` ] <style> .panel1-separate-3-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-separate-3-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-separate-3-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Let's experiment with separate ---- - read the csv from [http://bit.ly/separate_flowers](http://bit.ly/separate_flowers) ```r flowers2 <- read_csv2("http://bit.ly/separate_flowers") slice(flowers2,20:28) flowers %>% separate(col = ______, into = _____, sep = ____) ``` - make it tidy <svg viewBox="0 0 640 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M256.47 216.77l86.73 109.18s-16.6 102.36-76.57 150.12C206.66 523.85 0 510.19 0 510.19s3.8-23.14 11-55.43l94.62-112.17c3.97-4.7-.87-11.62-6.65-9.5l-60.4 22.09c14.44-41.66 32.72-80.04 54.6-97.47 59.97-47.76 163.3-40.94 163.3-40.94zM636.53 31.03l-19.86-25c-5.49-6.9-15.52-8.05-22.41-2.56l-232.48 177.8-34.14-42.97c-5.09-6.41-15.14-5.21-18.59 2.21l-25.33 54.55 86.73 109.18 58.8-12.45c8-1.69 11.42-11.2 6.34-17.6l-34.09-42.92 232.48-177.8c6.89-5.48 8.04-15.53 2.55-22.44z"></path></svg> - Take a look at the `NHANESraw` variable `HHIncome`. Can you use separate to make a lower income and upper income bracket? ]
04
:
00
--- count: false .panel1-separate-exercise-1-auto[ ```r *flowers <- read_csv2("http://bit.ly/separate_flowers", col_types = cols()) ``` ] .panel2-separate-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ``` ] --- count: false .panel1-separate-exercise-1-auto[ ```r flowers <- read_csv2("http://bit.ly/separate_flowers", col_types = cols()) *flowers ``` ] .panel2-separate-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ## # A tibble: 24 x 2 ## `Flowers/Intensity` Time ## <chr> <dbl> ## 1 62.3/150 1 ## 2 77.4/150 1 ## 3 55.3/300 1 ## 4 54.2/300 1 ## 5 49.6/450 1 ## 6 61.9/450 1 ## 7 39.4/600 1 ## 8 45.7/600 1 ## 9 31.3/750 1 ## 10 44.9/750 1 ## # ... with 14 more rows ``` ] --- count: false .panel1-separate-exercise-1-auto[ ```r flowers <- read_csv2("http://bit.ly/separate_flowers", col_types = cols()) flowers %>% * separate(col = `Flowers/Intensity`, * into = c("Flowers", "Intensity"), * sep = "/") ``` ] .panel2-separate-exercise-1-auto[ ``` ## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control. ## # A tibble: 24 x 3 ## Flowers Intensity Time ## <chr> <chr> <dbl> ## 1 62.3 150 1 ## 2 77.4 150 1 ## 3 55.3 300 1 ## 4 54.2 300 1 ## 5 49.6 450 1 ## 6 61.9 450 1 ## 7 39.4 600 1 ## 8 45.7 600 1 ## 9 31.3 750 1 ## 10 44.9 750 1 ## # ... with 14 more rows ``` ] <style> .panel1-separate-exercise-1-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-separate-exercise-1-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-separate-exercise-1-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false .panel1-separate-exercise-2-auto[ ```r *NHANES::NHANESraw ``` ] .panel2-separate-exercise-2-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-separate-exercise-2-auto[ ```r NHANES::NHANESraw %>% * select(starts_with("HHI")) ``` ] .panel2-separate-exercise-2-auto[ ``` ## # A tibble: 20,293 x 2 ## HHIncome HHIncomeMid ## <fct> <int> ## 1 25000-34999 30000 ## 2 20000-24999 22500 ## 3 45000-54999 50000 ## 4 20000-24999 22500 ## 5 10000-14999 12500 ## 6 25000-34999 30000 ## 7 35000-44999 40000 ## 8 35000-44999 40000 ## 9 65000-74999 70000 ## 10 15000-19999 17500 ## # ... with 20,283 more rows ``` ] --- count: false .panel1-separate-exercise-2-auto[ ```r NHANES::NHANESraw %>% select(starts_with("HHI")) %>% * separate(col = HHIncome, * into = c("lower_HHI", "upper_HHI"), * sep = "-") ``` ] .panel2-separate-exercise-2-auto[ ``` ## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 2892 rows [33, ## 39, 41, 94, 96, 99, 111, 112, 121, 122, 135, 142, 143, 148, 157, 158, 170, 173, ## 174, 178, ...]. ## # A tibble: 20,293 x 3 ## lower_HHI upper_HHI HHIncomeMid ## <chr> <chr> <int> ## 1 25000 34999 30000 ## 2 20000 24999 22500 ## 3 45000 54999 50000 ## 4 20000 24999 22500 ## 5 10000 14999 12500 ## 6 25000 34999 30000 ## 7 35000 44999 40000 ## 8 35000 44999 40000 ## 9 65000 74999 70000 ## 10 15000 19999 17500 ## # ... with 20,283 more rows ``` ] <style> .panel1-separate-exercise-2-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-separate-exercise-2-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-separate-exercise-2-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- name: your-turn background-color: var(--my-red) class: inverse .left-column[ ## Your turn<br><svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M402.3 344.9l32-32c5-5 13.7-1.5 13.7 5.7V464c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V112c0-26.5 21.5-48 48-48h273.5c7.1 0 10.7 8.6 5.7 13.7l-32 32c-1.5 1.5-3.5 2.3-5.7 2.3H48v352h352V350.5c0-2.1.8-4.1 2.3-5.6zm156.6-201.8L296.3 405.7l-90.4 10c-26.2 2.9-48.5-19.2-45.6-45.6l10-90.4L432.9 17.1c22.9-22.9 59.9-22.9 82.7 0l43.2 43.2c22.9 22.9 22.9 60 .1 82.8zM460.1 174L402 115.9 216.2 301.8l-7.3 65.3 65.3-7.3L460.1 174zm64.8-79.7l-43.2-43.2c-4.1-4.1-10.8-4.1-14.8 0L436 82l58.1 58.1 30.9-30.9c4-4.2 4-10.8-.1-14.9z"></path></svg><br> ] .right-column[ ### Day 1 Case Study ---- - What percent of those studied have Excellent health? (don't forget about `n()`) - What percent of those with Diabetes have Excellent health? - Attempt to recreate the following table: (Self Rated Health = `HealthGen`) ``` ## # A tibble: 5 x 3 ## Self_Rated_Health Diabetes_No Diabetes_Yes ## <fct> <dbl> <dbl> ## 1 Excellent 0.965 0.0352 ## 2 Vgood 0.947 0.0535 ## 3 Good 0.884 0.116 ## 4 Fair 0.763 0.237 ## 5 Poor 0.616 0.384 ``` ]
04
:
00
--- count: false .panel1-challenge-1-auto[ ```r *NHANESraw ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 20,293 x 78 ## ID SurveyYr Gender Age AgeMonths Race1 Race3 Education MaritalStatus ## <int> <fct> <fct> <int> <int> <fct> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 409 White <NA> High School Married ## 2 51625 2009_10 male 4 49 Other <NA> <NA> <NA> ## 3 51626 2009_10 male 16 202 Black <NA> <NA> <NA> ## 4 51627 2009_10 male 10 131 Black <NA> <NA> <NA> ## 5 51628 2009_10 female 60 722 Black <NA> High School Widowed ## 6 51629 2009_10 male 26 313 Mexican <NA> 9 - 11th G~ Married ## 7 51630 2009_10 female 49 596 White <NA> Some Colle~ LivePartner ## 8 51631 2009_10 female 1 12 White <NA> <NA> <NA> ## 9 51632 2009_10 male 10 124 Hispan~ <NA> <NA> <NA> ## 10 51633 2009_10 male 80 NA White <NA> Some Colle~ Married ## # ... with 20,283 more rows, and 69 more variables: HHIncome <fct>, ## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, ## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>, ## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, ## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, ## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, ## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, ## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>, ## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>, ## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>, ## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>, ## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>, ## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>, ## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>, ## # SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>, RegularMarij <fct>, ## # AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>, SexAge <int>, ## # SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>, ## # SexOrientation <fct>, WTINT2YR <dbl>, WTMEC2YR <dbl>, SDMVPSU <int>, ## # SDMVSTRA <int>, PregnantNow <fct> ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% * count(HealthGen, Diabetes) ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 18 x 3 ## HealthGen Diabetes n ## <fct> <fct> <int> ## 1 Excellent No 1262 ## 2 Excellent Yes 46 ## 3 Excellent <NA> 1 ## 4 Vgood No 3274 ## 5 Vgood Yes 185 ## 6 Vgood <NA> 2 ## 7 Good No 4384 ## 8 Good Yes 574 ## 9 Good <NA> 1 ## 10 Fair No 1743 ## 11 Fair Yes 540 ## 12 Fair <NA> 1 ## 13 Poor No 268 ## 14 Poor Yes 167 ## 15 Poor <NA> 1 ## 16 <NA> No 6823 ## 17 <NA> Yes 194 ## 18 <NA> <NA> 827 ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% * drop_na() ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 10 x 3 ## HealthGen Diabetes n ## <fct> <fct> <int> ## 1 Excellent No 1262 ## 2 Excellent Yes 46 ## 3 Vgood No 3274 ## 4 Vgood Yes 185 ## 5 Good No 4384 ## 6 Good Yes 574 ## 7 Fair No 1743 ## 8 Fair Yes 540 ## 9 Poor No 268 ## 10 Poor Yes 167 ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% drop_na() %>% * group_by(HealthGen) ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 10 x 3 ## # Groups: HealthGen [5] ## HealthGen Diabetes n ## <fct> <fct> <int> ## 1 Excellent No 1262 ## 2 Excellent Yes 46 ## 3 Vgood No 3274 ## 4 Vgood Yes 185 ## 5 Good No 4384 ## 6 Good Yes 574 ## 7 Fair No 1743 ## 8 Fair Yes 540 ## 9 Poor No 268 ## 10 Poor Yes 167 ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% drop_na() %>% group_by(HealthGen) %>% * summarize(percent = n / sum(n), * Diabetes = Diabetes) ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 10 x 3 ## # Groups: HealthGen [5] ## HealthGen percent Diabetes ## <fct> <dbl> <fct> ## 1 Excellent 0.965 No ## 2 Excellent 0.0352 Yes ## 3 Vgood 0.947 No ## 4 Vgood 0.0535 Yes ## 5 Good 0.884 No ## 6 Good 0.116 Yes ## 7 Fair 0.763 No ## 8 Fair 0.237 Yes ## 9 Poor 0.616 No ## 10 Poor 0.384 Yes ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% drop_na() %>% group_by(HealthGen) %>% summarize(percent = n / sum(n), Diabetes = Diabetes) %>% * pivot_wider(id_cols = HealthGen, * names_from = "Diabetes", * values_from = "percent", * names_prefix = "Diabetes_" * ) ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 5 x 3 ## # Groups: HealthGen [5] ## HealthGen Diabetes_No Diabetes_Yes ## <fct> <dbl> <dbl> ## 1 Excellent 0.965 0.0352 ## 2 Vgood 0.947 0.0535 ## 3 Good 0.884 0.116 ## 4 Fair 0.763 0.237 ## 5 Poor 0.616 0.384 ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% drop_na() %>% group_by(HealthGen) %>% summarize(percent = n / sum(n), Diabetes = Diabetes) %>% pivot_wider(id_cols = HealthGen, names_from = "Diabetes", values_from = "percent", names_prefix = "Diabetes_" ) %>% * rename(Self_Rated_Health = HealthGen) ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 5 x 3 ## # Groups: Self_Rated_Health [5] ## Self_Rated_Health Diabetes_No Diabetes_Yes ## <fct> <dbl> <dbl> ## 1 Excellent 0.965 0.0352 ## 2 Vgood 0.947 0.0535 ## 3 Good 0.884 0.116 ## 4 Fair 0.763 0.237 ## 5 Poor 0.616 0.384 ``` ] --- count: false .panel1-challenge-1-auto[ ```r NHANESraw %>% count(HealthGen, Diabetes) %>% drop_na() %>% group_by(HealthGen) %>% summarize(percent = n / sum(n), Diabetes = Diabetes) %>% pivot_wider(id_cols = HealthGen, names_from = "Diabetes", values_from = "percent", names_prefix = "Diabetes_" ) %>% rename(Self_Rated_Health = HealthGen) %>% * ungroup() ``` ] .panel2-challenge-1-auto[ ``` ## # A tibble: 5 x 3 ## Self_Rated_Health Diabetes_No Diabetes_Yes ## <fct> <dbl> <dbl> ## 1 Excellent 0.965 0.0352 ## 2 Vgood 0.947 0.0535 ## 3 Good 0.884 0.116 ## 4 Fair 0.763 0.237 ## 5 Poor 0.616 0.384 ``` ] <style> .panel1-challenge-1-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-challenge-1-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-challenge-1-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: left # Up Next <img src="img/stringr.png" class="title-hex"><img src="img/purrr.png" class="title-hex"><img src="img/lubridate.png" class="title-hex"><img src="img/glue.png" class="title-hex"><img src="img/forcats.png" class="title-hex"><img src="img/dplyr.png" class="title-hex"> ---- .pull-left[ ### Day 2 - `case_when` & `ifelse` - factors and `forcats` - `lubridate` - `stringr` & regex - `glue` ] .pull-right[ ### Day 3 - relational data & joins - `across()` - `rowwise()` - `distinct()` - `purrr()` ] --- class: goodbye-slide, inverse, middle, left .pull-left[ <img src="https://kelseygonzalez.github.io/author/kelsey-e.-gonzalez/avatar.png" class = "rounded"/> # Thank you! ### Here's where you can find me... .right[ [kelseygonzalez.github.io <svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M326.612 185.391c59.747 59.809 58.927 155.698.36 214.59-.11.12-.24.25-.36.37l-67.2 67.2c-59.27 59.27-155.699 59.262-214.96 0-59.27-59.26-59.27-155.7 0-214.96l37.106-37.106c9.84-9.84 26.786-3.3 27.294 10.606.648 17.722 3.826 35.527 9.69 52.721 1.986 5.822.567 12.262-3.783 16.612l-13.087 13.087c-28.026 28.026-28.905 73.66-1.155 101.96 28.024 28.579 74.086 28.749 102.325.51l67.2-67.19c28.191-28.191 28.073-73.757 0-101.83-3.701-3.694-7.429-6.564-10.341-8.569a16.037 16.037 0 0 1-6.947-12.606c-.396-10.567 3.348-21.456 11.698-29.806l21.054-21.055c5.521-5.521 14.182-6.199 20.584-1.731a152.482 152.482 0 0 1 20.522 17.197zM467.547 44.449c-59.261-59.262-155.69-59.27-214.96 0l-67.2 67.2c-.12.12-.25.25-.36.37-58.566 58.892-59.387 154.781.36 214.59a152.454 152.454 0 0 0 20.521 17.196c6.402 4.468 15.064 3.789 20.584-1.731l21.054-21.055c8.35-8.35 12.094-19.239 11.698-29.806a16.037 16.037 0 0 0-6.947-12.606c-2.912-2.005-6.64-4.875-10.341-8.569-28.073-28.073-28.191-73.639 0-101.83l67.2-67.19c28.239-28.239 74.3-28.069 102.325.51 27.75 28.3 26.872 73.934-1.155 101.96l-13.087 13.087c-4.35 4.35-5.769 10.79-3.783 16.612 5.864 17.194 9.042 34.999 9.69 52.721.509 13.906 17.454 20.446 27.294 10.606l37.106-37.106c59.271-59.259 59.271-155.699.001-214.959z"></path></svg>](https://kelseygonzalez.github.io/)<br/> [@KelseyEGonzalez <svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg>](https://twitter.com/kelseyegonzalez)<br/> [@KelseyGonzalez <svg viewBox="0 0 496 512" style="position:relative;display:inline-block;top:.1em;height:1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>](https://github.com/KelseyGonzalez) ]] --- class: inverse, middle, left ## Acknowledgements: [Slide template](https://spcanelon.github.io/xaringan-basics-and-beyond/) [Lecture structure](https://american-stat-412612.netlify.app/) [xaringan](https://github.com/yihui/xaringan) [xaringanExtra](https://pkg.garrickadenbuie.com/xaringanExtra/#/) [flipbookr](https://github.com/EvaMaeRey/flipbookr)