On April 30, 2021, more than 14 months into the global pandemic, the U.S. published the first ever national dataset on age of those admitted to hospital with COVID-19. Prior releases included the daily total of adult and child admissions, but lacked a decade-by-decade breakdown.
The most obvious shortcoming of this data are states where age is missing for a large proportion of cases. Nationally that number is less than 5% but in some states, especially in earlier months, it can be higher. Hospitals in Washington state failed to report the age of approximately 40% of patients through March, although things have improved drastically since then. That’s around the time another problem in Washington was resolved
This data was sought by The Lund Report in March. See related github issue (now closed) filed on Careset’s github repo. This piece, published May 19, 2021, uses the data to describe issues with Oregon’s vaccine rollout.
The raw HHS file “provides state-aggregated data for hospital utilization in a timeseries format dating back to January 1, 2020. These are derived from reports with facility-level granularity across three main sources: (1) HHS TeleTracking, (2) reporting provided directly to HHS Protect by state/territorial health departments on behalf of their healthcare facilities” (and a third collection method used prior to July 2020). The file can be accessed here. This analysis was updated with data as of May 22.
A much larger analysis doc that looks for detectable data errors is available here. Known errors from earlier work on this dataset include the daily counts of hospital admissions for suspected pediatric COVID in WA and OR. It also appears that there’s a substantial number of Tennessee hospitals that are missing many variables, Tennessee is generally not used much although it is included in the national total.
# Libraries
library(ggplot2)
library(tidyverse)
library(rio) # dependency of ggpubr, needs to be explicitly loaded?
library(ggpubr)
# setwd("/Users/jacob/github-whitelabel/covid-kids/juvenile_covid_analysis/hhs/")
#library(rmarkdown)
#file = "0508_Timeseries.csv"
#file = "0515_Timeseries.csv"
file = "0522_Timeseries.csv"
hospdf <- read.csv(file, header=TRUE, sep=",")
hospdf$dateob = as.Date(hospdf$date)
# Sum total adult COVID: confirmed + suspected
hospdf$total_adult = hospdf$previous_day_admission_adult_covid_confirmed + hospdf$previous_day_admission_adult_covid_suspected
# Sum total pediatric COVID: confirmed + suspected
hospdf$total_ped = hospdf$previous_day_admission_pediatric_covid_confirmed + hospdf$previous_day_admission_pediatric_covid_suspected
hospdf$total_admissions <- hospdf$total_ped + hospdf$total_adult
# Sum the confirmed plus suspected into a single figure by age
hospdf$total_unk <- hospdf$previous_day_admission_adult_covid_suspected_unknown + hospdf$previous_day_admission_adult_covid_confirmed_unknown
hospdf$total_80 <- hospdf$previous_day_admission_adult_covid_confirmed_80. + hospdf$previous_day_admission_adult_covid_suspected_80.
hospdf$total_70 <- hospdf$previous_day_admission_adult_covid_confirmed_70.79 + hospdf$previous_day_admission_adult_covid_suspected_70.79
hospdf$total_60 <- hospdf$previous_day_admission_adult_covid_confirmed_60.69 + hospdf$previous_day_admission_adult_covid_suspected_60.69
hospdf$total_50 <- hospdf$previous_day_admission_adult_covid_confirmed_50.59 + hospdf$previous_day_admission_adult_covid_suspected_50.59
hospdf$total_40 <- hospdf$previous_day_admission_adult_covid_confirmed_40.49 + hospdf$previous_day_admission_adult_covid_suspected_40.49
hospdf$total_30 <- hospdf$previous_day_admission_adult_covid_confirmed_30.39 + hospdf$previous_day_admission_adult_covid_suspected_30.39
hospdf$total_20 <- hospdf$previous_day_admission_adult_covid_confirmed_20.29 + hospdf$previous_day_admission_adult_covid_suspected_20.29
hospdf$total_1819 <- hospdf$previous_day_admission_adult_covid_confirmed_18.19 + hospdf$previous_day_admission_adult_covid_suspected_18.19
hospdf$unknown_percent <- 100*( (hospdf$previous_day_admission_adult_covid_suspected_unknown + hospdf$previous_day_admission_adult_covid_confirmed_unknown)/hospdf$total_admissions)
hospdf$X80_percent <-100*(hospdf$total_80/hospdf$total_admissions)
hospdf$X70_percent <-100*(hospdf$total_70/hospdf$total_admissions)
hospdf$X60_percent <-100*(hospdf$total_60/hospdf$total_admissions)
hospdf$X50_percent <-100*(hospdf$total_50/hospdf$total_admissions)
hospdf$X40_percent <-100*(hospdf$total_40/hospdf$total_admissions)
hospdf$X30_percent <-100*(hospdf$total_30/hospdf$total_admissions)
hospdf$X20_percent <-100*(hospdf$total_20/hospdf$total_admissions)
hospdf %>% ggplot(aes(x=dateob,y=X80_percent)) + geom_line() + facet_wrap( ~ state, scales = "free" ) + xlab("") + xlim(as.Date('2020-07-01'), as.Date('2021-05-23'))
## Warning: Removed 143 row(s) containing missing values (geom_path).
Using raw population estimates from table B01001 from the ACS 2019 1-year estimates.
'B01001_001E' # Total population
'B01001_003E' # Male 0-4
'B01001_004E' # M 5-9
'B01001_005E' # M 10-15
'B01001_006E' # M 16-17
'B01001_007E' # M 18-19 don't include this in under 18
'B01001_027E' # Female 0-4
'B01001_028E' # F 5-9
'B01001_029E': # F 10-15
'B01001_030E': # F 16-17
'B01001_031E': #F 18-19 don't include this in under 18
'B01001_001E' # Total Pop
'B01001_048E' # Male 80-84
'B01001_049E' # M 85+
'B01001_024E' # Femald 80-84
'B01001_025E' #F 85+
'under_18_total': under_18_total,
'over_18_total':over_18_total # really 18 and over total
'80_total' # B01001_024E + B01001_025E + B01001_048E + B01001_049E
'total_all' # equal to 'B01001_001E'],
'80_percent_adult' # 100*80_total/over_18_total <- the fraction of adults who are over 80
file = "acs_2019_pops.csv"
acs_pops <- read.csv(file, header=TRUE, sep=",", stringsAsFactors=FALSE)
acs_pops %>% select(NAME, B01001_001E, X80_total, X80_percent_adult, under_18_total, over_18_total)
## NAME B01001_001E X80_total X80_percent_adult under_18_total over_18_total
## 1 Alabama 4903185 191999 5.029327 1085597 3817588
## 2 Alaska 731545 14900 2.700273 179749 551796
## 3 Arizona 7278717 298199 5.288089 1639648 5639069
## 4 Arkansas 3017804 121096 5.222374 699012 2318792
## 5 California 39512223 1421015 4.640631 8891064 30621159
## 6 Colorado 5758736 177282 3.937486 1256320 4502416
## 7 Connecticut 3565287 165200 5.821298 727432 2837855
## 8 Delaware 973764 41315 5.366853 203946 769818
## 9 District of Columbia 705749 21967 3.801519 127901 577848
## 10 Florida 21477737 1147804 6.655564 4231955 17245782
## 11 Georgia 10617423 311652 3.840730 2503029 8114394
## 12 Hawaii 1415872 66536 5.963690 300187 1115685
## 13 Idaho 1787065 62067 4.639795 449355 1337710
## 14 Illinois 12671821 494414 5.015795 2814679 9857142
## 15 Indiana 6732219 255024 4.937734 1567421 5164798
## 16 Iowa 3155070 146504 6.020943 721830 2433240
## 17 Kansas 2913314 118880 5.371862 700301 2213013
## 18 Kentucky 4467673 162116 4.677528 1001825 3465848
## 19 Louisiana 4648794 158545 4.448628 1084886 3563908
## 20 Maine 1344212 66122 6.022813 246353 1097859
## 21 Maryland 6045680 229151 4.861699 1332287 4713393
## 22 Massachusetts 6892503 291837 5.267126 1351777 5540726
## 23 Michigan 9986857 414193 5.280356 2142822 7844035
## 24 Minnesota 5639632 224166 5.168732 1302669 4336963
## 25 Mississippi 2976149 107515 4.720272 698420 2277729
## 26 Missouri 6137428 249550 5.237439 1372695 4764733
## 27 Montana 1068778 41747 4.956581 226524 842254
## 28 Nebraska 1934408 80773 5.535006 475096 1459312
## 29 Nevada 3080156 96405 4.034663 690737 2389419
## 30 New Hampshire 1359711 58010 5.256000 256020 1103691
## 31 New Jersey 8882190 373302 5.375764 1938024 6944166
## 32 New Mexico 2096829 83392 5.138206 473850 1622979
## 33 New York 19453561 848548 5.498817 4022096 15431465
## 34 North Carolina 10488084 383831 4.684229 2293972 8194112
## 35 North Dakota 762062 31899 5.449504 176706 585356
## 36 Ohio 11689100 486762 5.340668 2574847 9114253
## 37 Oklahoma 3956971 152228 5.066259 952229 3004742
## 38 Oregon 4217737 167793 5.001399 862816 3354921
## 39 Pennsylvania 12801989 606919 5.967936 2632325 10169664
## 40 Rhode Island 1059361 45664 5.335913 203575 855786
## 41 South Carolina 5148714 187736 4.651059 1112300 4036414
## 42 South Dakota 884659 38209 5.708033 215269 669390
## 43 Tennessee 6829174 250222 4.704817 1510752 5318422
## 44 Texas 28995881 815890 3.777400 7396631 21599250
## 45 Utah 3205958 80913 3.555599 930308 2275650
## 46 Vermont 623989 27158 5.320612 113559 510430
## 47 Virginia 8535519 311263 4.661081 1857605 6677914
## 48 Washington 7614893 260496 4.376666 1662965 5951928
## 49 West Virginia 1792147 83413 5.819529 358818 1433329
## 50 Wisconsin 5822434 246651 5.410173 1263412 4559022
## 51 Wyoming 578759 20430 4.595452 134189 444570
## 52 National 328239523 12738703 4.990000 72967785 255271738
Summarize by state, create a national summary, and put them in a single data frame.
state_totals <- hospdf %>% filter (state %in% c("OR", "WA", "CA", "ID", "NV")) %>% group_by(dateob, state) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_50=sum(total_50), total_40=sum(total_40), total_30=sum(total_30), total_20=sum(total_20), total_1819=sum(total_1819) , total_adult=sum(total_adult))
## `summarise()` regrouping output by 'dateob' (override with `.groups` argument)
national_totals <- hospdf %>% group_by(dateob) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_50=sum(total_50), total_40=sum(total_40), total_30=sum(total_30), total_20=sum(total_20), total_1819=sum(total_1819) , total_adult=sum(total_adult))
## `summarise()` ungrouping output (override with `.groups` argument)
# Set the national total's "state" abbreviation
national_totals$state = 'US'
all_rows <- dplyr::bind_rows(as_data_frame(national_totals),as_data_frame(state_totals))
all_rows_pops <- all_rows %>% left_join(acs_pops, c("state"="state"))
all_rows_pops$X80_hosp_rate = 100*(all_rows_pops$total_80/all_rows_pops$total_adult)
Summary of hospitalizations in Oregon since 1/1/21
hospdf %>% filter(state=='OR') %>% filter(dateob > as.Date('2020-12-31')) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_adult=sum(total_adult))
## total_unk total_80 total_70 total_60 total_adult
## 1 451 2152 2344 2455 11497
Summary of hospitalizations in Oregon since 3/1/21
hospdf %>% filter(state=='OR') %>% filter(dateob > as.Date('2021-02-28')) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_adult=sum(total_adult))
## total_unk total_80 total_70 total_60 total_adult
## 1 159 1016 1202 1282 5906
Summary of hospitalizations nationally since 1/1/21
hospdf %>% filter(dateob > as.Date('2020-12-31')) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_adult=sum(total_adult))
## total_unk total_80 total_70 total_60 total_adult
## 1 80647 314443 355513 368920 1854236
Summary of hospitalizations nationally since 3/1/21
hospdf %>% filter(dateob > as.Date('2021-02-28')) %>% summarise(total_unk = sum(total_unk), total_80=sum(total_80), total_70=sum(total_70), total_60=sum(total_60), total_adult=sum(total_adult))
## total_unk total_80 total_70 total_60 total_adult
## 1 33415 114487 135938 157698 805303
This is smoothed using LOESS. To see the raw components of this visualized as a line chart, see the related (45MB) data vetting document.
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot( aes(x=dateob,y=100*(total_80/total_adult))) + ylab('Percent Of Daily Hospital Admissions')+xlab('')+geom_smooth( aes(color = state), se=F) + xlim(as.Date('2021-01-01'), as.Date('2021-05-23')) + labs(title = "Percent of COVID Hospital Admissions, Adults Aged 80+") + labs(caption="Graphic: Jacob Fenton. Source: U.S. Health and Human Services Department. Incomplete WA data are excluded") + ylim(0,25) + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + scale_color_manual("State", values=c("orange2", "green4", "palegreen4", "blue", "black" ))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1677 rows containing non-finite values (stat_smooth).
Percent of COVID Hospital Admissions, Adults Aged 80+
# keep a copy
ggsave("hospitalizations.png", width = 7, height = 5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1677 rows containing non-finite values (stat_smooth).
Washington is not included in the hospitalization analysis because such a high fraction of ages are unknown.
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA", "NV", "WA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_unk/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23')) + scale_color_manual(values=c( "deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1573 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_80/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23')) + scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_70/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_60/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_50/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_40/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_30/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_20/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
all_rows %>% filter (state %in% c("US", "OR", "NV", "ID", "CA")) %>% ggplot(aes(color=state,x=dateob,y=100*(total_1819/total_adult))) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1326 rows containing non-finite values (stat_smooth).
The CDC tracks vaccination by state for those 65+ only. Source: CDC, as aggregated by the Covid Tracking Project. See more here.
file = "cdc_vaccinations_timeseries.csv"
vax_cdc <- read.csv(file, header=TRUE, sep=",", stringsAsFactors=FALSE)
# colnames(vax_cdc)
#vax_cdc %>% select(Date, Location, Series_Complete_65PlusPop_Pct, Administered_65Plus, Dist_Per_100K, Admin_Per_100K)
vax_cdc$dateob = as.Date(vax_cdc$Date)
#####
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_65PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual("State", values=c("orange2", "green4", "palegreen4", "blue", "black", "chocolate2" )) + labs(title = "Completed Vaccination Rates For Seniors") + labs(caption="Graphic: Jacob Fenton. Source: U.S. Centers for Disease Control and Prevention. These data were not available prior to March 8. ") + theme( plot.caption = element_text(size = 7,hjust = 0), axis.title=element_text(size=10)) + ylim(0,85) + ylab("Percent of 65+ Fully Vaccinated")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 472 rows containing non-finite values (stat_smooth).
Completed Senior Vaccination Rates
ggsave("senior_vaccinations.png", width = 7, height = 5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 472 rows containing non-finite values (stat_smooth).
senior_completion_vax_plot <- vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_65PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23')) + ylim(0,85) + ylab("Percent 65+ Totally Vaccinated") + theme(legend.position = c(0.5, 0.15), legend.direction = "horizontal") + scale_color_manual("State", values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2"))
vax_cdc %>% filter(Location %in% c("OR", "WA", "CA", "NV", "MT", "ID", "AZ", "NM", "UT", "US", "WY", "CO")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_65PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ labs(title = "Fraction of 65+ Population Completely Vaccinated") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,85) + ylab("Percent 65+ Totally Vaccinated") + scale_color_manual(values=c("palegreen", "palegreen1", "palegreen2", "palegreen3", "palegreen4", "khaki", "khaki1", "blue", "black", "khaki2", "khaki3", "khaki4"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 946 rows containing non-finite values (stat_smooth).
How does it look going back further? Data is messy prior to Nov. 1.
vax_cdc %>% filter(Location %in% c("OR", "WA", "CA", "NV", "MT", "ID", "AZ", "NM", "UT", "US", "WY", "CO")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_65PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2020-11-01'), as.Date('2021-05-23'))+ labs(title = "Fraction of 65+ Population Completely Vaccinated") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,85) + ylab("Percent 65+ Totally Vaccinated") + scale_color_manual(values=c("palegreen", "palegreen1", "palegreen2", "palegreen3", "palegreen4", "khaki", "khaki1", "blue", "black", "khaki2", "khaki3", "khaki4"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 946 rows containing non-finite values (stat_smooth).
Other categories of vaccination data collected by CDC and not included here: Federal entities: Federal board of prisons, Department of Defense, Indian Health Service, Veterans Healthcare Administration
Federal territories: Guam, US Virgin Islands, American Samoa, Puerto Rico, Republic of Palau, Northern Mariana Islands and the Marshall Islands.
vax_cdc %>% filter (!Location %in% c('BP2', 'DD2', 'IH2', 'VA2', 'MH', 'FM', 'AS', 'MP', 'PR', 'GU', 'RP', 'LTC', 'VI')) %>% filter(dateob==as.Date('2021-03-15')) %>% select (Location, dateob, Series_Complete_65PlusPop_Pct) %>% arrange(Series_Complete_65PlusPop_Pct)
## Location dateob Series_Complete_65PlusPop_Pct
## 1 RI 2021-03-15 21.4
## 2 OR 2021-03-15 21.8
## 3 UT 2021-03-15 26.8
## 4 NH 2021-03-15 27.4
## 5 VT 2021-03-15 27.7
## 6 PA 2021-03-15 27.8
## 7 NY 2021-03-15 28.1
## 8 AR 2021-03-15 29.1
## 9 MO 2021-03-15 29.9
## 10 HI 2021-03-15 30.1
## 11 NJ 2021-03-15 30.4
## 12 KY 2021-03-15 32.3
## 13 IL 2021-03-15 32.7
## 14 KS 2021-03-15 32.7
## 15 AL 2021-03-15 33.0
## 16 MD 2021-03-15 33.0
## 17 TN 2021-03-15 33.3
## 18 VA 2021-03-15 33.3
## 19 MI 2021-03-15 33.6
## 20 DC 2021-03-15 34.4
## 21 MA 2021-03-15 34.5
## 22 TX 2021-03-15 34.5
## 23 CA 2021-03-15 35.2
## 24 AZ 2021-03-15 35.6
## 25 US 2021-03-15 35.9
## 26 IA 2021-03-15 36.0
## 27 ME 2021-03-15 36.2
## 28 NM 2021-03-15 36.4
## 29 NV 2021-03-15 36.6
## 30 ID 2021-03-15 36.7
## 31 NE 2021-03-15 37.2
## 32 MS 2021-03-15 37.4
## 33 MN 2021-03-15 38.6
## 34 OH 2021-03-15 38.9
## 35 WV 2021-03-15 39.1
## 36 MT 2021-03-15 39.2
## 37 DE 2021-03-15 39.5
## 38 FL 2021-03-15 40.1
## 39 SC 2021-03-15 41.2
## 40 WA 2021-03-15 41.6
## 41 GA 2021-03-15 41.7
## 42 WI 2021-03-15 41.8
## 43 CT 2021-03-15 42.7
## 44 CO 2021-03-15 44.8
## 45 NC 2021-03-15 46.2
## 46 SD 2021-03-15 46.2
## 47 LA 2021-03-15 46.7
## 48 OK 2021-03-15 48.2
## 49 WY 2021-03-15 48.2
## 50 ND 2021-03-15 48.8
## 51 IN 2021-03-15 50.1
## 52 AK 2021-03-15 55.6
vax_cdc %>% filter (!Location %in% c('BP2', 'DD2', 'IH2', 'VA2', 'MH', 'FM', 'AS', 'MP', 'PR', 'GU', 'RP', 'LTC', 'VI')) %>% filter(dateob==as.Date('2021-04-01')) %>% select (Location, dateob, Series_Complete_65PlusPop_Pct) %>% arrange(Series_Complete_65PlusPop_Pct)
## Location dateob Series_Complete_65PlusPop_Pct
## 1 HI 2021-04-01 39.4
## 2 OR 2021-04-01 40.7
## 3 PA 2021-04-01 42.4
## 4 AR 2021-04-01 42.6
## 5 AL 2021-04-01 42.8
## 6 NY 2021-04-01 43.2
## 7 UT 2021-04-01 44.4
## 8 MO 2021-04-01 46.4
## 9 IL 2021-04-01 46.5
## 10 TN 2021-04-01 47.4
## 11 DC 2021-04-01 47.7
## 12 MS 2021-04-01 47.9
## 13 NJ 2021-04-01 48.3
## 14 AZ 2021-04-01 49.7
## 15 KY 2021-04-01 50.0
## 16 TX 2021-04-01 50.0
## 17 MD 2021-04-01 50.1
## 18 VA 2021-04-01 50.1
## 19 GA 2021-04-01 50.3
## 20 NV 2021-04-01 50.6
## 21 WV 2021-04-01 51.9
## 22 US 2021-04-01 52.0
## 23 NH 2021-04-01 52.0
## 24 NM 2021-04-01 52.3
## 25 VT 2021-04-01 52.4
## 26 DE 2021-04-01 52.7
## 27 KS 2021-04-01 53.0
## 28 CA 2021-04-01 53.2
## 29 MI 2021-04-01 53.6
## 30 MT 2021-04-01 53.7
## 31 FL 2021-04-01 54.1
## 32 SC 2021-04-01 54.8
## 33 OH 2021-04-01 55.8
## 34 MA 2021-04-01 56.0
## 35 OK 2021-04-01 56.4
## 36 ID 2021-04-01 56.6
## 37 LA 2021-04-01 57.5
## 38 ME 2021-04-01 57.9
## 39 NC 2021-04-01 58.0
## 40 WY 2021-04-01 58.1
## 41 AK 2021-04-01 60.3
## 42 IA 2021-04-01 60.8
## 43 WA 2021-04-01 60.8
## 44 IN 2021-04-01 61.8
## 45 WI 2021-04-01 62.2
## 46 NE 2021-04-01 62.5
## 47 CO 2021-04-01 63.0
## 48 MN 2021-04-01 63.3
## 49 ND 2021-04-01 63.5
## 50 RI 2021-04-01 64.0
## 51 CT 2021-04-01 66.4
## 52 SD 2021-04-01 69.7
There are duplicate rows in the original dataset.
vax_cdc %>% filter (!Location %in% c('BP2', 'DD2', 'IH2', 'VA2', 'MH', 'FM', 'AS', 'MP', 'PR', 'GU', 'RP', 'LTC', 'VI')) %>% filter(dateob==as.Date('2021-04-15')) %>% select (Location, dateob, Series_Complete_65PlusPop_Pct) %>% arrange(Series_Complete_65PlusPop_Pct)
## Location dateob Series_Complete_65PlusPop_Pct
## 1 AL 2021-04-15 51.7
## 2 AL 2021-04-15 51.7
## 3 UT 2021-04-15 52.1
## 4 UT 2021-04-15 52.1
## 5 AR 2021-04-15 53.2
## 6 AR 2021-04-15 53.2
## 7 DC 2021-04-15 54.2
## 8 DC 2021-04-15 54.2
## 9 HI 2021-04-15 54.6
## 10 HI 2021-04-15 54.6
## 11 TN 2021-04-15 55.6
## 12 TN 2021-04-15 55.6
## 13 PA 2021-04-15 56.4
## 14 PA 2021-04-15 56.4
## 15 MS 2021-04-15 56.9
## 16 MS 2021-04-15 56.9
## 17 IL 2021-04-15 57.6
## 18 IL 2021-04-15 57.6
## 19 GA 2021-04-15 57.8
## 20 GA 2021-04-15 57.8
## 21 MO 2021-04-15 58.4
## 22 MO 2021-04-15 58.4
## 23 AZ 2021-04-15 58.7
## 24 AZ 2021-04-15 58.7
## 25 WV 2021-04-15 59.7
## 26 WV 2021-04-15 59.7
## 27 CA 2021-04-15 60.1
## 28 CA 2021-04-15 60.1
## 29 NY 2021-04-15 60.2
## 30 NY 2021-04-15 60.2
## 31 NV 2021-04-15 60.4
## 32 NV 2021-04-15 60.4
## 33 OR 2021-04-15 61.4
## 34 OR 2021-04-15 61.4
## 35 NM 2021-04-15 62.2
## 36 NM 2021-04-15 62.2
## 37 NJ 2021-04-15 62.3
## 38 NJ 2021-04-15 62.3
## 39 OK 2021-04-15 62.6
## 40 OK 2021-04-15 62.6
## 41 KY 2021-04-15 62.7
## 42 KY 2021-04-15 62.7
## 43 SC 2021-04-15 63.2
## 44 SC 2021-04-15 63.2
## 45 WY 2021-04-15 63.3
## 46 WY 2021-04-15 63.3
## 47 LA 2021-04-15 63.5
## 48 VA 2021-04-15 63.5
## 49 LA 2021-04-15 63.5
## 50 VA 2021-04-15 63.5
## 51 NC 2021-04-15 63.6
## 52 NC 2021-04-15 63.6
## 53 US 2021-04-15 63.7
## 54 FL 2021-04-15 63.7
## 55 US 2021-04-15 63.7
## 56 FL 2021-04-15 63.7
## 57 MT 2021-04-15 63.8
## 58 MT 2021-04-15 63.8
## 59 ID 2021-04-15 65.5
## 60 MI 2021-04-15 65.5
## 61 ID 2021-04-15 65.5
## 62 MI 2021-04-15 65.5
## 63 MD 2021-04-15 66.1
## 64 MD 2021-04-15 66.1
## 65 DE 2021-04-15 66.5
## 66 DE 2021-04-15 66.5
## 67 OH 2021-04-15 66.6
## 68 OH 2021-04-15 66.6
## 69 IN 2021-04-15 67.5
## 70 IN 2021-04-15 67.5
## 71 KS 2021-04-15 67.8
## 72 KS 2021-04-15 67.8
## 73 ND 2021-04-15 68.4
## 74 ND 2021-04-15 68.4
## 75 NH 2021-04-15 68.5
## 76 NH 2021-04-15 68.5
## 77 CO 2021-04-15 68.8
## 78 CO 2021-04-15 68.8
## 79 AK 2021-04-15 69.7
## 80 AK 2021-04-15 69.7
## 81 MA 2021-04-15 69.8
## 82 MA 2021-04-15 69.8
## 83 WA 2021-04-15 71.7
## 84 WA 2021-04-15 71.7
## 85 NE 2021-04-15 72.2
## 86 NE 2021-04-15 72.2
## 87 IA 2021-04-15 72.5
## 88 IA 2021-04-15 72.5
## 89 VT 2021-04-15 73.5
## 90 VT 2021-04-15 73.5
## 91 TX 2021-04-15 74.1
## 92 TX 2021-04-15 74.1
## 93 WI 2021-04-15 74.8
## 94 WI 2021-04-15 74.8
## 95 MN 2021-04-15 75.3
## 96 MN 2021-04-15 75.3
## 97 CT 2021-04-15 75.7
## 98 CT 2021-04-15 75.7
## 99 ME 2021-04-15 76.0
## 100 ME 2021-04-15 76.0
## 101 SD 2021-04-15 78.0
## 102 SD 2021-04-15 78.0
## 103 RI 2021-04-15 79.1
## 104 RI 2021-04-15 79.1
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Administered_Dose1_Recip_65PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2")) + labs(title = "Fraction of 65+ Population With 1+ Dose ") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,95) + ylab("Percent 65+ with 1+ Dose Vaccinated")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 467 rows containing non-finite values (stat_smooth).
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_Pop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual("State", values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2")) + labs(title = "Fraction of Total Population Completely Vaccinated") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,50) + ylab("Percent Totally Vaccinated")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Administered_Dose1_Pop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2")) + labs(title = "Percent Receiving First Dose of Vaccine") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,60) + ylab("Percent Receiving First Dose of Vaccine")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (stat_smooth).
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_18PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual("State", values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2")) + labs(title = "Completed Adult Vaccination Rate") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,55) + ylab("Percent 18+ Totally Vaccinated")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
adult_completion_plot <- vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Series_Complete_18PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23')) + ylim(0,55) + ylab("Percent 18+ Totally Vaccinated") + theme(legend.position = c(0.5, 0.15), legend.direction = "horizontal") + scale_color_manual("State", values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2"))
This looks a bit weird with one legend
figure <- ggarrange(adult_completion_plot, senior_completion_vax_plot,
ncol = 2, nrow = 1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 472 rows containing non-finite values (stat_smooth).
annotate_figure(figure,
top = text_grob("Oregon received similar dose allocations, but lagged vaccinating seniors", size = 14, face = "bold"),
bottom = text_grob("Graphic: Jacob Fenton. Source: U.S. Centers for Disease Control and Prevention as aggregated by The COVID Tracking Project", face = "italic", size = 8))
ggsave("vax_comparison.png", width = 8, height = 4)
vax_cdc %>% filter (Location %in% c("US", "OR", "NV", "ID", "CA", "WA")) %>% ggplot(aes(color=Location,x=dateob,y=Administered_Dose1_Recip_18PlusPop_Pct)) + geom_smooth() + xlab("") + xlim(as.Date('2021-03-01'), as.Date('2021-05-23'))+ scale_color_manual(values=c("deeppink", "palegreen1", "palegreen4", "blue", "black", "turquoise1", "chocolate2")) + labs(title = "Fraction of Adult Population Receiving at Least One Vaccine Dose") + labs(caption="Graphic: Jacob Fenton. Source: CDC. This was not available prior to Mar. 8. ") + theme( plot.caption = element_text(size = 8,hjust = 0), axis.title=element_text(size=10)) + ylim(0,75) + ylab("Percent 18+ With 1+ Dose Vaccine")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (stat_smooth).