---
title: "Shelter in Place Orders"
author: "K Corder, M Mingus,& D Blinova"
#date: "Updated `r format(Sys.time(), '%d %B %Y')`"
output:
 word_document:
    toc: no
 html_document:
    theme: yeti
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(knitr)
library(lubridate)
library(readr)
library(survival)
library(stargazer)
library(dplyr)
library(tidyr)
library(ggplot2)
library(readxl)
library(survminer)
theme_set(theme_classic(base_size = 14))

```

### Summary

This markdown reproduces the figures and models for the paper.  We rely on two durations - time between WHO announcement and the state crosses the 1:100,000 threshold and first case, and the time between crossing the threshold and implementation of the first SIP0. Data source for cases is the Johns Hopkins archive.  Data soures for the state are described in a separate document



5 January:  WHO announcement dated 5 January
"On 31 December 2019, the WHO China Country Office was informed of cases of pneumonia of unknown etiology (unknown cause) detected in Wuhan City, Hubei Province of China."  Retrieved 19 May from who.int
[https://www.who.int/csr/don/05-january-2020-pneumonia-of-unkown-cause-china/en/]

30 May: End of observation period - so censored states treated as acting on date to permit calculation of group medians and quartiles


```{r read_sipo, results = 'asis', echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}

# This code chunk reads the data and creates a governor party factor 

data <- read_csv("state_data.csv", 
    col_types = cols(`SIPO #1 Start Date` = col_date(format = "%m-%d-%y")))

# Create factors for Governor party

data$Governor <- factor(data$GovParty,
levels = c("R","D"),
labels = c("Republican governor", "Democratic governor"))

data$StateSen <- factor(data$StateSenate,
levels = c("R","D"),
labels = c("Republican senate", "Democratic senate"))

data$control1<- with(data, interaction(Governor,  StateSen))            
data$control <- factor(data$control1,
labels = c("Republican governor and senate", 
           "Democratic governor only",
           "Republican governor only",
           "Democratic governor and senate"))

#Grab the Johns Hopkins data

jh_data<-read_csv("time_series_covid19_confirmed_US.csv")
jh_subset<- jh_data[c(7, 12:236)]

jh_cases_county <- jh_subset %>% gather(date, cases, -c(Province_State))
jh_cases_county$date<-as.Date(jh_cases_county$date, "%m/%d/%y")
jh_cases <- jh_cases_county %>% 
  group_by(Province_State, date) %>%
  summarise(cases = sum(cases)) 
jh_cases<- jh_cases %>% rename(State=Province_State)
# Merge with data_1 to get state population
test<-inner_join(jh_cases, data)
test$casespercap<-100000*test$cases/(test$population*1000000)

# This selects only the state observation on the day where cases first exceed 1 in 100,000
data<- test %>% filter(casespercap>1) %>%
  group_by(State) %>% 
  arrange(casespercap) %>% 
  slice(1) %>% 
  ungroup()

# time1 is time from WHO announcement to case rate of 1 in 100,000
data$May30<-as.Date("2020-5-30")
data$Jan05<-as.Date("2020-1-5")

data$time1<-as.numeric(data$date-data$Jan05)

data$sipo<-data$`SIPO #1 Start Date`

# tto3 is time from case rate of 1 in 100,000 to SIPO effective date

data$tto3<-as.numeric(data$sipo-data$date)

# Replace missing with time to end of observation period to compare median days to order after First case

data$eoo<-as.numeric(data$May30)-as.numeric(data$date)
data$tto3_uncen<-data$tto3
data$tto3_uncen[is.na(data$tto3)]<-data$eoo[is.na(data$tto3)]

data_model<-select(data, time1, tto3, tto3_uncen, taxrevenuepercapita, urban, elderly, population, Governor, control, current)
data_model <- data_model %>% rename(timetocase = time1)
data_model <- data_model %>% rename(timetosipo = tto3)
data_model <- data_model %>% rename(timetosipo_uncen=tto3_uncen)
```

### Box plots

The box plots below summarize the time between to the SIPO based on two different times: time to reach 1:100,000 (time1), time from 1:100,000 case to implementation (tto3). 

The long delays in the Democratic states are the first cases in the United States - Washington,  Illinois and California. 

The box plot includes eight Republican states with a start date entered as the end of the observation period.



```{r sipo_box_plots,  results = 'asis', echo=FALSE, cache=FALSE, warning=FALSE, message=FALSE}
# This code chunk creates simple box plots of time between 5 Jan and case level exceeds 1 per 100,000 population (Figure 1) and the ime between cases reached 1 per 100,000 and the effecive data of the SIPo (Figure 3).  For Figure 3, May 30 (the end of the observation period ) is treated as positive action (so treat states that have not acted as if they did act on 30 May).  This permits accurate reporting of the median and quartiles

#tiff("figure_1.tiff", width=2404, height=1600, res=300)
ggplot(data_model, aes(x=Governor, y=timetocase, color=Governor)) +
  geom_boxplot(outlier.shape=8, outlier.size=4) + xlab("") +  ylab("Days\n") +
   theme(legend.position = "none")+
  scale_color_manual(values=c("red", "blue"))
#dev.off()

#tiff("figure_3.tiff", width=2404, height=1600, res=300)
ggplot(data_model, aes(x=Governor, y=timetosipo_uncen, color=Governor)) + 
  geom_boxplot(outlier.shape=8, outlier.size=4) + 
  ylab("Days\n") + theme(legend.position = "none")  + xlab("") +
  scale_color_manual(values=c("red", "blue"))
#dev.off()

  
```

\newpage
#### Correlation between potential covariates

Table 2

```{r correl, echo=FALSE}

data_check2<-data %>% select(FAA, shc, `Est Tot Pop`, `%Urban` , trpc, `MedHouse$`, `%CardioDis`, `%65orOver`, `%Diabetes`)
d<-cor(data_check2)
colnames(d) <- c("FAA Core 30", "Hospital", "State pop.", "Urban" , "Taxes", "Income", "CVD", "Elderly",  "Diabetes")
rownames(d) <- c("FAA Core 30", "Hospital capacity", "Total population", "Urban", "Tax revenue per capita", "Income", "Cardiovascular", "Elderly",  "Diabetes")
round(d,2)

#rownames(d) <- c("alpha", "beta", NA, "$a[0]", "$ a[beta]")

#round(cor(data_check2), 2)

```
\newpage
#### Figures 2 and 4 with statistical tests

Figure 2
```{r sipo_logrank7, echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}

km.fit1 <- survfit(Surv(timetocase) ~ control, data = data_model)

#tiff("figure_2.tiff", width=2404, height=1600, res=300)
ggsurv0<-ggsurvplot(km.fit1, data=data_model, xlab = "Days since WHO announcment", xlim=c(60,80),
ylab = "Proportion of states with more \nthan 1 case  per 100,000", fun="event", legend="bottom",  legend.labs = c("Republican governor and senate (n=22)", "Democratic governor only (n=9)", "Republican governor only (n=4)", "Democratic governor and senate (n=15)"), legend.title=" " ) 
ggsurv0$plot + guides(colour = guide_legend(nrow = 4))
#dev.off()


test <- survdiff(Surv(timetocase) ~ control, data = data_model)
test
data %>% group_by(control) %>% summarize(min=min(time1), max=max(time1), median=median(time1))

```


\newpage
Figure 4
```{r sipo_logrank2, echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}

#### See note about this uncensored data

km.fit2 <- survfit(Surv(timetosipo_uncen, current) ~ control, data = data_model)

#tiff("figure_4.tiff", width=2404, height=1600, res=300)
ggsurv1<-ggsurvplot(km.fit2, data=data_model,  xlab = "Number of days after reaching 1:100,000", ylab = "Proportion with SIPO\n", fun="event", legend="bottom",  legend.labs = c("Republican governor and senate (n=22)", "Democratic governor only (n=9)", "Republican governor only (n=4)", "Democratic governor and senate (n=15)"), legend.title=" " ) 
ggsurv1$plot + guides(colour = guide_legend(nrow = 4))
#dev.off()


test <- survdiff(Surv(timetosipo_uncen) ~ control, data = data_model)
test

# This excludes right-censored
data_model %>% filter(!is.na(timetosipo)) %>% group_by(control) %>% summarize(n=n(), min=min(timetosipo), median=median(timetosipo), max=max(timetosipo))


```




### Parametric models 

Weibull models for two durations are below  Table 3 is time from WHO announcement to 1:100,000
Table 4 is time from 1:100,000 to SIPO for 42 states that implemented SIPOs.  Table 5 includes the eight states that never implemented with an aciton data of 30 May, the end of the observation period.

\newpage
#### Time to 1:100,000
Table 3
```{r who_case, echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}

model1 <- survreg(Surv(timetocase, current) ~ control + urban + elderly + taxrevenuepercapita + population , data = data_model, dist = "weibull" )
#summary(model1)

stargazer(model1, style="qje", type="text" , digits=2)

pred11 <- predict(model1, newdata=list(control="Republican governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred12 <- predict(model1, newdata=list(control="Democratic governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred13 <- predict(model1, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred14 <- predict(model1, newdata=list(control="Democratic governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred15 <- predict(model1, newdata=list(control="Republican governor only", urban=38,7, elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred16 <- predict(model1, newdata=list(control="Republican governor only", urban=94.9, elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 
```


\newpage
#### Time to SIPO - EXCLDUES CENSORED OBSERVATIONS
Table 4

```{r case_SIPO,  echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}

data_model$tto3_a<-data_model$timetosipo+1



model3 <- survreg(Surv(tto3_a, current) ~ control + urban + elderly + taxrevenuepercapita + population , data = data_model, dist = "weibull" )


stargazer(model3, style="qje", type="text" , digits=2)

pred31 <- predict(model3, newdata=list(control="Republican governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population)), type='response')

pred32 <- predict(model3, newdata=list(control="Democratic governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population)), type='response')

pred33 <- predict(model3, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population)), type='response')

pred34 <- predict(model3, newdata=list(control="Democratic governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population)), type='response')


```


\newpage
### WITH CENSORED OBSERVATIONS ADDED IN
Table 5

```{r case_SIPOa, echo=FALSE, cache=TRUE, warning=FALSE, message=FALSE}
# Since one state enacted an SIPO on the same day cases reached 1 per 100,000 I had to add one day to the durations for all states

data_model$tto3_uncen_a<-data_model$timetosipo_uncen+1

model5 <- survreg(Surv(tto3_uncen_a, current) ~ control + urban + elderly + taxrevenuepercapita + population , data = data_model, dist = "weibull" )

stargazer(model5, style="qje", type="text" , digits=2)


pred55 <- predict(model5, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), margin=-15, population=mean(data$population),  type='responsef')) 

pred56 <- predict(model5, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), margin=15, population=mean(data$population),  type='response')) 


pred51 <- predict(model5, newdata=list(control="Republican governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population), type='response')) 

pred51 <- predict(model5, newdata=list(control="Republican governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population), type='response')) 

pred52 <- predict(model5, newdata=list(control="Democratic governor and senate", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population), type='response')) 

pred53 <- predict(model5, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population),  type='response')) 

pred54 <- predict(model5, newdata=list(control="Democratic governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=mean(data$population)), type='response')

pred55 <- predict(model5, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=1,  type='responsef')) 

pred56 <- predict(model5, newdata=list(control="Republican governor only", urban=mean(data$urban), elderly=mean(data$elderly), taxrevenuepercapita=mean(data$taxrevenuepercapita), population=10,  type='response')) 
```

````