library(MASS)
library(tidyverse)
library(GGally)
library(labelled)
library(dplyr)
library(rstatix)
library(ivreg)
library(simstudy)
library(ivmodel)
library(ggplot2)
library(car)
library(lmtest)
library(tseries)
library(ggfortify)
library(plotly)
library(haven)
library(broom)
library(stargazer)

setwd(r"(C:\Users\KaiHi\Documents\Uni\25_26_WS\Labor_Econ\PS2)")

# load data
minwage <- read_dta("minwage.dta")

# swap the "state" variable with the state names (not a must, but makes it clearer)
minwage$state <- recode(minwage$state, "0='PA'; 1='NJ'")

# select data that is available in both survey waves
df <- minwage %>%
  filter(sample == 1)

######## a) ########
wages <- df %>%
  group_by(state) %>%
  summarise(wage_st2 = mean(wage_st2),
            wage_st = mean(wage_st),
            dw = mean(dw))

# i)
difference <- wages$wage_st2 - wages$wage_st

# ii
diff_row <- wages %>%
  filter(state %in% c("NJ", "PA")) %>%
  summarise(wage_st2 = -diff(wage_st2),
            wage_st = -diff(wage_st),
            dw = -diff(dw),
            state = "diff")

wages <- bind_rows(wages, diff_row)
print(wages)

######## b) ########
fte <- df %>%
  group_by(state) %>%
  summarise(fte2 = mean(fte2),
            fte = mean(fte),
            dfte = mean(dfte))

# i)
difference_fte <- fte$fte2 - fte$fte

# ii)
diff_row_fte <- fte %>%
  filter(state %in% c("NJ", "PA")) %>%
  summarise(fte2 = -diff(fte2),
            fte = -diff(fte),
            dfte = -diff(dfte),
            state = "diff")

fte <- bind_rows(fte, diff_row_fte)
print(fte)


######## c) ########
df$state <- recode(df$state, "'PA'=0; 'NJ'=1")

# ii)
## for wages
reg_wages <- lm(dw ~ state, data = df)
summary(reg_wages)

## for employment
reg_emp <- lm(dfte ~ state, data = df)
summary(reg_emp)

stargazer(reg_wages, reg_emp,
          type = "latex",
          title = "Difference-in-Differences Estimates (DD Model)",
          dep.var.labels = c("Change in Wages ($\\Delta W$)", "Change in Employment ($\\Delta E$)"),
          covariate.labels = "Treatment Indicator (New Jersey)",
          omit.stat = c("f", "ser"), # Omit F-statistic and Standard Error of Regression
          notes = "The coefficient on the Treatment Indicator ($\\\\delta_{DD}$) is the Difference-in-Differences estimate.",
          # The following argument outputs the code to the console, ready to be copied into a .tex file
          out = "dd_results.tex" # You can remove this line if you prefer output directly to the R console
)

# iii)
## for wages
reg_wages_c <- lm(dw ~ state + co_owned + as.factor(chain), data = df)
summary(reg_wages_c)

## for employment
reg_emp_c <- lm(dfte ~ state + co_owned + as.factor(chain), data = df)
summary(reg_emp_c)

stargazer(reg_wages_c, reg_emp_c,
          type = "latex",
          title = "Difference-in-Differences Estimates with Controls",
          dep.var.labels = c("Change in Wages ($\\Delta W$)", "Change in Employment ($\\Delta E$)"),
          covariate.labels = c("Treatment Indicator (New Jersey)", "Corporate-Owned Indicator"),
          omit = "factor\\(chain\\)",
          omit.labels = "Chain Fixed Effects (Included)",
          omit.stat = c("f", "ser"), # Omit F-statistic and Standard Error of Regression
          notes = "The coefficient on the Treatment Indicator ($\\\\delta_{DD}$) is the causal effect of the minimum wage increase, controlling for ownership type and chain fixed effects.",
          align = TRUE
)

######## d) ########
# only consider restaurants in NJ
nj <- df %>%
  filter(state == 1)

# ii)
## construct variable low_wage, which is 1 if the wage is below 5, and 0 otherwise
nj$low_wage <- ifelse(nj$wage_st < 5, 1, 0)

# for wages
reg_wages_d <- lm(dw ~ low_wage, data = nj)
summary(reg_wages_d)

# for employment
reg_emp_d <- lm(dfte ~ low_wage, data = nj)
summary(reg_emp_d)

stargazer(reg_wages_d,
          type = "latex",
          title = "Table 1: Effect of Low Wage Status on Change in Wages (NJ Only)",
          dep.var.labels = "Change in Wages ($\\Delta W$)",
          covariate.labels = "Low Wage Indicator ($W_{pre} < \\$5$)",
          omit.stat = c("f", "ser"),
          align = TRUE,
          out = "nj_wages_low_wage.tex" # Outputs the code to a .tex file
)

stargazer(reg_emp_d,
          type = "latex",
          title = "Table 2: Effect of Low Wage Status on Change in Employment (NJ Only)",
          dep.var.labels = "Change in Employment ($\\Delta E$)",
          covariate.labels = "Low Wage Indicator ($W_{pre} < \\$5$)",
          omit.stat = c("f", "ser"),
          align = TRUE,
          out = "nj_emp_low_wage.tex" # Outputs the code to a .tex file
)

######## e) ########
# only consider restaurants in PA
pa <- df %>%
  filter(state == 0)

pa$low_wage <- ifelse(pa$wage_st < 5, 1, 0)

# i)
## for wages
reg_wages_e <- lm(dw ~ low_wage, data = pa)
summary(reg_wages_e)

## for employment
reg_emp_e <- lm(dfte ~ low_wage, data = pa)
summary(reg_emp_e)

stargazer(reg_wages_e, reg_emp_e,
          type = "latex",
          title = "Effect of Low Wage Status on Changes in Wages and Employment (PA Only)",
          dep.var.labels = c("Change in Wages ($\\Delta W$)", "Change in Employment ($\\Delta E$)"),
          covariate.labels = "Low Wage Indicator ($W_{pre} < \\$5$)",
          omit.stat = c("f", "ser"), # Omit F-statistic and Standard Error of Regression
          notes = "Estimates the difference in wage/employment changes between low-wage and high-wage restaurants within the control state (PA), which had no minimum wage change.",
          align = TRUE,
          out = "pa_low_wage_results.tex" # Outputs the code to a .tex file
)

# ii)
df$NJ <- df$state
df$low_wage <- ifelse(df$wage_st < 5, 1, 0)

reg_f <- lm(dw ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_f)

reg_g <- lm(dfte ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_g)

stargazer(reg_f, reg_g,
          type = "latex",
          title = "Full Difference-in-Differences (DD) Regression with Interaction Term",
          dep.var.labels = c("Change in Wages ($\\Delta W$)", "Change in Employment ($\\Delta E$)"),
          covariate.labels = c("Treatment State Indicator (NJ)",
                               "Low Wage Indicator ($W_{pre} < \\$5$)",
                               "DD Estimate (NJ $\\times$ Low Wage)"),
          omit.stat = c("f", "ser"), # Omit F-statistic and Standard Error of Regression
          notes = "The coefficient on the interaction term (NJ $\\times$ Low Wage) is the Difference-in-Differences estimate $\\delta_{DD}$.",
          align = TRUE,
          out = "full_dd_interaction_results.tex" # Outputs the code to a .tex file
)