library(MASS)
library(tidyverse)
library(GGally)
library(labelled)
library(dplyr)
library(rstatix)
library(ivreg)
library(simstudy)
library(ivmodel)
library(ggplot2)
library(car)
library(lmtest)
library(tseries)
library(ggfortify)
library(plotly)
library(xtable)
library(stargazer)

setwd("/Users/andreasluck/Desktop/Labor-Abgaben")

rm(list = ls())

####### Tasks ####### 
load("ps1_more_realistic_data.Rda") 
load("ps1_clean_data.Rda") 
#a)
#Get summary statistics:
df1$ln_wage = log(df1$wage)
table(df1$wage_premium)
df1_des <- df1 %>% 
  get_summary_stats(
    education, motivation, hours, ln_wage,  # columns to calculate for
    type = "common") 
LATEX_df1_des <- xtable(df1_des)
print(LATEX_df1_des, type="latex", include.rownames = FALSE)

df2_des <- df2 %>% 
  get_summary_stats(
    education, hours_ME, ln_wage_ME,  # columns to calculate for
    type = "common") 
LATEX_df2_des <- xtable(df2_des)
print(LATEX_df2_des, type="latex", include.rownames = FALSE)

#plots from 1.1 d)
density <- density(df2$ln_wage)
fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy')
fig <- fig %>% layout(xaxis = list(title = 'Log Wage'),
                      yaxis = list(title = 'Density'))
fig

plot_ly(data=df2, x = ~ln_wage_ME, y = ~hours_ME, name = 'Observations', type = 'scatter',mode = 'markers')

#b) Reg with Measurement Error (insignificant results)
reg_measure <- lm(hours_ME ~ ln_wage_ME,data = df2)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(reg_measure, type = "latex", title = "Reg with Measurement Error", 
          dep.var.labels = "hours_ME", 
          covariate.labels = c("lnwage"),
          out = "regression_table.tex") 

## for comparison, also load the previous data
load("ps1_clean_data.Rda")
df1$ln_wage = log(df1$wage)
OLS <- lm(hours ~ ln_wage,data = df1)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(OLS, type = "latex", title = "simple OLS", 
          dep.var.labels = "hours", 
          covariate.labels = c("lnwage"),
          out = "regression_table.tex") 
# -> note that this is upward-biased due to OVB, as we are missing control variables

#c)
summary <- df2 %>% 
  group_by(wage_premium) %>%
  get_summary_stats(
    education, hours_ME, ln_wage_ME,  # columns to calculate for
    type = "common") 
LATEX_df2 <- xtable(summary)
print(LATEX_df2, type="latex", include.rownames = FALSE)

t_test <- t.test(education ~ wage_premium, var.equal = TRUE, data = df2)
print(t_test)
cat(sprintf("\\textbf{t-Test:} $t(%0.2f)=%0.3f$, $p=%0.4f$", 
            t_test$parameter, t_test$statistic, t_test$p.value))
#d)
t_test2 <- t.test(motivation ~ wage_premium, var.equal = TRUE, data = df1)
cat(sprintf("\\textbf{t-Test:} $t(%0.2f)=%0.3f$, $p=%0.4f$", 
            t_test2$parameter, t_test2$statistic, t_test2$p.value))

#e)
first_stage = lm(ln_wage_ME ~ wage_premium,data = df2)
summary(first_stage)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(first_stage, type = "latex", title = "First Stage", 
          dep.var.labels = "lnwageME", 
          covariate.labels = c("wagepremium"),
          out = "regression_table.tex") 

ivmodel1 <- ivreg(hours_ME ~ ln_wage_ME | wage_premium, data = df2)
summary(ivmodel1)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(ivmodel1, type = "latex", title = "IV Regression", 
          dep.var.labels = "hoursME", 
          covariate.labels = c("lnwageME", "wagepremium"),
          out = "regression_table.tex") 

#g) 
first_stage2 = lm(ln_wage_ME ~ wage_premium+education,data = df2)
summary(first_stage2)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(first_stage2, type = "latex", title = "First Stage", 
          dep.var.labels = "lnwageME", 
          covariate.labels = c("wagepremium", "education"),
          out = "regression_table.tex") 

ivmodel2 <- ivreg(hours_ME ~ ln_wage_ME+education | wage_premium+education, data = df2)
summary(ivmodel2)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(ivmodel2, type = "latex", title = "First Stage", 
          dep.var.labels = "lnwageME", 
          covariate.labels = c("lnwageME", "education"),
          out = "regression_table.tex") 


#h)
# Reg with all variables
Reg_all <- lm(hours ~ ln_wage + education + motivation ,data = df1)
# Regressionsergebnisse als LaTeX ausgeben
stargazer(Reg_all, type = "latex", title = "Regression with all variables", 
          dep.var.labels = "hours", 
          covariate.labels = c("lnwage", "education", "motivation"),
          out = "regression_table.tex") 

coefficients <- coef(lm(hours ~ ln_wage + education + motivation, data = df1))

# mean of the outcome variable
average_hours <- mean(df1$hours)

# Percent effect of ln_wage at the average of hours 
average_effect_ln_wage <- coefficients['ln_wage'] / average_hours
average_effect_education <- coefficients['education'] / average_hours
average_effect_motivation <- coefficients['motivation'] / average_hours

# print effects
cat("% effect of ln_wage at the average of hours:", average_effect_ln_wage, "\n")
cat("% effect of ln_wage at the average of hours:", average_effect_education, "\n")
cat("% effect of ln_wage at the average of hours:", average_effect_motivation, "\n")