library(MASS) library(tidyverse) library(GGally) library(labelled) library(dplyr) library(rstatix) library(ivreg) library(simstudy) library(ivmodel) library(ggplot2) library(car) library(lmtest) library(tseries) library(ggfortify) library(plotly) library(xtable) library(stargazer) setwd("/Users/andreasluck/Desktop/Labor-Abgaben") rm(list = ls()) ####### Tasks ####### load("ps1_more_realistic_data.Rda") load("ps1_clean_data.Rda") #a) #Get summary statistics: df1$ln_wage = log(df1$wage) table(df1$wage_premium) df1_des <- df1 %>% get_summary_stats( education, motivation, hours, ln_wage, # columns to calculate for type = "common") LATEX_df1_des <- xtable(df1_des) print(LATEX_df1_des, type="latex", include.rownames = FALSE) df2_des <- df2 %>% get_summary_stats( education, hours_ME, ln_wage_ME, # columns to calculate for type = "common") LATEX_df2_des <- xtable(df2_des) print(LATEX_df2_des, type="latex", include.rownames = FALSE) #plots from 1.1 d) density <- density(df2$ln_wage) fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy') fig <- fig %>% layout(xaxis = list(title = 'Log Wage'), yaxis = list(title = 'Density')) fig plot_ly(data=df2, x = ~ln_wage_ME, y = ~hours_ME, name = 'Observations', type = 'scatter',mode = 'markers') #b) Reg with Measurement Error (insignificant results) reg_measure <- lm(hours_ME ~ ln_wage_ME,data = df2) # Regressionsergebnisse als LaTeX ausgeben stargazer(reg_measure, type = "latex", title = "Reg with Measurement Error", dep.var.labels = "hours_ME", covariate.labels = c("lnwage"), out = "regression_table.tex") ## for comparison, also load the previous data load("ps1_clean_data.Rda") df1$ln_wage = log(df1$wage) OLS <- lm(hours ~ ln_wage,data = df1) # Regressionsergebnisse als LaTeX ausgeben stargazer(OLS, type = "latex", title = "simple OLS", dep.var.labels = "hours", covariate.labels = c("lnwage"), out = "regression_table.tex") # -> note that this is upward-biased due to OVB, as we are missing control variables #c) summary <- df2 %>% group_by(wage_premium) %>% get_summary_stats( education, hours_ME, ln_wage_ME, # columns to calculate for type = "common") LATEX_df2 <- xtable(summary) print(LATEX_df2, type="latex", include.rownames = FALSE) t_test <- t.test(education ~ wage_premium, var.equal = TRUE, data = df2) print(t_test) cat(sprintf("\\textbf{t-Test:} $t(%0.2f)=%0.3f$, $p=%0.4f$", t_test$parameter, t_test$statistic, t_test$p.value)) #d) t_test2 <- t.test(motivation ~ wage_premium, var.equal = TRUE, data = df1) cat(sprintf("\\textbf{t-Test:} $t(%0.2f)=%0.3f$, $p=%0.4f$", t_test2$parameter, t_test2$statistic, t_test2$p.value)) #e) first_stage = lm(ln_wage_ME ~ wage_premium,data = df2) summary(first_stage) # Regressionsergebnisse als LaTeX ausgeben stargazer(first_stage, type = "latex", title = "First Stage", dep.var.labels = "lnwageME", covariate.labels = c("wagepremium"), out = "regression_table.tex") ivmodel1 <- ivreg(hours_ME ~ ln_wage_ME | wage_premium, data = df2) summary(ivmodel1) # Regressionsergebnisse als LaTeX ausgeben stargazer(ivmodel1, type = "latex", title = "IV Regression", dep.var.labels = "hoursME", covariate.labels = c("lnwageME", "wagepremium"), out = "regression_table.tex") #g) first_stage2 = lm(ln_wage_ME ~ wage_premium+education,data = df2) summary(first_stage2) # Regressionsergebnisse als LaTeX ausgeben stargazer(first_stage2, type = "latex", title = "First Stage", dep.var.labels = "lnwageME", covariate.labels = c("wagepremium", "education"), out = "regression_table.tex") ivmodel2 <- ivreg(hours_ME ~ ln_wage_ME+education | wage_premium+education, data = df2) summary(ivmodel2) # Regressionsergebnisse als LaTeX ausgeben stargazer(ivmodel2, type = "latex", title = "First Stage", dep.var.labels = "lnwageME", covariate.labels = c("lnwageME", "education"), out = "regression_table.tex") #h) # Reg with all variables Reg_all <- lm(hours ~ ln_wage + education + motivation ,data = df1) # Regressionsergebnisse als LaTeX ausgeben stargazer(Reg_all, type = "latex", title = "Regression with all variables", dep.var.labels = "hours", covariate.labels = c("lnwage", "education", "motivation"), out = "regression_table.tex") coefficients <- coef(lm(hours ~ ln_wage + education + motivation, data = df1)) # mean of the outcome variable average_hours <- mean(df1$hours) # Percent effect of ln_wage at the average of hours average_effect_ln_wage <- coefficients['ln_wage'] / average_hours average_effect_education <- coefficients['education'] / average_hours average_effect_motivation <- coefficients['motivation'] / average_hours # print effects cat("% effect of ln_wage at the average of hours:", average_effect_ln_wage, "\n") cat("% effect of ln_wage at the average of hours:", average_effect_education, "\n") cat("% effect of ln_wage at the average of hours:", average_effect_motivation, "\n")