library(MASS) library(tidyverse) library(GGally) library(labelled) library(dplyr) library(rstatix) library(ivreg) library(simstudy) library(ivmodel) library(ggplot2) library(car) library(lmtest) library(tseries) library(ggfortify) library(plotly) library(stargazer) library(cobalt) setwd("C:/Users/KaiHi/Documents/Uni/25_26-WS/Labor Econ/Problem_sets/PS2") rm(list = ls()) ####### Tasks ####### load("ps1_more_realistic_data.Rda") #a) #Get summary statistics: df2 %>% get_summary_stats( education, hours_ME,wage_premium, # columns to calculate for type = "common") cor(df2[,c(1,3,4)], use="pairwise", method="pearson") #plots from 1.1 d) density <- density(df2$ln_wage_ME) fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy') fig <- fig %>% layout(xaxis = list(title = 'Log Wage'), yaxis = list(title = 'Density')) fig plot_ly(data=df2, x = ~ln_wage_ME, y = ~density, name = 'Observations', type = 'scatter',mode = 'markers') #b) Reg with Measurement Error (insignificant results) OLS_ME <- lm(hours_ME ~ ln_wage_ME,data = df2) summary(OLS_ME) # export table stargazer(OLS_ME, title = "Bivariate Regression: Hours Worked on Log Wage", label = "tab:regression", dep.var.labels = "Hours Worked", covariate.labels = "Log Wage", ci = TRUE, single.row = TRUE, header = FALSE) ## for comparison, also load the previous data load("ps1_clean_data.Rda") df1$ln_wage = log(df1$wage) summary(lm(hours ~ ln_wage,data = df1)) # -> note that this is upward-biased due to OVB, as we are missing control variables #c) t.test(education ~ wage_premium, var.equal = TRUE, data = df2) #d) t.test(education ~ wage_premium, var.equal = TRUE, data = df1) bal.tab(motivation ~ wage_premium, data = df1,un = TRUE, thresholds = c(m = 0.1)) #e) first_stage = lm(ln_wage_ME ~ wage_premium,data = df2) summary(first_stage) # export table stargazer(first_stage, title = "first stage", label = "tab:regression", dep.var.labels = "Log Wage", covariate.labels = "wage premium", ci = TRUE, single.row = TRUE, header = FALSE) ivmodel1 <- ivreg(hours_ME ~ ln_wage_ME | wage_premium, data = df2) summary(ivmodel1) # export table stargazer(ivmodel1, title = "IV", label = "tab:regression", dep.var.labels = "hours worked", covariate.labels = "log wage", ci = TRUE, single.row = TRUE, header = FALSE) #f) some text #g) first_stage = lm(ln_wage_ME ~ wage_premium+education,data = df2) summary(first_stage) ivmodel2 <- ivreg(hours_ME ~ ln_wage_ME+education | wage_premium+education, data = df2) summary(ivmodel2) stargazer(first_stage, ivmodel2, title = "First Stage and IV Model Results", label = "tab:regression", header = FALSE, column.labels = c("First Stage (OLS)", "IV Model (2SLS)"), dep.var.labels = c("Log Wage", "Hours Worked"), covariate.labels = c("Wage Premium (Instrument)", "Log Wage", "Education"), ci = TRUE, single.row = TRUE ) #h) # Reg with all variables summary(lm(hours ~ ln_wage + education + motivation ,data = df1)) coefficients <- coef(lm(hours ~ ln_wage + education + motivation, data = df1)) # mean of the outcome variable average_hours <- mean(df1$hours) # Average effect average_effect_ln_wage <- coefficients['ln_wage'] / average_hours average_effect_education <- coefficients['education'] / average_hours average_effect_motivation <- coefficients['motivation'] / average_hours # print effects cat("Average effect of ln_wage:",average_effect_ln_wage, "\n") cat("Average effect of education:", average_effect_education, "\n") cat("Average effect of motivation:", average_effect_motivation, "\n")