library(MASS) library(tidyverse) library(GGally) library(labelled) library(dplyr) library(rstatix) library(ivreg) library(simstudy) library(ivmodel) library(ggplot2) library(car) library(lmtest) library(tseries) library(ggfortify) library(plotly) #setwd("____") rm(list = ls()) ####### Tasks ####### #load("ps1_more_realistic_data.Rda") #a) #Get summary statistics: df2 %>% get_summary_stats( education, hours_ME, ln_wage_ME, # columns to calculate for type = "common") #plots from 1.1 d) density <- density(df2$ln_wage_ME) fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy') fig <- fig %>% layout(xaxis = list(title = 'Log Wage'), yaxis = list(title = 'Density')) fig plot_ly(data=df2, x = ~ln_wage_ME, y = ~hours_ME, name = 'Observations', type = 'scatter',mode = 'markers') #b) Reg with Measurement Error (insignificant results) summary(lm(hours_ME ~ ln_wage_ME,data = df2)) ## for comparison, also load the previous data #load("ps1_clean_data.Rda") df1$ln_wage = log(df1$wage) summary(lm(hours ~ ln_wage,data = df1)) # -> note that this is upward-biased due to OVB, as we are missing control variables #c) t.test(ln_wage_ME ~ wage_premium, var.equal = TRUE, data = df2) #d) t.test(motivation ~ wage_premium, var.equal = TRUE, data = df1) #e) first_stage = lm(ln_wage_ME ~ wage_premium,data = df2) summary(first_stage) ivmodel1 <- ivreg(hours_ME ~ ln_wage_ME | wage_premium, data = df2) summary(ivmodel1) #f) some text #g) first_stage = lm(ln_wage_ME ~ wage_premium+education,data = df2) summary(first_stage) ivmodel2 <- ivreg(hours_ME ~ ln_wage_ME+education | wage_premium+education, data = df2) summary(ivmodel2) #h) # Reg with all variables summary(lm(hours ~ ln_wage + education + motivation ,data = df1)) coefficients <- coef(lm(hours ~ ln_wage + education + motivation, data = df1)) # mean of the outcome variable average_hours <- mean(df1$hours) # Average effect average_effect_ln_wage <- coefficients['ln_wage'] / average_hours average_effect_education <- coefficients['education'] / average_hours average_effect_motivation <- coefficients['motivation'] / average_hours # print effects cat("Average effect of ln_wage:", average_effect_ln_wage, "\n") cat("Average effect of education:", average_effect_education, "\n") cat("Average effect of motivation:", average_effect_motivation, "\n")