library(MASS) library(tidyverse) library(GGally) library(labelled) library(dplyr) library(rstatix) library(ivreg) library(simstudy) library(ivmodel) library(ggplot2) library(car) library(lmtest) library(tseries) library(ggfortify) library(plotly) library(xtable) library(stargazer) setwd("/Users/andreasluck/Desktop/Labor-Abgaben") rm(list = ls()) ####### Task 1.a.) ####### # Generate log wages (ln wage) and interpret a table with descriptive statistics for Education, Motivation, # hours and ln wage. Also calculate correlations and plot the density of wages as well as a histogram # for the years of education. load("ps1_clean_data.Rda") #a) # df1 stands for dataframe 1. generate the log of wage. df1$ln_wage = log(df1$wage) table(df1$wage_premium) #Drop the individuals with wage_premium. #df1 <- subset(df1, wage_premium != 1) #b) des_summary <-get_summary_stats( df1, education, motivation, hours, ln_wage, # columns to calculate for type = "common") LATEX_summary <- xtable(des_summary) print(LATEX_summary, type="latex", include.rownames = FALSE) #Get correlations correlation <-cor(df1[,c(1,2,5,6)], use="pairwise", method="pearson") LATEX_cor <- xtable(correlation) print(LATEX_cor, type="latex", include.rownames = FALSE) #Graph density <- density(df1$ln_wage) fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy') fig <- fig %>% layout(xaxis = list(title = 'Log Wage'), yaxis = list(title = 'Density')) fig fig2 <- plot_ly(x = ~df1$education, type = "histogram",alpha = 0.6)%>% layout(bargap=0.1,xaxis =list(dtick = 1,tickmode = "linear")) fig2 <- fig2 %>% layout(xaxis = list(title = 'Years of education'), yaxis = list(title = 'Frequency')) fig2 #b) #Create dummy df1$M <- ifelse(df1$motivation >=0, 1, 0) #descriptive Motivation summaryB <-df1 %>% group_by(M) %>% get_summary_stats( education, motivation, hours, ln_wage, # columns to calculate for type = "common") LATEX_summaryB <- xtable(summaryB) print(LATEX_summaryB, type="latex", include.rownames = FALSE) #descriptive education df1$E <- ifelse(df1$education >=14, 1, 0) summaryBB <- df1 %>% group_by(E) %>% get_summary_stats( education, motivation, hours, ln_wage, # columns to calculate for type = "common") LATEX_summaryBB <- xtable(summaryBB) print(LATEX_summaryBB, type="latex", include.rownames = FALSE) #c) plot_ly(data=df1, x = ~ln_wage, y = ~hours, name = 'Observations', type = 'scatter',mode = 'markers') ####### Regressions ####### #d) #Bivariate Regression simple_OLS <- lm(hours ~ ln_wage,data = df1) summary(simple_OLS) #Confidence intervals conf_interval <- confint(simple_OLS) Std_error_wage <- sqrt(diag(vcov(simple_OLS)))[2] c("lower (2.5%)" = simple_OLS$coef[2] - qt(0.975, df = simple_OLS$df) * Std_error_wage, "upper (97.5%)" = simple_OLS$coef[2] + qt(0.975, df = simple_OLS$df) * Std_error_wage) df1$hours_hat = fitted(simple_OLS) df1$res = simple_OLS$residuals #Plot of x, y and the regression line plot_ly(data = df1, x = ~ln_wage, y = ~hours, name = 'Observations', type = 'scatter', mode = 'markers') %>% add_trace(data = df1, y = ~hours_hat, name = 'OLS', mode = 'lines') %>% add_ribbons(data = df1, ymin = ~predict(simple_OLS, interval = "confidence")[, "lwr"], ymax = ~predict(simple_OLS, interval = "confidence")[, "upr"], name = "Confidence 95%", line = list(color = 'transparent'), fillcolor = 'rgba(0,100,80,0.2)', showlegend = TRUE) #e) #Adding education to the regression simple_OLS_education <-lm(hours ~ ln_wage + education,data = df1) summary(simple_OLS_education) # Regressionsergebnisse als LaTeX ausgeben stargazer(simple_OLS_education, type = "latex", title = "simple OLS education", dep.var.labels = "hours", covariate.labels = c("lnwage", "education"), out = "regression_table.tex") #f) # Reg with all variables simple_OLS_education_motivation <- lm(hours ~ ln_wage + education + motivation ,data = df1) summary(simple_OLS_education_motivation) # Regressionsergebnisse als LaTeX ausgeben stargazer(simple_OLS_education_motivation, type = "latex", title = "simple OLS motivation", dep.var.labels = "hours", covariate.labels = c("lnwage","education", "motivation"), out = "regression_table.tex")