library(haven)
## Warning: Paket 'haven' wurde unter R Version 4.4.3 erstellt
library(foreign)
## Warning: Paket 'foreign' wurde unter R Version 4.4.3 erstellt
library(lmtest)
## Warning: Paket 'lmtest' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: zoo
## Warning: Paket 'zoo' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'zoo'
## Die folgenden Objekte sind maskiert von 'package:base':
##
## as.Date, as.Date.numeric
library(Hmisc)
## Warning: Paket 'Hmisc' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'Hmisc'
## Die folgenden Objekte sind maskiert von 'package:base':
##
## format.pval, units
library(plm)
## Warning: Paket 'plm' wurde unter R Version 4.4.3 erstellt
library(dplyr)
## Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:plm':
##
## between, lag, lead
## Die folgenden Objekte sind maskiert von 'package:Hmisc':
##
## src, summarize
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lfe)
## Warning: Paket 'lfe' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: Matrix
##
## Attache Paket: 'lfe'
## Das folgende Objekt ist maskiert 'package:plm':
##
## sargan
## Das folgende Objekt ist maskiert 'package:lmtest':
##
## waldtest
library(tidyr)
## Warning: Paket 'tidyr' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'tidyr'
## Die folgenden Objekte sind maskiert von 'package:Matrix':
##
## expand, pack, unpack
library(readr)
## Warning: Paket 'readr' wurde unter R Version 4.4.3 erstellt
library(coefplot)
## Warning: Paket 'coefplot' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: ggplot2
## Warning: Paket 'ggplot2' wurde unter R Version 4.4.3 erstellt
library(lmtest)
library(sandwich)
## Warning: Paket 'sandwich' wurde unter R Version 4.4.3 erstellt
library(knitr)
## Warning: Paket 'knitr' wurde unter R Version 4.4.3 erstellt
library(rmarkdown)
## Warning: Paket 'rmarkdown' wurde unter R Version 4.4.3 erstellt
#library(tinytex)
#set the path
setwd("C:/Users/bdrammeh/Desktop/Labour Economics/assignmnt")
# Clear desk
rm(list=ls())
######### Question 4.1 #########
# write some verbal answers in your solution pdf...
######### Question 4.2 #########
#read in the data
twins_long <- read_dta("twins_long.dta")
#descriptive analysis
attach(twins_long)
summary(twins_long)
## famid age twin educ lwage
## Min. : 1 Min. :18.78 Min. :1.0 Min. : 8.00 Min. :0.5108
## 1st Qu.: 38 1st Qu.:29.05 1st Qu.:1.0 1st Qu.:12.00 1st Qu.:1.9459
## Median : 75 Median :35.20 Median :1.5 Median :14.00 Median :2.3770
## Mean : 75 Mean :36.56 Mean :1.5 Mean :14.11 Mean :2.3786
## 3rd Qu.:112 3rd Qu.:43.02 3rd Qu.:2.0 3rd Qu.:16.00 3rd Qu.:2.7485
## Max. :149 Max. :64.14 Max. :2.0 Max. :20.00 Max. :4.6052
## male white
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :1.0000
## Mean :0.4564 Mean :0.9362
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
# plot for age
table(age)
## age
## 18.7816600799561 19.2224502563477 19.8987007141113 20.4298400878906
## 2 2 2 2
## 21.349760055542 21.7412700653076 21.8042392730713 21.9274501800537
## 2 2 2 2
## 22.0725498199463 22.1546897888184 22.4722805023193 23.0006809234619
## 2 2 2 2
## 23.682409286499 23.8904895782471 23.9698791503906 24.1177291870117
## 2 2 2 2
## 24.6105403900146 24.6242294311523 24.6406593322754 24.7310104370117
## 2 2 2 2
## 24.8377799987793 25.1772804260254 25.541410446167 25.9411392211914
## 2 2 2 2
## 26.6693992614746 26.9267597198486 27.175910949707 27.2306594848633
## 2 2 2 2
## 27.2772102355957 27.4195709228516 27.748119354248 28.1533203125
## 2 2 2 2
## 28.323070526123 28.4134197235107 28.5311393737793 28.7364807128906
## 2 4 2 2
## 29.054069519043 29.185489654541 29.338809967041 29.3908290863037
## 2 2 2 2
## 29.8617401123047 30.1492099761963 30.2258701324463 30.3518104553223
## 2 2 2 2
## 30.4503803253174 30.6529808044434 30.8199901580811 30.967830657959
## 2 2 2 2
## 31.0307998657227 31.0663890838623 31.2087593078613 31.4578990936279
## 2 2 2 2
## 31.7672805786133 31.808349609375 31.8987007141113 32.290210723877
## 2 2 2 2
## 32.3477096557617 32.3586616516113 32.4462699890137 32.594108581543
## 2 2 2 2
## 32.84326171875 33.0376510620117 33.2512016296387 33.4072608947754
## 2 2 2 2
## 33.4620094299316 33.590690612793 33.6399688720703 33.741268157959
## 2 2 2 2
## 33.9548301696777 34.6338081359863 34.8665313720703 34.9787788391113
## 2 2 2 2
## 35.134838104248 35.2005500793457 35.6084899902344 35.6632499694824
## 2 2 2 2
## 36.07666015625 36.3641395568848 36.3723487854004 36.481861114502
## 2 2 2 2
## 36.6214904785156 36.9418182373047 36.9445610046387 36.9856300354004
## 2 2 2 2
## 37.0239601135254 37.2183418273926 37.2813110351562 37.284049987793
## 2 2 2 2
## 37.3744010925293 37.722110748291 37.8398399353027 37.8507881164551
## 2 2 2 2
## 38.026008605957 38.5325088500977 38.7707099914551 39.3401794433594
## 2 2 2 2
## 39.351131439209 39.6194381713867 40.0492782592773 40.3121109008789
## 2 2 2 2
## 40.3586616516113 40.5010299682617 40.8733711242676 41.0841903686523
## 2 2 2 2
## 41.3579788208008 42.086238861084 42.1218299865723 42.5489387512207
## 2 2 2 2
## 42.7707099914551 42.8501014709473 43.0198516845703 43.2005500793457
## 2 2 2 2
## 43.3045806884766 43.4223098754883 43.5701599121094 43.8548889160156
## 2 2 2 2
## 44.0191688537598 44.2190284729004 44.2327194213867 44.7063598632812
## 2 2 2 2
## 44.8487281799316 45.3935699462891 45.5550994873047 45.6125907897949
## 2 2 2 2
## 46.3655014038086 47.6057510375977 47.7426414489746 48.1204605102539
## 2 2 2 2
## 48.2491493225098 50.050651550293 50.1492118835449 50.5489387512207
## 2 2 2 2
## 51.331958770752 51.8412094116211 51.901439666748 52.9336090087891
## 2 2 2 2
## 53.6372299194336 53.6646118164062 53.8535194396973 56.479118347168
## 2 2 2 2
## 57.6865196228027 58.1327896118164 61.1581115722656 61.4702301025391
## 2 2 2 2
## 61.872688293457 61.9575614929199 63.7179985046387 64.1396331787109
## 2 2 2 2
fit2 <- lm(lwage ~ poly(age, degree = 2, raw = TRUE), data = data.frame(age, lwage))
fit4 <- lm(lwage ~ poly(age, degree = 4, raw = TRUE), data = data.frame(age, lwage))
age_seq <- seq(min(age), max(age), length.out = 100)
lwage_pred2 <- predict(fit2, newdata = data.frame(age = age_seq))
lwage_pred4 <- predict(fit4, newdata = data.frame(age = age_seq))
plot(0, type = "n", xlim = range(age_seq), ylim = range(c(lwage_pred2, lwage_pred4)),
xlab = "Age", ylab = "log wage", main = "Life-Cycle Profiles with Polynomial Fits")
lines(age_seq, lwage_pred2, col = "red", lwd = 2, lty = 1, label = "2nd Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
lines(age_seq, lwage_pred4, col = "blue", lwd = 2, lty = 2, label = "4th Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
legend("bottomright", legend = c("2nd Order", "4th Order"), col = c("red", "blue"), lty = 2:3, cex = 0.8)

# plot for educ
table(educ)
## educ
## 8 10 11 12 13 14 15 16 17 18 19 20
## 1 3 1 95 36 46 24 61 3 19 4 5
fit2 <- lm(lwage ~ poly(educ, degree = 2, raw = TRUE), data = data.frame(educ,lwage))
fit4 <- lm(lwage ~ poly(educ, degree = 4, raw = TRUE), data = data.frame(educ,lwage))
educ_seq <- seq(min(educ), max(educ), length.out = 100)
lwage_pred2 <- predict(fit2, newdata = data.frame(educ = educ_seq))
lwage_pred4 <- predict(fit4, newdata = data.frame(educ = educ_seq))
plot(0, type = "n", xlim = range(educ_seq), ylim = range(c(lwage_pred2, lwage_pred4)),
xlab = "Years of Education", ylab = "log wage", main = "Education--Wage Profiles with Polynomial Fits")
lines(educ_seq, lwage_pred2, col = "red", lwd = 2, lty = 1, label = "2nd Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
lines(educ_seq, lwage_pred4, col = "blue", lwd = 2, lty = 2, label = "4th Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
legend("bottomright", legend = c("2nd Order", "4th Order"), col = c("red", "blue"), lty = 2:3, cex = 0.8)

# plot for potential experience (usually people force within 0-40 years)
potexp <- pmin(pmax(age - educ - 6,0),40)
table(potexp)
## potexp
## 0 0.0725498199462891 0.154689788818359 0.429840087890625
## 2 2 2 2
## 0.472280502319336 0.610540390014648 0.682409286499023 0.731010437011719
## 1 2 1 2
## 0.898700714111328 1.22245025634766 1.34976005554199 1.47228050231934
## 2 2 2 1
## 1.68240928649902 1.74127006530762 1.96987915039062 2.11772918701172
## 1 1 2 2
## 2.62422943115234 2.64065933227539 2.74127006530762 2.80423927307129
## 1 1 1 2
## 2.92745018005371 3.00068092346191 3.17728042602539 3.62422943115234
## 1 1 2 1
## 3.89048957824707 3.92745018005371 4.00068092346191 4.64065933227539
## 2 1 1 1
## 4.66939926147461 4.92675971984863 5.17591094970703 5.23065948486328
## 1 1 2 1
## 5.66939926147461 6.32307052612305 6.41341972351074 6.44626998901367
## 1 2 3 1
## 6.54141044616699 6.73648071289062 6.808349609375 6.8377799987793
## 1 2 1 2
## 6.92675971984863 7.1533203125 7.2772102355957 7.54141044616699
## 1 2 1 1
## 7.76728057861328 7.94113922119141 8.22587013244629 8.41341972351074
## 1 2 1 1
## 8.41957092285156 8.5311393737793 8.81999015808105 9.0663890838623
## 1 1 1 2
## 9.14920997619629 9.23065948486328 9.2772102355957 9.33880996704102
## 1 1 1 1
## 9.41957092285156 9.45038032531738 9.45789909362793 9.74811935424805
## 1 1 1 2
## 9.76728057861328 9.81999015808105 10.054069519043 10.2087593078613
## 1 1 2 1
## 10.290210723877 10.3586616516113 10.4503803253174 10.5311393737793
## 2 1 1 1
## 10.6529808044434 10.84326171875 10.8987007141113 11.185489654541
## 1 1 2 2
## 11.2512016296387 11.338809967041 11.3908290863037 11.4462699890137
## 2 1 2 1
## 11.4578990936279 11.590690612793 11.6529808044434 11.741268157959
## 1 1 1 1
## 11.8617401123047 12.0376510620117 12.1492099761963 12.2005500793457
## 2 2 1 1
## 12.2258701324463 12.3518104553223 12.3723487854004 12.590690612793
## 1 2 2 1
## 12.594108581543 12.84326171875 12.967830657959 13.0307998657227
## 1 1 2 2
## 13.2005500793457 13.4072608947754 13.4620094299316 13.741268157959
## 1 1 1 1
## 13.808349609375 13.9548301696777 13.9787788391113 14.3477096557617
## 1 1 1 2
## 14.3586616516113 14.4072608947754 14.6338081359863 14.6632499694824
## 1 1 2 1
## 14.8665313720703 14.9418182373047 14.9445610046387 14.9856300354004
## 2 1 2 1
## 15.4620094299316 15.594108581543 15.6084899902344 15.6194381713867
## 1 1 2 1
## 15.6399688720703 15.722110748291 15.9418182373047 15.9548301696777
## 2 2 1 1
## 15.9787788391113 16.6214904785156 16.8507881164551 16.9856300354004
## 1 1 1 1
## 17.086238861084 17.134838104248 17.2087593078613 17.4223098754883
## 1 2 1 1
## 17.6632499694824 17.8398399353027 17.8548889160156 18.0239601135254
## 1 2 1 1
## 18.07666015625 18.1218299865723 18.2813110351562 18.3641395568848
## 2 2 1 2
## 18.3744010925293 18.481861114502 18.5010299682617 18.5489387512207
## 1 2 2 1
## 18.5701599121094 18.6214904785156 18.7707099914551 18.8507881164551
## 1 1 1 1
## 19.0239601135254 19.2183418273926 19.2813110351562 19.284049987793
## 1 2 1 2
## 19.3045806884766 19.3401794433594 19.3579788208008 19.3744010925293
## 1 1 1 1
## 19.5325088500977 19.7707099914551 20.0191688537598 20.026008605957
## 1 1 1 2
## 20.086238861084 20.2327194213867 20.3121109008789 20.3586616516113
## 1 2 1 1
## 20.5325088500977 20.5489387512207 20.7063598632812 20.7707099914551
## 1 1 1 1
## 20.8501014709473 21.0492782592773 21.0841903686523 21.3045806884766
## 2 2 1 1
## 21.3401794433594 21.351131439209 21.4223098754883 21.6194381713867
## 1 2 1 1
## 22.0191688537598 22.0198516845703 22.0841903686523 22.3121109008789
## 1 1 1 1
## 22.3579788208008 22.3586616516113 22.7707099914551 22.8548889160156
## 1 1 1 1
## 22.8733711242676 23.1204605102539 23.2005500793457 23.7063598632812
## 2 1 1 1
## 25.0198516845703 25.1204605102539 25.2190284729004 25.3935699462891
## 1 1 1 2
## 25.5701599121094 25.6125907897949 25.8487281799316 26.2190284729004
## 1 1 2 1
## 26.9336090087891 27.2005500793457 27.331958770752 27.5550994873047
## 2 1 2 2
## 27.6125907897949 28.3655014038086 29.6057510375977 29.8412094116211
## 1 2 2 1
## 30.2491493225098 31.7426414489746 32.050651550293 32.1492118835449
## 2 2 2 2
## 32.5489387512207 32.6646118164062 32.901439666748 33.6372299194336
## 2 1 1 1
## 33.6646118164062 33.8412094116211 33.901439666748 35.6372299194336
## 1 1 1 1
## 35.8535194396973 37.479118347168 38.479118347168 38.6865196228027
## 2 1 1 1
## 39.1581115722656 39.6865196228027 40
## 1 1 13
fit2 <- lm(lwage ~ poly(potexp, degree = 2, raw = TRUE), data = data.frame(potexp,lwage))
fit4 <- lm(lwage ~ poly(potexp, degree = 4, raw = TRUE), data = data.frame(potexp,lwage))
potexp_seq <- seq(min(potexp), max(potexp), length.out = 100)
lwage_pred2 <- predict(fit2, newdata = data.frame(potexp = potexp_seq))
lwage_pred4 <- predict(fit4, newdata = data.frame(potexp = potexp_seq))
plot(0, type = "n", xlim = range(potexp_seq), ylim = range(c(lwage_pred2, lwage_pred4)),
xlab = "Potential Experience", ylab = "log wage", main = "Life-Cycle Profiles with Polynomial Fits")
lines(potexp_seq, lwage_pred2, col = "red", lwd = 2, lty = 1, label = "2nd Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
lines(potexp_seq, lwage_pred4, col = "blue", lwd = 2, lty = 2, label = "4th Order")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "label" ist kein
## Grafikparameter
legend("bottomright", legend = c("2nd Order", "4th Order"), col = c("red", "blue"), lty = 2:3, cex = 0.8)

# Interpretation
#1. Ashenfelter and Krueger (1994)
# 1a) Research question and difficulties
#nAshenfelter and Krueger study the causal return to schooling, i.e. how much an additional year of education increases wages.
# The main difficulty is that schooling is endogenous: individuals who obtain more education may differ in unobserved ability, motivation, or family background, all of which also affect wages. As a result, simple OLS estimates of the return to education are likely biased due to ability bias, family background effects, and measurement error in reported schooling.
# 1b) Authors’ approach and credibility
# The authors exploit data on identical twins. Since twins share genetics and much of their family environment, comparing wages between twins with different schooling levels differences out many unobserved factors. Their main strategies are:
# Within-family (fixed-effects) estimation
# First-difference estimation
# Use of cross-reported education to address measurement error
# This approach is convincing to a large extent because it controls for major sources of omitted-variable bias. However, it does not fully eliminate concerns about twin-specific differences (e.g. health, motivation, birth order) or remaining measurement error. Overall, the strategy substantially improves upon standard OLS.
# 1c) Key results
# The key result is that the estimated return to schooling is about 7–10% per year, similar to or slightly lower than conventional OLS estimates. This is somewhat surprising, as one might expect OLS to be strongly upward biased. The results suggest that measurement error and ability bias may offset each other, making OLS estimates not wildly misleading. The results are convincing given the careful robustness checks.
######### Question 4.3 #########
#creating variable: age squared
twins_long$age_sq <- (twins_long$age*twins_long$age)
#creating a linear model
regr <- lm(lwage ~ educ + age + age_sq + male + white , data=twins_long)
summary(regr)
##
## Call:
## lm(formula = lwage ~ educ + age + age_sq + male + white, data = twins_long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.62602 -0.28748 0.00277 0.28474 2.42317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4706147 0.4260210 -1.105 0.270210
## educ 0.0838714 0.0144261 5.814 1.60e-08 ***
## age 0.0878166 0.0188327 4.663 4.75e-06 ***
## age_sq -0.0008686 0.0002335 -3.720 0.000239 ***
## male 0.2040259 0.0630223 3.237 0.001345 **
## white -0.4104659 0.1266840 -3.240 0.001333 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5324 on 292 degrees of freedom
## Multiple R-squared: 0.2724, Adjusted R-squared: 0.2599
## F-statistic: 21.86 on 5 and 292 DF, p-value: < 2.2e-16
# Interpretation
# 3a) Life-cycle profile
# The coefficient on age is positive and the coefficient on age² is negative, implying a concave life-cycle earnings profile: wages rise with age at a decreasing rate and eventually peak. This pattern closely matches the age–wage profiles plotted in Question 2.
# 3b) Comparison with Table 3 (Column 1)
# The estimated coefficients closely match Column (1) of Table 3 in Ashenfelter and Krueger. The OLS coefficient on education likely reflects:
# Upward bias from ability and family background
# Downward bias from measurement error in schooling
# The net bias is ambiguous, which helps explain why OLS estimates are similar to fixed-effect estimates.
######### Question 4.4 #########
coeftest(regr, vcov = vcovHC, type = "HC1")
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.47061471 0.49575386 -0.9493 0.3432576
## educ 0.08387137 0.01473366 5.6925 3.047e-08 ***
## age 0.08781660 0.01976842 4.4423 1.265e-05 ***
## age_sq -0.00086864 0.00023652 -3.6726 0.0002855 ***
## male 0.20402595 0.06152734 3.3160 0.0010285 **
## white -0.41046590 0.11653213 -3.5223 0.0004962 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(regr, vcov = vcovHC, cluster = ~ famid)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.47061471 0.51306812 -0.9173 0.359766
## educ 0.08387137 0.01497900 5.5993 4.967e-08 ***
## age 0.08781660 0.02033895 4.3177 2.163e-05 ***
## age_sq -0.00086864 0.00024452 -3.5524 0.000445 ***
## male 0.20402595 0.06213090 3.2838 0.001149 **
## white -0.41046590 0.12500367 -3.2836 0.001149 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpretation
# 4. Robust and clustered standard errors
# HC1 robust standard errors account for heteroskedasticity, while clustering at the family level accounts for within-family correlation in errors.
# Clustering typically increases standard errors, reducing statistical significance for some coefficients, but the education coefficient remains statistically significant. This adjustment is appropriate because twins within the same family are not independent observations.
######### Question 4.5 #########
table(educ)
## educ
## 8 10 11 12 13 14 15 16 17 18 19 20
## 1 3 1 95 36 46 24 61 3 19 4 5
twins_long$educ_factor <- factor(twins_long$educ, levels = c(12, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20)) # 12=High-School will be the omitted category in the regression
table(twins_long$educ,twins_long$educ_factor)
##
## 12 8 10 11 13 14 15 16 17 18 19 20
## 8 0 1 0 0 0 0 0 0 0 0 0 0
## 10 0 0 3 0 0 0 0 0 0 0 0 0
## 11 0 0 0 1 0 0 0 0 0 0 0 0
## 12 95 0 0 0 0 0 0 0 0 0 0 0
## 13 0 0 0 0 36 0 0 0 0 0 0 0
## 14 0 0 0 0 0 46 0 0 0 0 0 0
## 15 0 0 0 0 0 0 24 0 0 0 0 0
## 16 0 0 0 0 0 0 0 61 0 0 0 0
## 17 0 0 0 0 0 0 0 0 3 0 0 0
## 18 0 0 0 0 0 0 0 0 0 19 0 0
## 19 0 0 0 0 0 0 0 0 0 0 4 0
## 20 0 0 0 0 0 0 0 0 0 0 0 5
regr2 <- lm(lwage ~educ_factor+ age + age_sq + male + white, data=subset(twins_long))
summary(regr2)
##
## Call:
## lm(formula = lwage ~ educ_factor + age + age_sq + male + white,
## data = subset(twins_long))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.60043 -0.29552 -0.00838 0.26326 2.51836
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3647119 0.3987379 0.915 0.361149
## educ_factor8 0.1776058 0.5535310 0.321 0.748554
## educ_factor10 -0.3184596 0.3161341 -1.007 0.314627
## educ_factor11 0.1898938 0.5400986 0.352 0.725408
## educ_factor13 0.2910925 0.1074009 2.710 0.007133 **
## educ_factor14 0.2896466 0.0976197 2.967 0.003264 **
## educ_factor15 0.2938105 0.1227619 2.393 0.017350 *
## educ_factor16 0.3610306 0.0896191 4.029 7.22e-05 ***
## educ_factor17 0.5328049 0.3151039 1.691 0.091963 .
## educ_factor18 0.4776445 0.1362620 3.505 0.000530 ***
## educ_factor19 0.8128246 0.2739847 2.967 0.003268 **
## educ_factor20 0.8080972 0.2479446 3.259 0.001254 **
## age 0.0942462 0.0195404 4.823 2.32e-06 ***
## age_sq -0.0009432 0.0002415 -3.906 0.000118 ***
## male 0.1954990 0.0649259 3.011 0.002839 **
## white -0.4220351 0.1324176 -3.187 0.001598 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5352 on 282 degrees of freedom
## Multiple R-squared: 0.29, Adjusted R-squared: 0.2522
## F-statistic: 7.678 on 15 and 282 DF, p-value: 2.272e-14
twins_long$coefficients <- coef(regr2)[twins_long$educ_factor]
twins_long$coefficients[twins_long$educ_factor == 12] <- 0
plot(twins_long$educ, twins_long$coefficients, xlim = range(educ),
xlab = "Years of Education", ylab = "log wage relative to HS=12", main = "Conditional Education--Wage Profiles
(when Regression-Adjusting)", cex.lab=0.75, cex.main=0.85)
abline(lm(twins_long$coefficients ~ educ),col='red', lwd = 2, lty = 2)

# Interpretation
# Education dummies
#Replacing years of education with education-level dummies (omitting high school, educ = 12) shows that:
# Returns are approximately linear
# Some non-linearities exist at very high education levels (college and beyond)
# Overall, the pattern supports a roughly linear return to education, validating the linear specification as a reasonable approximation.
######### Question 4.6 #########
regr3 <- lm(lwage ~ educ + factor(famid), data = twins_long)
regr3 <- lm(lwage ~ educ + factor(famid) + factor(twin), data = twins_long)
summary(regr3)
##
## Call:
## lm(formula = lwage ~ educ + factor(famid) + factor(twin), data = twins_long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.016 -0.133 0.000 0.133 1.016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.786296 0.471373 1.668 0.097425 .
## educ 0.091569 0.023711 3.862 0.000168 ***
## factor(famid)2 0.284803 0.392052 0.726 0.468723
## factor(famid)3 0.873150 0.403187 2.166 0.031953 *
## factor(famid)4 0.436302 0.394732 1.105 0.270830
## factor(famid)5 0.686161 0.394732 1.738 0.084252 .
## factor(famid)6 0.580491 0.398276 1.458 0.147109
## factor(famid)7 0.143165 0.403187 0.355 0.723037
## factor(famid)8 0.851733 0.400564 2.126 0.035146 *
## factor(famid)9 0.704329 0.403187 1.747 0.082742 .
## factor(famid)10 0.510042 0.394732 1.292 0.198342
## factor(famid)11 0.109504 0.398276 0.275 0.783745
## factor(famid)12 -1.542640 0.391873 -3.937 0.000127 ***
## factor(famid)13 0.377758 0.398276 0.948 0.344440
## factor(famid)14 0.577055 0.400564 1.441 0.151822
## factor(famid)15 0.287605 0.392052 0.734 0.464367
## factor(famid)16 0.577228 0.396331 1.456 0.147406
## factor(famid)17 -0.073043 0.394732 -0.185 0.853449
## factor(famid)18 0.625514 0.403187 1.551 0.122950
## factor(famid)19 0.037265 0.392589 0.095 0.924507
## factor(famid)20 -0.040834 0.403187 -0.101 0.919468
## factor(famid)21 0.374854 0.393483 0.953 0.342329
## factor(famid)22 0.692471 0.403187 1.717 0.087995 .
## factor(famid)23 0.070782 0.391873 0.181 0.856911
## factor(famid)24 0.471198 0.400564 1.176 0.241362
## factor(famid)25 0.704329 0.403187 1.747 0.082742 .
## factor(famid)26 0.517283 0.392589 1.318 0.189681
## factor(famid)27 0.780732 0.392589 1.989 0.048593 *
## factor(famid)28 0.123463 0.391873 0.315 0.753164
## factor(famid)29 0.647664 0.403187 1.606 0.110340
## factor(famid)30 0.443953 0.398276 1.115 0.266804
## factor(famid)31 0.363248 0.391873 0.927 0.355470
## factor(famid)32 -0.051435 0.403187 -0.128 0.898663
## factor(famid)33 0.277613 0.392589 0.707 0.480603
## factor(famid)34 0.955651 0.394732 2.421 0.016696 *
## factor(famid)35 -0.812751 0.391873 -2.074 0.039822 *
## factor(famid)36 1.027938 0.398276 2.581 0.010831 *
## factor(famid)37 2.157194 0.393483 5.482 1.78e-07 ***
## factor(famid)38 0.384228 0.392052 0.980 0.328675
## factor(famid)39 0.541602 0.394732 1.372 0.172129
## factor(famid)40 -0.176165 0.403187 -0.437 0.662802
## factor(famid)41 0.940748 0.403187 2.333 0.020989 *
## factor(famid)42 0.005130 0.394732 0.013 0.989649
## factor(famid)43 0.373865 0.394732 0.947 0.345123
## factor(famid)44 0.258201 0.403187 0.640 0.522910
## factor(famid)45 0.662423 0.394732 1.678 0.095441 .
## factor(famid)46 0.847492 0.393483 2.154 0.032885 *
## factor(famid)47 0.520621 0.396331 1.314 0.191027
## factor(famid)48 -0.224229 0.398276 -0.563 0.574294
## factor(famid)49 -1.032630 0.394732 -2.616 0.009824 **
## factor(famid)50 0.194212 0.391873 0.496 0.620916
## factor(famid)51 0.539896 0.400564 1.348 0.179783
## factor(famid)52 0.395290 0.396331 0.997 0.320221
## factor(famid)53 0.684363 0.394732 1.734 0.085060 .
## factor(famid)54 0.459628 0.392589 1.171 0.243590
## factor(famid)55 -0.336669 0.398276 -0.845 0.399309
## factor(famid)56 0.473476 0.403187 1.174 0.242160
## factor(famid)57 -0.527922 0.394732 -1.337 0.183151
## factor(famid)58 1.024986 0.392589 2.611 0.009968 **
## factor(famid)59 -0.064971 0.396331 -0.164 0.870011
## factor(famid)60 0.184093 0.392589 0.469 0.639821
## factor(famid)61 -1.027830 0.391873 -2.623 0.009638 **
## factor(famid)62 0.432291 0.392052 1.103 0.271987
## factor(famid)63 1.107019 0.391873 2.825 0.005386 **
## factor(famid)64 -0.605801 0.398276 -1.521 0.130394
## factor(famid)65 0.463140 0.392052 1.181 0.239382
## factor(famid)66 -0.067488 0.403187 -0.167 0.867296
## factor(famid)67 0.311401 0.403187 0.772 0.441148
## factor(famid)68 -0.343842 0.392052 -0.877 0.381901
## factor(famid)69 0.291112 0.392052 0.743 0.458948
## factor(famid)70 1.140370 0.403187 2.828 0.005332 **
## factor(famid)71 0.047763 0.391873 0.122 0.903158
## factor(famid)72 -0.186777 0.398276 -0.469 0.639790
## factor(famid)73 -0.236218 0.392052 -0.603 0.547758
## factor(famid)74 0.837028 0.398276 2.102 0.037292 *
## factor(famid)75 0.385610 0.403187 0.956 0.340437
## factor(famid)76 0.353982 0.396331 0.893 0.373237
## factor(famid)77 0.260816 0.393483 0.663 0.508472
## factor(famid)78 0.253920 0.394732 0.643 0.521050
## factor(famid)79 -0.411142 0.394732 -1.042 0.299319
## factor(famid)80 0.111484 0.394732 0.282 0.778011
## factor(famid)81 0.050241 0.403187 0.125 0.901003
## factor(famid)82 0.528519 0.392589 1.346 0.180298
## factor(famid)83 0.105553 0.403187 0.262 0.793845
## factor(famid)84 -0.321969 0.392052 -0.821 0.412840
## factor(famid)85 0.241221 0.400564 0.602 0.547966
## factor(famid)86 0.474020 0.391873 1.210 0.228363
## factor(famid)87 -0.079979 0.403187 -0.198 0.843032
## factor(famid)88 2.007199 0.394732 5.085 1.10e-06 ***
## factor(famid)89 -0.060573 0.392589 -0.154 0.877593
## factor(famid)90 0.381172 0.400564 0.952 0.342868
## factor(famid)91 -0.498119 0.394732 -1.262 0.208977
## factor(famid)92 -0.377980 0.391873 -0.965 0.336355
## factor(famid)93 -0.211121 0.396331 -0.533 0.595053
## factor(famid)94 0.723924 0.394732 1.834 0.068681 .
## factor(famid)95 -0.452334 0.396331 -1.141 0.255599
## factor(famid)96 0.868657 0.392589 2.213 0.028465 *
## factor(famid)97 0.632355 0.392589 1.611 0.109384
## factor(famid)98 0.091748 0.400564 0.229 0.819150
## factor(famid)99 0.249227 0.416898 0.598 0.550885
## factor(famid)100 0.725133 0.394732 1.837 0.068225 .
## factor(famid)101 0.411200 0.392052 1.049 0.295973
## factor(famid)102 0.252883 0.396331 0.638 0.524427
## factor(famid)103 -0.004189 0.394732 -0.011 0.991548
## factor(famid)104 0.254737 0.403187 0.632 0.528494
## factor(famid)105 0.708860 0.393483 1.801 0.073674 .
## factor(famid)106 0.100320 0.396331 0.253 0.800527
## factor(famid)107 0.101126 0.403187 0.251 0.802307
## factor(famid)108 -0.140827 0.403187 -0.349 0.727375
## factor(famid)109 -0.003312 0.398276 -0.008 0.993377
## factor(famid)110 0.006086 0.398276 0.015 0.987828
## factor(famid)111 0.733806 0.394732 1.859 0.065026 .
## factor(famid)112 1.071615 0.392589 2.730 0.007116 **
## factor(famid)113 -0.271375 0.393483 -0.690 0.491487
## factor(famid)114 -0.432740 0.392052 -1.104 0.271492
## factor(famid)115 0.254328 0.398276 0.639 0.524095
## factor(famid)116 0.441049 0.392589 1.123 0.263084
## factor(famid)117 0.013053 0.403187 0.032 0.974217
## factor(famid)118 0.206725 0.400564 0.516 0.606571
## factor(famid)119 0.719739 0.394732 1.823 0.070280 .
## factor(famid)120 0.265969 0.403187 0.660 0.510500
## factor(famid)121 0.031592 0.403187 0.078 0.937651
## factor(famid)122 -0.151560 0.391873 -0.387 0.699494
## factor(famid)123 0.416948 0.392589 1.062 0.289957
## factor(famid)124 0.036694 0.392589 0.093 0.925660
## factor(famid)125 0.263411 0.403187 0.653 0.514570
## factor(famid)126 0.850042 0.403187 2.108 0.036701 *
## factor(famid)127 0.376795 0.406139 0.928 0.355059
## factor(famid)128 0.247300 0.392589 0.630 0.529724
## factor(famid)129 -0.798500 0.400564 -1.993 0.048064 *
## factor(famid)130 1.071313 0.403187 2.657 0.008753 **
## factor(famid)131 -0.448921 0.403187 -1.113 0.267340
## factor(famid)132 0.005419 0.400564 0.014 0.989225
## factor(famid)133 0.496774 0.398276 1.247 0.214267
## factor(famid)134 0.035385 0.394732 0.090 0.928693
## factor(famid)135 0.612190 0.400564 1.528 0.128582
## factor(famid)136 -0.216924 0.400564 -0.542 0.588950
## factor(famid)137 -0.103905 0.391873 -0.265 0.791265
## factor(famid)138 -0.283914 0.394732 -0.719 0.473124
## factor(famid)139 0.405892 0.391873 1.036 0.302008
## factor(famid)140 0.236819 0.403187 0.587 0.557858
## factor(famid)141 -0.569069 0.398276 -1.429 0.155175
## factor(famid)142 0.441675 0.398276 1.109 0.269256
## factor(famid)143 0.903498 0.398276 2.269 0.024754 *
## factor(famid)144 -0.466847 0.403187 -1.158 0.248785
## factor(famid)145 1.033556 0.396331 2.608 0.010052 *
## factor(famid)146 1.046223 0.403187 2.595 0.010421 *
## factor(famid)147 -0.736406 0.398276 -1.849 0.066469 .
## factor(famid)148 0.180535 0.392589 0.460 0.646298
## factor(famid)149 0.156918 0.391873 0.400 0.689420
## factor(twin)2 0.078593 0.045472 1.728 0.086023 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3919 on 147 degrees of freedom
## Multiple R-squared: 0.8015, Adjusted R-squared: 0.599
## F-statistic: 3.958 on 150 and 147 DF, p-value: 4.058e-16
# interpretation
# Family fixed effects vs. OLS
# When adding family fixed effects, identification comes from differences in schooling within twin pairs. The education coefficient typically falls slightly relative to OLS.
# Economically, this reflects the removal of:
# Shared family background
# Genetic ability
# The remaining variation is arguably closer to the true causal effect of education.
######### Question 4.7 #########
#read in the wide data
twins <- read_dta("twins.dta")
#differences in education
twins$dif_edu <- (twins$educ1-twins$educ2)
#differences in wages
twins$dif_lwage <- (twins$lwage1-twins$lwage2)
#linear model for first differences
first_dif_regr <- lm(dif_lwage ~ dif_edu, data = twins)
summary(first_dif_regr)
##
## Call:
## lm(formula = dif_lwage ~ dif_edu, data = twins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.03115 -0.20909 0.00722 0.34395 1.15740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07859 0.04547 -1.728 0.086023 .
## dif_edu 0.09157 0.02371 3.862 0.000168 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5542 on 147 degrees of freedom
## Multiple R-squared: 0.09211, Adjusted R-squared: 0.08593
## F-statistic: 14.91 on 1 and 147 DF, p-value: 0.0001682
# Interpretation
# The first-difference estimate is similar to the family fixed-effect estimate and close to Column (v) of Table 3.
# In theory, fixed effects and first differences should produce the same coefficient when there are exactly two observations per family. In practice, small differences arise due to sample restrictions and measurement error, but the results are very close, reinforcing the credibility of the findings.
# Overall conclusion
# Ashenfelter and Krueger provide strong evidence that the return to education is robust to controlling for family background and unobserved ability. Twin-based methods substantially strengthen causal interpretation, and the results suggest that conventional OLS estimates are not severely biased.