library(MASS)
## Warning: Paket 'MASS' wurde unter R Version 4.4.3 erstellt
library(tidyverse)
## Warning: Paket 'tidyverse' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'ggplot2' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'tidyr' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'readr' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'purrr' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'forcats' wurde unter R Version 4.4.3 erstellt
## Warning: Paket 'lubridate' wurde unter R Version 4.4.3 erstellt
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(GGally)
## Warning: Paket 'GGally' wurde unter R Version 4.4.3 erstellt
library(labelled)
## Warning: Paket 'labelled' wurde unter R Version 4.4.3 erstellt
library(dplyr)
library(rstatix)
## Warning: Paket 'rstatix' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'rstatix'
##
## Das folgende Objekt ist maskiert 'package:MASS':
##
## select
##
## Das folgende Objekt ist maskiert 'package:stats':
##
## filter
library(ivreg)
## Warning: Paket 'ivreg' wurde unter R Version 4.4.3 erstellt
library(simstudy)
## Warning: Paket 'simstudy' wurde unter R Version 4.4.3 erstellt
library(ivmodel)
## Warning: Paket 'ivmodel' wurde unter R Version 4.4.3 erstellt
library(ggplot2)
library(car)
## Warning: Paket 'car' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: carData
## Warning: Paket 'carData' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'car'
##
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## recode
##
## Das folgende Objekt ist maskiert 'package:purrr':
##
## some
library(lmtest)
## Warning: Paket 'lmtest' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: zoo
## Warning: Paket 'zoo' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'zoo'
##
## Die folgenden Objekte sind maskiert von 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
## Warning: Paket 'tseries' wurde unter R Version 4.4.3 erstellt
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggfortify)
## Warning: Paket 'ggfortify' wurde unter R Version 4.4.3 erstellt
library(plotly)
## Warning: Paket 'plotly' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'plotly'
##
## Das folgende Objekt ist maskiert 'package:ggplot2':
##
## last_plot
##
## Das folgende Objekt ist maskiert 'package:MASS':
##
## select
##
## Das folgende Objekt ist maskiert 'package:stats':
##
## filter
##
## Das folgende Objekt ist maskiert 'package:graphics':
##
## layout
library(haven)
## Warning: Paket 'haven' wurde unter R Version 4.4.3 erstellt
library(broom)
## Warning: Paket 'broom' wurde unter R Version 4.4.3 erstellt
#setwd("---")
setwd("C:/Users/bdrammeh/Desktop/Labour Economics/assignmnt")
# load data
minwage <- read_dta("minwage.dta")
# swap the "state" variable with the state names (not a must, but makes it clearer)
minwage$state <- recode(minwage$state, "0='PA'; 1='NJ'")
# select data that is available in both survey waves
df <- minwage %>%
filter(sample == 1)
######## a) ########
wages <- df %>%
group_by(state) %>%
summarise(wage_st2 = mean(wage_st2),
wage_st = mean(wage_st2),
dw = mean(dw))
# i)
difference <- wages$wage_st2 - wages$wage_st
# ii)
diff_row <- wages %>%
filter(state %in% c("NJ", "PA")) %>%
summarise(wage_st2 = -diff(wage_st2),
wage_st = -diff(wage_st),
dw = -diff(dw),
state = "diff")
wages <- bind_rows(wages, diff_row)
print(wages)
## # A tibble: 3 × 4
## state wage_st2 wage_st dw
## <chr> <dbl> <dbl> <dbl>
## 1 NJ 5.08 5.08 0.469
## 2 PA 4.62 4.62 -0.0348
## 3 diff 0.463 0.463 0.504
# Interpretation:
# NJ’s average starting wage increased by $0.469, while PA’s decreased slightly by $0.0348.
# The difference-in-differences estimate (dw diff = 0.504) indicates that NJ wages increased about $0.50 more than PA wages, which can be attributed to the minimum wage increase.
# This assumes parallel trends: without the law change, NJ wages would have changed similarly to PA.
# key point: The DiD shows a clear wage effect from the minimum wage increase.
######## b) ########
fte <- df %>%
group_by(state) %>%
summarise(fte2 = mean(fte2),
fte = mean(fte),
dfte = mean(dfte))
# i)
difference_fte <- fte$fte2 - fte$fte
# ii)
diff_row_fte <- fte %>%
filter(state %in% c("NJ", "PA")) %>%
summarise(fte2 = -diff(fte2),
fte = -diff(fte),
dfte = -diff(dfte),
state = "diff")
fte <- bind_rows(fte,diff_row_fte)
print(fte)
## # A tibble: 3 × 4
## state fte2 fte dfte
## <chr> <dbl> <dbl> <dbl>
## 1 NJ 17.6 17.3 0.287
## 2 PA 18.1 20.1 -2.02
## 3 diff -0.536 -2.84 2.30
# Interpretation:
# NJ employment increased slightly (+0.287), while PA employment fell (-2.02).
#The DiD estimate (2.30) indicates that NJ employment decreased less (or increased relative to PA).
#The minimum wage increase does not seem to have reduced employment; if anything, NJ performed better than PA.
######## c) ########
df$state <-dplyr:: recode(df$state, "PA" =0, "NJ"=1)
#df$treat <-
#table(df$state, df$treat)
#minwage$state <- recode(as.character(minwage$state), "0" = "PA", "1" = "NJ")
#minwage$state <- ifelse(minwage$state == 0, "PA", "NJ")
# ii)
## for wages
reg_wages <- lm(dw ~ state, data = df)
summary(reg_wages)
##
## Call:
## lm(formula = dw ~ state, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03916 -0.21515 0.03485 0.33084 2.03485
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03485 0.04287 -0.813 0.417
## state 0.50401 0.04757 10.595 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3483 on 349 degrees of freedom
## Multiple R-squared: 0.2434, Adjusted R-squared: 0.2412
## F-statistic: 112.2 on 1 and 349 DF, p-value: < 2.2e-16
## for employment
reg_emp <- lm(dfte ~ state, data = df)
summary(reg_emp)
##
## Call:
## lm(formula = dfte ~ state, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.485 -3.287 0.213 4.463 25.765
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.015 1.052 -1.916 0.0562 .
## state 2.302 1.167 1.972 0.0494 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.546 on 349 degrees of freedom
## Multiple R-squared: 0.01102, Adjusted R-squared: 0.008184
## F-statistic: 3.888 on 1 and 349 DF, p-value: 0.04942
# iii)
## for wages
reg_wages_c <- lm(dw ~ state + co_owned + as.factor(chain), data = df)
summary(reg_wages_c)
##
## Call:
## lm(formula = dw ~ state + co_owned + as.factor(chain), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.05863 -0.21156 0.00137 0.25137 1.95503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.04497 0.04744 0.948 0.34379
## state 0.50366 0.04693 10.731 < 2e-16 ***
## co_owned -0.03676 0.04308 -0.853 0.39413
## as.factor(chain)2 -0.04665 0.05084 -0.918 0.35945
## as.factor(chain)3 -0.15112 0.05180 -2.917 0.00376 **
## as.factor(chain)4 -0.15024 0.05846 -2.570 0.01060 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3419 on 345 degrees of freedom
## Multiple R-squared: 0.279, Adjusted R-squared: 0.2685
## F-statistic: 26.7 on 5 and 345 DF, p-value: < 2.2e-16
## for employment
reg_emp_c <- lm(dfte ~ state + co_owned + as.factor(chain), data = df)
summary(reg_emp_c)
##
## Call:
## lm(formula = dfte ~ state + co_owned + as.factor(chain), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.893 -3.628 0.469 4.372 25.357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.6073 1.1867 -1.354 0.1765
## state 2.2973 1.1741 1.957 0.0512 .
## co_owned 0.3394 1.0777 0.315 0.7530
## as.factor(chain)2 0.2990 1.2719 0.235 0.8143
## as.factor(chain)3 -1.9637 1.2960 -1.515 0.1306
## as.factor(chain)4 -0.7816 1.4626 -0.534 0.5934
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.554 on 345 degrees of freedom
## Multiple R-squared: 0.0207, Adjusted R-squared: 0.006506
## F-statistic: 1.458 on 5 and 345 DF, p-value: 0.2029
# Wages (dw ~ state)
# state coefficient = 0.504, matching the DiD from part (a).
#Highly significant (p < 0.001).
#Adding covariates (co_owned, chain) barely changes the effect: 0.504 → 0.504, confirming robustness.
#Employment (dfte ~ state)
#state coefficient = 2.30, significant at p ≈ 0.049, consistent with part (b).
#Adding covariates reduces significance slightly (p ≈ 0.051), but coefficient remains ~2.30.
# interpretation:
# Regression approach gives the same DiD estimate as the simple calculation.
# Covariates do not materially change the estimates, as expected (the effect is driven by state/time difference).
######## d) ########
# only consider restaurants in NJ
nj <- df %>%
filter(state == 1)
# ii)
## construct variable low_wage, which is 1 if the wage is below 5, and 0 otherwise
nj$low_wage <- ifelse(nj$wage_st < 5, 1, 0)
# for wages
reg_wages_d <- lm(dw ~ low_wage, data = nj)
summary(reg_wages_d)
##
## Call:
## lm(formula = dw ~ low_wage, data = nj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56591 -0.16178 0.05409 0.18822 0.55822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.004091 0.026719 -0.153 0.878
## low_wage 0.615872 0.030480 20.206 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2171 on 283 degrees of freedom
## Multiple R-squared: 0.5906, Adjusted R-squared: 0.5892
## F-statistic: 408.3 on 1 and 283 DF, p-value: < 2.2e-16
# for employment
reg_emp_d <- lm(dfte ~ low_wage, data = nj)
summary(reg_emp_d)
##
## Call:
## lm(formula = dfte ~ low_wage, data = nj)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.051 -3.551 -0.051 3.949 24.949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2500 0.9472 -2.375 0.01820 *
## low_wage 3.3014 1.0806 3.055 0.00246 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.695 on 283 degrees of freedom
## Multiple R-squared: 0.03193, Adjusted R-squared: 0.02851
## F-statistic: 9.334 on 1 and 283 DF, p-value: 0.002464
# interpretation:
# Strong wage response in low-wage NJ restaurants.
# Employment also increased, suggesting no negative employment effect for these restaurants.
######## e) ########
# only consider restaurants in PA
pa <- df %>%
filter(state == 0)
pa$low_wage <- ifelse(pa$wage_st < 5, 1, 0)
# i)
## for wages
reg_wages_e <- lm(dw ~ low_wage, data = pa)
summary(reg_wages_e)
##
## Call:
## lm(formula = dw ~ low_wage, data = pa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58837 -0.08837 -0.08837 0.13913 1.91163
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.26522 0.07318 -3.624 0.000575 ***
## low_wage 0.35359 0.09066 3.900 0.000233 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3509 on 64 degrees of freedom
## Multiple R-squared: 0.192, Adjusted R-squared: 0.1794
## F-statistic: 15.21 on 1 and 64 DF, p-value: 0.0002331
## for employment
reg_emp_e <- lm(dfte ~ low_wage, data = pa)
summary(reg_emp_e)
##
## Call:
## lm(formula = dfte ~ low_wage, data = pa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.652 -5.465 1.441 5.988 24.785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.848 2.340 -1.644 0.105
## low_wage 2.813 2.899 0.970 0.336
##
## Residual standard error: 11.22 on 64 degrees of freedom
## Multiple R-squared: 0.01449, Adjusted R-squared: -0.0009036
## F-statistic: 0.9413 on 1 and 64 DF, p-value: 0.3356
# ii)
df$NJ <- df$state
df$low_wage <- ifelse(df$wage_st < 5, 1, 0)
reg_f <- lm(dw ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_f)
##
## Call:
## lm(formula = dw ~ NJ + low_wage + NJ * low_wage, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58837 -0.16178 0.01522 0.18822 1.91163
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.26522 0.05156 -5.144 4.51e-07 ***
## NJ 0.26113 0.05987 4.361 1.71e-05 ***
## low_wage 0.35359 0.06388 5.536 6.13e-08 ***
## NJ:low_wage 0.26228 0.07270 3.608 0.000354 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2473 on 347 degrees of freedom
## Multiple R-squared: 0.6207, Adjusted R-squared: 0.6174
## F-statistic: 189.3 on 3 and 347 DF, p-value: < 2.2e-16
reg_g <- lm(dfte ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_g)
##
## Call:
## lm(formula = dfte ~ NJ + low_wage + NJ * low_wage, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.652 -3.551 0.035 4.750 24.949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.8478 1.7635 -2.182 0.0298 *
## NJ 1.5978 2.0478 0.780 0.4358
## low_wage 2.8129 2.1848 1.288 0.1988
## NJ:low_wage 0.4884 2.4867 0.196 0.8444
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.457 on 347 degrees of freedom
## Multiple R-squared: 0.03706, Adjusted R-squared: 0.02874
## F-statistic: 4.452 on 3 and 347 DF, p-value: 0.004376
# Interpretation:
# Comparison to PA isolates the minimum wage effect from general wage growth.
# Significant wage increase in NJ relative to PA for low-wage restaurants.
# Employment effect smaller and not significant after pooling, indicating some of the employment increase may be due to other factors.