LE 2 Diference in Differences

library(MASS)

## Warning: Paket 'MASS' wurde unter R Version 4.4.3 erstellt

library(tidyverse)

## Warning: Paket 'tidyverse' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'ggplot2' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'tidyr' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'readr' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'purrr' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'forcats' wurde unter R Version 4.4.3 erstellt

## Warning: Paket 'lubridate' wurde unter R Version 4.4.3 erstellt

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(GGally)

## Warning: Paket 'GGally' wurde unter R Version 4.4.3 erstellt

library(labelled)

## Warning: Paket 'labelled' wurde unter R Version 4.4.3 erstellt

library(dplyr)
library(rstatix)

## Warning: Paket 'rstatix' wurde unter R Version 4.4.3 erstellt

## 
## Attache Paket: 'rstatix'
## 
## Das folgende Objekt ist maskiert 'package:MASS':
## 
##     select
## 
## Das folgende Objekt ist maskiert 'package:stats':
## 
##     filter

library(ivreg)

## Warning: Paket 'ivreg' wurde unter R Version 4.4.3 erstellt

library(simstudy)

## Warning: Paket 'simstudy' wurde unter R Version 4.4.3 erstellt

library(ivmodel)

## Warning: Paket 'ivmodel' wurde unter R Version 4.4.3 erstellt

library(ggplot2)
library(car)

## Warning: Paket 'car' wurde unter R Version 4.4.3 erstellt

## Lade nötiges Paket: carData

## Warning: Paket 'carData' wurde unter R Version 4.4.3 erstellt

## 
## Attache Paket: 'car'
## 
## Das folgende Objekt ist maskiert 'package:dplyr':
## 
##     recode
## 
## Das folgende Objekt ist maskiert 'package:purrr':
## 
##     some

library(lmtest)

## Warning: Paket 'lmtest' wurde unter R Version 4.4.3 erstellt

## Lade nötiges Paket: zoo

## Warning: Paket 'zoo' wurde unter R Version 4.4.3 erstellt

## 
## Attache Paket: 'zoo'
## 
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     as.Date, as.Date.numeric

library(tseries)

## Warning: Paket 'tseries' wurde unter R Version 4.4.3 erstellt

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(ggfortify)

## Warning: Paket 'ggfortify' wurde unter R Version 4.4.3 erstellt

library(plotly)

## Warning: Paket 'plotly' wurde unter R Version 4.4.3 erstellt

## 
## Attache Paket: 'plotly'
## 
## Das folgende Objekt ist maskiert 'package:ggplot2':
## 
##     last_plot
## 
## Das folgende Objekt ist maskiert 'package:MASS':
## 
##     select
## 
## Das folgende Objekt ist maskiert 'package:stats':
## 
##     filter
## 
## Das folgende Objekt ist maskiert 'package:graphics':
## 
##     layout

library(haven)

## Warning: Paket 'haven' wurde unter R Version 4.4.3 erstellt

library(broom)

## Warning: Paket 'broom' wurde unter R Version 4.4.3 erstellt

#setwd("---")
setwd("C:/Users/bdrammeh/Desktop/Labour Economics/assignmnt")
# load data
minwage <- read_dta("minwage.dta")

# swap the "state" variable with the state names (not a must, but makes it clearer)
minwage$state <- recode(minwage$state, "0='PA'; 1='NJ'")

# select data that is available in both survey waves
df <- minwage %>%
  filter(sample == 1)

######## a) ########
wages <- df %>%
  group_by(state) %>%
  summarise(wage_st2 = mean(wage_st2),
            wage_st = mean(wage_st2),
            dw = mean(dw))

# i)
difference <- wages$wage_st2 - wages$wage_st

# ii)
diff_row <- wages %>%
  filter(state %in% c("NJ", "PA")) %>%
  summarise(wage_st2 = -diff(wage_st2),
            wage_st = -diff(wage_st),
            dw = -diff(dw),
            state = "diff")

wages <- bind_rows(wages, diff_row)
print(wages)

## # A tibble: 3 × 4
##   state wage_st2 wage_st      dw
##   <chr>    <dbl>   <dbl>   <dbl>
## 1 NJ       5.08    5.08   0.469 
## 2 PA       4.62    4.62  -0.0348
## 3 diff     0.463   0.463  0.504

# Interpretation:
# NJ’s average starting wage increased by $0.469, while PA’s decreased slightly by $0.0348.
# The difference-in-differences estimate (dw diff = 0.504) indicates that NJ wages increased about $0.50 more than PA wages, which can be attributed to the minimum wage increase.
# This assumes parallel trends: without the law change, NJ wages would have changed similarly to PA.
# key point: The DiD shows a clear wage effect from the minimum wage increase.
######## b) ########
fte <- df %>%
  group_by(state) %>%
  summarise(fte2 = mean(fte2),
            fte = mean(fte),
            dfte = mean(dfte))

# i)
difference_fte <- fte$fte2 - fte$fte

# ii)
diff_row_fte <- fte %>%
  filter(state %in% c("NJ", "PA")) %>%
  summarise(fte2 = -diff(fte2),
            fte = -diff(fte),
            dfte = -diff(dfte),
            state = "diff")

fte <- bind_rows(fte,diff_row_fte)
print(fte)

## # A tibble: 3 × 4
##   state   fte2   fte   dfte
##   <chr>  <dbl> <dbl>  <dbl>
## 1 NJ    17.6   17.3   0.287
## 2 PA    18.1   20.1  -2.02 
## 3 diff  -0.536 -2.84  2.30

# Interpretation:
# NJ employment increased slightly (+0.287), while PA employment fell (-2.02).
#The DiD estimate (2.30) indicates that NJ employment decreased less (or increased relative to PA).
#The minimum wage increase does not seem to have reduced employment; if anything, NJ performed better than PA.


######## c) ########
df$state <-dplyr:: recode(df$state, "PA" =0, "NJ"=1)
#df$treat <- 

#table(df$state, df$treat)
#minwage$state <- recode(as.character(minwage$state), "0" = "PA", "1" = "NJ")
#minwage$state <- ifelse(minwage$state == 0, "PA", "NJ")




# ii)
## for wages
reg_wages <- lm(dw ~ state, data = df)

summary(reg_wages)

## 
## Call:
## lm(formula = dw ~ state, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03916 -0.21515  0.03485  0.33084  2.03485 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.03485    0.04287  -0.813    0.417    
## state        0.50401    0.04757  10.595   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3483 on 349 degrees of freedom
## Multiple R-squared:  0.2434, Adjusted R-squared:  0.2412 
## F-statistic: 112.2 on 1 and 349 DF,  p-value: < 2.2e-16

## for employment
reg_emp <- lm(dfte ~ state, data = df)

summary(reg_emp)

## 
## Call:
## lm(formula = dfte ~ state, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.485  -3.287   0.213   4.463  25.765 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -2.015      1.052  -1.916   0.0562 .
## state          2.302      1.167   1.972   0.0494 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.546 on 349 degrees of freedom
## Multiple R-squared:  0.01102,    Adjusted R-squared:  0.008184 
## F-statistic: 3.888 on 1 and 349 DF,  p-value: 0.04942

# iii)
## for wages
reg_wages_c <- lm(dw ~ state + co_owned + as.factor(chain), data = df)
summary(reg_wages_c)

## 
## Call:
## lm(formula = dw ~ state + co_owned + as.factor(chain), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.05863 -0.21156  0.00137  0.25137  1.95503 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.04497    0.04744   0.948  0.34379    
## state              0.50366    0.04693  10.731  < 2e-16 ***
## co_owned          -0.03676    0.04308  -0.853  0.39413    
## as.factor(chain)2 -0.04665    0.05084  -0.918  0.35945    
## as.factor(chain)3 -0.15112    0.05180  -2.917  0.00376 ** 
## as.factor(chain)4 -0.15024    0.05846  -2.570  0.01060 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3419 on 345 degrees of freedom
## Multiple R-squared:  0.279,  Adjusted R-squared:  0.2685 
## F-statistic:  26.7 on 5 and 345 DF,  p-value: < 2.2e-16

## for employment
reg_emp_c <- lm(dfte ~ state + co_owned + as.factor(chain), data = df)
summary(reg_emp_c)

## 
## Call:
## lm(formula = dfte ~ state + co_owned + as.factor(chain), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.893  -3.628   0.469   4.372  25.357 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        -1.6073     1.1867  -1.354   0.1765  
## state               2.2973     1.1741   1.957   0.0512 .
## co_owned            0.3394     1.0777   0.315   0.7530  
## as.factor(chain)2   0.2990     1.2719   0.235   0.8143  
## as.factor(chain)3  -1.9637     1.2960  -1.515   0.1306  
## as.factor(chain)4  -0.7816     1.4626  -0.534   0.5934  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.554 on 345 degrees of freedom
## Multiple R-squared:  0.0207, Adjusted R-squared:  0.006506 
## F-statistic: 1.458 on 5 and 345 DF,  p-value: 0.2029

# Wages (dw ~ state)

# state coefficient = 0.504, matching the DiD from part (a).

#Highly significant (p < 0.001).

#Adding covariates (co_owned, chain) barely changes the effect: 0.504 → 0.504, confirming robustness.

#Employment (dfte ~ state)

#state coefficient = 2.30, significant at p ≈ 0.049, consistent with part (b).

#Adding covariates reduces significance slightly (p ≈ 0.051), but coefficient remains ~2.30.

# interpretation:

# Regression approach gives the same DiD estimate as the simple calculation.
# Covariates do not materially change the estimates, as expected (the effect is driven by state/time difference).


######## d) ########
# only consider restaurants in NJ
nj <- df %>%
  filter(state == 1)

# ii)
## construct variable low_wage, which is 1 if the wage is below 5, and 0 otherwise
nj$low_wage <- ifelse(nj$wage_st < 5, 1, 0)

# for wages
reg_wages_d <- lm(dw ~ low_wage, data = nj)
summary(reg_wages_d)

## 
## Call:
## lm(formula = dw ~ low_wage, data = nj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56591 -0.16178  0.05409  0.18822  0.55822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.004091   0.026719  -0.153    0.878    
## low_wage     0.615872   0.030480  20.206   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2171 on 283 degrees of freedom
## Multiple R-squared:  0.5906, Adjusted R-squared:  0.5892 
## F-statistic: 408.3 on 1 and 283 DF,  p-value: < 2.2e-16

# for employment
reg_emp_d <- lm(dfte ~ low_wage, data = nj)
summary(reg_emp_d)

## 
## Call:
## lm(formula = dfte ~ low_wage, data = nj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.051  -3.551  -0.051   3.949  24.949 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -2.2500     0.9472  -2.375  0.01820 * 
## low_wage      3.3014     1.0806   3.055  0.00246 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.695 on 283 degrees of freedom
## Multiple R-squared:  0.03193,    Adjusted R-squared:  0.02851 
## F-statistic: 9.334 on 1 and 283 DF,  p-value: 0.002464

# interpretation:
# Strong wage response in low-wage NJ restaurants.
# Employment also increased, suggesting no negative employment effect for these restaurants.


######## e) ########
# only consider restaurants in PA
pa <- df %>%
  filter(state == 0)

pa$low_wage <- ifelse(pa$wage_st < 5, 1, 0)


# i)
## for wages
reg_wages_e <- lm(dw ~ low_wage, data = pa)
summary(reg_wages_e)

## 
## Call:
## lm(formula = dw ~ low_wage, data = pa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58837 -0.08837 -0.08837  0.13913  1.91163 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.26522    0.07318  -3.624 0.000575 ***
## low_wage     0.35359    0.09066   3.900 0.000233 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3509 on 64 degrees of freedom
## Multiple R-squared:  0.192,  Adjusted R-squared:  0.1794 
## F-statistic: 15.21 on 1 and 64 DF,  p-value: 0.0002331

## for employment
reg_emp_e <- lm(dfte ~ low_wage, data = pa)
summary(reg_emp_e)

## 
## Call:
## lm(formula = dfte ~ low_wage, data = pa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.652  -5.465   1.441   5.988  24.785 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -3.848      2.340  -1.644    0.105
## low_wage       2.813      2.899   0.970    0.336
## 
## Residual standard error: 11.22 on 64 degrees of freedom
## Multiple R-squared:  0.01449,    Adjusted R-squared:  -0.0009036 
## F-statistic: 0.9413 on 1 and 64 DF,  p-value: 0.3356

# ii)
df$NJ <- df$state
df$low_wage <- ifelse(df$wage_st < 5, 1, 0)

reg_f <- lm(dw ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_f)

## 
## Call:
## lm(formula = dw ~ NJ + low_wage + NJ * low_wage, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58837 -0.16178  0.01522  0.18822  1.91163 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.26522    0.05156  -5.144 4.51e-07 ***
## NJ           0.26113    0.05987   4.361 1.71e-05 ***
## low_wage     0.35359    0.06388   5.536 6.13e-08 ***
## NJ:low_wage  0.26228    0.07270   3.608 0.000354 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2473 on 347 degrees of freedom
## Multiple R-squared:  0.6207, Adjusted R-squared:  0.6174 
## F-statistic: 189.3 on 3 and 347 DF,  p-value: < 2.2e-16

reg_g <- lm(dfte ~ NJ + low_wage + NJ * low_wage, data = df)
summary(reg_g)

## 
## Call:
## lm(formula = dfte ~ NJ + low_wage + NJ * low_wage, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.652  -3.551   0.035   4.750  24.949 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -3.8478     1.7635  -2.182   0.0298 *
## NJ            1.5978     2.0478   0.780   0.4358  
## low_wage      2.8129     2.1848   1.288   0.1988  
## NJ:low_wage   0.4884     2.4867   0.196   0.8444  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.457 on 347 degrees of freedom
## Multiple R-squared:  0.03706,    Adjusted R-squared:  0.02874 
## F-statistic: 4.452 on 3 and 347 DF,  p-value: 0.004376

# Interpretation:

# Comparison to PA isolates the minimum wage effect from general wage growth.

# Significant wage increase in NJ relative to PA for low-wage restaurants.

# Employment effect smaller and not significant after pooling, indicating some of the employment increase may be due to other factors.

LE 2 Diference in Differences

Bademba Drammeh

2025-11-24