###################### ###################### # Part 2: Matching and Weighting ###################### ###################### # Load required packages library(MatchIt) library(cobalt) library(tidyverse) library(janitor) library(haven) library(here) library(WeightIt) library(marginaleffects) # Print the working directory using 'here()' # This ensures that the current working directory is set correctly for the workshop. print(here()) # Read in the data using a relative path for reproducibility shadishclark <- read_dta(here("shadishclark.dta")) # View the initial rows of the data to check it's loaded correctly View(shadishclark) #### Data Preparation #### # Creating the treatment variable # We are defining 'treatment' based on the 'vm' column shadishclark <- shadishclark %>% mutate(treatment = case_when( vm == 1 ~ 1, # If 'vm' is 1, treatment is set to 1 vm == 2 ~ 0, # If 'vm' is 2, treatment is set to 0 TRUE ~ NA_real_ # For all other cases, treatment is set to NA )) # Display the frequency distribution of the treatment variable tabyl(shadishclark$treatment) # ----------------------------------------------------------------------------- # Initial Balance Assessment # ----------------------------------------------------------------------------- # Here we assess the baseline balance between treatment and control groups # without any matching technique applied. # Fit a basic model to assess baseline balance m.out <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, method = NULL ) # Using NULL method means no matching # Generate summary statistics to assess initial balance between groups. summary(m.out) # Visualize the balance across all covariates before matching plot(summary(m.out)) # ----------------------------------------------------------------------------- # Propensity Score (PS) Matching # ----------------------------------------------------------------------------- # 1. Nearest Neighbor Matching without Caliper m.out.matched <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, distance = "logit", method = "nearest" ) # Extract and attach propensity scores to the original dataset shadishclark$propensity_score <- m.out.matched$distance # Display the initial rows showing the treatment and the corresponding propensity scores head(select(shadishclark, treatment, propensity_score)) # Display balance statistics after matching m.out.matched summary(m.out.matched) plot(summary(m.out.matched)) plot(m.out.matched, type = "jitter", interactive = F) # 2. Nearest Neighbor Matching with Caliper m.out.matched <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, distance = "logit", caliper = 0.25, # lets play with the caliper method = "nearest" ) # Display balance statistics after matching summary(m.out.matched) plot(summary(m.out.matched)) plot(m.out.matched, type = "jitter", interactive = F) # 3. Nearest Neighbor Matching with Replacement m.out.matched <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, distance = "logit", caliper = 0.25, replace = T, # with or wihtout replacement method = "nearest" ) # Display balance statistics after matching summary(m.out.matched) plot(summary(m.out.matched)) plot(m.out.matched, type = "jitter", interactive = F) # 4. Optimal Matching m.out.matched <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, distance = "logit", method = "optimal" ) # Display balance statistics after matching summary(m.out.matched) plot(summary(m.out.matched)) plot(m.out.matched, type = "jitter", interactive = F) # 5. Exact Matching on a Specific Variable (e.g., "married") m.out.matched <- matchit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, exact = "married", # lets do some exact matching distance = "logit", caliper = 0.25, replace = T, # with or wihtout replacement method = "nearest" ) # Display balance statistics after matching summary(m.out.matched) plot(summary(m.out.matched)) plot(m.out.matched, type = "jitter", interactive = F) # ----------------------------------------------------------------------------- # Propensity Score (PS) Weighting # ----------------------------------------------------------------------------- # Examine initial inbalance bal.tab( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, estimand = "ATE", thresholds = c(m = .25) ) # IPTW for Average Treatment Effect (ATE) w.out <- weightit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, method = "ps", estimand = "ATE" ) w.out summary(w.out) bal.tab(w.out, stats = c("m", "v"), thresholds = c(m = .25)) # IPTW for Average Treatment Effect on the Treated (ATT) w.out <- weightit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, method = "ps", estimand = "ATT" ) # ----------------------------------------------------------------------------- # Naive Comparison (Without Matching or Weighting) # ----------------------------------------------------------------------------- # Simple regression for treatment effect naive.model <- lm(mathall ~ treatment, data = shadishclark) summary(naive.model) # ----------------------------------------------------------------------------- # Outcome Modeling after Weighting # ----------------------------------------------------------------------------- # IPTW for Average Treatment Effect on the Treated (ATT) w.out <- weightit( treatment ~ vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = shadishclark, method = "ps", estimand = "ATT" ) # Fit a linear regression model on the weighted data weighted.data <- shadishclark weighted.data$weights <- w.out$weights outcome.model.weighted <- lm(mathall ~ treatment, data = weighted.data, weights = weighted.data$weights) summary(outcome.model.weighted) # ----------------------------------------------------------------------------- # Doubly Robust Estimation after Weighting # ----------------------------------------------------------------------------- # Incorporate covariates in the outcome model to make it doubly robust doubly.robust.model.weighted <- lm(mathall ~ treatment + vocabpre + numbmath + likemath + likelit + preflit + pagree + pconsc + pemot + pintell + mars + beck + cauc + afram + other + age + male + married + momdegr + daddegr + credit + majormi + actcomp + hsgpaar + collgpaa + mathpre + pextra, data = weighted.data, weights = weighted.data$weights) summary(doubly.robust.model.weighted) # ----------------------------------------------------------------------------- # Handling Standard Errors in Outcome Modeling with Weighted Data # ----------------------------------------------------------------------------- avg_comparisons(doubly.robust.model.weighted, variables = "treatment", vcov = "HC3", wts = weighted.data$weights)