首页 > 编程学习 > 如何用R语言做工具变量回归(未完工版本

如何用R语言做工具变量回归(未完工版本

发布时间:2022/12/10 17:41:02

在计量经济学中的回归中,可能会遇到遗漏变量偏误、测量误差、双向因果等问题,那么工具变量是解决此类内生性问题的几大利器之一。本文用Stock&Waston课本章节为例,展示如何在R语言中进行工具变量的回归。工具变量回归用到的两阶段最小二乘法(2SLS)

首先载入需要用到包,没安装的话需要提前安装

library(AER)
library(stargazer)

载入数据并且进行统计描述

data("CigarettesSW")
summary(CigarettesSW)
# compute real per capita prices
CigarettesSW$rprice <- with(CigarettesSW, price / cpi)
# compute the sales tax
CigarettesSW$salestax <- with(CigarettesSW, (taxs - tax) / cpi)
# check the correlation between sales tax and price
cor(CigarettesSW$salestax, CigarettesSW$price)
# generate a subset for the year 1995
c1995 <- subset(CigarettesSW, year == "1995")
# perform the first stage regression
cig_s1 <- lm(log(rprice) ~ salestax, data = c1995)
coeftest(cig_s1, vcov = vcovHC, type = "HC1")
# inspect the Rˆ2 of the first stage regression
summary(cig_s1)$r.squared
# store the predicted values
lcigp_pred <- cig_s1$fitted.values
# run the stage 2 regression
cig_s2 <- lm(log(c1995$packs) ~ lcigp_pred)
coeftest(cig_s2, vcov = vcovHC)

上面的方法其实很傻,并不需要这样。其实只需要执行一次就可以

# perform TSLS using 'ivreg()'
cig_ivreg <- ivreg(log(packs) ~ log(rprice) | salestax, data = c1995)
coeftest(cig_ivreg, vcov = vcovHC, type = "HC1")

那么还要有其他控制变量的话呢

# add rincome to the dataset
CigarettesSW$rincome <- with(CigarettesSW, income / population / cpi)
c1995 <- subset(CigarettesSW, year == "1995")
# estimate the model
cig_ivreg2 <- ivreg(log(packs) ~ log(rprice) + log(rincome) | log(rincome) +
salestax, data = c1995)
coeftest(cig_ivreg2, vcov = vcovHC, type = "HC1")
# add cigtax to the data set
CigarettesSW$cigtax <- with(CigarettesSW, tax/cpi)
c1995 <- subset(CigarettesSW, year == "1995")
# estimate the model
cig_ivreg3 <- ivreg(log(packs) ~ log(rprice) + log(rincome) |
log(rincome) + salestax + cigtax, data = c1995)
coeftest(cig_ivreg3, vcov = vcovHC, type = "HC1")

# subset data for year 1985
c1985 <- subset(CigarettesSW, year == "1985")
# define differences in variables
packsdiff <- log(c1995$packs) - log(c1985$packs)
pricediff <- log(c1995$price/c1995$cpi) - log(c1985$price/c1985$cpi)
incomediff <- log(c1995$income/c1995$population/c1995$cpi) -
log(c1985$income/c1985$population/c1985$cpi)
salestaxdiff <- (c1995$taxs - c1995$tax)/c1995$cpi - (c1985$taxs - c1985$tax)/c1985$cpi
cigtaxdiff <- c1995$tax/c1995$cpi - c1985$tax/c1985$cpi
# estimate the three models
cig_ivreg_diff1 <- ivreg(packsdiff ~ pricediff + incomediff | incomediff +
salestaxdiff)
cig_ivreg_diff2 <- ivreg(packsdiff ~ pricediff + incomediff | incomediff +
cigtaxdiff)
cig_ivreg_diff3 <- ivreg(packsdiff ~ pricediff + incomediff | incomediff +
salestaxdiff + cigtaxdiff)
# robust coefficient summary for 1.
coeftest(cig_ivreg_diff1, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 2.
coeftest(cig_ivreg_diff2, vcov = vcovHC, type = "HC1")
# robust coefficient summary for 3.
coeftest(cig_ivreg_diff3, vcov = vcovHC, type = "HC1")
# gather robust standard errors in a list
rob_se <- list(sqrt(diag(vcovHC(cig_ivreg_diff1, type = "HC1"))),
sqrt(diag(vcovHC(cig_ivreg_diff2, type = "HC1"))),
sqrt(diag(vcovHC(cig_ivreg_diff3, type = "HC1"))))
# generate table
stargazer(cig_ivreg_diff1, cig_ivreg_diff2,cig_ivreg_diff3,
header = FALSE,
type = "html",
omit.table.layout = "n",
digits = 3,
column.labels = c("IV: salestax", "IV: cigtax", "IVs: salestax, cigtax"),
dep.var.labels.include = FALSE,
dep.var.caption = "Dependent Variable: 1985-1995 Difference in Log per Pack Price",
se = rob_se)
# first-stage regressions
mod_relevance1 <- lm(pricediff ~ salestaxdiff + incomediff)
mod_relevance2 <- lm(pricediff ~ cigtaxdiff + incomediff)
mod_relevance3 <- lm(pricediff ~ incomediff + salestaxdiff + cigtaxdiff)
# check instrument relevance for model (1)
linearHypothesis(mod_relevance1,
"salestaxdiff = 0",vcov = vcovHC, type = "HC1")
# check instrument relevance for model (2)
linearHypothesis(mod_relevance2,
"cigtaxdiff = 0",
vcov = vcovHC, type = "HC1")
# check instrument relevance for model (3)
linearHypothesis(mod_relevance3,
c("salestaxdiff = 0", "cigtaxdiff = 0"),
vcov = vcovHC, type = "HC1")
# compute the J-statistic
cig_iv_OR <- lm(residuals(cig_ivreg_diff3) ~ incomediff + salestaxdiff + cigtaxdiff)
cig_OR_test <- linearHypothesis(cig_iv_OR,
c("salestaxdiff = 0", "cigtaxdiff = 0"),
test = "Chisq")
cig_OR_test
# compute correct p-value for J-statistic
pchisq(cig_OR_test[2, 5], df = 1, lower.tail = FALSE)


本文链接:https://www.ngui.cc/zz/1568978.html
Copyright © 2010-2022 ngui.cc 版权所有 |关于我们| 联系方式| 豫B2-20100000