Chapter 7 Simulation Design
7.1 Example 1
R code
library(hdm)
set.seed(1)
= 20 # trials
B= rep(0, B)
Naive = rep(0, B) Orthogonal
Python code
import hdmpy
import numpy as np
import random
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
# Set seed
0)
np.random.seed(= 20
B = np.zeros( B )
Naive = np.zeros( B ) Orthogonal
R code
for (i in 1:B){
=100
n= 100
p= 1/(1:p)^2
beta =1/(1:p)^2
gamma
=matrix(rnorm(n*p), n, p)
X
= X%*%gamma + rnorm(n)/4
D
= D+ X%*%beta + rnorm(n)
Y
# single selection method
= which(rlasso(Y~ D+X)$coef[-c(1,2)] !=0) #select covariates by Lasso
SX.IDs
if (sum(SX.IDs)==0) {Naive[i] = lm(Y~ D)$coef[2]}
if (sum(SX.IDs)>0) {Naive[i] = lm(Y~ D + X[,SX.IDs])$coef[2]}
#partialling out
= rlasso(Y~ X, Post=F)$res
resY = rlasso(D~ X, Post=F)$res
resD = lm(resY ~ resD)$coef[2]
Orthogonal[i]
}
Python code
for i in range( 0, B ):
= 100
n = 100
p = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )
beta = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )
gamma
= 0
mean = 1
sd = np.random.normal( mean , sd, n * p ).reshape( n, p )
X
= ( X @ gamma ) + np.random.normal( mean , sd, n ).reshape( n, 1 )/4 # We reshape because in r when we sum a vecto with a matrix it sum by column
D
# DGP
= D + ( X @ beta ) + np.random.normal( mean , sd, n ).reshape( n, 1 )
Y
# single selection method
= hdmpy.rlasso( np.concatenate( ( D , X ) , axis = 1 ) , Y , post = True ) # Regress main equation by lasso
r_lasso_estimation
= r_lasso_estimation.est[ 'coefficients' ].iloc[ 2:, :].to_numpy() # Get "X" coefficients
coef_array
= np.where( coef_array != 0 )[0]
SX_IDs
# In case all X coefficients are zero, then regress Y on D
if sum(SX_IDs) == 0 :
= sm.OLS( Y , sm.add_constant(D) ).fit().summary2().tables[1].round(3).iloc[ 1, 0 ]
Naive[ i ]
# Otherwise, then regress Y on X and D (but only in the selected coefficients)
elif sum( SX_IDs ) > 0 :
= np.concatenate( ( D, X[:, SX_IDs ] ) , axis = 1 )
X_D = sm.OLS( Y , sm.add_constant( X_D ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]
Naive[ i ]
# In both cases we save D coefficient
# Regress residuals.
= hdmpy.rlasso( X , Y , post = False ).est[ 'residuals' ]
resY = hdmpy.rlasso( X , D , post = False ).est[ 'residuals' ]
resD = sm.OLS( resY , sm.add_constant( resD ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0] Orthogonal[ i ]
R code
hist(Orthogonal-1,col=4, freq=F, xlim= c(-2, 2), xlab= "Orhtogonal -True ", main="Orthogonal")
hist(Naive-1, col=2, freq=F, xlim= c(-2,2), xlab= "Naive- True", main = "Naive")
Python code
= [-1.2, -1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2]
Orto_breaks = [-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1, 1.2]
Naive_breaks = plt.subplots(1, 2, sharex= True, tight_layout=True)
fig, axs # We can set the number of bins with the `bins` kwarg
0].hist( Orthogonal - 1 , range = (-2, 2), density = True , bins = Orto_breaks ) axs[
## (array([0. , 0. , 0. , 0. , 0.75, 0.25, 0.5 , 1.5 , 1. , 0.5 , 0.25,
## 0.25, 0. , 0. , 0. , 0. ]), array([-1.2, -1. , -0.8, -0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8,
## 1. , 1.2, 1.4, 1.6, 1.8, 2. ]), <BarContainer object of 16 artists>)
1].hist( Naive - 1, range = (-2, 2), density = True , bins = Naive_breaks ) axs[
## (array([0. , 0. , 0. , 0. , 0. , 0. , 0.5 , 2.75, 1.75]), array([-0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2]), <BarContainer object of 9 artists>)
0].title.set_text('Orthogonal')
axs[1].title.set_text('Naive')
axs[0].set_xlabel( 'Orhtogonal - True' ) axs[
## Text(0.5, 0, 'Orhtogonal - True')
1].set_xlabel( 'Naive - True' ) axs[
7.2 Example 2
R code
library(hdm)
set.seed(1)
= 20 # trials
B= rep(0, B)
Naive = rep(0, B)
Orthogonal
for (i in 1:B){
=100
n= 100
p= 1/(1:p)^2
beta =1/(1:p)^2
gamma
=matrix(rnorm(n*p), n, p)
X
= X%*%gamma + rnorm(n)/4
D
= D+ X%*%beta + rnorm(n)
Y
# single selection method
= which(rlasso(Y~ D+X)$coef[-c(1,2)] !=0) #select covariates by Lasso
SX.IDs
if (sum(SX.IDs)==0) {Naive[i] = lm(Y~ D)$coef[2]}
if (sum(SX.IDs)>0) {Naive[i] = lm(Y~ D + X[,SX.IDs])$coef[2]}
#partialling out
= rlasso(Y~ X, Post=T)$res
resY = rlasso(D~ X, Post=T)$res
resD = lm(resY ~ resD)$coef[2]
Orthogonal[i]
}
Python code
# Set seed
# Set seed
0)
np.random.seed(= 20
B = np.zeros( B )
Naive = np.zeros( B )
Orthogonal
for i in range( 0, B ):
= 100
n = 100
p = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )
beta = ( 1 / (np.arange( 1, p + 1 ) ** 2 ) ).reshape( p , 1 )
gamma
= 0
mean = 1
sd = np.random.normal( mean , sd, n * p ).reshape( n, p )
X
= ( X @ gamma ) + np.random.normal( mean , sd, n ).reshape( n, 1 )/4 # We reshape because in r when we sum a vecto with a matrix it sum by column
D = D + ( X @ beta ) + np.random.normal( mean , sd, n ).reshape( n, 1 )
Y
# single selectin method
= hdmpy.rlasso( np.concatenate( ( D , X ) , axis = 1 ) , Y , post = True )
r_lasso_estimation
= r_lasso_estimation.est[ 'coefficients' ].iloc[ 2:, :].to_numpy()
coef_array
= np.where( coef_array != 0 )[0]
SX_IDs
if sum(SX_IDs) == 0 :
0 ] = sm.OLS( Y , sm.add_constant(D) ).fit().summary2().tables[1].round(3).iloc[ 1, 0 ]
Naive[
elif sum( SX_IDs ) > 0 :
= np.concatenate( ( D, X[:, SX_IDs ] ) , axis = 1 )
X_D = sm.OLS( Y , sm.add_constant( X_D ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0]
Naive[ i ]
= hdmpy.rlasso( X , Y , post = True ).est[ 'residuals' ]
resY = hdmpy.rlasso( X , D , post = True ).est[ 'residuals' ]
resD = sm.OLS( resY , sm.add_constant( resD ) ).fit().summary2().tables[1].round(3).iloc[ 1, 0] Orthogonal[ i ]
R code
hist(Orthogonal-1,col=4, freq=F, xlim= c(-2, 2), xlab= "Orhtogonal -True ", main="Orthogonal")
hist(Naive-1, col=2, freq=F, xlim= c(-2,2), xlab= "Naive- True", main = "Naive")
Python code
= plt.subplots(1, 2, sharex= True, tight_layout=True)
fig, axs
# We can set the number of bins with the `bins` kwarg
0].hist( Orthogonal - 1 , range = (-2, 2), density = True , bins = Orto_breaks ) axs[
## (array([0. , 0. , 0. , 0.75, 0.5 , 1.25, 1. , 0.5 , 0.5 , 0.25, 0.25,
## 0. , 0. , 0. , 0. , 0. ]), array([-1.2, -1. , -0.8, -0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8,
## 1. , 1.2, 1.4, 1.6, 1.8, 2. ]), <BarContainer object of 16 artists>)
1].hist( Naive - 1, range = (-2, 2), density = True , bins = Naive_breaks ) axs[
## (array([0., 0., 0., 0., 0., 0., 0., 0., 5.]), array([-0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2]), <BarContainer object of 9 artists>)
0].title.set_text('Orthogonal')
axs[1].title.set_text('Naive')
axs[
0].set_xlabel( 'Orhtogonal - True' ) axs[
## Text(0.5, 0, 'Orhtogonal - True')
1].set_xlabel( 'Naive - True' ) axs[