Skip to content
This repository was archived by the owner on Mar 11, 2022. It is now read-only.

Commit a500747

Browse files
authored
Improve subset selection and add show method for AbstractDIDResult (#21)
1 parent b091be6 commit a500747

File tree

6 files changed

+228
-100
lines changed

6 files changed

+228
-100
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
1414
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
1515
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1616
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
17+
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
1718
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
1819
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1920

@@ -28,6 +29,7 @@ Missings = "0.4"
2829
PooledArrays = "1.2"
2930
Reexport = "0.2, 1"
3031
StatsBase = "0.33"
32+
StatsFuns = "0.9"
3133
StatsModels = "0.6.18"
3234
Tables = "1.2"
3335
julia = "1.3"

src/DiffinDiffsBase.jl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,24 @@ using MacroTools: @capture, isexpr, postwalk
1010
using Missings: disallowmissing
1111
using PooledArrays: _label
1212
using Reexport
13-
using StatsBase: Weights, uweights
13+
using StatsBase: CoefTable, Weights, stderror, uweights
14+
using StatsFuns: tdistccdf, tdistinvcdf
1415
@reexport using StatsModels
1516
using StatsModels: Schema
1617
using Tables
1718
using Tables: AbstractColumns, table, istable, columnnames, getcolumn
1819

1920
import Base: ==, show, parent, view
2021
import Base: eltype, firstindex, lastindex, getindex, iterate, length, sym_in
21-
import StatsBase: coef, vcov, responsename, coefnames, weights, nobs, dof_residual
22+
import StatsBase: coef, vcov, confint, nobs, dof_residual, responsename, coefnames, weights,
23+
coeftable
2224
import StatsModels: concrete_term, schema, termvars
2325

2426
const TimeType = Int
2527

2628
# Reexport objects from StatsBase
27-
export coef, vcov, responsename, coefnames, weights, nobs, dof_residual
29+
export coef, vcov, stderror, confint, nobs, dof_residual, responsename, coefnames, weights,
30+
coeftable
2831

2932
export cb,
3033
,
@@ -91,13 +94,16 @@ export cb,
9194
AbstractDIDResult,
9295
DIDResult,
9396
AggregatedDIDResult,
97+
vce,
9498
outcomename,
9599
treatnames,
96100
treatcells,
97101
ntreatcoef,
98102
treatcoef,
99103
treatvcov,
100104
coefinds,
105+
ncovariate,
106+
agg,
101107
SubDIDResult,
102108
TransformedDIDResult,
103109
TransSubDIDResult,

src/did.jl

Lines changed: 126 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -180,22 +180,24 @@ collect estimation results for difference-in-differences.
180180
# Interface definition
181181
| Required methods | Default definition | Brief description |
182182
|---|---|---|
183-
| `coef(r)` | `r.coef` | Vector of point estimates for all treatment coefficients and covariates |
183+
| `coef(r)` | `r.coef` | Vector of point estimates for all coefficients including covariates |
184184
| `vcov(r)` | `r.vcov` | Variance-covariance matrix for estimates in `coef` |
185+
| `vce(r)` | `r.vce` | Covariance estimator |
185186
| `nobs(r)` | `r.nobs` | Number of observations (table rows) involved in estimation |
186187
| `outcomename(r)` | `r.yname` | Name of the outcome variable |
187-
| `coefnames(r)` | `r.coefnames` | Names (`Vector{String}`) of all treatment coefficients and covariates |
188-
| `treatnames(r)` | `coefnames(r)[1:ntreatcoef(r)]` | Names (`Vector{String}`) of treatment coefficients |
188+
| `coefnames(r)` | `r.coefnames` | Names (`Vector{String}`) of all coefficients including covariates |
189189
| `treatcells(r)` | `r.treatcells` | Tables.jl-compatible tabular description of treatment coefficients in the order of `coefnames` (without covariates) |
190+
| `weights(r)` | `r.weights` | Column name of the weight variable (if specified) |
190191
| `ntreatcoef(r)` | `size(treatcells(r), 1)` | Number of treatment coefficients |
191192
| `treatcoef(r)` | `view(coef(r), 1:ntreatcoef(r))` | A view of treatment coefficients |
192193
| `treatvcov(r)` | `(N = ntreatcoef(r); view(vcov(r), 1:N, 1:N))` | A view of variance-covariance matrix for treatment coefficients |
193-
| `weights(r)` | `r.weights` | Column name of the weight variable (if specified) |
194+
| `treatnames(r)` | `coefnames(r)[1:ntreatcoef(r)]` | Names (`Vector{String}`) of treatment coefficients |
194195
| **Optional methods** | | |
195196
| `parent(r)` | `r.parent` | Result object from which `r` is generated |
197+
| `dof_residual(r)` | `r.dof_residual` | Residual degrees of freedom |
196198
| `responsename(r)` | `outcomename(r)` | Name of the outcome variable |
197199
| `coefinds(r)` | `r.coefinds` | Lookup table (`Dict{String,Int}`) from `coefnames` to integer indices (for retrieving estimates by name) |
198-
| `dof_residual(r)` | `r.dof_residual` | Residual degrees of freedom |
200+
| `ncovariate(r)` | `length(coef(r)) - ntreatcoef(r)` | Number of covariate coefficients |
199201
"""
200202
abstract type AbstractDIDResult <: StatisticalModel end
201203

@@ -323,6 +325,27 @@ through bit-wise `and`.
323325
return treatvcov(r)[inds, inds]
324326
end
325327

328+
"""
329+
vce(r::AbstractDIDResult)
330+
331+
Return the covariance estimator used to estimate variance-covariance matrix.
332+
"""
333+
vce(r::AbstractDIDResult) = r.vce
334+
335+
"""
336+
confint(r::AbstractDIDResult; level::Real=0.95)
337+
338+
Return a confidence interval for each coefficient estimate.
339+
The returned object is of type `Tuple{Vector{Float64}, Vector{Float64}}`
340+
where the first vector collects the lower bounds for all intervals
341+
and the second one collects the upper bounds.
342+
"""
343+
function confint(r::AbstractDIDResult; level::Real=0.95)
344+
scale = tdistinvcdf(dof_residual(r), 1 - (1 - level) / 2)
345+
se = stderror(r)
346+
return coef(r) .- scale .* se, coef(r) .+ scale .* se
347+
end
348+
326349
"""
327350
nobs(r::AbstractDIDResult)
328351
@@ -345,13 +368,6 @@ Return a vector of coefficient names.
345368
"""
346369
coefnames(r::AbstractDIDResult) = r.coefnames
347370

348-
"""
349-
treatnames(r::AbstractDIDResult)
350-
351-
Return a vector of names for treatment coefficients.
352-
"""
353-
treatnames(r::AbstractDIDResult) = coefnames(r)[1:ntreatcoef(r)]
354-
355371
"""
356372
treatcells(r::AbstractDIDResult)
357373
@@ -360,6 +376,14 @@ in the order of coefnames (without covariates).
360376
"""
361377
treatcells(r::AbstractDIDResult) = r.treatcells
362378

379+
"""
380+
weights(r::AbstractDIDResult)
381+
382+
Return the column name of the weight variable.
383+
Return `nothing` if `weights` is not specified for estimation.
384+
"""
385+
weights(r::AbstractDIDResult) = r.weightname
386+
363387
"""
364388
ntreatcoef(r::AbstractDIDResult)
365389
@@ -382,12 +406,11 @@ Return a view of variance-covariance matrix for treatment coefficients.
382406
treatvcov(r::AbstractDIDResult) = (N = ntreatcoef(r); view(vcov(r), 1:N, 1:N))
383407

384408
"""
385-
weights(r::AbstractDIDResult)
409+
treatnames(r::AbstractDIDResult)
386410
387-
Return the column name of the weight variable.
388-
Return `nothing` if `weights` is not specified for estimation.
411+
Return a vector of names for treatment coefficients.
389412
"""
390-
weights(r::AbstractDIDResult) = r.weightname
413+
treatnames(r::AbstractDIDResult) = coefnames(r)[1:ntreatcoef(r)]
391414

392415
"""
393416
parent(r::AbstractDIDResult)
@@ -396,6 +419,13 @@ Return the `AbstractDIDResult` from which `r` is generated.
396419
"""
397420
parent(r::AbstractDIDResult) = r.parent
398421

422+
"""
423+
dof_residual(r::AbstractDIDResult)
424+
425+
Return the residual degrees of freedom.
426+
"""
427+
dof_residual(r::AbstractDIDResult) = r.dof_residual
428+
399429
"""
400430
responsename(r::AbstractDIDResult)
401431
@@ -413,11 +443,35 @@ for retrieving estimates by name.
413443
coefinds(r::AbstractDIDResult) = r.coefinds
414444

415445
"""
416-
dof_residual(r::AbstractDIDResult)
446+
ncovariate(r::AbstractDIDResult)
417447
418-
Return the residual degrees of freedom.
448+
Return the number of covariate coefficients.
419449
"""
420-
dof_residual(r::AbstractDIDResult) = r.dof_residual
450+
ncovariate(r::AbstractDIDResult) = length(coef(r)) - ntreatcoef(r)
451+
452+
"""
453+
agg(r::DIDResult)
454+
455+
Aggregate difference-in-differences estimates
456+
and return a subtype of [`AggregatedDIDResult`](@ref).
457+
The implementation depends on the type of `r`.
458+
"""
459+
agg(r::DIDResult) = error("agg is not implemented for $(typeof(r))")
460+
461+
function coeftable(r::AbstractDIDResult; level::Real=0.95)
462+
cf = coef(r)
463+
se = stderror(r)
464+
zs = cf ./ se
465+
pv = 2 .* tdistccdf.(dof_residual(r), abs.(zs))
466+
cil, ciu = confint(r)
467+
cnames = coefnames(r)
468+
levstr = isinteger(level*100) ? string(Integer(level*100)) : string(level*100)
469+
return CoefTable(Vector[cf, se, zs, pv, cil, ciu],
470+
["Estimate","Std. Error","t", "Pr(>|t|)", "Lower $levstr%", "Upper $levstr%"],
471+
["$(cnames[i])" for i = 1:length(cf)], 4, 3)
472+
end
473+
474+
show(io::IO, r::AbstractDIDResult) = show(io, coeftable(r))
421475

422476
"""
423477
_treatnames(treatcells)
@@ -440,36 +494,52 @@ function _treatnames(treatcells)
440494
return names
441495
end
442496

443-
function _parse_bys!(bycols::Vector, cells::VecColumnTable, by::Pair{Symbol})
497+
# Helper functions that parse the bys option for agg
498+
function _parse_bycells!(bycols::Vector, cells::VecColumnTable, by::Pair{Symbol})
444499
lookup = getfield(cells, :lookup)
445-
_parse_bys!(bycols, cells, lookup[by[1]]=>by[2])
500+
_parse_bycells!(bycols, cells, lookup[by[1]]=>by[2])
446501
end
447502

448-
function _parse_bys!(bycols::Vector, cells::VecColumnTable, by::Pair{Int})
503+
function _parse_bycells!(bycols::Vector, cells::VecColumnTable, by::Pair{Int})
449504
if by[2] isa Function
450505
bycols[by[1]] = apply(cells, by[1]=>by[2])
451506
else
452507
bycols[by[1]] = apply(cells, by[2][1]=>by[2][2])
453508
end
454509
end
455510

456-
function _parse_bys!(bycols::Vector, cells::VecColumnTable, bys)
511+
function _parse_bycells!(bycols::Vector, cells::VecColumnTable, bys)
457512
eltype(bys) <: Pair || throw(ArgumentError("unaccepted type of bys"))
458513
for by in bys
459-
_parse_bys!(bycols, cells, by)
514+
_parse_bycells!(bycols, cells, by)
460515
end
461516
end
462517

463-
function _bycells(r::DIDResult, names, bys)
464-
tcells = treatcells(r)
465-
bynames = names === nothing ? getfield(tcells, :names) : collect(Symbol, names)
466-
bycols = AbstractVector[getcolumn(tcells, n) for n in bynames]
467-
bys === nothing || _parse_bys!(bycols, tcells, bys)
468-
return VecColumnTable(bycols, bynames)
518+
_parse_bycells!(bycols::Vector, cells::VecColumnTable, bys::Nothing) = nothing
519+
520+
# Helper function for _parse_subset
521+
function _fill_x!(r::AbstractDIDResult, inds::BitVector)
522+
nx = ncovariate(r)
523+
nx > 0 && push!(inds, (false for i in 1:nx)...)
469524
end
470525

526+
# Helper functions for handling subset option that may involves Pairs
527+
_parse_subset(r::AbstractDIDResult, by::Pair, fill_x::Bool) =
528+
(inds = apply(treatcells(r), by); fill_x && _fill_x!(r, inds); return inds)
529+
530+
function _parse_subset(r::AbstractDIDResult, inds, fill_x::Bool)
531+
eltype(inds) <: Pair || return inds
532+
inds = apply_and(treatcells(r), inds...)
533+
fill_x && _fill_x!(r, inds)
534+
return inds
535+
end
536+
537+
_parse_subset(r::AbstractDIDResult, ::Colon, fill_x::Bool) =
538+
fill_x ? (1:length(coef(r))) : 1:ntreatcoef(r)
539+
471540
# Count number of elements selected by indices `inds`
472541
_nselected(inds) = eltype(inds) == Bool ? sum(inds) : length(inds)
542+
_nselected(::Colon) = throw(ArgumentError("cannot accept Colon (:)"))
473543

474544
"""
475545
treatindex(ntcoef::Int, I)
@@ -536,17 +606,18 @@ from `r` at the given index or indices `inds` without constructing a copied subs
536606

537607
coef(r::SubDIDResult) = view(coef(parent(r)), r.inds)
538608
vcov(r::SubDIDResult) = view(vcov(parent(r)), r.inds, r.inds)
609+
vce(r::SubDIDResult) = vce(parent(r))
539610
nobs(r::SubDIDResult) = nobs(parent(r))
540611
outcomename(r::SubDIDResult) = outcomename(parent(r))
541612
coefnames(r::SubDIDResult) = view(coefnames(parent(r)), r.inds)
542-
treatnames(r::SubDIDResult) = view(treatnames(parent(r)), r.treatinds)
543613
treatcells(r::SubDIDResult) = view(treatcells(parent(r)), r.treatinds)
614+
weights(r::SubDIDResult) = weights(parent(r))
544615
ntreatcoef(r::SubDIDResult) = _nselected(r.treatinds)
545616
treatcoef(r::SubDIDResult) = view(treatcoef(parent(r)), r.treatinds)
546617
treatvcov(r::SubDIDResult) = view(treatvcov(parent(r)), r.treatinds, r.treatinds)
547-
weights(r::SubDIDResult) = weights(parent(r))
548-
responsename(r::SubDIDResult) = responsename(parent(r))
618+
treatnames(r::SubDIDResult) = view(treatnames(parent(r)), r.treatinds)
549619
dof_residual(r::SubDIDResult) = dof_residual(parent(r))
620+
responsename(r::SubDIDResult) = responsename(parent(r))
550621

551622
"""
552623
TransformedDIDResult{P,M} <: AbstractDIDResult
@@ -607,19 +678,21 @@ end
607678

608679
const TransOrTransSub = Union{TransformedDIDResult, TransSubDIDResult}
609680

681+
vce(r::TransOrTransSub) = vce(parent(r))
610682
nobs(r::TransOrTransSub) = nobs(parent(r))
611683
outcomename(r::TransOrTransSub) = outcomename(parent(r))
612684
coefnames(r::TransformedDIDResult) = coefnames(parent(r))
613685
coefnames(r::TransSubDIDResult) = view(coefnames(parent(r)), r.inds)
614-
treatnames(r::TransOrTransSub) = treatnames(parent(r))
615686
treatcells(r::TransformedDIDResult) = treatcells(parent(r))
616687
treatcells(r::TransSubDIDResult) = view(treatcells(parent(r)), r.treatinds)
688+
weights(r::TransOrTransSub) = weights(parent(r))
617689
ntreatcoef(r::TransformedDIDResult) = ntreatcoef(parent(r))
618690
ntreatcoef(r::TransSubDIDResult) = _nselected(r.treatinds)
619-
weights(r::TransOrTransSub) = weights(parent(r))
691+
treatnames(r::TransformedDIDResult) = treatnames(parent(r))
692+
treatnames(r::TransSubDIDResult) = view(treatnames(parent(r)), r.treatinds)
693+
dof_residual(r::TransOrTransSub) = dof_residual(parent(r))
620694
responsename(r::TransOrTransSub) = responsename(parent(r))
621695
coefinds(r::TransformedDIDResult) = coefinds(parent(r))
622-
dof_residual(r::TransOrTransSub) = dof_residual(parent(r))
623696

624697
"""
625698
lincom(r::AbstractDIDResult, linmap::AbstractMatrix{<:Real}, subset=nothing)
@@ -643,13 +716,14 @@ function lincom(r::AbstractDIDResult, linmap::AbstractMatrix{<:Real}, subset::No
643716
end
644717

645718
function lincom(r::AbstractDIDResult, linmap::AbstractMatrix{<:Real}, subset)
719+
inds = _parse_subset(r, subset, true)
646720
nr, nc = size(linmap)
647721
length(coef(r)) == nc ||
648722
throw(DimensionMismatch("linmap must have $(length(coef(r))) columns"))
649-
_nselected(subset) == nr || throw(ArgumentError("subset must select $nr elements"))
723+
_nselected(inds) == nr || throw(ArgumentError("subset must select $nr elements"))
650724
cf = linmap * coef(r)
651725
v = linmap * vcov(r) * linmap'
652-
return TransSubDIDResult(r, linmap, cf, v, subset)
726+
return TransSubDIDResult(r, linmap, cf, v, inds)
653727
end
654728

655729
"""
@@ -684,21 +758,25 @@ function rescale(r::AbstractDIDResult, scale::AbstractVector{<:Real}, subset::No
684758
end
685759

686760
function rescale(r::AbstractDIDResult, scale::AbstractVector{<:Real}, subset)
687-
N1 = length(scale)
688-
_nselected(subset) == N1 || throw(ArgumentError("subset must select $N1 elements"))
689-
cf = scale .* view(coef(r), subset)
690-
v = Matrix{Float64}(undef, N1, N1)
691-
pv = view(vcov(r), subset, subset)
692-
@inbounds for j in 1:N1
693-
for i in 1:N1
761+
inds = _parse_subset(r, subset, true)
762+
N = length(scale)
763+
_nselected(inds) == N || throw(ArgumentError("subset must select $N elements"))
764+
cf = scale .* view(coef(r), inds)
765+
v = Matrix{Float64}(undef, N, N)
766+
pv = view(vcov(r), inds, inds)
767+
@inbounds for j in 1:N
768+
for i in 1:N
694769
v[i, j] = scale[i]*scale[j]*pv[i, j]
695770
end
696771
end
697-
return TransSubDIDResult(r, Diagonal(scale), cf, v, subset)
772+
return TransSubDIDResult(r, Diagonal(scale), cf, v, inds)
698773
end
699774

700775
rescale(r::AbstractDIDResult, by::Pair, subset::Nothing=nothing) =
701776
rescale(r, apply(treatcells(r), by), 1:ntreatcoef(r))
702777

703-
rescale(r::AbstractDIDResult, by::Pair, subset) =
704-
rescale(r, apply(view(treatcells(r), subset), by), subset)
778+
function rescale(r::AbstractDIDResult, by::Pair, subset)
779+
inds = _parse_subset(r, subset, true)
780+
tinds = treatindex(ntreatcoef(r), inds)
781+
return rescale(r, apply(view(treatcells(r), tinds), by), inds)
782+
end

0 commit comments

Comments
 (0)