Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] DML meta estimator for causal workflows #268

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .devpy/cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import click
from devpy import util
from devpy.cmds.meson import get_site_packages


@click.command()
Expand All @@ -17,7 +18,7 @@ def docs(build_dir, clean=False):
print(f"Removing `{doc_dir}`")
shutil.rmtree(doc_dir)

site_path = util.get_site_packages(build_dir)
site_path = get_site_packages()
if site_path is None:
print("No built scikit-tree found; run `./dev.py build` first.")
sys.exit(1)
Expand Down
26 changes: 21 additions & 5 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
% Try to keep this list in alphabetical order based on citing name

@article{Li2019manifold,
title={Manifold Oblique Random Forests: Towards Closing the Gap on Convolutional Deep Networks},
author={Li, Adam and Perry, Ronan and Huynh, Chester and Tomita, Tyler M and Mehta, Ronak and Arroyo, Jesus and Patsolic, Jesse and Falk, Benjamin and Vogelstein, Joshua T},
journal={arXiv preprint arXiv:1909.11799},
year={2019}
title = {Manifold Oblique Random Forests: Towards Closing the Gap on Convolutional Deep Networks},
author = {Li, Adam and Perry, Ronan and Huynh, Chester and Tomita, Tyler M and Mehta, Ronak and Arroyo, Jesus and Patsolic, Jesse and Falk, Benjamin and Vogelstein, Joshua T},
journal = {arXiv preprint arXiv:1909.11799},
year = {2019}
}

@article{Meghana2019_geodesicrf,
Expand All @@ -17,4 +17,20 @@ @article{Meghana2019_geodesicrf
publisher = {arXiv},
year = {2019},
copyright = {arXiv.org perpetual, non-exclusive license}
}
}

% Causal

@article{Athey2016GeneralizedRF,
title = {Generalized random forests},
author = {Susan Athey and Julie Tibshirani and Stefan Wager},
journal = {The Annals of Statistics},
year = {2016}
}

@article{chernozhukov2018double,
title = {Double/debiased machine learning for treatment and structural parameters},
author = {Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney and Robins, James},
year = {2018},
publisher = {Oxford University Press Oxford, UK}
}
1 change: 1 addition & 0 deletions sktree/causal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .tree import CausalForest, CausalTree
75 changes: 75 additions & 0 deletions sktree/causal/_grf_criterion.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Licensed under the MIT License.
# Original Authors:
# - EconML
# - Vasilis Syrgkanis
#
# Modified by: Adam Li

import numpy as np
cimport numpy as np

from sklearn.tree._tree cimport DTYPE_t # Type of X
from sklearn.tree._tree cimport DOUBLE_t # Type of y, sample_weight
from sklearn.tree._tree cimport SIZE_t # Type for indices and counters

from sklearn.tree._criterion cimport Criterion, RegressionCriterion


cdef class GeneralizedMomentCriterion(RegressionCriterion):
# The A random vector of the linear moment equation for each sample of size (n_samples, n_outputs)
# these are the "weights" that are pre-computed.
cdef const DOUBLE_t[:, ::1] alpha

# the approximate Jacobian evaluated at every single sample point
# random vector of the linear moment equation
# size (n_samples, n_outputs, n_outputs)
cdef const DOUBLE_t[:, :, ::1] pointJ

cdef DOUBLE_t[:, ::1] rho # Proxy heterogeneity label: rho = E[J | X in Node]^{-1} m(J, A; theta(Node)) of shape (`n_samples`, `n_outputs`)
cdef DOUBLE_t[:, ::1] moment # Moment for each sample: m(J, A; theta(Node)) of shape (`n_samples`, `n_outputs`)
cdef DOUBLE_t[:, ::1] J # Node average jacobian: J(Node) = E[J | X in Node] of shape (n_outputs, n_outputs)
cdef DOUBLE_t[:, ::1] invJ # Inverse of node average jacobian: J(Node)^{-1} of shape (n_outputs, n_outputs)
cdef DOUBLE_t[:] parameter # Estimated node parameter: theta(Node) = E[J|X in Node]^{-1} E[A|X in Node]
cdef DOUBLE_t[:] parameter_pre # Preconditioned node parameter: theta_pre(Node) = E[A | X in Node]

cdef SIZE_t n_relevant_outputs

cdef int compute_sample_preparameter(
self,
DOUBLE_t[:] parameter_pre,
const DOUBLE_t[:, ::1] alpha,
DOUBLE_t weight,
SIZE_t sample_index,
) nogil except -1
cdef int compute_sample_parameter(
self,
DOUBLE_t[:] parameter,
DOUBLE_t[:] parameter_pre,
DOUBLE_t[:, ::1] invJ
) nogil except -1
cdef int compute_sample_moment(
self,
DOUBLE_t[:, ::1] moment,
DOUBLE_t[:, ::1] alpha,
DOUBLE_t[:] parameter,
const DOUBLE_t[:, :, ::1] pointJ,
SIZE_t sample_index
) except -1 nogil
cdef int compute_sample_rho(
self,
DOUBLE_t[:, ::1] moment,
DOUBLE_t[:, ::1] invJ,
SIZE_t sample_index
) except -1 nogil
cdef int compute_sample_jacobian(
self,
DOUBLE_t[:, ::1] J,
const DOUBLE_t[:, :, ::1] pointJ,
DOUBLE_t weight,
SIZE_t sample_index,
) except -1 nogil
cdef int compute_node_inv_jacobian(
self,
DOUBLE_t[:, ::1] J,
DOUBLE_t[:, ::1] invJ,
) except -1 nogil
Loading