"""Inference after ranking.
References:
.. code-block::
@techreport{andrews2019inference,
title={ Inference on winners },
author={ Andrews, Isaiah and Kitagawa, Toru and McCloskey, Adam },
year={ 2019 },
institution={ National Bureau of Economic Research }
}
@article{andrews2022inference,
Author = {Andrews, Isaiah and Bowen, Dillon and Kitagawa, Toru and McCloskey, Adam},
Title = {Inference for Losers},
Journal = {AEA Papers and Proceedings},
Volume = {112},
Year = {2022},
Month = {May},
Pages = {635-42},
DOI = {10.1257/pandp.20221065},
URL = {https://www.aeaweb.org/articles?id=10.1257/pandp.20221065}
}
"""
from __future__ import annotations
from typing import Any, Mapping
import numpy as np
from .base import (
ModelBase,
ResultsBase,
ColumnType,
Numeric1DArray,
)
from .confidence_set import ConfidenceSet
from .stats import quantile_unbiased
[docs]
class RankConditionResults(ResultsBase):
"""Quantile-unbiased results.
Inherits from :class:`multiple_inference.base.ResultsBase`.
Args:
*args (Any): Passed to :class:`multiple_inference.base.ResultsBase`.
beta (float, optional): Used to compute the projection quantile for hybrid
estimation. Defaults to 0.
marginal_distribution_kwargs (Mapping[str, Any], optional): Passed to
:meth:`RankCondition.get_marginal_distribution`. Defaults to None.
**kwargs (Any): Passed to :class:`multiple_inference.base.ResultsBase`.
"""
_default_title = "Rank condition quantile-unbiased estimates"
def __init__(
self,
*args: Any,
beta: float = 0,
marginal_distribution_kwargs: Mapping[str, Any] = None,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
if marginal_distribution_kwargs is None:
marginal_distribution_kwargs = {}
self.marginal_distributions, self.params, self.pvalues = [], [], []
for i in range(self.model.n_params):
dist = self.model.get_marginal_distribution(
i, beta=beta, **marginal_distribution_kwargs
)
self.marginal_distributions.append(dist)
self.params.append(dist.ppf(0.5))
self.pvalues.append((1 - beta) * dist.cdf(0) + beta)
self.params, self.pvalues = np.array(self.params), np.array(self.pvalues)
self._beta = beta
def _conf_int(self, alpha: float, indices: np.ndarray) -> np.ndarray:
# see paper for details on adjusting alpha
return super()._conf_int((alpha - self._beta) / (1 - self._beta), indices)
def _make_summary_header(self, alpha: float) -> list[str]:
return ["coef (median)", "pvalue (1-sided)", f"[{alpha/2}", f"{1-alpha/2}]"]
[docs]
class RankCondition(ModelBase):
"""Rank condition quantile-unbiased estimator.
Provides utilities for obtaining quantile-unbiased estimates conditional on the
rank-ordering of conventional estimates of policy effects.
Subclasses :class:`multiple_inference.base.ModelBase`.
Args:
*args (Any): Passed to :class:`multiple_inference.base.ModelBase`.
xmean (Numeric1DArray, optional): (# params,) array of conventional estimates to
use for ranking. If None, ranking conditions are based on ``mean``. Defaults
to None.
xycov (np.ndarray, optional): (# params, # params) covariance matrix between
``mean`` and ``xmean``. Defaults to None.
**kwargs (Any): Passed to :class:`multiple_inference.base.ModelBase`.
Raises:
ValueError: Either all or none of ``xmean``, ``xcov`` and ``xycov`` must be
specified.
Additional attributes:
xmean (np.ndarray): (# params,) array of conventional estimates to use for
ranking.
xycov (np.ndarray): (# params, # params) covariance matrix between ``self.mean``
and ``self.xmean``.
Examples:
Compute a quantile-unbiased distribution of the x4 parameter given that it was
the top-ranked parameter.
.. testcode::
import numpy as np
from multiple_inference.rank_condition import RankCondition
model = RankCondition(np.arange(5), np.identity(5))
dist = model.get_marginal_distribution("x4")
print(dist.ppf([.025, .5, .975]))
.. testoutput::
[0.06742731 3.68627552 5.93267239]
Compute an "almost" quantile-unbiased hybrid distribution.
.. testcode::
dist = model.get_marginal_distribution("x4", beta=.005)
print(dist.ppf([.025, .5, .975]))
.. testoutput::
[0.89034671 3.68742998 5.93290012]
Summarize the quantile-unbiased results.
.. testcode::
results = model.fit()
print(results.summary())
.. testoutput::
:options: -ELLIPSIS, +NORMALIZE_WHITESPACE
Rank condition quantile-unbiased estimates
===============================================
coef (median) pvalue (1-sided) [0.025 0.975]
-----------------------------------------------
x0 0.314 0.406 -1.933 3.933
x1 1.000 0.285 -2.922 4.922
x2 2.000 0.136 -1.922 5.922
x3 3.000 0.058 -0.922 6.922
x4 3.686 0.023 0.067 5.933
===============
Dep. Variable y
---------------
Summarize the "almost" quantile-unbiased hybrid results.
.. testcode::
results = model.fit(beta=.005)
print(results.summary())
.. testoutput::
:options: -ELLIPSIS, +NORMALIZE_WHITESPACE
Rank condition quantile-unbiased estimates
===============================================
coef (median) pvalue (1-sided) [0.025 0.975]
-----------------------------------------------
x0 0.313 0.409 -1.977 3.129
x1 1.000 0.288 -2.129 4.129
x2 2.000 0.140 -1.129 5.129
x3 3.000 0.042 -0.129 6.129
x4 3.687 0.005 0.871 5.977
===============
Dep. Variable y
---------------
"""
_results_cls = RankConditionResults
def __init__(
self,
*args: Any,
xmean: Numeric1DArray = None,
xycov: np.ndarray = None,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
xparams = (xmean, xycov)
if any([x is not None for x in xparams]) and None in xparams:
raise ValueError(
"Either both or neither of `xmean` and `xycov` must be given."
)
self.xmean = xmean
self.xycov = xycov
self._confidence_set = ConfidenceSet(self.mean, self.cov).fit()
@property
def xmean(self): # pylint: disable=missing-function-docstring
return self.mean if self._xmean is None else self._xmean
@xmean.setter
def xmean(self, xmean): # pylint: disable=missing-function-docstring
self._xmean = None if xmean is None else np.array(xmean)
self._estimated_ranks = (-self.xmean).argsort().argsort()
@property
def xycov(self): # pylint: disable=missing-function-docstring
return self.cov if self._xycov is None else self._xycov
@xycov.setter
def xycov(self, xycov): # pylint: disable=missing-function-docstring
self._xycov = None if xycov is None else np.array(xycov)
[docs]
def get_marginal_distribution( # pylint: disable=too-many-arguments
self,
column: ColumnType,
ranks: Numeric1DArray = None,
beta: float = 0,
**kwargs: Any,
) -> quantile_unbiased:
"""Compute a quantile-unbiased distribution for a given ranking condition.
Args:
column (ColumnType): Name or index of the parameter of interest. Defaults to
None.
ranks (Numeric1DArray, optional): Ranking conditions for the parameter of
interest. This method returns a quantile-unbiased distribution given
that the estimated rank of the parameter of interest is in ``ranks``.
If None, the estimated rank of the parameter is used as the ranking
condition. Defaults to None.
beta (float, optional): Used to compute the projection quantile for hybrid
estimation. Defaults to 0.
**kwargs (Any): Passed to :class:`quantile_unbiased`.
Returns:
quantile_unbiased: Quantile-unbiased distribution.
"""
def rank_condition_holds():
return (
(current_rank - n_always_equal <= ranks)
& (ranks <= current_rank + n_always_equal)
).any()
index = self.get_index(column)
ranks = np.atleast_1d(self._estimated_ranks[index] if ranks is None else ranks)
# compute the conditional truncation set
equal_cov = self.xycov[index, index] == self.xycov[:, index]
greater_cov = self.xycov[~equal_cov, index] > self.xycov[index, index]
# number of parameters whose estimates are always greater than the estimate of
# the target parameter
n_always_greater = (equal_cov & (self.xmean > self.xmean[index])).sum()
# number of parameters whose estimates are always equal to the estimate of the
# target parameter. -1 so you don't count the target parameter itself.
n_always_equal = (equal_cov & (self.xmean == self.xmean[index])).sum() - 1
z = ( # pylint: disable=invalid-name
self.xmean
- (self.xycov[:, index] / self.cov[index, index]) * self.mean[index]
)
# thresholds are the values at which the estimated value of the target parameter
# equals the estimated value of the other parameters (denoted Q in the paper)
thresholds = (
self.cov[index, index]
* (z[~equal_cov] - z[index])
/ (self.xycov[index, index] - self.xycov[~equal_cov, index])
)
argsort = thresholds.argsort()
greater_cov, thresholds = greater_cov[argsort], thresholds[argsort]
intervals = np.array([thresholds, np.append(thresholds[1:], np.inf)]).T
current_rank = n_always_greater + (~greater_cov).sum()
truncset = [[-np.inf, thresholds[0]]] if rank_condition_holds() else []
for greater_cov_i, interval in zip(greater_cov, intervals):
current_rank += 1 if greater_cov_i else -1
if rank_condition_holds():
truncset.append(interval)
if beta != 0:
# projection interval must be centered on 0 for `quantile_unbiased`
kwargs["projection_interval"] = (
self._confidence_set.conf_int(beta, [index]) - self.mean[index]
)[0]
return quantile_unbiased( # type: ignore
y=self.mean[index],
scale=np.sqrt(self.cov[index, index]),
truncation_set=truncset,
**kwargs,
)