Source code for multiple_inference.rank_condition

"""Inference after ranking.

References:

    .. code-block::

        @techreport{andrews2019inference,
            title={ Inference on winners },
            author={ Andrews, Isaiah and Kitagawa, Toru and McCloskey, Adam },
            year={ 2019 },
            institution={ National Bureau of Economic Research }
        }

        @article{andrews2022inference,
            Author = {Andrews, Isaiah and Bowen, Dillon and Kitagawa, Toru and McCloskey, Adam},
            Title = {Inference for Losers},
            Journal = {AEA Papers and Proceedings},
            Volume = {112},
            Year = {2022},
            Month = {May},
            Pages = {635-42},
            DOI = {10.1257/pandp.20221065},
            URL = {https://www.aeaweb.org/articles?id=10.1257/pandp.20221065}
        }
"""
from __future__ import annotations

from typing import Any, Mapping

import numpy as np

from .base import (
    ModelBase,
    ResultsBase,
    ColumnType,
    Numeric1DArray,
)
from .confidence_set import ConfidenceSet
from .stats import quantile_unbiased



[docs]
class RankConditionResults(ResultsBase):
    """Quantile-unbiased results.

    Inherits from :class:`multiple_inference.base.ResultsBase`.

    Args:
        *args (Any): Passed to :class:`multiple_inference.base.ResultsBase`.
        beta (float, optional): Used to compute the projection quantile for hybrid
            estimation. Defaults to 0.
        marginal_distribution_kwargs (Mapping[str, Any], optional): Passed to
            :meth:`RankCondition.get_marginal_distribution`. Defaults to None.
        **kwargs (Any): Passed to :class:`multiple_inference.base.ResultsBase`.
    """

    _default_title = "Rank condition quantile-unbiased estimates"

    def __init__(
        self,
        *args: Any,
        beta: float = 0,
        marginal_distribution_kwargs: Mapping[str, Any] = None,
        **kwargs: Any,
    ):
        super().__init__(*args, **kwargs)
        if marginal_distribution_kwargs is None:
            marginal_distribution_kwargs = {}
        self.marginal_distributions, self.params, self.pvalues = [], [], []
        for i in range(self.model.n_params):
            dist = self.model.get_marginal_distribution(
                i, beta=beta, **marginal_distribution_kwargs
            )
            self.marginal_distributions.append(dist)
            self.params.append(dist.ppf(0.5))
            self.pvalues.append((1 - beta) * dist.cdf(0) + beta)
        self.params, self.pvalues = np.array(self.params), np.array(self.pvalues)
        self._beta = beta

    def _conf_int(self, alpha: float, indices: np.ndarray) -> np.ndarray:
        # see paper for details on adjusting alpha
        return super()._conf_int((alpha - self._beta) / (1 - self._beta), indices)

    def _make_summary_header(self, alpha: float) -> list[str]:
        return ["coef (median)", "pvalue (1-sided)", f"[{alpha/2}", f"{1-alpha/2}]"]




[docs]
class RankCondition(ModelBase):
    """Rank condition quantile-unbiased estimator.

    Provides utilities for obtaining quantile-unbiased estimates conditional on the
    rank-ordering of conventional estimates of policy effects.

    Subclasses :class:`multiple_inference.base.ModelBase`.

    Args:
        *args (Any): Passed to :class:`multiple_inference.base.ModelBase`.
        xmean (Numeric1DArray, optional): (# params,) array of conventional estimates to
            use for ranking. If None, ranking conditions are based on ``mean``. Defaults
            to None.
        xycov (np.ndarray, optional): (# params, # params) covariance matrix between
            ``mean`` and ``xmean``. Defaults to None.
        **kwargs (Any): Passed to :class:`multiple_inference.base.ModelBase`.

    Raises:
        ValueError: Either all or none of ``xmean``, ``xcov`` and ``xycov`` must be
            specified.

    Additional attributes:
        xmean (np.ndarray): (# params,) array of conventional estimates to use for
            ranking.
        xycov (np.ndarray): (# params, # params) covariance matrix between ``self.mean``
            and ``self.xmean``.

    Examples:

        Compute a quantile-unbiased distribution of the x4 parameter given that it was
        the top-ranked parameter.

        .. testcode::

            import numpy as np
            from multiple_inference.rank_condition import RankCondition

            model = RankCondition(np.arange(5), np.identity(5))
            dist = model.get_marginal_distribution("x4")
            print(dist.ppf([.025, .5, .975]))

        .. testoutput::

            [0.06742731 3.68627552 5.93267239]

        Compute an "almost" quantile-unbiased hybrid distribution.

        .. testcode::

            dist = model.get_marginal_distribution("x4", beta=.005)
            print(dist.ppf([.025, .5, .975]))

        .. testoutput::

            [0.89034671 3.68742998 5.93290012]

        Summarize the quantile-unbiased results.

        .. testcode::

            results = model.fit()
            print(results.summary())

        .. testoutput::
            :options: -ELLIPSIS, +NORMALIZE_WHITESPACE

            Rank condition quantile-unbiased estimates
            ===============================================
               coef (median) pvalue (1-sided) [0.025 0.975]
            -----------------------------------------------
            x0         0.314            0.406 -1.933  3.933
            x1         1.000            0.285 -2.922  4.922
            x2         2.000            0.136 -1.922  5.922
            x3         3.000            0.058 -0.922  6.922
            x4         3.686            0.023  0.067  5.933
            ===============
            Dep. Variable y
            ---------------

        Summarize the "almost" quantile-unbiased hybrid results.

        .. testcode::

            results = model.fit(beta=.005)
            print(results.summary())

        .. testoutput::
            :options: -ELLIPSIS, +NORMALIZE_WHITESPACE

            Rank condition quantile-unbiased estimates
            ===============================================
               coef (median) pvalue (1-sided) [0.025 0.975]
            -----------------------------------------------
            x0         0.313            0.409 -1.977  3.129
            x1         1.000            0.288 -2.129  4.129
            x2         2.000            0.140 -1.129  5.129
            x3         3.000            0.042 -0.129  6.129
            x4         3.687            0.005  0.871  5.977
            ===============
            Dep. Variable y
            ---------------
    """

    _results_cls = RankConditionResults

    def __init__(
        self,
        *args: Any,
        xmean: Numeric1DArray = None,
        xycov: np.ndarray = None,
        **kwargs: Any,
    ):
        super().__init__(*args, **kwargs)
        xparams = (xmean, xycov)
        if any([x is not None for x in xparams]) and None in xparams:
            raise ValueError(
                "Either both or neither of `xmean` and `xycov` must be given."
            )

        self.xmean = xmean
        self.xycov = xycov
        self._confidence_set = ConfidenceSet(self.mean, self.cov).fit()

    @property
    def xmean(self):  # pylint: disable=missing-function-docstring
        return self.mean if self._xmean is None else self._xmean

    @xmean.setter
    def xmean(self, xmean):  # pylint: disable=missing-function-docstring
        self._xmean = None if xmean is None else np.array(xmean)
        self._estimated_ranks = (-self.xmean).argsort().argsort()

    @property
    def xycov(self):  # pylint: disable=missing-function-docstring
        return self.cov if self._xycov is None else self._xycov

    @xycov.setter
    def xycov(self, xycov):  # pylint: disable=missing-function-docstring
        self._xycov = None if xycov is None else np.array(xycov)


[docs]
    def get_marginal_distribution(  # pylint: disable=too-many-arguments
        self,
        column: ColumnType,
        ranks: Numeric1DArray = None,
        beta: float = 0,
        **kwargs: Any,
    ) -> quantile_unbiased:
        """Compute a quantile-unbiased distribution for a given ranking condition.

        Args:
            column (ColumnType): Name or index of the parameter of interest. Defaults to
                None.
            ranks (Numeric1DArray, optional): Ranking conditions for the parameter of
                interest. This method returns a quantile-unbiased distribution given
                that the estimated rank of the parameter of interest is in ``ranks``.
                If None, the estimated rank of the parameter is used as the ranking
                condition. Defaults to None.
            beta (float, optional): Used to compute the projection quantile for hybrid
                estimation. Defaults to 0.
            **kwargs (Any): Passed to :class:`quantile_unbiased`.

        Returns:
            quantile_unbiased: Quantile-unbiased distribution.
        """

        def rank_condition_holds():
            return (
                (current_rank - n_always_equal <= ranks)
                & (ranks <= current_rank + n_always_equal)
            ).any()

        index = self.get_index(column)
        ranks = np.atleast_1d(self._estimated_ranks[index] if ranks is None else ranks)

        # compute the conditional truncation set
        equal_cov = self.xycov[index, index] == self.xycov[:, index]
        greater_cov = self.xycov[~equal_cov, index] > self.xycov[index, index]
        # number of parameters whose estimates are always greater than the estimate of
        # the target parameter
        n_always_greater = (equal_cov & (self.xmean > self.xmean[index])).sum()
        # number of parameters whose estimates are always equal to the estimate of the
        # target parameter. -1 so you don't count the target parameter itself.
        n_always_equal = (equal_cov & (self.xmean == self.xmean[index])).sum() - 1
        z = (  # pylint: disable=invalid-name
            self.xmean
            - (self.xycov[:, index] / self.cov[index, index]) * self.mean[index]
        )
        # thresholds are the values at which the estimated value of the target parameter
        # equals the estimated value of the other parameters (denoted Q in the paper)
        thresholds = (
            self.cov[index, index]
            * (z[~equal_cov] - z[index])
            / (self.xycov[index, index] - self.xycov[~equal_cov, index])
        )
        argsort = thresholds.argsort()
        greater_cov, thresholds = greater_cov[argsort], thresholds[argsort]
        intervals = np.array([thresholds, np.append(thresholds[1:], np.inf)]).T
        current_rank = n_always_greater + (~greater_cov).sum()
        truncset = [[-np.inf, thresholds[0]]] if rank_condition_holds() else []
        for greater_cov_i, interval in zip(greater_cov, intervals):
            current_rank += 1 if greater_cov_i else -1
            if rank_condition_holds():
                truncset.append(interval)

        if beta != 0:
            # projection interval must be centered on 0 for `quantile_unbiased`
            kwargs["projection_interval"] = (
                self._confidence_set.conf_int(beta, [index]) - self.mean[index]
            )[0]
        return quantile_unbiased(  # type: ignore
            y=self.mean[index],
            scale=np.sqrt(self.cov[index, index]),
            truncation_set=truncset,
            **kwargs,
        )