o
    tBhM                     @   s  d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  dgZ!dd Z"dd Z#dd Z$dddddd d!dZ%dS )"zBPartial dependence plots for regression and classification models.    )IterableN)sparse)
mquantiles   )is_classifieris_regressor)	cartesian)check_array)check_matplotlib_support)_safe_indexing)_determine_key_type)_get_column_indices)check_is_fitted)Bunch)DecisionTreeRegressor)RandomForestRegressor)NotFittedError)BaseGradientBoosting)BaseHistGradientBoostingpartial_dependencec                 C   s  t |trt|dkrtdtdd |D std|d |d kr(td|dkr0td	g }t| jd D ]B}tt	| |dd
}|jd |k rO|}n't
t	| |dd
|dd}t|d |d ritdtj|d |d |dd}|| q9t||fS )a  Generate a grid of points based on the percentiles of X.

    The grid is a cartesian product between the columns of ``values``. The
    ith column of ``values`` consists in ``grid_resolution`` equally-spaced
    points between the percentiles of the jth column of X.
    If ``grid_resolution`` is bigger than the number of unique values in the
    jth column of X, then those unique values will be used instead.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_target_features)
        The data.

    percentiles : tuple of floats
        The percentiles which are used to construct the extreme values of
        the grid. Must be in [0, 1].

    grid_resolution : int
        The number of equally spaced points to be placed on the grid for each
        feature.

    Returns
    -------
    grid : ndarray, shape (n_points, n_target_features)
        A value for each feature at each point in the grid. ``n_points`` is
        always ``<= grid_resolution ** X.shape[1]``.

    values : list of 1d ndarrays
        The values with which the grid has been created. The size of each
        array ``values[j]`` is either ``grid_resolution``, or the number of
        unique values in ``X[:, j]``, whichever is smaller.
    r   z/'percentiles' must be a sequence of 2 elements.c                 s   s(    | ]}d |  kodkn  V  qdS )r      N ).0xr   r   }/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/inspection/_partial_dependence.py	<genexpr>H   s   & z_grid_from_X.<locals>.<genexpr>z''percentiles' values must be in [0, 1].r   r   z9percentiles[0] must be strictly less than percentiles[1].z2'grid_resolution' must be strictly greater than 1.axis)probr   ztpercentiles are too close to each other, unable to build the grid. Please choose percentiles that are further apart.T)numendpoint)
isinstancer   len
ValueErrorallrangeshapenpuniquer   r   allcloselinspaceappendr   )Xpercentilesgrid_resolutionvaluesfeatureuniquesr   emp_percentilesr   r   r   _grid_from_X%   s8   !r3   c                 C   s&   |  ||}|jdkr|dd}|S )Nr   )%_compute_partial_dependence_recursionndimreshape)estgridfeaturesaveraged_predictionsr   r   r   _partial_dependence_recursionl   s   
r<   c                 C   s  g }g }t | r| j}n5t| dd }t| dd }	|dkr!|p|	}n|dkr'|n|	}|d u rA|dkr5td|dkr=tdtd| }
|D ]L}t|D ]\}}t|
drb|| |
jd d |f< qM|| |
d d |f< qMz||
}|| |t	j
|dd	 W qG ty } ztd
|d }~ww |jd }t	|j}t | r|jdkr||d}nt| r|jd dkr|d }||d}t	|j}t | r|jdkr|dd}||fS t| r|jd dkr|d }|dd}||fS )Npredict_probadecision_functionautozCThe estimator has no predict_proba and no decision_function method.z*The estimator has no predict_proba method.z.The estimator has no decision_function method.ilocr   r   z0'estimator' parameter must be a fitted estimatorr   r4   r   )r   predictgetattrr#   copy	enumeratehasattrr@   r+   r'   meanr   r&   arrayTr6   r7   r   )r8   r9   r:   r,   response_methodpredictionsr;   prediction_methodr=   r>   X_eval
new_valuesivariableprede	n_samplesr   r   r   _partial_dependence_brutev   sb   




rS   r?   )g?gffffff?d   average)rI   r-   r.   methodkindc                C   s  t |  t| st| stdt| r!t| jd tjr!tdt|ds2t	
|s2t|dtd}d}||vrCtd|d	|t| rO|d
krOtdd}	||	vr`td|d	|	|dkrn|dkrltdd}|d
krt| tr| jdu rd}nt| tttfrd}nd}|dkrt| ttttfsd}
tdd	|
|d
krd}|dkrtd|t|dddkrtt|drtd|jd d tjt||tjdd }tt||dd||\}}|dkrt| ||||\}}|jd|jd gd d! |D R  }nt | ||}|jdgd"d! |D R  }|dkr/t!||d#S |d$kr:t!||d%S t!|||d&S )'aG  Partial dependence of ``features``.

    Partial dependence of a feature (or a set of features) corresponds to
    the average response of an estimator for each possible value of the
    feature.

    Read more in the :ref:`User Guide <partial_dependence>`.

    .. warning::

        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
        `'recursion'` method (used by default) will not account for the `init`
        predictor of the boosting process. In practice, this will produce
        the same values as `'brute'` up to a constant offset in the target
        response, provided that `init` is a constant estimator (which is the
        default). However, if `init` is not a constant estimator, the
        partial dependence values are incorrect for `'recursion'` because the
        offset will be sample-dependent. It is preferable to use the `'brute'`
        method. Note that this only applies to
        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.

    Parameters
    ----------
    estimator : BaseEstimator
        A fitted estimator object implementing :term:`predict`,
        :term:`predict_proba`, or :term:`decision_function`.
        Multioutput-multiclass classifiers are not supported.

    X : {array-like or dataframe} of shape (n_samples, n_features)
        ``X`` is used to generate a grid of values for the target
        ``features`` (where the partial dependence will be evaluated), and
        also to generate values for the complement features when the
        `method` is 'brute'.

    features : array-like of {int, str}
        The feature (e.g. `[0]`) or pair of interacting features
        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.

    response_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'
        Specifies whether to use :term:`predict_proba` or
        :term:`decision_function` as the target response. For regressors
        this parameter is ignored and the response is always the output of
        :term:`predict`. By default, :term:`predict_proba` is tried first
        and we revert to :term:`decision_function` if it doesn't exist. If
        ``method`` is 'recursion', the response is always the output of
        :term:`decision_function`.

    percentiles : tuple of float, default=(0.05, 0.95)
        The lower and upper percentile used to create the extreme values
        for the grid. Must be in [0, 1].

    grid_resolution : int, default=100
        The number of equally spaced points on the grid, for each target
        feature.

    method : {'auto', 'recursion', 'brute'}, default='auto'
        The method used to calculate the averaged predictions:

        - `'recursion'` is only supported for some tree-based estimators
          (namely
          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
          :class:`~sklearn.tree.DecisionTreeRegressor`,
          :class:`~sklearn.ensemble.RandomForestRegressor`,
          ) when `kind='average'`.
          This is more efficient in terms of speed.
          With this method, the target response of a
          classifier is always the decision function, not the predicted
          probabilities. Since the `'recursion'` method implicitly computes
          the average of the Individual Conditional Expectation (ICE) by
          design, it is not compatible with ICE and thus `kind` must be
          `'average'`.

        - `'brute'` is supported for any estimator, but is more
          computationally intensive.

        - `'auto'`: the `'recursion'` is used for estimators that support it,
          and `'brute'` is used otherwise.

        Please see :ref:`this note <pdp_method_differences>` for
        differences between the `'brute'` and `'recursion'` method.

    kind : {'average', 'individual', 'both'}, default='average'
        Whether to return the partial dependence averaged across all the
        samples in the dataset or one line per sample or both.
        See Returns below.

        Note that the fast `method='recursion'` option is only available for
        `kind='average'`. Plotting individual dependencies requires using the
        slower `method='brute'` option.

        .. versionadded:: 0.24

    Returns
    -------
    predictions : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        individual : ndarray of shape (n_outputs, n_instances,                 len(values[0]), len(values[1]), ...)
            The predictions for all the points in the grid for all
            samples in X. This is also known as Individual
            Conditional Expectation (ICE)

        average : ndarray of shape (n_outputs, len(values[0]),                 len(values[1]), ...)
            The predictions for all the points in the grid, averaged
            over all samples in X (or over the training data if
            ``method`` is 'recursion').
            Only available when ``kind='both'``.

        values : seq of 1d ndarrays
            The values with which the grid has been created. The generated
            grid is a cartesian product of the arrays in ``values``.
            ``len(values) == len(features)``. The size of each array
            ``values[j]`` is either ``grid_resolution``, or the number of
            unique values in ``X[:, j]``, whichever is smaller.

        ``n_outputs`` corresponds to the number of classes in a multi-class
        setting, or to the number of tasks for multi-output regression.
        For classical regression and binary classification ``n_outputs==1``.
        ``n_values_feature_j`` corresponds to the size ``values[j]``.

    See Also
    --------
    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
    PartialDependenceDisplay : Partial Dependence visualization.

    Examples
    --------
    >>> X = [[0, 0, 2], [1, 0, 0]]
    >>> y = [0, 1]
    >>> from sklearn.ensemble import GradientBoostingClassifier
    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
    ...                    grid_resolution=2) # doctest: +SKIP
    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
    z5'estimator' must be a fitted regressor or classifier.r   z3Multiclass-multioutput estimators are not supported	__array__z	allow-nan)force_all_finitedtype)r?   r=   r>   zEresponse_method {} is invalid. Accepted response_method names are {}.z, r?   zKThe response_method parameter is ignored for regressors and must be 'auto'.)brute	recursionr?   z3method {} is invalid. Accepted method names are {}.rU   r\   zCThe 'recursion' method only applies when 'kind' is set to 'average'r[   N)GradientBoostingClassifierGradientBoostingRegressorHistGradientBoostingClassifierHistGradientBoostingRegressorr`   r   r   z[Only the following estimators support the 'recursion' method: {}. Try using method='brute'.r>   zUWith the 'recursion' method, the response_method must be 'decision_function'. Got {}.F)accept_sliceintzall features must be in [0, {}]r   C)rZ   orderr   r4   c                 S      g | ]}|j d  qS r   r&   r   valr   r   r   
<listcomp>      z&partial_dependence.<locals>.<listcomp>c                 S   re   rf   rg   rh   r   r   r   rj     rk   )rU   r/   
individual)rl   r/   )rU   rl   r/   )"r   r   r   r#   r!   classes_r'   ndarrayrE   r   issparser	   objectformatjoinr   initr   r   r   r   anylessr&   asarrayr   int32ravelr3   r   rS   r7   r<   r   )	estimatorr,   r:   rI   r-   r.   rV   rW   accepted_responsesaccepted_methodssupported_classes_recursionfeatures_indicesr9   r/   r;   rJ   r   r   r   r      s    
		





)&__doc__collections.abcr   numpyr'   scipyr   scipy.stats.mstatsr   baser   r   utils.extmathr   utilsr	   r
   r   r   r   utils.validationr   r   treer   ensembler   
exceptionsr   ensemble._gbr   2ensemble._hist_gradient_boosting.gradient_boostingr   __all__r3   r<   rS   r   r   r   r   r   <module>   s<    G
^