o
    tBh                  	   @   sD  d Z ddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ G dd dZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%eeee e!e"e$e%dZ&dS ) z
This module contains loss classes suitable for fitting.

It is not part of the public API.
Specific losses are used for regression, binary classification or multiclass
classification.
    Nxlogy   )	CyHalfSquaredErrorCyAbsoluteErrorCyPinballLossCyHalfPoissonLossCyHalfGammaLossCyHalfTweedieLossCyHalfTweedieLossIdentityCyHalfBinomialLossCyHalfMultinomialLoss)IntervalIdentityLinkLogLink	LogitLinkMultinomialLogit   )check_scalar)ReadonlyArrayWrapper)_weighted_percentilec                   @   s   e Zd ZdZdZdZdZdddZdd Zd	d
 Z				dddZ
				dddZ			dddZ				dddZd ddZdddZdddZejdfddZdS )!BaseLossa  Base class for a loss function of 1-dimensional targets.

    Conventions:

        - y_true.shape = sample_weight.shape = (n_samples,)
        - y_pred.shape = raw_prediction.shape = (n_samples,)
        - If is_multiclass is true (multiclass classification), then
          y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
          Note that this corresponds to the return value of decision_function.

    y_true, y_pred, sample_weight and raw_prediction must either be all float64
    or all float32.
    gradient and hessian must be either both float64 or both float32.

    Note that y_pred = link.inverse(raw_prediction).

    Specific loss classes can inherit specific link classes to satisfy
    BaseLink's abstractmethods.

    Parameters
    ----------
    sample_weight : {None, ndarray}
        If sample_weight is None, the hessian might be constant.
    n_classes : {None, int}
        The number of classes for classification, else None.

    Attributes
    ----------
    closs: CyLossFunction
    link : BaseLink
    interval_y_true : Interval
        Valid interval for y_true
    interval_y_pred : Interval
        Valid Interval for y_pred
    differentiable : bool
        Indicates whether or not loss function is differentiable in
        raw_prediction everywhere.
    need_update_leaves_values : bool
        Indicates whether decision trees in gradient boosting need to uptade
        leave values after having been fit to the (negative) gradients.
    approx_hessian : bool
        Indicates whether the hessian is approximated or exact. If,
        approximated, it should be larger or equal to the exact one.
    constant_hessian : bool
        Indicates whether the hessian is one for this loss.
    is_multiclass : bool
        Indicates whether n_classes > 2 is allowed.
    FTNc                 C   sB   || _ || _d| _d| _|| _ttj tjdd| _| jj	| _	d S )NF)
closslinkapprox_hessianconstant_hessian	n_classesr   npinfinterval_y_trueinterval_y_pred)selfr   r   r    r"   i/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/_loss/loss.py__init__}   s   zBaseLoss.__init__c                 C      | j |S zuReturn True if y is in the valid range of y_true.

        Parameters
        ----------
        y : ndarray
        )r   includesr!   yr"   r"   r#   in_y_true_range      zBaseLoss.in_y_true_rangec                 C   r%   )zuReturn True if y is in the valid range of y_pred.

        Parameters
        ----------
        y : ndarray
        )r    r'   r(   r"   r"   r#   in_y_pred_range   r+   zBaseLoss.in_y_pred_ranger   c                 C   sj   |du r	t |}|jdkr|jd dkr|d}t|}t|}|dur*t|}| jj|||||dS )aJ  Compute the pointwise loss value for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        loss_out : None or C-contiguous array of shape (n_samples,)
            A location into which the result is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : array of shape (n_samples,)
            Element-wise loss function.
        Nr   r   y_trueraw_predictionsample_weightloss_out	n_threads)r   
empty_likendimshapesqueezer   r   loss)r!   r.   r/   r0   r1   r2   r"   r"   r#   r7      s   

zBaseLoss.lossc                 C   s   |du r|du rt |}t |}nt j||jd}n|du r(t j||jd}|jdkr9|jd dkr9|d}|jdkrJ|jd dkrJ|d}t|}t|}|durZt|}| jj||||||dS )a  Compute loss and gradient w.r.t. raw_prediction for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        loss_out : None or C-contiguous array of shape (n_samples,)
            A location into which the loss is stored. If None, a new array
            might be created.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : array of shape (n_samples,)
            Element-wise loss function.

        gradient : array of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.
        Ndtyper   r   )r.   r/   r0   r1   gradient_outr2   )	r   r3   r9   r4   r5   r6   r   r   loss_gradient)r!   r.   r/   r0   r1   r:   r2   r"   r"   r#   r;      s.   &


zBaseLoss.loss_gradientc                 C   s   |du r	t |}|jdkr|jd dkr|d}|jdkr+|jd dkr+|d}t|}t|}|dur;t|}| jj|||||dS )a  Compute gradient of loss w.r.t raw_prediction for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the result is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : array of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.
        Nr   r   )r.   r/   r0   r:   r2   )r   r3   r4   r5   r6   r   r   gradient)r!   r.   r/   r0   r:   r2   r"   r"   r#   r<   
  s"   


zBaseLoss.gradientc                 C   s   |du r|du rt |}t |}nt |}n	|du r"t |}|jdkr3|jd dkr3|d}|jdkrD|jd dkrD|d}|jdkrU|jd dkrU|d}t|}t|}|duret|}| jj||||||dS )a  Compute gradient and hessian of loss w.r.t raw_prediction.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        hessian_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the hessian is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : arrays of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.

        hessian : arrays of shape (n_samples,) or (n_samples, n_classes)
            Element-wise hessians.
        Nr   r   )r.   r/   r0   r:   hessian_outr2   )r   r3   r4   r5   r6   r   r   gradient_hessian)r!   r.   r/   r0   r:   r=   r2   r"   r"   r#   r>   >  s2   '




zBaseLoss.gradient_hessianc                 C   s   t j| j||dd|d|dS )a{  Compute the weighted average loss.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : float
            Mean or averaged loss function.
        Nr-   )weights)r   averager7   )r!   r.   r/   r0   r2   r"   r"   r#   __call__  s   zBaseLoss.__call__c                 C   s   t j||dd}dt |jj }| jjt j krd}n| jjr%| jj}n| jj| }| jj	t jkr5d}n| jj
r>| jj	}n| jj	| }|du rR|du rR| j|S | jt |||S )a#  Compute raw_prediction of an intercept-only model.

        This can be used as initial estimates of predictions, i.e. before the
        first iteration in fit.

        Parameters
        ----------
        y_true : array-like of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or array of shape (n_samples,)
            Sample weights.

        Returns
        -------
        raw_prediction : numpy scalar or array of shape (n_classes,)
            Raw predictions of an intercept-only model.
        r   r?   axis
   N)r   r@   finfor9   epsr    lowr   low_inclusivehighhigh_inclusiver   clip)r!   r.   r0   y_predrF   a_mina_maxr"   r"   r#   fit_intercept_only  s   

zBaseLoss.fit_intercept_onlyc                 C   s
   t |S )zpCalculate term dropped in loss.

        With this term added, the loss of perfect predictions is zero.
        )r   
zeros_liker!   r.   r0   r"   r"   r#   constant_to_optimal_zero  s   
z!BaseLoss.constant_to_optimal_zeroFc                 C   s|   |t jt jfvrtd| d| jr|| jf}n|f}t j|||d}| jr2t jd|d}||fS t j|||d}||fS )au  Initialize arrays for gradients and hessians.

        Unless hessians are constant, arrays are initialized with undefined values.

        Parameters
        ----------
        n_samples : int
            The number of samples, usually passed to `fit()`.
        dtype : {np.float64, np.float32}, default=np.float64
            The dtype of the arrays gradient and hessian.
        order : {'C', 'F'}, default='F'
            Order of the arrays gradient and hessian. The default 'F' makes the arrays
            contiguous along samples.

        Returns
        -------
        gradient : C-contiguous array of shape (n_samples,) or array of shape             (n_samples, n_classes)
            Empty array (allocated but not initialized) to be used as argument
            gradient_out.
        hessian : C-contiguous array of shape (n_samples,), array of shape
            (n_samples, n_classes) or shape (1,)
            Empty (allocated but not initialized) array to be used as argument
            hessian_out.
            If constant_hessian is True (e.g. `HalfSquaredError`), the array is
            initialized to ``1``.
        zCValid options for 'dtype' are np.float32 and np.float64. Got dtype=z	 instead.)r5   r9   order)r   )r5   r9   )	r   float32float64
ValueErroris_multiclassr   emptyr   ones)r!   	n_samplesr9   rT   r5   r<   hessianr"   r"   r#   init_gradient_and_hessian  s   z"BaseLoss.init_gradient_and_hessianN)NNr   NNNr   Nr   )__name__
__module____qualname____doc__need_update_leaves_valuesdifferentiablerX   r$   r*   r,   r7   r;   r<   r>   rA   rO   rR   r   rV   r]   r"   r"   r"   r#   r   >   s<    :
		
4
F
8

E

*r   c                       s"   e Zd ZdZd fdd	Z  ZS )HalfSquaredErrora  Half squared error with identity link, for regression.

    Domain:
    y_true and y_pred all real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, half squared error is defined as::

        loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2

    The factor of 0.5 simplifies the computation of gradients and results in a
    unit hessian (and is consistent with what is done in LightGBM). It is also
    half the Normal distribution deviance.
    Nc                    s"   t  jt t d |d u | _d S )Nr   r   )superr$   r   r   r   r!   r0   	__class__r"   r#   r$     s   zHalfSquaredError.__init__r^   ra   rb   rc   rd   r$   __classcell__r"   r"   rk   r#   rg     s    rg   c                       s4   e Zd ZdZdZdZd	 fdd	Zd	ddZ  ZS )
AbsoluteErrora  Absolute error with identity link, for regression.

    Domain:
    y_true and y_pred all real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, the absolute error is defined as::

        loss(x_i) = |y_true_i - raw_prediction_i|
    FTNc                    s(   t  jt t d d| _|d u | _d S )Nrh   T)ri   r$   r   r   r   r   rj   rk   r"   r#   r$   3  s   zAbsoluteError.__init__c                 C   s"   |du rt j|ddS t||dS )Compute raw_prediction of an intercept-only model.

        This is the weighted median of the target, i.e. over the samples
        axis=0.
        Nr   rC   2   )r   medianr   rQ   r"   r"   r#   rO   8  s   z AbsoluteError.fit_intercept_onlyr^   	ra   rb   rc   rd   rf   re   r$   rO   rn   r"   r"   rk   r#   ro   "  s    ro   c                       s4   e Zd ZdZdZdZd
 fdd	Zddd	Z  ZS )PinballLossa  Quantile loss aka pinball loss, for regression.

    Domain:
    y_true and y_pred all real numbers
    quantile in (0, 1)

    Link:
    y_pred = raw_prediction

    For a given sample x_i, the pinball loss is defined as::

        loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)

        rho_{quantile}(u) = u * (quantile - 1_{u<0})
                          = -u *(1 - quantile)  if u < 0
                             u * quantile       if u >= 0

    Note: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().

    Additional Attributes
    ---------------------
    quantile : float
        The quantile to be estimated. Must be in range (0, 1).
    FTN      ?c                    sF   t |dtjdddd t jtt|dt d d| _|d u | _	d S )	Nquantiler   r   neither)target_typemin_valmax_valinclude_boundaries)rw   rh   T)
r   numbersRealri   r$   r   floatr   r   r   )r!   r0   rw   rk   r"   r#   r$   a  s   zPinballLoss.__init__c                 C   s4   |du rt j|d| jj ddS t||d| jj S )rp   Nd   r   rq   )r   
percentiler   rw   r   rQ   r"   r"   r#   rO   q  s
   zPinballLoss.fit_intercept_only)Nrv   r^   rt   r"   r"   rk   r#   ru   D  s    ru   c                       ,   e Zd ZdZd fdd	ZdddZ  ZS )HalfPoissonLossa  Half Poisson deviance loss with log-link, for regression.

    Domain:
    y_true in non-negative real numbers
    y_pred in positive real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half the Poisson deviance is defined as::

        loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))
                    - y_true_i + exp(raw_prediction_i)

    Half the Poisson deviance is actually the negative log-likelihood up to
    constant terms (not involving raw_prediction) and simplifies the
    computation of the gradients.
    We also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.
    Nc                    s*   t  jt t d tdtjdd| _d S )Nrh   r   TF)ri   r$   r   r   r   r   r   r   rj   rk   r"   r#   r$        zHalfPoissonLoss.__init__c                 C   s"   t ||| }|d ur||9 }|S r^   r   r!   r.   r0   termr"   r"   r#   rR     s   z(HalfPoissonLoss.constant_to_optimal_zeror^   ra   rb   rc   rd   r$   rR   rn   r"   r"   rk   r#   r     s    r   c                       r   )HalfGammaLossaV  Half Gamma deviance loss with log-link, for regression.

    Domain:
    y_true and y_pred in positive real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half Gamma deviance loss is defined as::

        loss(x_i) = log(exp(raw_prediction_i)/y_true_i)
                    + y_true/exp(raw_prediction_i) - 1

    Half the Gamma deviance is actually proportional to the negative log-
    likelihood up to constant terms (not involving raw_prediction) and
    simplifies the computation of the gradients.
    We also skip the constant term `-log(y_true_i) - 1`.
    Nc                    s*   t  jt t d tdtjdd| _d S )Nrh   r   F)ri   r$   r	   r   r   r   r   r   rj   rk   r"   r#   r$     r   zHalfGammaLoss.__init__c                 C   s$   t | d }|d ur||9 }|S r`   )r   logr   r"   r"   r#   rR     s   z&HalfGammaLoss.constant_to_optimal_zeror^   r   r"   r"   rk   r#   r     s    r   c                       s,   e Zd ZdZd fdd	Zd	ddZ  ZS )
HalfTweedieLossa  Half Tweedie deviance loss with log-link, for regression.

    Domain:
    y_true in real numbers for power <= 0
    y_true in non-negative real numbers for 0 < power < 2
    y_true in positive real numbers for 2 <= power
    y_pred in positive real numbers
    power in real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half Tweedie deviance loss with p=power is defined
    as::

        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                    - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)
                    + exp(raw_prediction_i)**(2-p) / (2-p)

    Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,
    HalfPoissonLoss and HalfGammaLoss.

    We also skip constant terms, but those are different for p=0, 1, 2.
    Therefore, the loss is not continuous in `power`.

    Note furthermore that although no Tweedie distribution exists for
    0 < power < 1, it still gives a strictly consistent scoring function for
    the expectation.
    N      ?c                    s   t |dtjdtj tjd t jtt|dt	 d | j
jdkr/ttj tjdd| _d S | j
jdk r@tdtjd	d| _d S tdtjdd| _d S )
Npowerrx   )ry   r|   rz   r{   r   rh   r   Fr   T)r   r}   r~   r   r   ri   r$   r
   r   r   r   r   r   r   r!   r0   r   rk   r"   r#   r$     s"   zHalfTweedieLoss.__init__c                 C   s   | j jdkrt j||dS | j jdkrt j||dS | j jdkr*t j||dS | j j}tt|dd| d|  d|  }|d urJ||9 }|S )Nr   )r.   r0   r   r   )r   r   rg   rR   r   r   r   maximum)r!   r.   r0   pr   r"   r"   r#   rR     s"   (z(HalfTweedieLoss.constant_to_optimal_zeroNr   r^   r   r"   r"   rk   r#   r     s    r   c                       s"   e Zd ZdZd fdd	Z  ZS )HalfTweedieLossIdentityan  Half Tweedie deviance loss with identity link, for regression.

    Domain:
    y_true in real numbers for power <= 0
    y_true in non-negative real numbers for 0 < power < 2
    y_true in positive real numbers for 2 <= power
    y_pred in positive real numbers for power != 0
    y_pred in real numbers for power = 0
    power in real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, half Tweedie deviance loss with p=power is defined
    as::

        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                    - y_true_i * raw_prediction_i**(1-p) / (1-p)
                    + raw_prediction_i**(2-p) / (2-p)

    Note that the minimum value of this loss is 0.

    Note furthermore that although no Tweedie distribution exists for
    0 < power < 1, it still gives a strictly consistent scoring function for
    the expectation.
    Nr   c                    s   t  jtt|dt d | jjdkr ttj	 tj	dd| _
n| jjdk r0tdtj	dd| _
n	tdtj	dd| _
| jjdkrLttj	 tj	dd| _d S tdtj	dd| _d S )Nr   rh   r   Fr   T)ri   r$   r   r   r   r   r   r   r   r   r   r    r   rk   r"   r#   r$   "  s   z HalfTweedieLossIdentity.__init__r   rm   r"   r"   rk   r#   r     s    r   c                       s4   e Zd ZdZd	 fdd	Zd	ddZdd Z  ZS )
HalfBinomialLossa  Half Binomial deviance loss with logit link, for binary classification.

    This is also know as binary cross entropy, log-loss and logistic loss.

    Domain:
    y_true in [0, 1], i.e. regression on the unit interval
    y_pred in (0, 1), i.e. boundaries excluded

    Link:
    y_pred = expit(raw_prediction)

    For a given sample x_i, half Binomial deviance is defined as the negative
    log-likelihood of the Binomial/Bernoulli distribution and can be expressed
    as::

        loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i

    See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
    section 4.4.1 (about logistic regression).

    Note that the formulation works for classification, y = {0, 1}, as well as
    logistic regression, y = [0, 1].
    If you add `constant_to_optimal_zero` to the loss, you get half the
    Bernoulli/binomial deviance.
    Nc                    s*   t  jt t dd tdddd| _d S )Nr   r   r   r   r   r   T)ri   r$   r   r   r   r   rj   rk   r"   r#   r$   O  s   zHalfBinomialLoss.__init__c                 C   s0   t ||t d| d|  }|d ur||9 }|S r`   r   r   r"   r"   r#   rR   W  s   z)HalfBinomialLoss.constant_to_optimal_zeroc                 C   sx   |j dkr|jd dkr|d}tj|jd df|jd}| j||dddf< d|dddf  |dddf< |S )a=  Predict probabilities.

        Parameters
        ----------
        raw_prediction : array of shape (n_samples,) or (n_samples, 1)
            Raw prediction values (in link space).

        Returns
        -------
        proba : array of shape (n_samples, 2)
            Element-wise class probabilities.
        r   r   r   r8   N)r4   r5   r6   r   rY   r9   r   inverse)r!   r/   probar"   r"   r#   predict_proba^  s   
 zHalfBinomialLoss.predict_probar^   )ra   rb   rc   rd   r$   rR   r   rn   r"   r"   rk   r#   r   4  s
    
r   c                       sR   e Zd ZdZdZd fdd	Zdd Zdd	d
Zdd Z				dddZ	  Z
S )HalfMultinomialLossa  Categorical cross-entropy loss, for multiclass classification.

    Domain:
    y_true in {0, 1, 2, 3, .., n_classes - 1}
    y_pred has n_classes elements, each element in (0, 1)

    Link:
    y_pred = softmax(raw_prediction)

    Note: We assume y_true to be already label encoded. The inverse link is
    softmax. But the full link function is the symmetric multinomial logit
    function.

    For a given sample x_i, the categorical cross-entropy loss is defined as
    the negative log-likelihood of the multinomial distribution, it
    generalizes the binary cross-entropy to more than 2 classes::

        loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))
                - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)

    See [1].

    Note that for the hessian, we calculate only the diagonal part in the
    classes: If the full hessian for classes k and l and sample i is H_i_k_l,
    we calculate H_i_k_k, i.e. k=l.

    Reference
    ---------
    .. [1] :arxiv:`Simon, Noah, J. Friedman and T. Hastie.
        "A Blockwise Descent Algorithm for Group-penalized Multiresponse and
        Multinomial Regression".
        <1311.6529>`
    TN   c                    s<   t  jt t |d tdtjdd| _tdddd| _d S )Nr   r   TFr   )	ri   r$   r   r   r   r   r   r   r    )r!   r0   r   rk   r"   r#   r$     s   zHalfMultinomialLoss.__init__c                 C   s    | j |ot|t|kS r&   )r   r'   r   allastypeintr(   r"   r"   r#   r*     s    z#HalfMultinomialLoss.in_y_true_rangec                 C   s   t j| j|jd}t |jj}t| jD ]}t j||k|dd||< t || |d| ||< q| j		|dddf 
dS )zCompute raw_prediction of an intercept-only model.

        This is the softmax of the weighted average of the target, i.e. over
        the samples axis=0.
        r8   r   rB   r   N)r   zerosr   r9   rE   rF   ranger@   rK   r   reshape)r!   r.   r0   outrF   kr"   r"   r#   rO     s   z&HalfMultinomialLoss.fit_intercept_onlyc                 C   r%   )a=  Predict probabilities.

        Parameters
        ----------
        raw_prediction : array of shape (n_samples, n_classes)
            Raw prediction values (in link space).

        Returns
        -------
        proba : array of shape (n_samples, n_classes)
            Element-wise class probabilities.
        )r   r   )r!   r/   r"   r"   r#   r     s   z!HalfMultinomialLoss.predict_probar   c                 C   s|   |du r|du rt |}t |}nt |}n	|du r"t |}t|}t|}|dur2t|}| jj||||||dS )aK  Compute gradient and class probabilities fow raw_prediction.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : array of shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or array of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        proba_out : None or array of shape (n_samples, n_classes)
            A location into which the class probabilities are stored. If None,
            a new array might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : array of shape (n_samples, n_classes)
            Element-wise gradients.

        proba : array of shape (n_samples, n_classes)
            Element-wise class probabilities.
        N)r.   r/   r0   r:   	proba_outr2   )r   r3   r   r   gradient_proba)r!   r.   r/   r0   r:   r   r2   r"   r"   r#   r     s&   $

z"HalfMultinomialLoss.gradient_proba)Nr   r^   r_   )ra   rb   rc   rd   rX   r$   r*   rO   r   r   rn   r"   r"   rk   r#   r   t  s    "	
	r   )squared_errorabsolute_errorpinball_losspoisson_loss
gamma_losstweedie_lossbinomial_lossmultinomial_loss)'rd   r}   numpyr   scipy.specialr   _lossr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   utilsr   utils._readonly_array_wrapperr   utils.statsr   r   rg   ro   ru   r   r   r   r   r   r   _LOSSESr"   r"   r"   r#   <module>   s@    ,   P"; H.@ 
