
     h              	          d Z ddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ  G d d          Z G d de          Z G d de          Z G d de          Z G d de          Z  G d de          Z! G d de          Z" G d de          Z# G d de          Z$ G d de          Z%eeee e!e"e$e%dZ&dS ) z
This module contains loss classes suitable for fitting.

It is not part of the public API.
Specific losses are used for regression, binary classification or multiclass
classification.
    Nxlogy   )	CyHalfSquaredErrorCyAbsoluteErrorCyPinballLossCyHalfPoissonLossCyHalfGammaLossCyHalfTweedieLossCyHalfTweedieLossIdentityCyHalfBinomialLossCyHalfMultinomialLoss)IntervalIdentityLinkLogLink	LogitLinkMultinomialLogit   )check_scalar)ReadonlyArrayWrapper)_weighted_percentilec                       e Zd ZdZdZdZdZddZd Zd Z		 	 	 dd	Z
	 	 	 	 dd
Z	 	 	 ddZ	 	 	 	 ddZddZddZddZej        dfdZdS )BaseLossa  Base class for a loss function of 1-dimensional targets.

    Conventions:

        - y_true.shape = sample_weight.shape = (n_samples,)
        - y_pred.shape = raw_prediction.shape = (n_samples,)
        - If is_multiclass is true (multiclass classification), then
          y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
          Note that this corresponds to the return value of decision_function.

    y_true, y_pred, sample_weight and raw_prediction must either be all float64
    or all float32.
    gradient and hessian must be either both float64 or both float32.

    Note that y_pred = link.inverse(raw_prediction).

    Specific loss classes can inherit specific link classes to satisfy
    BaseLink's abstractmethods.

    Parameters
    ----------
    sample_weight : {None, ndarray}
        If sample_weight is None, the hessian might be constant.
    n_classes : {None, int}
        The number of classes for classification, else None.

    Attributes
    ----------
    closs: CyLossFunction
    link : BaseLink
    interval_y_true : Interval
        Valid interval for y_true
    interval_y_pred : Interval
        Valid Interval for y_pred
    differentiable : bool
        Indicates whether or not loss function is differentiable in
        raw_prediction everywhere.
    need_update_leaves_values : bool
        Indicates whether decision trees in gradient boosting need to uptade
        leave values after having been fit to the (negative) gradients.
    approx_hessian : bool
        Indicates whether the hessian is approximated or exact. If,
        approximated, it should be larger or equal to the exact one.
    constant_hessian : bool
        Indicates whether the hessian is one for this loss.
    is_multiclass : bool
        Indicates whether n_classes > 2 is allowed.
    FTNc                     || _         || _        d| _        d| _        || _        t          t          j         t          j        dd          | _        | j        j	        | _	        d S )NF)
closslinkapprox_hessianconstant_hessian	n_classesr   npinfinterval_y_trueinterval_y_pred)selfr   r   r   s       N/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/sklearn/_loss/loss.py__init__zBaseLoss.__init__}   sV    
	# %"'FF#y8    c                 6    | j                             |          S zuReturn True if y is in the valid range of y_true.

        Parameters
        ----------
        y : ndarray
        )r"   includesr$   ys     r%   in_y_true_rangezBaseLoss.in_y_true_range        #,,Q///r'   c                 6    | j                             |          S )zuReturn True if y is in the valid range of y_pred.

        Parameters
        ----------
        y : ndarray
        )r#   r*   r+   s     r%   in_y_pred_rangezBaseLoss.in_y_pred_range   r.   r'   r   c                 ,   |t          j        |          }|j        dk    r&|j        d         dk    r|                    d          }t          |          }t          |          }|t          |          }| j                            |||||          S )aJ  Compute the pointwise loss value for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        loss_out : None or C-contiguous array of shape (n_samples,)
            A location into which the result is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : array of shape (n_samples,)
            Element-wise loss function.
        Nr   r   y_trueraw_predictionsample_weightloss_out	n_threads)r    
empty_likendimshapesqueezer   r   loss)r$   r3   r4   r5   r6   r7   s         r%   r<   zBaseLoss.loss   s    < }V,,H!##(<Q(?1(D(D+33A66N%f---n==$0??Mz)'  
 
 	
r'   c                 0   |G|)t          j        |          }t          j        |          }n9t          j        ||j                  }n|t          j        ||j                  }|j        dk    r&|j        d         dk    r|                    d          }|j        dk    r&|j        d         dk    r|                    d          }t          |          }t          |          }|t          |          }| j                            ||||||          S )a  Compute loss and gradient w.r.t. raw_prediction for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        loss_out : None or C-contiguous array of shape (n_samples,)
            A location into which the loss is stored. If None, a new array
            might be created.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : array of shape (n_samples,)
            Element-wise loss function.

        gradient : array of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.
        Ndtyper   r   )r3   r4   r5   r6   gradient_outr7   )	r    r8   r?   r9   r:   r;   r   r   loss_gradient)r$   r3   r4   r5   r6   r@   r7   s          r%   rA   zBaseLoss.loss_gradient   s,   L #=00!}^<<=|7IJJJ!=x~NNNL !##(<Q(?1(D(D+33A66N!!l&8&;q&@&@'//22L%f---n==$0??Mz'')'% ( 
 
 	
r'   c                    |t          j        |          }|j        dk    r&|j        d         dk    r|                    d          }|j        dk    r&|j        d         dk    r|                    d          }t          |          }t          |          }|t          |          }| j                            |||||          S )a  Compute gradient of loss w.r.t raw_prediction for each input.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the result is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : array of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.
        Nr   r   )r3   r4   r5   r@   r7   )r    r8   r9   r:   r;   r   r   gradient)r$   r3   r4   r5   r@   r7   s         r%   rC   zBaseLoss.gradient
  s    > =88L !##(<Q(?1(D(D+33A66N!!l&8&;q&@&@'//22L%f---n==$0??Mz"")'% # 
 
 	
r'   c                 v   |@|)t          j        |          }t          j        |          }n+t          j        |          }n|t          j        |          }|j        dk    r&|j        d         dk    r|                    d          }|j        dk    r&|j        d         dk    r|                    d          }|j        dk    r&|j        d         dk    r|                    d          }t          |          }t          |          }|t          |          }| j                            ||||||          S )a  Compute gradient and hessian of loss w.r.t raw_prediction.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        hessian_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
            A location into which the hessian is stored. If None, a new array
            might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : arrays of shape (n_samples,) or (n_samples, n_classes)
            Element-wise gradients.

        hessian : arrays of shape (n_samples,) or (n_samples, n_classes)
            Element-wise hessians.
        Nr   r   )r3   r4   r5   r@   hessian_outr7   )r    r8   r9   r:   r;   r   r   gradient_hessian)r$   r3   r4   r5   r@   rE   r7   s          r%   rF   zBaseLoss.gradient_hessian>  sP   N "!}^<< mN;;!}[99 -55K !##(<Q(?1(D(D+33A66N!!l&8&;q&@&@'//22Lq  [%6q%9Q%>%>%--a00K%f---n==$0??Mz**)'%# + 
 
 	
r'   c           	      ^    t          j        |                     ||dd|          |          S )a{  Compute the weighted average loss.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        loss : float
            Mean or averaged loss function.
        Nr2   )weights)r    averager<   )r$   r3   r4   r5   r7   s        r%   __call__zBaseLoss.__call__  sG    ( zII-"#    "	
 	
 	
 		
r'   c                     t          j        ||d          }dt          j        |j                  j        z  }| j        j        t           j         k    rd}n(| j        j        r| j        j        }n| j        j        |z   }| j        j	        t           j        k    rd}n(| j        j
        r| j        j	        }n| j        j	        |z
  }||| j                            |          S | j                            t          j        |||                    S )a#  Compute raw_prediction of an intercept-only model.

        This can be used as initial estimates of predictions, i.e. before the
        first iteration in fit.

        Parameters
        ----------
        y_true : array-like of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or array of shape (n_samples,)
            Sample weights.

        Returns
        -------
        raw_prediction : numpy scalar or array of shape (n_classes,)
            Raw predictions of an intercept-only model.
        r   rH   axis
   N)r    rI   finfor?   epsr#   lowr!   low_inclusivehighhigh_inclusiver   clip)r$   r3   r5   y_predrP   a_mina_maxs          r%   fit_intercept_onlyzBaseLoss.fit_intercept_only  s    ( FMBBB28FL))--#w..EE!/ 	3(,EE(,s2E$..EE!0 	4(-EE(-3E=U]9>>&)))9>>"'&%"?"?@@@r'   c                 *    t          j        |          S )zpCalculate term dropped in loss.

        With this term added, the loss of perfect predictions is zero.
        )r    
zeros_liker$   r3   r5   s      r%   constant_to_optimal_zeroz!BaseLoss.constant_to_optimal_zero  s    
 }V$$$r'   Fc                 $   |t           j        t           j        fvrt          d| d          | j        r
|| j        f}n|f}t          j        |||          }| j        rt          j        d|          }nt          j        |||          }||fS )au  Initialize arrays for gradients and hessians.

        Unless hessians are constant, arrays are initialized with undefined values.

        Parameters
        ----------
        n_samples : int
            The number of samples, usually passed to `fit()`.
        dtype : {np.float64, np.float32}, default=np.float64
            The dtype of the arrays gradient and hessian.
        order : {'C', 'F'}, default='F'
            Order of the arrays gradient and hessian. The default 'F' makes the arrays
            contiguous along samples.

        Returns
        -------
        gradient : C-contiguous array of shape (n_samples,) or array of shape             (n_samples, n_classes)
            Empty array (allocated but not initialized) to be used as argument
            gradient_out.
        hessian : C-contiguous array of shape (n_samples,), array of shape
            (n_samples, n_classes) or shape (1,)
            Empty (allocated but not initialized) array to be used as argument
            hessian_out.
            If constant_hessian is True (e.g. `HalfSquaredError`), the array is
            initialized to ``1``.
        zCValid options for 'dtype' are np.float32 and np.float64. Got dtype=z	 instead.)r:   r?   order)r   )r:   r?   )	r    float32float64
ValueErroris_multiclassr   emptyr   ones)r$   	n_samplesr?   r`   r:   rC   hessians          r%   init_gradient_and_hessianz"BaseLoss.init_gradient_and_hessian  s    8 RZ000.". . .  
  	!/EELE8%uEBBB  	F
 gD666GGhU%uEEEG  r'   N)NNr   NNNr   Nr   )__name__
__module____qualname____doc__need_update_leaves_valuesdifferentiablerd   r&   r-   r0   r<   rA   rC   rF   rJ   rY   r]   r    rb   ri    r'   r%   r   r   >   sO       / /t !&NM9 9 9 90 0 00 0 0 .
 .
 .
 .
h @
 @
 @
 @
L 2
 2
 2
 2
p C
 C
 C
 C
J
 
 
 
>(A (A (A (AT% % % % :<3 1! 1! 1! 1! 1! 1!r'   r   c                   $     e Zd ZdZd fd	Z xZS )HalfSquaredErrora  Half squared error with identity link, for regression.

    Domain:
    y_true and y_pred all real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, half squared error is defined as::

        loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2

    The factor of 0.5 simplifies the computation of gradients and results in a
    unit hessian (and is consistent with what is done in LightGBM). It is also
    half the Normal distribution deviance.
    Nc                     t                                          t                      t                                 |d u | _        d S )Nr   r   )superr&   r   r   r   r$   r5   	__class__s     r%   r&   zHalfSquaredError.__init__  s>    133,..III - 5r'   rj   rm   rn   ro   rp   r&   __classcell__rz   s   @r%   ru   ru     sG         "6 6 6 6 6 6 6 6 6 6r'   ru   c                   4     e Zd ZdZdZdZd fd	ZddZ xZS )AbsoluteErrora  Absolute error with identity link, for regression.

    Domain:
    y_true and y_pred all real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, the absolute error is defined as::

        loss(x_i) = |y_true_i - raw_prediction_i|
    FTNc                     t                                          t                      t                                 d| _        |d u | _        d S )Nrw   T)rx   r&   r   r   r   r   ry   s     r%   r&   zAbsoluteError.__init__3  sE    00|~~FFF" - 5r'   c                 T    |t          j        |d          S t          ||d          S )Compute raw_prediction of an intercept-only model.

        This is the weighted median of the target, i.e. over the samples
        axis=0.
        Nr   rM   2   )r    medianr   r\   s      r%   rY   z AbsoluteError.fit_intercept_only8  s1      9V!,,,,'rBBBr'   rj   	rm   rn   ro   rp   rr   rq   r&   rY   r|   r}   s   @r%   r   r   "  sp          N $6 6 6 6 6 6
	C 	C 	C 	C 	C 	C 	C 	Cr'   r   c                   4     e Zd ZdZdZdZd fd	Zd	dZ xZS )
PinballLossa  Quantile loss aka pinball loss, for regression.

    Domain:
    y_true and y_pred all real numbers
    quantile in (0, 1)

    Link:
    y_pred = raw_prediction

    For a given sample x_i, the pinball loss is defined as::

        loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)

        rho_{quantile}(u) = u * (quantile - 1_{u<0})
                          = -u *(1 - quantile)  if u < 0
                             u * quantile       if u >= 0

    Note: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().

    Additional Attributes
    ---------------------
    quantile : float
        The quantile to be estimated. Must be in range (0, 1).
    FTN      ?c                     t          |dt          j        ddd           t                                          t          t          |                    t                                 d| _        |d u | _	        d S )	Nquantiler   r   neither)target_typemin_valmax_valinclude_boundaries)r   rw   T)
r   numbersRealrx   r&   r   floatr   r   r   )r$   r5   r   rz   s      r%   r&   zPinballLoss.__init__a  s    (	
 	
 	
 	
 	x999 	 	
 	
 	
 # - 5r'   c                     |$t          j        |d| j        j        z  d          S t	          ||d| j        j        z            S )r   Nd   r   r   )r    
percentiler   r   r   r\   s      r%   rY   zPinballLoss.fit_intercept_onlyq  sN      =tz/B)BKKKK'sTZ-@'@  r'   )Nr   rj   r   r}   s   @r%   r   r   D  sh         2 N $6 6 6 6 6 6        r'   r   c                   ,     e Zd ZdZd fd	ZddZ xZS )HalfPoissonLossa  Half Poisson deviance loss with log-link, for regression.

    Domain:
    y_true in non-negative real numbers
    y_pred in positive real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half the Poisson deviance is defined as::

        loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))
                    - y_true_i + exp(raw_prediction_i)

    Half the Poisson deviance is actually the negative log-likelihood up to
    constant terms (not involving raw_prediction) and simplifies the
    computation of the gradients.
    We also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.
    Nc                     t                                          t                      t                                 t	          dt
          j        dd          | _        d S )Nrw   r   TF)rx   r&   r	   r   r   r    r!   r"   ry   s     r%   r&   zHalfPoissonLoss.__init__  sI    022CCC'264??r'   c                 :    t          ||          |z
  }|||z  }|S rj   r   r$   r3   r5   terms       r%   r]   z(HalfPoissonLoss.constant_to_optimal_zero  s+    VV$$v-$M!Dr'   rj   rm   rn   ro   rp   r&   r]   r|   r}   s   @r%   r   r     sa         (@ @ @ @ @ @       r'   r   c                   ,     e Zd ZdZd fd	ZddZ xZS )HalfGammaLossaV  Half Gamma deviance loss with log-link, for regression.

    Domain:
    y_true and y_pred in positive real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half Gamma deviance loss is defined as::

        loss(x_i) = log(exp(raw_prediction_i)/y_true_i)
                    + y_true/exp(raw_prediction_i) - 1

    Half the Gamma deviance is actually proportional to the negative log-
    likelihood up to constant terms (not involving raw_prediction) and
    simplifies the computation of the gradients.
    We also skip the constant term `-log(y_true_i) - 1`.
    Nc                     t                                          t                      t                                 t	          dt
          j        dd          | _        d S )Nrw   r   F)rx   r&   r
   r   r   r    r!   r"   ry   s     r%   r&   zHalfGammaLoss.__init__  sH    00wyyAAA'265%@@r'   c                 D    t          j        |           dz
  }|||z  }|S rl   )r    logr   s       r%   r]   z&HalfGammaLoss.constant_to_optimal_zero  s+    v"$M!Dr'   rj   r   r}   s   @r%   r   r     sa         &A A A A A A       r'   r   c                   ,     e Zd ZdZd fd	ZddZ xZS )HalfTweedieLossa  Half Tweedie deviance loss with log-link, for regression.

    Domain:
    y_true in real numbers for power <= 0
    y_true in non-negative real numbers for 0 < power < 2
    y_true in positive real numbers for 2 <= power
    y_pred in positive real numbers
    power in real numbers

    Link:
    y_pred = exp(raw_prediction)

    For a given sample x_i, half Tweedie deviance loss with p=power is defined
    as::

        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                    - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)
                    + exp(raw_prediction_i)**(2-p) / (2-p)

    Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,
    HalfPoissonLoss and HalfGammaLoss.

    We also skip constant terms, but those are different for p=0, 1, 2.
    Therefore, the loss is not continuous in `power`.

    Note furthermore that although no Tweedie distribution exists for
    0 < power < 1, it still gives a strictly consistent scoring function for
    the expectation.
    N      ?c                    t                                          t          t          |                    t	                                 | j        j        dk    r.t          t          j	         t          j	        dd          | _
        d S | j        j        dk     r#t          dt          j	        dd          | _
        d S t          dt          j	        dd          | _
        d S N)powerrw   r   Fr   T)rx   r&   r   r   r   r   r   r   r    r!   r"   r$   r5   r   rz   s      r%   r&   zHalfTweedieLoss.__init__  s    #%,,777 	 	
 	
 	
 :q  #+RVGRVUE#J#JD   Z!!#+ArvtU#C#CD   #+Arvue#D#DD   r'   c                    | j         j        dk    r#t                                          ||          S | j         j        dk    r#t	                                          ||          S | j         j        dk    r#t                                          ||          S | j         j        }t          j        t          j        |d          d|z
            d|z
  z  d|z
  z  }|||z  }|S )Nr   )r3   r5   r   r   )r   r   ru   r]   r   r   r    maximum)r$   r3   r5   pr   s        r%   r]   z(HalfTweedieLoss.constant_to_optimal_zero  s    :q  #%%>>] ?    Z"""$$==] >    Z"" ??;;] <    
 A8BJvq111q599QUCq1uMD(%Kr'   Nr   rj   r   r}   s   @r%   r   r     sa         <
E 
E 
E 
E 
E 
E       r'   r   c                   $     e Zd ZdZd fd	Z xZS )HalfTweedieLossIdentityan  Half Tweedie deviance loss with identity link, for regression.

    Domain:
    y_true in real numbers for power <= 0
    y_true in non-negative real numbers for 0 < power < 2
    y_true in positive real numbers for 2 <= power
    y_pred in positive real numbers for power != 0
    y_pred in real numbers for power = 0
    power in real numbers

    Link:
    y_pred = raw_prediction

    For a given sample x_i, half Tweedie deviance loss with p=power is defined
    as::

        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                    - y_true_i * raw_prediction_i**(1-p) / (1-p)
                    + raw_prediction_i**(2-p) / (2-p)

    Note that the minimum value of this loss is 0.

    Note furthermore that although no Tweedie distribution exists for
    0 < power < 1, it still gives a strictly consistent scoring function for
    the expectation.
    Nr   c                 z   t                                          t          t          |                    t	                                 | j        j        dk    r-t          t          j	         t          j	        dd          | _
        nS| j        j        dk     r"t          dt          j	        dd          | _
        n!t          dt          j	        dd          | _
        | j        j        dk    r.t          t          j	         t          j	        dd          | _        d S t          dt          j	        dd          | _        d S r   )rx   r&   r   r   r   r   r   r   r    r!   r"   r#   r   s      r%   r&   z HalfTweedieLossIdentity.__init__  s   +%,,??? 	 	
 	
 	
 :q  #+RVGRVUE#J#JD  Z!!#+ArvtU#C#CD  #+Arvue#D#DD :q  #+RVGRVUE#J#JD   #+Arvue#D#DD   r'   r   r{   r}   s   @r%   r   r     sQ         6E E E E E E E E E Er'   r   c                   2     e Zd ZdZd fd	ZddZd Z xZS )HalfBinomialLossa  Half Binomial deviance loss with logit link, for binary classification.

    This is also know as binary cross entropy, log-loss and logistic loss.

    Domain:
    y_true in [0, 1], i.e. regression on the unit interval
    y_pred in (0, 1), i.e. boundaries excluded

    Link:
    y_pred = expit(raw_prediction)

    For a given sample x_i, half Binomial deviance is defined as the negative
    log-likelihood of the Binomial/Bernoulli distribution and can be expressed
    as::

        loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i

    See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
    section 4.4.1 (about logistic regression).

    Note that the formulation works for classification, y = {0, 1}, as well as
    logistic regression, y = [0, 1].
    If you add `constant_to_optimal_zero` to the loss, you get half the
    Bernoulli/binomial deviance.
    Nc                     t                                          t                      t                      d           t	          dddd          | _        d S )Nr   r   r   r   r   r   T)rx   r&   r   r   r   r"   ry   s     r%   r&   zHalfBinomialLoss.__init__G  sU    $&& 	 	
 	
 	

  (1dD99r'   c                 b    t          ||          t          d|z
  d|z
            z   }|||z  }|S rl   r   r   s       r%   r]   z)HalfBinomialLoss.constant_to_optimal_zeroO  s=    VV$$uQZV'D'DD$M!Dr'   c                 &   |j         dk    r&|j        d         dk    r|                    d          }t          j        |j        d         df|j                  }| j                            |          |dddf<   d|dddf         z
  |dddf<   |S )a=  Predict probabilities.

        Parameters
        ----------
        raw_prediction : array of shape (n_samples,) or (n_samples, 1)
            Raw prediction values (in link space).

        Returns
        -------
        proba : array of shape (n_samples, 2)
            Element-wise class probabilities.
        r   r   r   r>   N)r9   r:   r;   r    re   r?   r   inverse)r$   r4   probas      r%   predict_probazHalfBinomialLoss.predict_probaV  s     !##(<Q(?1(D(D+33A66N..q115^=QRRRi''77aaad%1+oaaadr'   rj   )rm   rn   ro   rp   r&   r]   r   r|   r}   s   @r%   r   r   ,  sj         4: : : : : :         r'   r   c                   L     e Zd ZdZdZd fd	Zd ZddZd Z	 	 	 	 dd
Z	 xZ
S )HalfMultinomialLossa  Categorical cross-entropy loss, for multiclass classification.

    Domain:
    y_true in {0, 1, 2, 3, .., n_classes - 1}
    y_pred has n_classes elements, each element in (0, 1)

    Link:
    y_pred = softmax(raw_prediction)

    Note: We assume y_true to be already label encoded. The inverse link is
    softmax. But the full link function is the symmetric multinomial logit
    function.

    For a given sample x_i, the categorical cross-entropy loss is defined as
    the negative log-likelihood of the multinomial distribution, it
    generalizes the binary cross-entropy to more than 2 classes::

        loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))
                - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)

    See [1].

    Note that for the hessian, we calculate only the diagonal part in the
    classes: If the full hessian for classes k and l and sample i is H_i_k_l,
    we calculate H_i_k_k, i.e. k=l.

    Reference
    ---------
    .. [1] :arxiv:`Simon, Noah, J. Friedman and T. Hastie.
        "A Blockwise Descent Algorithm for Group-penalized Multiresponse and
        Multinomial Regression".
        <1311.6529>`
    TN   c                     t                                          t                      t                      |           t	          dt
          j        dd          | _        t	          dddd          | _        d S )Nr   r   TFr   )	rx   r&   r   r   r   r    r!   r"   r#   )r$   r5   r   rz   s      r%   r&   zHalfMultinomialLoss.__init__  so    '))!## 	 	
 	
 	

  (264??'1eU;;r'   c                     | j                             |          o/t          j        |                    t
                    |k              S r)   )r"   r*   r    allastypeintr+   s     r%   r-   z#HalfMultinomialLoss.in_y_true_range  s9     #,,Q//NBF188C==A;M4N4NNr'   c                    t          j        | j        |j                  }t          j        |j                  j        }t          | j                  D ]B}t          j        ||k    |d          ||<   t          j        ||         |d|z
            ||<   C| j	        	                    |dddf                   
                    d          S )zCompute raw_prediction of an intercept-only model.

        This is the softmax of the weighted average of the target, i.e. over
        the samples axis=0.
        r>   r   rL   r   N)r    zerosr   r?   rO   rP   rangerI   rU   r   reshape)r$   r3   r5   outrP   ks         r%   rY   z&HalfMultinomialLoss.fit_intercept_only  s     ht~V\:::hv|$$(t~&& 	3 	3AZ!]KKKCFWSVS!c'22CFFy~~c$'l++33B777r'   c                 6    | j                             |          S )a=  Predict probabilities.

        Parameters
        ----------
        raw_prediction : array of shape (n_samples, n_classes)
            Raw prediction values (in link space).

        Returns
        -------
        proba : array of shape (n_samples, n_classes)
            Element-wise class probabilities.
        )r   r   )r$   r4   s     r%   r   z!HalfMultinomialLoss.predict_proba  s     y  000r'   r   c                 P   |@|)t          j        |          }t          j        |          }n+t          j        |          }n|t          j        |          }t          |          }t          |          }|t          |          }| j                            ||||||          S )aK  Compute gradient and class probabilities fow raw_prediction.

        Parameters
        ----------
        y_true : C-contiguous array of shape (n_samples,)
            Observed, true target values.
        raw_prediction : array of shape (n_samples, n_classes)
            Raw prediction values (in link space).
        sample_weight : None or C-contiguous array of shape (n_samples,)
            Sample weights.
        gradient_out : None or array of shape (n_samples, n_classes)
            A location into which the gradient is stored. If None, a new array
            might be created.
        proba_out : None or array of shape (n_samples, n_classes)
            A location into which the class probabilities are stored. If None,
            a new array might be created.
        n_threads : int, default=1
            Might use openmp thread parallelism.

        Returns
        -------
        gradient : array of shape (n_samples, n_classes)
            Element-wise gradients.

        proba : array of shape (n_samples, n_classes)
            Element-wise class probabilities.
        N)r3   r4   r5   r@   	proba_outr7   )r    r8   r   r   gradient_proba)r$   r3   r4   r5   r@   r   r7   s          r%   r   z"HalfMultinomialLoss.gradient_proba  s    H  !}^<<M.99		!}Y77l33I%f---n==$0??Mz(()'% ) 
 
 	
r'   )Nr   rj   rk   )rm   rn   ro   rp   rd   r&   r-   rY   r   r   r|   r}   s   @r%   r   r   l  s           D M< < < < < <O O O8 8 8 81 1 1& 8
 8
 8
 8
 8
 8
 8
 8
r'   r   )squared_errorabsolute_errorpinball_losspoisson_loss
gamma_losstweedie_lossbinomial_lossmultinomial_loss)'rp   r   numpyr    scipy.specialr   _lossr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   utilsr   utils._readonly_array_wrapperr   utils.statsr   r   ru   r   r   r   r   r   r   r   r   _LOSSESrs   r'   r%   <module>r      s   "           
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
              !           @ @ @ @ @ @ . . . . . .*F! F! F! F! F! F! F! F!Z6 6 6 6 6x 6 6 6.C C C C CH C C CD8 8 8 8 8( 8 8 8v    h   @    H   >= = = = =h = = =@+E +E +E +E +Eh +E +E +E\= = = = =x = = =@K
 K
 K
 K
 K
( K
 K
 K
^ &###%+	 	r'   