o
    tBhB                     @   s6   d Z ddlZddlmZ ddlmZ G dd dZdS )zA
Loss functions for linear models with raw_prediction = X @ coef
    N)sparse   )squared_normc                   @   sN   e Zd ZdZdd Zdd Zdd	d
Z	dddZ	dddZ	dddZ	dS )LinearModelLossa  General class for loss functions with raw_prediction = X @ coef + intercept.

    Note that raw_prediction is also known as linear predictor.

    The loss is the sum of per sample losses and includes a term for L2
    regularization::

        loss = sum_i s_i loss(y_i, X_i @ coef + intercept)
               + 1/2 * l2_reg_strength * ||coef||_2^2

    with sample weights s_i=1 if sample_weight=None.

    Gradient and hessian, for simplicity without intercept, are::

        gradient = X.T @ loss.gradient + l2_reg_strength * coef
        hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity

    Conventions:
        if fit_intercept:
            n_dof =  n_features + 1
        else:
            n_dof = n_features

        if base_loss.is_multiclass:
            coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)
        else:
            coef.shape = (n_dof,)

        The intercept term is at the end of the coef array:
        if base_loss.is_multiclass:
            if coef.shape (n_classes, n_dof):
                intercept = coef[:, -1]
            if coef.shape (n_classes * n_dof,)
                intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]
            intercept.shape = (n_classes,)
        else:
            intercept = coef[-1]

    Note: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as

        coef.reshape((n_classes, -1), order="F")

    The option order="F" makes coef[:, i] contiguous. This, in turn, makes the
    coefficients without intercept, coef[:, :-1], contiguous and speeds up
    matrix-vector computations.

    Note: If the average loss per sample is wanted instead of the sum of the loss per
    sample, one can simply use a rescaled sample_weight such that
    sum(sample_weight) = 1.

    Parameters
    ----------
    base_loss : instance of class BaseLoss from sklearn._loss.
    fit_intercept : bool
    c                 C   s   || _ || _d S )N)	base_lossfit_intercept)selfr   r    r	   x/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/linear_model/_linear_loss.py__init__B   s   
zLinearModelLoss.__init__c                 C   s   | j js| jr|d }|dd }nd}|}|| | }n2|jdkr.|j| j jdfdd}n|}| jrF|dddf }|ddddf }nd}||j | }|||fS )ai  Helper function to get coefficients, intercept and raw_prediction.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        Returns
        -------
        weights : ndarray of shape (n_features,) or (n_classes, n_features)
            Coefficients without intercept term.
        intercept : float or ndarray of shape (n_classes,)
            Intercept terms.
        raw_prediction : ndarray of shape (n_samples,) or             (n_samples, n_classes)
        N           Forder)r   is_multiclassr   ndimreshape	n_classesT)r   coefX	interceptweightsraw_predictionr	   r	   r
   _w_intercept_rawF   s    

z LinearModelLoss._w_intercept_rawNr   r   c                 C   sX   |  ||\}}}	| jj||	||d}
|
 }
|jdkr || nt|}|
d| |  S )a  Compute the loss as sum over point-wise losses.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.

        Returns
        -------
        loss : float
            Sum of losses per sample plus penalty.
        y_truer   sample_weight	n_threadsr         ?)r   r   losssumr   r   )r   r   r   yr   l2_reg_strengthr    r   r   r   r"   norm2_wr	   r	   r
   r"   s   s   zLinearModelLoss.lossc                 C   s<  |j d | jj}}|t| j }	| ||\}
}}| jj||||d\}}| }| jjsZ|d| |
|
  7 }t	j
||
jd}|j| ||
  |d|< | jrV| |d< ||fS |d| t|
 7 }t	j||	f|
jdd}|j| ||
  |ddd|f< | jr|jd	d
|dddf< |jdkr|jdd}||fS )a8  Computes the sum of loss and gradient w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.

        Returns
        -------
        loss : float
            Sum of losses per sample plus penalty.

        gradient : ndarray of shape coef.shape
             The gradient of the loss.
        r   r   r!   dtypeNr   r   r(   r   r   axisr   )shaper   r   intr   r   loss_gradientr#   r   np
empty_liker(   r   r   emptyr   ravel)r   r   r   r$   r   r%   r    
n_featuresr   n_dofr   r   r   r"   grad_per_samplegradr	   r	   r
   r.      s2   
"
zLinearModelLoss.loss_gradientc                 C   s   |j d | jj}}|t| j }	| ||\}
}}| jj||||d}| jjsHtj	||
j
d}|j| ||
  |d|< | jrF| |d< |S tj||	f|
j
dd}|j| ||
  |ddd|f< | jrs|jdd	|dddf< |jdkr~|jdd
S |S )a  Computes the gradient w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.

        Returns
        -------
        gradient : ndarray of shape coef.shape
             The gradient of the loss.
        r   r   r'   Nr   r   r)   r   r*   r   )r,   r   r   r-   r   r   gradientr   r/   r0   r(   r   r#   r1   r   r2   )r   r   r   r$   r   r%   r    r3   r   r4   r   r   r   r5   r6   r	   r	   r
   r7      s,   "
zLinearModelLoss.gradientc              
      s   j jj\}tj  \}}	jjsjj||	
|d\}
}tj	j
d} j|
   |d< jrH|
 |d< | t r_tj|df||fd  n|ddtjf   jryttjdd fdd	}||fS jj||	
|d\}
	tjfj
d
d}|
j    |dddf< jr|
jdd|dddf<  	
f
dd	}jdkr|jd
d|fS ||fS )a  Computes gradient and hessp (hessian product function) w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.

        Returns
        -------
        gradient : ndarray of shape coef.shape
             The gradient of the loss.

        hessp : callable
            Function that takes in a vector input of shape of gradient and
            and returns matrix-vector product with hessian.
        r   r'   Nr   r   )r,   r*   c                    s   t | }t r j| d    |d < nt j j| d  g|d < |d   | d   7  < jr]|d   | d  7  < | d   | d   |d< |S )Nr   )r/   r0   r   issparser   linalg	multi_dotr   )sret)r   hXhX_sumhessian_sumr%   r3   r   r	   r
   hesspP  s   

 $  z7LinearModelLoss.gradient_hessian_product.<locals>.hesspr   r)   c                    s
  | j dfdd} jr| d d df }| d d d df } nd} | j | }| | jddd d tjf 7 }|9 }d urM|d d tjf 9 }tjf	jdd}|j  |   |d d d f< jrx|jdd|d d df< jdkr|j	ddS |S )Nr   r   r   r   r   r*   r)   )
r   r   r   r#   r/   newaxisr1   r(   r   r2   )r;   s_intercepttmp	hess_prod)
r   r   r%   r   r4   r3   probar   r   r   r	   r
   r@     s"   $"
r   r   )r,   r   r   r-   r   r   r   gradient_hessianr/   r0   r(   r   r#   r   r8   
dia_matrixrA   squeezeasarraygradient_probar1   r   r2   )r   r   r   r$   r   r%   r    	n_samplesr   r   r7   hessianr6   r@   r	   )r   r   r=   r>   r?   r%   r   r4   r3   rE   r   r   r   r
   gradient_hessian_product  sH    

M
"
z(LinearModelLoss.gradient_hessian_product)Nr   r   )
__name__
__module____qualname____doc__r   r   r"   r.   r7   rM   r	   r	   r	   r
   r   	   s    8
-(
>
9r   )rQ   numpyr/   scipyr   utils.extmathr   r   r	   r	   r	   r
   <module>   s
    