o
    tBhg[                     @   s   d dl Zd dlZddlmZmZmZmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ edZdd ZG dd deeeeZdS )    N   )BaseEstimatorMetaEstimatorMixinRegressorMixinclone)MultiOutputMixin)check_random_statecheck_consistent_length)sample_without_replacement)check_is_fitted_check_sample_weight   )LinearRegression)has_fit_parameter)ConvergenceWarningc                 C   sj   | t | }ttd| }ttd||  }|dkrdS |dkr$t dS tt tt|t| S )a  Determine number trials such that at least one outlier-free subset is
    sampled for the given inlier/outlier ratio.

    Parameters
    ----------
    n_inliers : int
        Number of inliers in the data.

    n_samples : int
        Total number of samples in the data.

    min_samples : int
        Minimum number of samples chosen randomly from original data.

    probability : float
        Probability (confidence) that one outlier-free sample is generated.

    Returns
    -------
    trials : int
        Number of trials.

    r   r   inf)floatmax_EPSILONabsnpceillog)	n_inliers	n_samplesmin_samplesprobabilityinlier_rationomdenom r    s/var/www/html/riverr-enterprise-integrations-main/venv/lib/python3.10/site-packages/sklearn/linear_model/_ransac.py_dynamic_max_trials   s   "r"   c                   @   s`   e Zd ZdZ	ddddddejejejddddddd	Zdd
dZdd Zdd Z	dd Z
dS )RANSACRegressora  RANSAC (RANdom SAmple Consensus) algorithm.

    RANSAC is an iterative algorithm for the robust estimation of parameters
    from a subset of inliers from the complete data set.

    Read more in the :ref:`User Guide <ransac_regression>`.

    Parameters
    ----------
    estimator : object, default=None
        Base estimator object which implements the following methods:

         * `fit(X, y)`: Fit model to given training data and target values.
         * `score(X, y)`: Returns the mean accuracy on the given test data,
           which is used for the stop criterion defined by `stop_score`.
           Additionally, the score is used to decide which of two equally
           large consensus sets is chosen as the better one.
         * `predict(X)`: Returns predicted values using the linear model,
           which is used to compute residual error using loss function.

        If `estimator` is None, then
        :class:`~sklearn.linear_model.LinearRegression` is used for
        target values of dtype float.

        Note that the current implementation only supports regression
        estimators.

    min_samples : int (>= 1) or float ([0, 1]), default=None
        Minimum number of samples chosen randomly from original data. Treated
        as an absolute number of samples for `min_samples >= 1`, treated as a
        relative number `ceil(min_samples * X.shape[0])` for
        `min_samples < 1`. This is typically chosen as the minimal number of
        samples necessary to estimate the given `estimator`. By default a
        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
        `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly
        dependent upon the model, so if a `estimator` other than
        :class:`linear_model.LinearRegression` is used, the user is
        encouraged to provide a value.

        .. deprecated:: 1.0
           Not setting `min_samples` explicitly will raise an error in version
           1.2 for models other than
           :class:`~sklearn.linear_model.LinearRegression`. To keep the old
           default behavior, set `min_samples=X.shape[1] + 1` explicitly.

    residual_threshold : float, default=None
        Maximum residual for a data sample to be classified as an inlier.
        By default the threshold is chosen as the MAD (median absolute
        deviation) of the target values `y`. Points whose residuals are
        strictly equal to the threshold are considered as inliers.

    is_data_valid : callable, default=None
        This function is called with the randomly selected data before the
        model is fitted to it: `is_data_valid(X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.

    is_model_valid : callable, default=None
        This function is called with the estimated model and the randomly
        selected data: `is_model_valid(model, X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.
        Rejecting samples with this function is computationally costlier than
        with `is_data_valid`. `is_model_valid` should therefore only be used if
        the estimated model is needed for making the rejection decision.

    max_trials : int, default=100
        Maximum number of iterations for random sample selection.

    max_skips : int, default=np.inf
        Maximum number of iterations that can be skipped due to finding zero
        inliers or invalid data defined by ``is_data_valid`` or invalid models
        defined by ``is_model_valid``.

        .. versionadded:: 0.19

    stop_n_inliers : int, default=np.inf
        Stop iteration if at least this number of inliers are found.

    stop_score : float, default=np.inf
        Stop iteration if score is greater equal than this threshold.

    stop_probability : float in range [0, 1], default=0.99
        RANSAC iteration stops if at least one outlier-free set of the training
        data is sampled in RANSAC. This requires to generate at least N
        samples (iterations)::

            N >= log(1 - probability) / log(1 - e**m)

        where the probability (confidence) is typically set to high value such
        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
        the total number of samples.

    loss : str, callable, default='absolute_error'
        String inputs, 'absolute_error' and 'squared_error' are supported which
        find the absolute error and squared error per sample respectively.

        If ``loss`` is a callable, then it should be a function that takes
        two arrays as inputs, the true and predicted value and returns a 1-D
        array with the i-th value of the array corresponding to the loss
        on ``X[i]``.

        If the loss on a sample is greater than the ``residual_threshold``,
        then this sample is classified as an outlier.

        .. versionadded:: 0.18

        .. deprecated:: 1.0
            The loss 'squared_loss' was deprecated in v1.0 and will be removed
            in version 1.2. Use `loss='squared_error'` which is equivalent.

        .. deprecated:: 1.0
            The loss 'absolute_loss' was deprecated in v1.0 and will be removed
            in version 1.2. Use `loss='absolute_error'` which is equivalent.

    random_state : int, RandomState instance, default=None
        The generator used to initialize the centers.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    base_estimator : object, default="deprecated"
        Use `estimator` instead.

        .. deprecated:: 1.1
            `base_estimator` is deprecated and will be removed in 1.3.
            Use `estimator` instead.

    Attributes
    ----------
    estimator_ : object
        Best fitted model (copy of the `estimator` object).

    n_trials_ : int
        Number of random selection trials until one of the stop criteria is
        met. It is always ``<= max_trials``.

    inlier_mask_ : bool array of shape [n_samples]
        Boolean mask of inliers classified as ``True``.

    n_skips_no_inliers_ : int
        Number of iterations skipped due to finding zero inliers.

        .. versionadded:: 0.19

    n_skips_invalid_data_ : int
        Number of iterations skipped due to invalid data defined by
        ``is_data_valid``.

        .. versionadded:: 0.19

    n_skips_invalid_model_ : int
        Number of iterations skipped due to an invalid model defined by
        ``is_model_valid``.

        .. versionadded:: 0.19

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    HuberRegressor : Linear regression model that is robust to outliers.
    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/RANSAC
    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf

    Examples
    --------
    >>> from sklearn.linear_model import RANSACRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y)
    0.9885...
    >>> reg.predict(X[:1,])
    array([-31.9417...])
    Nd   gGz?absolute_error
deprecated)r   residual_thresholdis_data_validis_model_valid
max_trials	max_skipsstop_n_inliers
stop_scorestop_probabilitylossrandom_statebase_estimatorc                C   sR   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _d S N)	estimatorr   r'   r(   r)   r*   r+   r,   r-   r.   r0   r/   r1   )selfr3   r   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r    r    r!   __init__   s   
zRANSACRegressor.__init__c           !      C   s  t ddd}t dd}| j||||fd\}}t|| | jdkr+tdt | j| _| jdur6t| j}nt	 }| j
du rYt|t	sQtd	|jd
 d
  t |jd
 d
 }n1d| j
  k rdd
k rrn nt| j
|jd  }n| j
d
kr| j
d
 dkrtd| j
}ntd||jd krtd|jd  | jdk s| jd
krtd| jdu rtt|t| }n| j}| jdv r| jdkrtdt |jd
krdd }	n5dd }	n0| jdv r| jdkrtdt |jd
krdd }	ndd }	nt| jr| j}	ntd| j t| j}
z|j|
d W n
 ty$   Y nw t|d}t|j}|dur=|s=td| |durGt||}d
}tj }d}d}d}d}d| _d| _ d| _!|jd }t"|}d| _#| j$}| j#|k rD|  j#d
7  _#| j| j  | j! | j%krnt&|||
d}|| }|| }| j'dur| '||s|  j d
7  _ qn|du r|(|| n
|j(|||| d | j)dur| )|||s|  j!d
7  _!qn|*|}|	||}||k}t+|}||k r|  jd
7  _qn|| }|| }|| }|,||} ||kr| |k rqn|}| }|}|}|}|}t-|t.|||| j}|| j/ks=|| j0kr>n| j#|k st|du r^| j| j  | j! | j%krZtd td!| j| j  | j! | j%krqtd"t1 |du r}|(|| n
|j(|||| d || _2|| _3| S )#a  Fit estimator using RANSAC algorithm.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample
            raises error if sample_weight is passed and estimator
            fit method does not support it.

            .. versionadded:: 0.18

        Returns
        -------
        self : object
            Fitted `RANSACRegressor` estimator.

        Raises
        ------
        ValueError
            If no valid consensus set could be found. This occurs if
            `is_data_valid` and `is_model_valid` return False for all
            `max_trials` randomly chosen sub-samples.
        csrF)accept_sparseforce_all_finite)	ensure_2d)validate_separatelyr&   zV`base_estimator` was renamed to `estimator` in version 1.1 and will be removed in 1.3.NzFrom version 1.2, `min_samples` needs to be explicitly set otherwise an error will be raised. To keep the current behavior, you need to set `min_samples` to `X.shape[1] + 1 that is r   r   z4Absolute number of samples must be an integer value.z4Value for `min_samples` must be scalar and positive.zG`min_samples` may not be larger than number of samples: n_samples = %d.z+`stop_probability` must be in range [0, 1].)r%   absolute_lossr;   zThe loss 'absolute_loss' was deprecated in v1.0 and will be removed in version 1.2. Use `loss='absolute_error'` which is equivalent.c                 S   s   t | | S r2   )r   r   y_truey_predr    r    r!   <lambda>x  s    z%RANSACRegressor.fit.<locals>.<lambda>c                 S   s   t jt | | ddS )Nr   axis)r   sumr   r<   r    r    r!   r?   z  s    )squared_errorsquared_lossrD   zThe loss 'squared_loss' was deprecated in v1.0 and will be removed in version 1.2. Use `loss='squared_error'` which is equivalent.c                 S   s   | | d S )Nr   r    r<   r    r    r!   r?     s    c                 S   s   t j| | d ddS )Nr   r   r@   )r   rB   r<   r    r    r!   r?     s    zHloss should be 'absolute_error', 'squared_error' or a callable. Got %s. )r0   sample_weightz\%s does not support sample_weight. Samples weights are only used for the calibration itself.)rE   zRANSAC skipped more iterations than `max_skips` without finding a valid consensus set. Iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC found a valid consensus set but exited early due to skipping more iterations than `max_skips`. See estimator attributes for diagnostics (n_skips*).)4dict_validate_datar	   r1   warningswarnFutureWarningr3   r   r   r   
isinstanceshaper   r   
ValueErrorr.   r'   medianr   r/   ndimcallabler   r0   
set_paramsr   type__name__r   r   n_skips_no_inliers_n_skips_invalid_data_n_skips_invalid_model_arange	n_trials_r*   r+   r
   r(   fitr)   predictrB   scoreminr"   r,   r-   r   
estimator_inlier_mask_)!r4   XyrE   check_X_paramscheck_y_paramsr3   r   r'   loss_functionr0   estimator_fit_has_sample_weightestimator_namen_inliers_best
score_bestinlier_mask_bestX_inlier_besty_inlier_bestinlier_best_idxs_subsetr   sample_idxsr*   subset_idxsX_subsety_subsetr>   residuals_subsetinlier_mask_subsetn_inliers_subsetinlier_idxs_subsetX_inlier_subsety_inlier_subsetscore_subsetr    r    r!   rY     sz  !


































U
	zRANSACRegressor.fitc                 C   s&   t |  | j|dddd}| j|S )au  Predict using the estimated model.

        This is a wrapper for `estimator_.predict(X)`.

        Parameters
        ----------
        X : {array-like or sparse matrix} of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        FTr8   r7   reset)r   rG   r]   rZ   )r4   r_   r    r    r!   rZ   >  s   zRANSACRegressor.predictc                 C   s(   t |  | j|dddd}| j||S )a  Return the score of the prediction.

        This is a wrapper for `estimator_.score(X, y)`.

        Parameters
        ----------
        X : (array-like or sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        Returns
        -------
        z : float
            Score of the prediction.
        FTrw   )r   rG   r]   r[   )r4   r_   r`   r    r    r!   r[   V  s   zRANSACRegressor.scorec                 C   s   dddiiS )N_xfail_checkscheck_sample_weights_invariancez8zero sample_weight is not equivalent to removing samplesr    )r4   r    r    r!   
_more_tagsq  s
   zRANSACRegressor._more_tagsr2   )rS   
__module____qualname____doc__r   r   r5   rY   rZ   r[   r{   r    r    r    r!   r#   6   s0     A
   )r#   )numpyr   rH   baser   r   r   r   r   utilsr   r	   utils.randomr
   utils.validationr   r   _baser   r   
exceptionsr   spacingr   r"   r#   r    r    r    r!   <module>   s   

"