
     h!                        d Z ddlmZ ddlmZmZ ddlmZ ddlZddl	Z
ddlZddlmZ ddlmZ dd	lmZ d
dlmZ ddlmZ  eddd          Z eddd          ZdZ ej        e          Zde
j        fdZd Zd ZddddZdS )a  
=============================
Species distribution dataset
=============================

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

 - `"Bradypus variegatus"
   <http://www.iucnredlist.org/details/3038/0>`_ ,
   the Brown-throated Sloth.

 - `"Microryzomys minutus"
   <http://www.iucnredlist.org/details/13408/0>`_ ,
   also known as the Forest Small Rice Rat, a rodent that lives in Peru,
   Colombia, Ecuador, Peru, and Venezuela.

References
----------

`"Maximum entropy modeling of species geographic distributions"
<http://rob.schapire.net/papers/ecolmod.pdf>`_ S. J. Phillips,
R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006.

Notes
-----

For an example of using this dataset, see
:ref:`examples/applications/plot_species_distribution_modeling.py
<sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.
    )BytesIO)makedirsremove)existsN   )get_data_home)_fetch_remote)RemoteFileMetadata   )Bunch)_pkl_filepathzsamples.zipz.https://ndownloader.figshare.com/files/5976075@abb07ad284ac50d9e6d20f1c4211e0fd3c098f7f85955e89d321ee8efe37ac28)filenameurlchecksumzcoverages.zipz.https://ndownloader.figshare.com/files/5976078@4d862674d72e79d6cee77e63b98651ec7926043ba7d39dcb31329cf3f6073807zspecies_coverage.pkz   c                       fdt          |          D             }d t          fd|D                       }t          j         |          }t	          |d                   }|dk    rd||<   |S )zjLoad a coverage file from an open file object.

    This will return a numpy array of the given dtype
    c                 8    g | ]}                                 S  )readline).0_Fs     c/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/sklearn/datasets/_species_distributions.py
<listcomp>z"_load_coverage.<locals>.<listcomp>R   s!    999qajjll999    c                     |                                  d         t          |                                  d                   fS )Nr   r   )splitfloat)ts    r   <lambda>z _load_coverage.<locals>.<lambda>S   s+    AGGIIaL%		!*=*=> r   c                 &    g | ]} |          S r   r   )r   line
make_tuples     r   r   z"_load_coverage.<locals>.<listcomp>T   s#    777::d##777r   dtypes   NODATA_valuei)rangedictnploadtxtint)r   header_lengthr'   headerMnodatar%   s   `     @r   _load_coverager1   M   s    
 :999E-$8$8999F>>J777777788F

1E"""A())F&	Hr   c                     |                                                      d                                                              d          }t	          j        | ddd          }||j        _        |S )zLoad csv file.

    Parameters
    ----------
    F : file object
        CSV file open in byte mode.

    Returns
    -------
    rec : np.ndarray
        record array representing the data
    ascii,r   z	a22,f4,f4)skiprows	delimiterr'   )r   decodestripr   r*   r+   r'   names)r   r9   recs      r   	_load_csvr;   ]   s_     JJLL((..0066s;;E
*Qc
E
E
ECCIOJr   c                     | j         | j        z   }|| j        | j        z  z   }| j        | j        z   }|| j        | j        z  z   }t          j        ||| j                  }t          j        ||| j                  }||fS )a%  Construct the map grid from the batch object

    Parameters
    ----------
    batch : Batch object
        The object returned by :func:`fetch_species_distributions`

    Returns
    -------
    (xgrid, ygrid) : 1-D arrays
        The grid corresponding to the values in batch.coverages
    )x_left_lower_corner	grid_sizeNxy_left_lower_cornerNyr*   arange)batchxminxmaxyminymaxxgridygrids          r   construct_gridsrJ   q   s~     $u6D58eo-.D$u6D58eo-.D IdD%/22EIdD%/22E5>r   T)	data_homedownload_if_missingc                    t          |           } t          |           st          |            t          ddddd          }t          j        }t          | t                    }t          |          s|st          d          t          
                    dt          j        d	|            t          t          | 
          }t	          j        |          5 }|j        D ]=}t!          ||                   }d|v rt#          |          }	d|v rt#          |          }
>	 ddd           n# 1 swxY w Y   t%          |           t          
                    dt&          j        d	|            t          t&          | 
          }t	          j        |          5 }g }|j        D ]f}t!          ||                   }t                              d                    |                     |                    t/          |                     gt	          j        ||          }ddd           n# 1 swxY w Y   t%          |           t3          d||
|	d|}t5          j        ||d           nt5          j        |          }|S )aX
  Loader for species distribution dataset from Phillips et. al. (2006).

    Read more in the :ref:`User Guide <datasets>`.

    Parameters
    ----------
    data_home : str, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        coverages : array, shape = [14, 1592, 1212]
            These represent the 14 features measured
            at each point of the map grid.
            The latitude/longitude values for the grid are discussed below.
            Missing data is represented by the value -9999.
        train : record array, shape = (1624,)
            The training points for the data.  Each point has three fields:

            - train['species'] is the species name
            - train['dd long'] is the longitude, in degrees
            - train['dd lat'] is the latitude, in degrees
        test : record array, shape = (620,)
            The test points for the data.  Same format as the training data.
        Nx, Ny : integers
            The number of longitudes (x) and latitudes (y) in the grid
        x_left_lower_corner, y_left_lower_corner : floats
            The (x,y) position of the lower-left corner, in degrees
        grid_size : float
            The spacing between points of the grid, in degrees

    Notes
    -----

    This dataset represents the geographic distribution of species.
    The dataset is provided by Phillips et. al. (2006).

    The two species are:

    - `"Bradypus variegatus"
      <http://www.iucnredlist.org/details/3038/0>`_ ,
      the Brown-throated Sloth.

    - `"Microryzomys minutus"
      <http://www.iucnredlist.org/details/13408/0>`_ ,
      also known as the Forest Small Rice Rat, a rodent that lives in Peru,
      Colombia, Ecuador, Peru, and Venezuela.

    - For an example of using this dataset with scikit-learn, see
      :ref:`examples/applications/plot_species_distribution_modeling.py
      <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.

    References
    ----------

    * `"Maximum entropy modeling of species geographic distributions"
      <http://rob.schapire.net/papers/ecolmod.pdf>`_
      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
      190:231-259, 2006.
    g33333Wi  gfffffLi8  g?)r=   r?   r@   rA   r>   z1Data not found and `download_if_missing` is FalsezDownloading species data from z to )dirnametraintestNzDownloading coverage data from z - converting {}r&   )	coveragesrP   rO   	   )compressr   )r   r   r   r)   r*   int16r   DATA_ARCHIVE_NAMEIOErrorloggerinfoSAMPLESr   r	   loadfilesr   r;   r   	COVERAGESdebugformatappendr1   asarrayr   joblibdump)rK   rL   extra_paramsr'   archive_pathsamples_pathXffhandlerO   rP   coverages_pathrQ   bunchs                 r   fetch_species_distributionsrk      s   J i((I) 
 !"  L HE ,=>>L, *" 	OMNNNYYWXXX$Wi@@@W\"" 	.aW . .!!A$--a<<%g..EQ;;$W--D.	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	|9B		R	
 	
 	
 'y)DDDW^$$ 	;IW : :!!A$--/66q99:::  !8!89999
9E:::I	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	~R	ERR\RRE<!44444L))Ls&   AD++D/2D/BH,,H03H0)__doc__ior   osr   r   os.pathr   loggingnumpyr*   ra    r   _baser	   r
   utilsr   r   rY   r\   rU   	getLogger__name__rW   rT   r1   r;   rJ   rk   r   r   r   <module>rw      s     N                                             % % % % % %                   
8O   8O  	 +  
	8	$	$ %&RX       (  6 .2t w w w w w w wr   