
     hL              
          d Z ddlmZmZmZ ddlmZmZmZ ddl	Z	ddl
ZddlmZ ddlmZmZmZmZ dd	lmZ  e	j        e          Z ed
dd          Z eddd          Z eddd           eddd           eddd          fZd*dZd Z	 d+dZddddd ed d!           ed"d#          fddd$d%Z	 d,d&Z d'dddd ed d!           ed"d#          fdd(d)Z!dS )-zLabeled Faces in the Wild (LFW) dataset

This dataset is a collection of JPEG pictures of famous people collected
over the internet, all details are available on the official website:

    http://vis-www.cs.umass.edu/lfw/
    )listdirmakedirsremove)joinexistsisdirN)Memory   )get_data_home_fetch_remoteRemoteFileMetadata
load_descr   )Bunchzlfw.tgzz.https://ndownloader.figshare.com/files/5976018@055f7d9c632d7370e6fb4afc7468d40f970c34a80d4c6f50ffec63f5a8d536c0)filenameurlchecksumzlfw-funneled.tgzz.https://ndownloader.figshare.com/files/5976015@b47c8422c8cded889dc5a13418c4bc2abbda121092b3533a83306f90d900100apairsDevTrain.txtz.https://ndownloader.figshare.com/files/5976012@1d454dada7dfeca0e7eab6f65dc4e97a6312d44cf142207be28d688be92aabfapairsDevTest.txtz.https://ndownloader.figshare.com/files/5976009@7cb06600ea8b2814ac26e946201cdb304296262aad67d046a16a7ec85d0ff87c	pairs.txtz.https://ndownloader.figshare.com/files/5976006@ea42330c62c92989f9d7c03237ed5d591365e89b3e649747777b70e692dc1592Tc                 r   t          |           } t          | d          }t          |          st          |           t          D ]l}t          ||j                  }t          |          sF|r2t                              d|j                   t          ||           Zt          d|z            m|rt          |d          }t          }nt          |d          }t          }t          |          st          ||j                  }t          |          sF|r2t                              d|j                   t          ||           nt          d|z            d	d
l}	t                              d|           |	                    |d                              |           t#          |           ||fS )z0Helper function to download any missing LFW data)	data_homelfw_homezDownloading LFW metadata: %s)dirnamez%s is missinglfw_funneledlfwz!Downloading LFW data (~200MB): %sr   Nz$Decompressing the data archive to %szr:gz)path)r   r   r   r   TARGETSr   loggerinfor   r   IOErrorFUNNELED_ARCHIVEARCHIVEtarfiledebugopen
extractallr   )
r   funneleddownload_if_missingr   targettarget_filepathdata_folder_patharchivearchive_pathr)   s
             Q/var/www/html/Sam_Eipo/venv/lib/python3.11/site-packages/sklearn/datasets/_lfw.py_check_fetch_lfwr5   J   s    	222IIz**H(  A Ax99o&& 	A" A:FJGGGfh77777o?@@@	A  .99"%00"## Hg&677l## 	>" >?MMMgx88888o<===;=MNNN\6**5585DDD|%%%    c                 d   	 ddl m} n# t          $ r t          d          w xY wt          dd          t          dd          f}||}n't	          d t          ||          D                       }|\  }}|j        |j        z
  |j        pdz  }|j        |j        z
  |j        pdz  }	|3t          |          }t          ||z            }t          ||	z            }	t          |           }
|s$t          j        |
||	ft          j                  }n$t          j        |
||	d	ft          j                  }t          |           D ]\  }}|d
z  dk    rt                               d|dz   |
           |                    |          }|                    |j        |j        |j        |j        f          }||                    |	|f          }t          j        |t          j                  }|j        dk    rt/          d|z            |dz  }|s|                    d          }|||df<   |S )zInternally used to load imagesr   )ImagezThe Python Imaging Library (PIL) is required to load data from jpeg files. Please refer to https://pillow.readthedocs.io/en/stable/installation.html for installing PIL.   Nc              3   $   K   | ]\  }}|p|V  d S )N ).0sdss      r4   	<genexpr>z_load_imgs.<locals>.<genexpr>   s*      GG51bqwBGGGGGGr6   r
   dtype   i  zLoading face #%05d / %05dzLFailed to read the image file %s, Please make sure that libjpeg is installedg     o@r   )axis.)PILr8   ImportErrorslicetuplezipstopstartstepfloatintlennpzerosfloat32	enumerater$   r*   r+   cropresizeasarrayndimRuntimeErrormean)
file_pathsslice_colorrT   r8   default_sliceh_slicew_slicehwn_facesfacesi	file_pathpil_imgfaces                   r4   
_load_imgsrg   u   st   
 
 
 
"
 
 	

 1c]]E!SMM2M~GGC,F,FGGGGGGW		%7<+<1=A		%7<+<1=Av
OO
OO *ooG ?'1a
;;;'1a+2:>>> "*--  9t8q==LL4a!eWEEE **Y'',,]GM7<F
 
 nnaV,,Gz'4449>>=?HI  
 	 	% 99!9$$DafLs   	 #Fc                    g g }}t          t          |                     D ]}t          | |          t                    s"fdt          t                              D             }t	          |          }	|	|k    rD|                    dd          }|                    |g|	z             |                    |           t	          |          }
|
dk    rt          d|z            t          j	        |          }t          j
        ||          }t          ||||          }t          j        |
          }t          j                            d                              |           ||         ||         }}|||fS )z~Perform the actual data loading for the lfw people dataset

    This operation is meant to be cached by a joblib wrapper.
    c                 0    g | ]}t          |          S r;   )r   )r<   ffolder_paths     r4   
<listcomp>z%_fetch_lfw_people.<locals>.<listcomp>   s#    LLL!k1%%LLLr6   _ r   z*min_faces_per_person=%d is too restrictive*   )sortedr   r   r   rN   replaceextend
ValueErrorrO   uniquesearchsortedrg   arangerandomRandomStateshuffle)r1   rZ   r[   rT   min_faces_per_personperson_namesrY   person_namepaths
n_picturesra   target_namesr/   rb   indicesrk   s                  @r4   _fetch_lfw_peopler      s     "2*Lg&67788 	% 	%+[99[!! 	LLLLvgk6J6J/K/KLLLZZ
---%--c377K
 :;;;e$$$*ooG!||8;OO
 
 	
 9\**L_\<88Fz65&99E i  GI"%%g...'NF7O6E&,&&r6   g      ?F      N      )r   r-   rT   rz   r[   rZ   r.   
return_X_yc                 z   t          | ||          \  }}	t                              d|           t          |dd          }
|
                    t
                    } ||	||||          \  }}}|                    t          |          d          }t          d          }|r||fS t          |||||	          S )
a  Load the Labeled Faces in the Wild (LFW) people dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    data_home : str, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    funneled : bool, default=True
        Download and use the funneled variant of the dataset.

    resize : float or None, default=0.5
        Ratio used to resize the each face picture. If `None`, no resizing is
        performed.

    min_faces_per_person : int, default=None
        The extracted dataset will only retain pictures of people that have at
        least `min_faces_per_person` different pictures.

    color : bool, default=False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background.

    download_if_missing : bool, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : bool, default=False
        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch
        object. See below for more information about the `dataset.data` and
        `dataset.target` object.

        .. versionadded:: 0.20

    Returns
    -------
    dataset : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : numpy array of shape (13233, 2914)
            Each row corresponds to a ravelled face image
            of original size 62 x 47 pixels.
            Changing the ``slice_`` or resize parameters will change the
            shape of the output.
        images : numpy array of shape (13233, 62, 47)
            Each row is a face image corresponding to one of the 5749 people in
            the dataset. Changing the ``slice_``
            or resize parameters will change the shape of the output.
        target : numpy array of shape (13233,)
            Labels associated to each face image.
            Those labels range from 0-5748 and correspond to the person IDs.
        target_names : numpy array of shape (5749,)
            Names of all persons in the dataset.
            Position in array corresponds to the person ID in the target array.
        DESCR : str
            Description of the Labeled Faces in the Wild (LFW) dataset.

    (data, target) : tuple if ``return_X_y`` is True
        A tuple of two ndarray. The first containing a 2D array of
        shape (n_samples, n_features) with each row representing one
        sample and each column representing the features. The second
        ndarray of shape (n_samples,) containing the target samples.

        .. versionadded:: 0.20
    r   r-   r.   z Loading LFW people faces from %s   r   locationcompressverbose)rT   rz   r[   rZ   lfw.rst)dataimagesr/   r   DESCR)
r5   r$   r*   r	   cacher   reshaperN   r   r   )r   r-   rT   rz   r[   rZ   r.   r   r   r1   m	load_funcrb   r/   r   Xfdescrs                    r4   fetch_lfw_peopler      s    | "2hDW" " "H LL3X>>> 	1a888A)**I #,)1# # #E6< 	c%jj"%%A	""F &y uV,f   r6   c           
         t          | d          5 }d |D             }ddd           n# 1 swxY w Y   d |D             }t          |          }t          j        |t                    }	t                      }
t          |          D ]{\  }}t          |          dk    rFd|	|<   |d         t	          |d                   dz
  f|d         t	          |d	                   dz
  ff}npt          |          d
k    rFd|	|<   |d         t	          |d                   dz
  f|d	         t	          |d                   dz
  ff}nt          d|dz   |fz            t          |          D ]\  }\  }}	 t          ||          }n.# t          $ r! t          |t          |d                    }Y nw xY wt          t          t          |                              }t          |||                   }|
                    |           }t          |
|||          }t          |j                  }|                    d          }|                    dd	           |                    d|d	z             ||_        ||	t          j        ddg          fS )z}Perform the actual data loading for the LFW pairs dataset

    This operation is meant to be cached by a joblib wrapper.
    rbc                     g | ];}|                                                                                     d           <S )	)decodestripsplit)r<   lns     r4   rl   z$_fetch_lfw_pairs.<locals>.<listcomp>w  s:    LLL2ryy{{((**0066LLLr6   Nc                 8    g | ]}t          |          d k    |S )r   )rN   )r<   sls     r4   rl   z$_fetch_lfw_pairs.<locals>.<listcomp>x  s#    :::c"ggkk"kkkr6   r@   rB   r
   r   r      zinvalid line %d: %rzUTF-8zDifferent personszSame person)r+   rN   rO   rP   rM   listrR   rs   r   	TypeErrorstrrp   r   appendrg   shapepopinsertarray)index_file_pathr1   rZ   r[   rT   
index_filesplit_lines
pair_specsn_pairsr/   rY   rc   
componentspairjnameidxperson_folder	filenamesrd   pairsr   ra   s                          r4   _fetch_lfw_pairsr   m  s    
ot	$	$ M
LLLLLM M M M M M M M M M M M M M M::{:::J*ooG XgS)))FJ":.. ) ):z??aF1IAJqM 2 2Q 67AJqM 2 2Q 67DD __!!F1IAJqM 2 2Q 67AJqM 2 2Q 67DD
 2a!eZ5HHIII'oo 	) 	)NA{cK $%5t < < K K K $%5s47I7I J JKVGM$:$:;;<<I]IcN;;Ii((((	) z65&99EEiillG	LLA	LLGqL!!!EK&"($7#GHHHHs   *../F  (F+*F+train)subsetr   r-   rT   r[   rZ   r.   c                 .   t          |||          \  }}t                              d| |           t          |dd          }	|	                    t
                    }
dddd	}| |vrAt          d
| dt          t          |	                                                              t          |||                    } |
|||||          \  }}}t          d          }t          |                    t          |          d          ||||          S )a  Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                   2
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    In the official `README.txt`_ this task is described as the
    "Restricted" task.  As I am not sure as to implement the
    "Unrestricted" variant correctly, I left it as unsupported for now.

      .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt

    The original images are 250 x 250 pixels, but the default slice and resize
    arguments reduce them to 62 x 47.

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    subset : {'train', 'test', '10_folds'}, default='train'
        Select the dataset to load: 'train' for the development training
        set, 'test' for the development test set, and '10_folds' for the
        official evaluation set that is meant to be used with a 10-folds
        cross validation.

    data_home : str, default=None
        Specify another download and cache folder for the datasets. By
        default all scikit-learn data is stored in '~/scikit_learn_data'
        subfolders.

    funneled : bool, default=True
        Download and use the funneled variant of the dataset.

    resize : float, default=0.5
        Ratio used to resize the each face picture.

    color : bool, default=False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background.

    download_if_missing : bool, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : ndarray of shape (2200, 5828). Shape depends on ``subset``.
            Each row corresponds to 2 ravel'd face images
            of original size 62 x 47 pixels.
            Changing the ``slice_``, ``resize`` or ``subset`` parameters
            will change the shape of the output.
        pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``
            Each row has 2 face images corresponding
            to same or different person from the dataset
            containing 5749 people. Changing the ``slice_``,
            ``resize`` or ``subset`` parameters will change the shape of the
            output.
        target : numpy array of shape (2200,). Shape depends on ``subset``.
            Labels associated to each pair of images.
            The two label values being different persons or the same person.
        target_names : numpy array of shape (2,)
            Explains the target values of the target array.
            0 corresponds to "Different person", 1 corresponds to "same person".
        DESCR : str
            Description of the Labeled Faces in the Wild (LFW) dataset.
    r   zLoading %s LFW pairs from %sr   r   r   r   r   r   )r   test10_foldszsubset='z' is invalid: should be one of )rT   r[   rZ   r   r   )r   r   r/   r   r   )r5   r$   r*   r	   r   r   rs   r   rp   keysr   r   r   r   rN   )r   r   r-   rT   r[   rZ   r.   r   r1   r   r   label_filenamesr   r   r/   r   r   s                    r4   fetch_lfw_pairsr     sY   t "2hDW" " "H LL/BBB 	1a888A())I %" O
 _$$jvvtF?#7#7#9#9::;;;=
 
 	
 8_V%<==O #,))&f# # #E6< 	""F ]]3u::r**!   r6   )NTT)NFNr   )NFN)"__doc__osr   r   r   os.pathr   r   r   loggingnumpyrO   joblibr	   _baser   r   r   r   utilsr   	getLogger__name__r$   r(   r'   r#   r5   rg   r   rF   r   r   r   r;   r6   r4   <module>r      s    ) ( ( ( ( ( ( ( ( ( ' ' ' ' ' ' ' ' ' '                            		8	$	$ 
8O   &%8O    $<S  
 #<S  
 <S  2(& (& (& (&VB B BV ST(' (' (' ('Z 
E"cNNEE"cNN+{ { { { {H IM1I 1I 1I 1Il 
E"cNNEE"cNN+      r6   