
    dhJ                    R   S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	J
r
JrJrJrJrJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  \R:                  " \5      rS/r \ " S S5      5       r!SS1r"1 Skr#SS jr$ " S S\5      r% " S S\5      r&\ " S S\5      5       r'g)zLoads YouTube transcript.    )annotationsN)Enum)Path)AnyDict	GeneratorListOptionalSequenceUnion)parse_qsurlparse)
ParseError)Document)model_validator)	dataclass)
BaseLoaderz0https://www.googleapis.com/auth/youtube.readonlyc                      \ rS rSr% Sr\R                  " 5       S-  S-  rS\S'   \R                  " 5       S-  S-  r	S\S'   \R                  " 5       S-  S-  r
S\S	'   SS
 jr\" SS9\SS j5       5       rSS jrSrg)GoogleApiClient   a  Generic Google API Client.

To use, you should have the ``google_auth_oauthlib,youtube_transcript_api,google``
python package installed.
As the google api expects credentials you need to set up a google account and
register your Service. "https://developers.google.com/docs/api/quickstart/python"

*Security Note*: Note that parsing of the transcripts relies on the standard
    xml library but the input is viewed as trusted in this case.


Example:
    .. code-block:: python

        from langchain_community.document_loaders import GoogleApiClient
        google_api_client = GoogleApiClient(
            service_account_path=Path("path_to_your_sec_file.json")
        )

z.credentialszcredentials.jsonr   credentials_pathservice_account_pathz
token.json
token_pathc                .    U R                  5       U l        g N)_load_credentialscredsselfs    d/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/youtube.py__post_init__GoogleApiClient.__post_init__2   s    ++-
    beforemodec                    UR                   R                  S5      (       d+  UR                   R                  S5      (       d  [        S5      eUR                   $ )DValidate that either folder_id or document_ids is set, but not both.r   r   -Must specify either channel_name or video_idskwargsget
ValueErrorclsvaluess     r    #validate_channel_or_videoIds_is_set3GoogleApiClient.validate_channel_or_videoIds_is_set5   sK    
 }}  !344V]]=N=N">
 >
 LMM}}r#   c                h    SSK Jn  SSKJn  SSKJn  SSKJn  SSKJ	n  SnU R                  R                  5       (       a.  UR
                  R                  [        U R                  5      5      $ U R                  R                  5       (       a)  UR!                  [        U R                  5      ["        5      nU(       a  UR$                  (       d  U(       a9  UR&                  (       a(  UR(                  (       a  UR+                  U" 5       5        O8UR-                  [        U R.                  5      ["        5      nUR1                  SS	9n[3        U R                  S
5       nUR5                  UR7                  5       5        SSS5        U$ U$ ! [         a    [        S5      ef = f! , (       d  f       U$ = f)zLoad credentials.r   )Request)service_account)Credentials)InstalledAppFlowYouTubeTranscriptApiYou must run`pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib youtube-transcript-api` to use the Google Drive loaderN)portw)google.auth.transport.requestsr4   google.oauth2r5   google.oauth2.credentialsr6   google_auth_oauthlib.flowr7   youtube_transcript_apir9   ImportErrorr   existsfrom_service_account_filestrr   from_authorized_user_fileSCOPESvalidexpiredrefresh_tokenrefreshfrom_client_secrets_filer   run_local_serveropenwriteto_json)	r   r4   r5   r6   r7   r9   r   flowtokens	            r    r   !GoogleApiClient._load_credentials@   sL   	>5=BC $$++--"..HHD--.  ??!!##99#doo:NPVWEEKK5+>+>gi('@@--. --1-5doos+uEMMO, , u;  	1 	4 ,+ s   F	  F"	F"
F1)r   NreturnNoner0   r   rU   r   )rU   r   )__name__
__module____qualname____firstlineno____doc__r   homer   __annotations__r   r   r!   r   classmethodr1   r   __static_attributes__ r#   r    r   r      s    * "YY[>9<NNdN!%~!=@R!R$Ryy{^3lBJB. (#  $&r#   r   httphttps>   vid.plusyoutu.beyoutube.comm.youtube.comwww.youtube.comwww.youtube-nocookie.comc                   [        U 5      nUR                  [        ;  a  gUR                  [        ;  a  gUR
                  nUR                  S5      (       a@  UR                  n[        U5      nSU;   a"  US   n[        U[        5      (       a  UOUS   nO0gUR
                  R                  S5      nUR                  S5      S   n[        U5      S:w  a  gU$ )zEParse a YouTube URL and return the video ID if valid, otherwise None.Nz/watchvr   /   )r   schemeALLOWED_SCHEMESnetlocALLOWED_NETLOCSpathendswithqueryr   
isinstancerE   lstripsplitlen)url
parsed_urlrs   ru   parsed_queryidsvideo_ids          r    _parse_video_idr   t   s    #J//??D}}X  ,s#C(c22sAH%%c*::c?2&
8}Or#   c                  $    \ rS rSrSrSrSrSrSrg)TranscriptFormat   z3Output formats of transcripts from `YoutubeLoader`.textlineschunksra   N)	rX   rY   rZ   r[   r\   TEXTLINESCHUNKSr`   ra   r#   r    r   r      s    =DEFr#   r   c                      \ rS rSrSrSSS\R                  SS4             SS jjr\SS j5       r	\
SS	 j5       r      SS
 jr    SS jrSS jrSS jrSrg)YoutubeLoader   z!Load `YouTube` video transcripts.FenNx   c                    Xl         SU0U l        X l        X0l        [	        U[
        5      (       a	  U/U l        OX0l        X@l        XPl        X`l        Xpl	        g)z!Initialize with YouTube video ID.sourceN)
r~   	_metadataadd_video_infolanguagerv   rE   translationtranscript_formatcontinue_on_failurechunk_size_seconds)r   r~   r   r   r   r   r   r   s           r    __init__YoutubeLoader.__init__   sS     !"H-, h$$%JDM$M&!2#6 "4r#   c                H    [        U 5      nU(       d  [        SU  S35      eU$ )z*Extract video ID from common YouTube URLs.z.Could not determine the video ID for the URL "z".)r   r-   )youtube_urlr~   s     r    extract_video_idYoutubeLoader.extract_video_id   s1     #;/@RP  r#   c                6    U R                  U5      nU " U40 UD6$ )zlGiven a YouTube URL, construct a loader.
See `YoutubeLoader()` constructor for a list of keyword arguments.
)r   )r/   r   r+   r~   s       r    from_youtube_urlYoutubeLoader.from_youtube_url   s$    
 ''48&v&&r#   c                    [        US5      u  p4[        US5      u  pS[        SR                  [        S U5      5      0 U R                  EUUS SUS SUS 3SU R
                   SU S3S	.ES
9$ )z0Create Document from chunk of transcript pieces.<    c                *    U S   R                  S5      $ Nr   r   strip)chunk_pieces    r    <lambda>4YoutubeLoader._make_chunk_document.<locals>.<lambda>   s    F(;(A(A#(Fr#   02d: https://www.youtube.com/watch?v=z&t=s)start_secondsstart_timestampr   page_contentmetadata)divmodr   joinmapr   r~   )r   chunk_pieceschunk_start_secondsmr   hs         r    _make_chunk_document"YoutubeLoader._make_chunk_document   s     )2.a}FU..!4&'WAaWAaW#= 34==/)*!-	
 	
r#   c              #    #    / nSnU R                   nU HP  nUS   US   -   nXd:  a,  U(       a  U R                  X#5      v   / nUnX@R                   -  nUR                  U5        MR     [        U5      S:  a  U R                  X#5      v   g g 7f)Nr   startduration)r   r   appendry   )r   transcript_piecesr   r   chunk_time_limittranscript_piece	piece_ends          r    _get_transcript_chunks$YoutubeLoader._get_transcript_chunks   s      .022 1(14DZ4PPI+33LVV!&6# $;$;;  01 !2 |q ++LNN !s   BBc                4    SSK JnJnJnJn  U R                  (       a+  U R                  5       nU R                  R                  U5         UR                  U R                  5      n UR                  U R                  5      nU R                  b  UR                  U R                  5      nUR!                  5       n[#        X5      (       a>  UR$                   V	s/ sH'  n	U	R&                  U	R(                  U	R*                  S.PM)     n
n	OUn
U R,                  [.        R0                  :X  a0  SR3                  [5        S U
5      5      n[7        XpR                  S9/$ U R,                  [.        R8                  :X  a  [;        [5        S	 U
5      5      $ U R,                  [.        R<                  :X  a  [;        U R?                  U
5      5      $ [A        S
5      e! [
         a    [        S5      ef = f! U a    / s $ f = f! U a    UR                  S/5      n GNf = fs  sn	f )z1Load YouTube transcripts into `Document` objects.r   )FetchedTranscriptNoTranscriptFoundTranscriptsDisabledr9   zvCould not import "youtube_transcript_api" Python package. Please install it with `pip install youtube-transcript-api`.r   )r   r   r   r   c                *    U S   R                  S5      $ r   r   r   s    r    r   $YoutubeLoader.load.<locals>.<lambda>!  s    -=f-E-K-KC-Pr#   r   c           
     ~    [        U S   R                  S5      [        [        S U R	                  5       5      5      S9$ )Nr   r   c                    U S   S:g  $ )Nr   r   ra   )items    r    r   6YoutubeLoader.load.<locals>.<lambda>.<locals>.<lambda>-  s    T!W->r#   r   )r   r   dictfilteritemsr   s    r    r   r   )  s;    X%5f%=%C%CC%H!%" >@P@V@V@X".r#   zUnknown transcript format.)!rA   r   r   r   r9   rB   r   _get_video_infor   updatelist_transcriptsr~   find_transcriptr   r   	translatefetchrv   snippetsr   r   r   r   r   r   r   r   r   r   listr   r   r-   )r   r   r   r   r9   
video_infotranscript_list
transcripttranscript_objectsnippetr   s              r    loadYoutubeLoader.load   s   	   --/JNN!!*-	2CCDMMRO	A(88GJ '#--d.>.>?J&,,.';;  199!  :G	 $LL$]] ' 0 0
  :  ! 7H!!%5%:%::P%J *~~NOO##'7'='== &
  ##'7'>'>>334EFGG 9::  	O 	 # 	I	
 ! 	A(88$@J	A!s5   G G( 'G6 -HG%(G32G36HHc                    SSK Jn  U" SU R                   35      nUR                  =(       d    SUR
                  =(       d    SUR                  =(       d    SUR                  =(       d    SUR                  (       a  UR                  R                  S5      OSUR                  =(       d    SUR                  =(       d    SS.nU$ ! [         a    [        S5      ef = f)zGet important video information.

Components include:
    - title
    - description
    - thumbnail URL,
    - publish_date
    - channel author
    - and more.
r   )YouTubezVCould not import "pytube" Python package. Please install it with `pip install pytube`.r   Unknownz%Y-%m-%d %H:%M:%S)titledescription
view_countthumbnail_urlpublish_datelengthauthor)pytuber   rB   r~   r   r   viewsr   r   strftimer   r   )r   r   ytr   s       r    r   YoutubeLoader._get_video_info:  s    	& 7GHXX*>>6Y((-a--: OO445HIiin1ii,9


 #  	? 	s   C C)r   r   r   r   r   r   r   r~   )r~   rE   r   boolr   zUnion[str, Sequence[str]]r   Optional[str]r   r   r   r   r   int)r   rE   rU   rE   )r   rE   r+   r   rU   r   )r   
List[Dict]r   r   rU   r   )r   r   rU   zGenerator[Document, None, None]rU   List[Document])rU   r   )rX   rY   rZ   r[   r\   r   r   r   staticmethodr   r_   r   r   r   r   r   r`   ra   r#   r    r   r      s    +
  %.2%).>.C.C$)"%55 5 ,	5
 #5 ,5 "5  50   ' '
&
=@
	
*O!+O	(O(H;Tr#   r   c                      \ rS rSr% SrS\S'   SrS\S'   SrS\S	'   S
rS\S'   Sr	S\S'   Sr
S\S'   SS jrSS jr\" SS9\S S j5       5       rS!S jrS"S jrS#S jrS$S jrS%S jrS&S jrSrg)'GoogleApiYoutubeLoaderi\  aQ  Load all Videos from a `YouTube` Channel.

To use, you should have the ``googleapiclient,youtube_transcript_api``
python package installed.
As the service needs a google_api_client, you first have to initialize
the GoogleApiClient.

Additionally you have to either provide a channel name or a list of videoids
"https://developers.google.com/docs/api/quickstart/python"



Example:
    .. code-block:: python

        from langchain_community.document_loaders import GoogleApiClient
        from langchain_community.document_loaders import GoogleApiYoutubeLoader
        google_api_client = GoogleApiClient(
            service_account_path=Path("path_to_your_sec_file.json")
        )
        loader = GoogleApiYoutubeLoader(
            google_api_client=google_api_client,
            channel_name = "CodeAesthetic"
        )
        load.load()

r   google_api_clientNr   channel_namezOptional[List[str]]	video_idsTr   r   r   rE   captions_languageFr   c                X    U R                  U R                  R                  5      U l        g r   )_build_youtube_clientr   r   youtube_clientr   s    r    r!   $GoogleApiYoutubeLoader.__post_init__  s!    "889O9O9U9UVr#   c                ^     SSK Jn  SSKJn  U" SSUS9$ ! [         a    [	        S5      ef = f)Nr   )buildr8   r:   youtubev3)credentials)googleapiclient.discoveryr  rA   r9   rB   )r   r   r  r9   s       r    r   ,GoogleApiYoutubeLoader._build_youtube_client  sA    	7C Y%88  	1 	s    ,r$   r%   c                    UR                   R                  S5      (       d+  UR                   R                  S5      (       d  [        S5      eUR                   $ )r(   r   r   r)   r*   r.   s     r    r1   :GoogleApiYoutubeLoader.validate_channel_or_videoIds_is_set  sC     }}  009J9J;9W9WLMM}}r#   c                \   SSK JnJn  UR                  U5      n UR	                  U R
                  /5      nWR                  5       nSR                  U Vs/ sH  oS   R                  S5      PM     sn5      $ ! U a'    U H  nUR                  U R
                  5      nM       Nnf = fs  snf )Nr   )r   r9   r   r   )
rA   r   r9   r   r   r   r   r   r   r   )	r   r~   r   r9   r   r   available_transcriptr   ts	            r    _get_transcripe_for_video_id3GoogleApiYoutubeLoader._get_transcripe_for_video_id  s    R.??I	(88$:P:P9QRJ ',,.xx7HI7H!6-7HIJJ ! 	(7$1;;D<R<RS
 )8	 Js   A9 B)9*B&%B&c                    U R                  U5      nU R                  R                  5       R                  SUS9R	                  5       n[        UUR                  S5      S   S9$ )N
id,snippetpartidr   r   r   )r  r  videosr   executer   r,   )r   r~   r+   captionsvideo_responses        r    _get_document_for_video_id1GoogleApiYoutubeLoader._get_document_for_video_id  sl    44X>&&(T!   WY 	 !#''03
 	
r#   c                    U R                   R                  5       R                  SUSSS9nUR                  5       nUS   S   S   S   nU$ )Nr  channel   )r  qtype
maxResultsr   r   	channelId)r  searchr   r  )r   r   requestresponse
channel_ids        r    _get_channel_id&GoogleApiYoutubeLoader._get_channel_id  s_    %%,,.33	 4 
 ??$g&q)$/<
r#   c                    U R                   R                  5       R                  SUS9nUR                  5       nUS   S   S   S   S   $ )NcontentDetailsr  r   r   relatedPlaylistsuploads)r  channelsr   r  )r   r&  r$  r%  s       r    _get_uploads_playlist_id/GoogleApiYoutubeLoader._get_uploads_playlist_id  s[    %%..055! 6 
 ??$ #$456HI)TTr#   c           	     
    SSK JnJn  U R	                  U5      nU R                  U5      nU R                  R                  5       R                  SUSS9n/ nUb  UR                  5       n	U	S    Hv  n
U
S   S	   S
   nS
U0nU R                  (       a(  U
S   R                  S5        UR                  U
S   5         U R                  U5      nUR                  [        UUS95        Mx     U R                  R)                  5       R+                  Xy5      nUb  M  U$ ! [         a    [        S5      ef = f! XC[         4 aD  nU R"                  (       a%  [$        R'                  SSU
S   S
    SU 3-   5        OUe S nAGM  S nAff = f)Nr   )r   r   zTYou must run`pip install --upgrade youtube-transcript-api` to use the youtube loaderr  2   )r  
playlistIdr!  r   r   
resourceIdvideoId
thumbnailsr   zError fetching transscript r   r  z, exception: )rA   r   r   rB   r'  r.  r  playlistItemsr   r  r   popr   r  r   r   r   r   loggererrorr#  	list_next)r   r  r+   r   r   r&  uploads_playlist_idr$  r   r%  r   r~   	meta_datar   es                  r    _get_document_for_channel0GoogleApiYoutubeLoader._get_document_for_channel  s   	 ))'2
";;JG%%335::* ; 

 	!(H !)	?<8C&1	&&O''5$$T)_5#'#D#DX#NL$$ )5%. *. ))002<<WOG7 !: [  	, 	D ,
K //9 !$t*Y"7!8aSIJ
  s#   D ;*D2D/2F>8E==Fc                :   / nU R                   (       a,  UR                  U R                  U R                   5      5        U$ U R                  (       a<  UR                  U R                   Vs/ sH  nU R	                  U5      PM     sn5        U$ [        S5      es  snf )zLoad documents.r)   )r   extendr>  r   r  r-   )r   document_listr~   s      r    r   GoogleApiYoutubeLoader.load  s      !?!?@Q@Q!RS  ^^   %)NN$2 33H=$2  LMMs   *B)r  rT   )r   r   rU   r   rW   )r~   rE   rU   rE   )r~   rE   r+   r   rU   r   )r   rE   rU   rE   )r&  rE   rU   rE   )r  rE   r+   r   rU   r   r   )rX   rY   rZ   r[   r\   r^   r   r   r   r   r   r!   r   r   r_   r1   r  r  r'  r.  r>  r   r`   ra   r#   r    r   r   \  s    8 '&"&L-&%)I")ND!s! %%W9  (#  $K
	U3jr#   r   )rz   rE   rU   r   )(r\   
__future__r   loggingenumr   pathlibr   typingr   r   r   r	   r
   r   r   urllib.parser   r   xml.etree.ElementTreer   langchain_core.documentsr   pydanticr   pydantic.dataclassesr   )langchain_community.document_loaders.baser   	getLoggerrX   r8  rG   r   rp   rr   r   r   r   r   ra   r#   r    <module>rP     s     "    H H H + , - $ * @			8	$
<	= N N Nb 7#<t J D rZ r rr#   