
    dhk                         S SK r S SKrS SKrS SKJrJr  S SKrS SKJr  S SK	J
r
  \R                  " S5      r\R                  " S5      r\R                  " S5      r " S S	\
5      rg)
    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+zp=(\d+)c            	       r    \ rS rSrSr   SS\\   S\S\S\4S jjrS\\   4S	 jr	S
\S\
\\4   4S jrSrg)BiliBiliLoader   z1
Load fetching transcripts from BiliBili videos.

video_urlssessdatabili_jctbuvid3c                     Xl         SU l         SSKJn  U(       a&  U(       a  U(       a  UR                  X#US9U l        gggg! [         a    [	        S5      ef = f)a  
Initialize the loader with BiliBili video URLs and authentication cookies.
if no authentication cookies are provided, the loader can't get transcripts
and will only fetch videos info.

Args:
    video_urls (List[str]): List of BiliBili video URLs.
    sessdata (str): SESSDATA cookie value for authentication.
    bili_jct (str): BILI_JCT cookie value for authentication.
    buvid3 (str): BUVI3 cookie value for authentication.
Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r   r   r   )r
   
credentialbilibili_apir   ImportError
Credential)selfr
   r   r   r   r   s         e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/bilibili.py__init__BiliBiliLoader.__init__   si    $ %	* V#..!V / DO &,8  	4 	s   A Areturnc                     / nU R                    H0  nU R                  U5      u  p4[        X4S9nUR                  U5        M2     U$ )z
Load and return a list of documents containing video transcripts.

Returns:
    List[Document]: List of Document objects transcripts and metadata.
)page_contentmetadata)r
   _get_bilibili_subs_and_infor   append)r   resultsurl
transcript
video_infodocs         r   loadBiliBiliLoader.load6   sG     ??C%)%E%Ec%J"J
HCNN3 #
     r    c                    [         R                  U5      n SSKJnJn  U(       a)  UR                  UR                  5       U R                  S9nO_[        R                  U5      nU(       a5  UR                  [        UR                  5       SS 5      U R                  S9nO[        SU 35      eU" UR                  5       5      nUR                  S	U05        U R                  (       d  S
U4$ Sn[        R                  U5      n	U	(       a'  US   [        U	R                  S5      5      S-
     S   nOUS   nU" UR                  U5      5      n
U
R!                  S/ 5      nU(       a  US   R!                  SS
5      nUR#                  S5      (       d  SU-   n[$        R                   " U5      nUR&                  S:X  ah  [(        R*                  " UR,                  5      R!                  S/ 5      nSR/                  U Vs/ sH  oS   PM	     sn5      nSUS    SUS    SU 3nUU4$ [0        R2                  " SU SUR&                   35         S
U4$ [0        R2                  " SU S35        S
U4$ ! [
         a    [        S5      ef = fs  snf )zE
Retrieve video information and transcript for a given BiliBili URL.
r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r     pages   cid	subtitlessubtitle_urlhttpzhttps:   body contentzVideo Title: titlez, description: descz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r(   r   r   Videogroupr   
AV_PATTERNint
ValueErrorget_infoupdatePAGE_INDEX_PATTERNget_subtitleget
startswithrequestsstatus_codejsonloadsr6   joinwarningswarn)r   r    r)   r(   r   vr+   r"   r/   
page_matchsubsub_listsub_urlresponseraw_sub_titlescraw_transcriptraw_transcript_with_meta_infos                     r   r   *BiliBiliLoader._get_bilibili_subs_and_infoE   s      %	0 $//JA##C(CKKC		AB$8T__KU #KC5!QRR!**,'
5#,' z>!'..s3
W%c**:*:1*=&>&BCC U#C 1>>#&'77;+qkoonb9G%%f--"W,||G,H##s*!%H,<,<!=!A!A&"!M!$*PAY<*P!Q $Jw$7#8 9$$.v$6#7 8##1"24 .
 5j@@4SE :))1)=)=(>@ :~ MM05RS
 :~y  	4 	R +Qs   I- ;J-J)r   r
   N)r,   r,   r,   )__name__
__module____qualname____firstlineno____doc__r   strr   r   r$   r   dictr   __static_attributes__ r&   r   r   r      sq     I  	
 @d8n Cs CuS$Y7G Cr&   r   )rH   rerK   typingr   r   rF   langchain_core.documentsr   )langchain_community.document_loaders.baser   compiler9   r=   rB   r   r`   r&   r   <module>rf      sU     	    - @ ZZ!
ZZ$
ZZ
+ wZ wr&   