
    dhw#                        S SK Jr  S SKrS SKrS SKJr  S SKJrJrJ	r	J
r
Jr  S SKJr  S SKJr  \(       a
  S SKrS SKJr  SS jr " S	 S
\5      rSS jr " S S\5      r\rg)    )annotationsN)Path)TYPE_CHECKINGDictListOptionalUnion)Document)
BaseLoader)
EntityLikec                6    U S   nU S   nU S   nU SU SU S3$ )zBCombine message information in a readable format ready to be used.datefromtextz on z: 

 )rowr   senderr   s       e/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_community/document_loaders/telegram.pyconcatenate_rowsr      s6    v;D[Fv;DXT$r$t,,    c                  ,    \ rS rSrSrSS jrSS jrSrg)	TelegramChatFileLoader   zLoad from `Telegram chat` dump.c                    Xl         g)zInitialize with a path.N	file_path)selfpaths     r   __init__TelegramChatFileLoader.__init__   s    r   c                   [        U R                  5      n[        USS9 n[        R                  " U5      nSSS5        SR                  S WS    5       5      nS[        U5      0n[        XES9/$ ! , (       d  f       N@= f)	Load documents.utf8encodingN c              3     #    U H5  nUS    S:X  d  M  [        US   [        5      (       d  M(  [        U5      v   M7     g7f)typemessager   N)
isinstancestrr   ).0r*   s     r   	<genexpr>.TelegramChatFileLoader.load.<locals>.<genexpr>'   sA      
(v)+ &0:76?C0P &W%%(s   >>>messagessourcepage_contentmetadata)r   r   openjsonloadjoinr,   r
   )r   pfdr   r4   s         r   r7   TelegramChatFileLoader.load    sv     !f%		!A & ww 
Z=
 

 c!f%d>?? &%s   A11
A?r   N)r   zUnion[str, Path]returnList[Document])__name__
__module____qualname____firstlineno____doc__r    r7   __static_attributes__r   r   r   r   r      s    )@r   r   c                    SSK Jn  U" S/ SQSS9n[        U [        5      (       a  U /n U  Vs/ sH  n[	        US9PM     nn[        U5       H  u  pVUS-   UR                  S	'   M     / nU H  nUR                  UR                  5      n[        U5       H]  u  pY[	        XR                  S	   US
.S9nUR                  S	    SUR                  S    3UR                  S'   UR                  U5        M_     M     U$ s  snf )zIConvert a string or list of strings to a list of Documents with metadata.r   )RecursiveCharacterTextSplitteri   )r   
.!?, r'      )
chunk_size
separatorschunk_overlap)r3      page)rS   chunkr2   -rT   r1   )
langchain_text_splittersrG   r+   r,   r
   	enumerater4   
split_textr3   append)
r   rG   text_splitterrS   	page_docsidoc
doc_chunkschunksrT   s
             r   text_to_docsr`   1   s
   G2>M $v9=>t,I> I& 1uV ' J))#*:*:;!&)HA"ll66JUV-WC ),V(<'=Qs||G?T>U%VCLL"c" *  % ?s   C;c                  v    \ rS rSrSr     S
         SS jjrSS jrSS jr      SS jrSS jr	S	r
g)TelegramChatApiLoaderS   z)Load `Telegram` chat json directory dump.Nc                @    Xl         X l        X0l        X@l        XPl        g)a	  Initialize with API parameters.

Args:
    chat_entity: The chat entity to fetch data from.
    api_id: The API ID.
    api_hash: The API hash.
    username: The username.
    file_path: The file path to save the data to. Defaults to
         "telegram_data.json".
N)chat_entityapi_idapi_hashusernamer   )r   re   rf   rg   rh   r   s         r   r    TelegramChatApiLoader.__init__V   s    $ '  "r   c                  #    SSK Jn  / nU" U R                  U R                  U R                  5       ISh  vN nUR                  U R                  5        Sh  vN nUR                  SLnU(       a  UR                  R                  OSnUR                  UR                  UR                  UR                  R                  5       UR                  UUS.5        M   N N
 SSS5      ISh  vN    O! , ISh  vN  (       d  f       O= f[        U R                   SSS9 n["        R$                  " X'SS	S
9  SSS5        g! , (       d  f       g= f7f)z8Fetch data from Telegram API and save it as a JSON file.r   )TelegramClientN)	sender_idr   r   
message.idis_replyreply_to_idwzutf-8r%   F   )ensure_asciiindent)telethon.syncrk   rh   rf   rg   iter_messagesre   reply_toreply_to_msg_idrY   rl   r   r   	isoformatidr5   r   r6   dump)r   rk   dataclientr*   rn   ro   r:   s           r   fetch_data_from_telegram.TelegramChatApiLoader.fetch_data_from_telegramn   s     0!$--dmmLLPV!'!5!5d6F6F!G g"++47BJg..>>PT%,%6%6 ' ' 6 6 8&-jj$,'2		 M!G MLLLLL $..#8AIIdE!< 988s   5ECEC4C!CC!A>C4EC!!C4"E-C0.E4D:C=;DE#E:	E
EEc           	        ^ SU4S jjmXS   )    nXS      R                  S/S9nUS   R                  [        5      US'   US    Vs0 sH  nXD/T" XC5      -   _M     nnU$ s  snf )a  Create a dictionary of message threads from the given data.

Args:
    data (pd.DataFrame): A DataFrame containing the conversation                 data with columns:
        - message.sender_id
        - text
        - date
        - message.id
        - is_reply
        - reply_to_id

Returns:
    dict: A dictionary where the key is the parent message ID and                 the value is a list of message IDs in ascending order.
c                p   > XS   U :H     S   R                  5       n/ nU H  nX4/T" XA5      -   -  nM     U$ )a
  
Recursively find all replies to a given parent message ID.

Args:
    parent_id (int): The parent message ID.
    reply_data (pd.DataFrame): A DataFrame containing reply messages.

Returns:
    list: A list of message IDs that are replies to the parent message ID.
ro   rm   )tolist)	parent_id
reply_datadirect_repliesall_repliesreply_idfind_repliess        r   r   @TelegramChatApiLoader._get_message_threads.<locals>.find_replies   sV     (=(AY(NOfh 
 K*zL,NNN + r   rn   ro   )subsetrm   )r   intr   pd.DataFramer>   z	List[int])dropnaastyper   )r   r{   parent_messagesreply_messagesr   message_threadsr   s         @r   _get_message_threads*TelegramChatApiLoader._get_message_threads   s    $	0 Z 001 :./66}o6N )7}(E(L(LS(Q}% -\:
:	 {\)%LLL: 	 

 
s   	A"c                ,   SnUR                  5        Hj  u  pEX"S   R                  U5         R                  SS9S   R                  5       nU Vs/ sH  n[	        U5      PM     nnUSR                  U5      S-   -  nMl     UR                  5       $ s  snf )a  
Combine the message texts for each parent message ID based             on the list of message threads.

Args:
    message_threads (dict): A dictionary where the key is the parent message                 ID and the value is a list of message IDs in ascending order.
    data (pd.DataFrame): A DataFrame containing the conversation data:
        - message.sender_id
        - text
        - date
        - message.id
        - is_reply
        - reply_to_id

Returns:
    str: A combined string of message texts sorted by date.
r'   rm   r   )byr   rM   z.
)itemsisinsort_valuesr   r,   r8   strip)r   r   r{   combined_textr   message_idsmessage_textselems           r   _combine_message_texts,TelegramChatApiLoader._combine_message_texts   s    *  '6&;&;&="I ,',,[9:'0 
 4AA=4SY=MA SXXm4u<<M '> ""$$ Bs   Bc                <   U R                   b9   SSKnUR                  5         [        R                  " U R                  5       5        [        U R                  5      n[        USS9 n[        R                  " U5      nSSS5         SSKnUR                  " W5      nUR                  " U5      nU R                  U5      nU R!                  X5      n	[#        U	5      $ ! [         a    [        S5      ef = f! , (       d  f       N}= f! [         a    [        S5      ef = f)r#   Nr   zy`nest_asyncio` package not found.
                    please install with `pip install nest_asyncio`
                    r$   r%   zf`pandas` package not found. 
                please install with `pip install pandas`
                )re   nest_asyncioapplyasynciorunr}   ImportErrorr   r   r5   r6   r7   pandasjson_normalize	DataFramer   r   r`   )
r   r   r9   r:   r;   pdnormalized_messagesdfr   combined_textss
             r   r7   TelegramChatApiLoader.load   s
    '
#""$D99;<  !f%		!A &	 !//2\\-.33B744_IN++3  !  &%  	 	s#   8C &C4D C14
DD)rg   rf   re   r   rh   )NNNNztelegram_data.json)
re   zOptional[EntityLike]rf   zOptional[int]rg   Optional[str]rh   r   r   r,   )r>   None)r{   r   r>   dict)r   zDict[int, List[int]]r{   r   r>   r,   r=   )r@   rA   rB   rC   rD   r    r}   r   r   r7   rE   r   r   r   rb   rb   S   sy    3 -1 $"&"&-#)# #  	#
  # #0=.9v$%3$%;G$%	$%L",r   rb   )r   r   r>   r,   )r   zUnion[str, List[str]]r>   r?   )
__future__r   r   r6   pathlibr   typingr   r   r   r   r	   langchain_core.documentsr
   )langchain_community.document_loaders.baser   r   r   telethon.hintsr   r   r   r`   rb   TelegramChatLoaderr   r   r   <module>r      sV    "    = = - @)-@Z @0Du,J u,r , r   