
    @hn                    Z    S SK Jr  S SKJr  S SKJr   " S S\5      rSS.     S
S jjrg	)    )annotations)Any)TextSplitterc                  b   ^  \ rS rSrSr   SSS.           S	U 4S jjjjrS
S jrSrU =r$ )SpacyTextSplitter   a=  Splitting text using Spacy package.

Per default, Spacy's `en_core_web_sm` model is used and
its default max_length is 1000000 (it is the length of maximum character
this model takes which can be increased for large files). For a faster, but
potentially less accurate splitting, you can use `pipeline='sentencizer'`.
T)strip_whitespacec               Z   > [         TU ]  " S0 UD6  [        X#S9U l        Xl        X@l        g)z#Initialize the spacy text splitter.
max_lengthN )super__init__"_make_spacy_pipeline_for_splitting
_tokenizer
_separator_strip_whitespace)self	separatorpipeliner   r	   kwargs	__class__s         V/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_text_splitters/spacy.pyr   SpacyTextSplitter.__init__   s1     	"6"<
 $!1    c                   ^  U 4S jT R                  U5      R                   5       nT R                  UT R                  5      $ )z&Split incoming text and return chunks.c              3  t   >#    U H.  nTR                   (       a  UR                  OUR                  v   M0     g 7f)N)r   texttext_with_ws).0sr   s     r   	<genexpr>/SpacyTextSplitter.split_text.<locals>.<genexpr>$   s-      
0 ,,AFF!..@0s   58)r   sents_merge_splitsr   )r   r   splitss   `  r   
split_textSpacyTextSplitter.split_text"   s:    
__T*00
 !!&$//::r   )r   r   r   )z

en_core_web_sm@B )r   strr   r+   r   intr	   boolr   r   returnNone)r   r+   r.   z	list[str])	__name__
__module____qualname____firstlineno____doc__r   r'   __static_attributes____classcell__)r   s   @r   r   r      sk      (#	2 "&22 2 	2 2 2 
2 2"; ;r   r   r*   r   c                    SS K nU S:X  a7  UR                  R                  R	                  5       nUR                  S5        U$ UR                  U SS/S9nXl        U$ ! [         a    Sn[        U5      ef = f)Nr   zCSpacy is not installed, please install it with `pip install spacy`.sentencizernertagger)exclude)spacyImportErrorlangenEnglishadd_pipeloadr   )r   r   r<   msgr8   s        r   r   r   +   s     =  ::==002]+  jjE83DjE!+  S#s   A A5N)r   r+   r   r,   r.   r   )
__future__r   typingr   langchain_text_splitters.baser   r   r   r   r   r   <module>rG      s=    "  6 ;  ;H )2"%r   