
    @hr                    >    S SK Jr  S SKJr  S SKJr   " S S\5      rg)    )annotations)Any)TextSplitterc                  \   ^  \ rS rSrSr  SSS.         S	U 4S jjjjrS
S jrSrU =r$ )NLTKTextSplitter   z"Splitting text using NLTK package.F)use_span_tokenizec                 > [         TU ]  " S0 UD6  Xl        X l        X0l        U R                  (       a  U R                  S:w  a  Sn[        U5      e SSKnU R                  (       a+  UR                  R                  U R                  5      U l	        gUR                  R                  U l	        g! [         a    Sn[        U5      ef = f)zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   NzANLTK is not installed, please install it with `pip install nltk`. )super__init__
_separator	_language_use_span_tokenize
ValueErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenizeImportError)self	separatorlanguager	   kwargsmsgr   	__class__s          U/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_text_splitters/nltk.pyr   NLTKTextSplitter.__init__   s     	"6"#!"3""t"'<JCS/!		#&&"&--"D"DT^^"T"&--"="= 	#UCc""	#s   ?B/ B/ /Cc                f   U R                   (       am  [        U R                  R                  U5      5      n/ n[	        U5       H7  u  nu  pVUS:  a  X$S-
     S   nXU XU -   nOXU nUR                  U5        M9     OU R                  XR                  S9nU R                  X0R                  5      $ )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r   
split_textNLTKTextSplitter.split_text&   s     ""66t<=EF#,U#3<Eq5$U|AH#U3doEH##Hh' $4 __TNN_CF!!&//::    )r   r   r   r   )z

english)
r   strr   r4   r	   boolr   r   returnNone)r(   r4   r6   z	list[str])	__name__
__module____qualname____firstlineno____doc__r   r0   __static_attributes____classcell__)r   s   @r   r   r      s\    ,  !#
 #(## #
  # # 
# #6; ;r2   r   N)
__future__r   typingr   langchain_text_splitters.baser   r   r   r2   r   <module>rB      s    "  6-;| -;r2   