
    @h c                        S SK Jr  S SKrS SKJrJrJrJr  S SKJ	r	J
r
   " S S\
5      r        S
S jr " S S	\
5      rg)    )annotationsN)AnyLiteralOptionalUnion)LanguageTextSplitterc                  P   ^  \ rS rSrSr  S       SU 4S jjjrSS jrSrU =r$ )	CharacterTextSplitter	   z(Splitting text that looks at characters.c                >   > [         TU ]  " S0 UD6  Xl        X l        g)Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)self	separatoris_separator_regexkwargs	__class__s       Z/var/www/html/shao/venv/lib/python3.13/site-packages/langchain_text_splitters/character.pyr   CharacterTextSplitter.__init__   s      	"6"##5     c                h  ^  T R                   (       a  T R                  O[        R                  " T R                  5      n[	        XT R
                  S9nSnT R                   =(       a    [        U 4S jU 5       5      nSnT R
                  (       d  U(       d  T R                  nT R                  X65      $ )z=Split into chunks without re-inserting lookaround separators.keep_separator)z(?=z(?<!z(?<=z(?!c              3  X   >#    U H   nTR                   R                  U5      v   M"     g 7f)N)r   
startswith).0pr   s     r   	<genexpr>3CharacterTextSplitter.split_text.<locals>.<genexpr>%   s'      9
3FaDOO&&q))3Fs   '* )r   r   reescape_split_text_with_regex_keep_separatorany_merge_splits)r   textsep_patternsplitslookaround_prefixesis_lookaround	merge_seps   `      r   
split_text CharacterTextSplitter.split_text   s      $77DOORYYt=W 	
 (d.B.B

 =00 
S 9
3F9
 6
 	$$I !!&44r   )r   r   )

F)r   strr   boolr   r   returnNoner,   r5   r7   	list[str])	__name__
__module____qualname____firstlineno____doc__r   r2   __static_attributes____classcell__r   s   @r   r   r   	   sH    2  #(	6	6 !	6 		6
 
	6 	65 5r   r   c                  U(       a  U(       a  [         R                  " SU S3U 5      nUS:X  a3  [        S[        U5      S-
  S5       Vs/ sH  oCU   X4S-      -   PM     snO/[        S[        U5      S5       Vs/ sH  oCU   X4S-      -   PM     snn[        U5      S-  S:X  a  XSSS  -  nUS:X  a	  / UQUS   POUS   /UQnO"[         R                  " X5      nO[	        U 5      nU Vs/ sH  ofS:w  d  M
  UPM     sn$ s  snf s  snf s  snf )	N()endr         r%   )r&   splitrangelenlist)r,   r   r   _splitsir.   ss          r   r(   r(   4   s)    hh9+Q/6G "U* 8=QGq@PRS7TU7T!!*w1u~-7TU<A!S\ST<UV<UqqzGEN2<UV 
 7|a1$"#,& "U* (6'72;'qz+F+  XXi.Fd)v!bAv)) VV *s   C?;D*D	6D	c                     ^  \ rS rSrSr   S	         S
U 4S jjjrSS jrSS jr\      SS j5       r	\
SS j5       rSrU =r$ )RecursiveCharacterTextSplitterO   z~Splitting text by recursively look at characters.

Recursively tries to split by different characters to find one
that works.
c                Z   > [         TU ]  " SSU0UD6  U=(       d    / SQU l        X0l        g)r   r   )r4   
 r%   Nr   )r   r   _separatorsr   )r   
separatorsr   r   r   r   s        r   r   'RecursiveCharacterTextSplitter.__init__V   s.     	AA&A%@)@#5 r   c                0   / nUS   n/ n[        U5       H^  u  pgU R                  (       a  UO[        R                  " U5      nUS:X  a  Un  O*[        R                  " X5      (       d  MU  UnX&S-   S n  O   U R                  (       a  UO[        R                  " U5      n[        XU R                  S9n	/ n
U R                  (       a  SOUnU	 H  nU R                  U5      U R                  :  a  U
R                  U5        M5  U
(       a$  U R                  X5      nUR                  U5        / n
U(       d  UR                  U5        Mz  U R                  X5      nUR                  U5        M     U
(       a"  U R                  X5      nUR                  U5        U$ )z&Split incoming text and return chunks.rI   r%   rG   Nr   )	enumerater   r&   r'   searchr(   r)   _length_function_chunk_sizeappendr+   extend_split_text)r   r,   rX   final_chunksr   new_separatorsrO   _sr   r.   _good_splitsrP   merged_text
other_infos                 r   ra   *RecursiveCharacterTextSplitter._split_textb   se   rN	z*EA#77RYYr]JRx	yy**	!+EG!4 + #'":":Y		)@T
'T-A-A

 //RY
A$$Q'$*:*::##A&"&"4"4\"NK ''4#%L% ''*!%!1!1!!DJ ''
3  ,,\FK,r   c                8    U R                  XR                  5      $ )zSplit the input text into smaller chunks based on predefined separators.

Args:
    text (str): The input text to be split.

Returns:
    List[str]: A list of text chunks obtained after splitting.
)ra   rW   )r   r,   s     r   r2   )RecursiveCharacterTextSplitter.split_text   s     &6&677r   c                :    U R                  U5      nU " SUSS.UD6$ )a  Return an instance of this class based on a specific language.

This method initializes the text splitter with language-specific separators.

Args:
    language (Language): The language to configure the text splitter for.
    **kwargs (Any): Additional keyword arguments to customize the splitter.

Returns:
    RecursiveCharacterTextSplitter: An instance of the text splitter configured
    for the specified language.
T)rX   r   r   )get_separators_for_language)clslanguager   rX   s       r   from_language,RecursiveCharacterTextSplitter.from_language   s)      44X>
LjTLVLLr   c                   U [         R                  [         R                  4;   a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R
                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / S	Q$ U [         R                  :X  a  / S
Q$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                  :X  a  / SQ$ U [         R                   :X  a  / SQ$ U [         R"                  :X  a  / SQ$ U [         R$                  :X  a  / SQ$ U [         R&                  :X  a  / SQ$ U [         R(                  :X  a  / SQ$ U [         R*                  :X  a  / SQ$ U [         R,                  :X  a  / SQ$ U [         R.                  :X  a  / SQ$ U [         R0                  :X  a  / SQ$ U [         R2                  :X  a  / SQ$ U [         R4                  :X  a*  SnSU S3SU S3SU S3SU S3SU S3S S!S"S#S$S%S&S'S(S)S*S+S,/$ U [         R6                  ;   a  S-U  S.3n[9        U5      eS-U  S/[;        [         5       3n[9        U5      e)0zRetrieve a list of separators specific to the given language.

Args:
    language (Language): The language for which to get the separators.

Returns:
    List[str]: A list of separators appropriate for the specified language.
)
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r4   rU   rV   r%   )
func 
var 
const 
type rs   rt   rv   rw   r4   rU   rV   r%   )rr   
public 
protected 	
private 
static rs   rt   ru   rv   rw   r4   rU   rV   r%   )rr   r|   r}   r~   z

internal z
companion z
fun 
val ry   rs   rt   ru   z
when rw   
else r4   rU   rV   r%   )

function rz   
let ry   rr   rs   rt   ru   rv   rw   	
default r4   rU   rV   r%   )
enum 
interface z
namespace r{   rr   r   rz   r   ry   rs   rt   ru   rv   rw   r   r4   rU   rV   r%   )r   rr   rs   	
foreach ru   
do rv   rw   r4   rU   rV   r%   )
z	
message z	
service r   z
option 
import z
syntax r4   rU   rV   r%   )rr   
def z
	def r4   rU   rV   r%   )z
=+
z
-+
z
\*+
z

.. *

r4   rU   rV   r%   )r   rr   rs   
unless ru   rt   r   z
begin z
rescue r4   rU   rV   r%   )r   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop rs   r   ru   rw   z
cond z
with rt   r   r4   rU   rV   r%   )z
fn rz   r   rs   ru   rt   z
loop 
match rz   r4   rU   rV   r%   )rr   z
object r   r   ry   rs   rt   ru   r   rw   r4   rU   rV   r%   )rx   rr   
struct r   rs   rt   ru   r   rv   rw   r4   rU   rV   r%   )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r4   rU   rV   r%   )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rV   r%   )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler%   )r   r   z
implements z

delegate 
event rr   z

abstract r|   r}   r~   r   z
return rs   z

continue rt   r   ru   rv   z
break rw   r   
try z
throw 	
finally 
catch r4   rU   rV   r%   )z
pragma z
using z

contract r   z	
library z
constructor r{   r   r   z

modifier z
error r   r   rs   rt   ru   z

do while z

assembly r4   rU   rV   r%   )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rU   rV   r%   )
z
local r   rs   rt   ru   z
repeat r4   rU   rV   r%   )z	
main :: z
main = r   z
in r   z
where 
:: z
= 
data z	
newtype r{   r   z
module r   z
qualified z
import qualified rr   z

instance rw   z
| r   z
= {z
, r4   rU   rV   r%   )r   z
param rs   r   rt   ru   rv   rr   r   r   r   r4   rU   rV   r%   z*(?:Public|Private|Friend|Global|Static)\s+z\n(?!End\s)z?Sub\s+z?Function\s+z?Property\s+(?:Get|Let|Set)\s+z?Type\s+z?Enum\s+z\n(?!End\s)If\s+z\nElseIf\s+z	\nElse\s+z\nSelect\s+Case\s+z	\nCase\s+z\nFor\s+z\nDo\s+z
\nWhile\s+z	\nWith\s+z\n\nz\nrV   r%   z	Language z is not implemented yet!z& is not supported! Please choose from )r   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELLVISUALBASIC6_value2member_map_
ValueErrorrM   )rn   vismsgs      r   rl   :RecursiveCharacterTextSplitter.get_separators_for_language   s(    

HLL11 ( x{{" " x}}$ ( x& 2 x{{" ( x{{" 2 x||# $ x~~% & x&
 
 x||#  x}}$ $ x& . x}}$ $ x~~% ( x~~% ( x((( & x~~% . x}}$ > x&# #H x||# : x~~% > x||#   x'''% %L x*** . x,,,?C se7+se<0se#ABse8,se8,#%+ 0 x222hZ'?@CS/!z!GXGWX 	 or   )r   rW   )NTF)
rX   zOptional[list[str]]r   $Union[bool, Literal['start', 'end']]r   r6   r   r   r7   r8   )r,   r5   rX   r:   r7   r:   r9   )rn   r   r   r   r7   rR   )rn   r   r7   r:   )r;   r<   r=   r>   r?   r   ra   r2   classmethodro   staticmethodrl   r@   rA   rB   s   @r   rR   rR   O   s     +/?C#(	
6'
6 =
6 !	
6
 
6 

6 
6(T	8 MM+.M	'M M$ V	 V	r   rR   )r,   r5   r   r5   r   r   r7   r:   )
__future__r   r&   typingr   r   r   r   langchain_text_splitters.baser   r	   r   r(   rR   r   r   r   <module>r      sT    " 	 0 0 @(5L (5V*
**2V**6r
\ r
r   