
    ChL                        S SK Jr  S SKrS SKrS SKJr   S SKJr  S SK
JrJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  \R,                  " \5      r " S
 S\5      r\rg! \ a	    S SK	Jr   NPf = f)    )annotationsN)Path)Self)Tensornn)logging)InputModule)Module)import_from_stringload_dir_pathc                  H  ^  \ rS rSr% S1rSS/rS\S'   Sr S       SU 4S jjjr\	  S         SS	 jj5       r
SSS
 jjrSS jrSSS jjrSSS jjr\	     S             SS jj5       r\S 5       r\SS j5       r\R&                  SS j5       rSrU =r$ )Router   taskdefault_routeallow_empty_keyz	list[str]config_keyszrouter_config.jsonc           
       > [         TU ]  5         Ub  [        U5      S:X  a  [        S5      eUb-  X!;  a(  [        SU S[	        UR                  5       5       35      e[        R                  " UR                  5        VVs0 sH  u  pEU[        R                  " U6 _M     snn5      U l
        U(       a%  Uc"  [        [        UR                  5       5      5      nX l        X0l        gs  snnf )a  
This model allows to create asymmetric SentenceTransformer models that apply different modules depending on the specified route,
such as "query" or "document". Especially useful for models that have different encoders for queries and documents.

Notably, the ``task`` argument of ``model.encode`` can be used to specify which route to use, and
``model.encode_query`` and ``model.encode_document`` are shorthands for using ``task="query"`` and
``task="document"``, respectively. These methods also optionally apply ``prompts`` specific to queries
or documents.

.. note::

    When training models with the :class:`~sentence_transformers.models.Router` module, you must use the
    ``router_mapping`` argument in the :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments`
    or :class:`~sentence_transformers.sparse_encoder.training_args.SparseEncoderTrainingArguments` to map the
    training dataset columns to the correct route ("query" or "document"). For example, if your training dataset(s)
    have ``["question", "positive", "negative"]`` columns, then you can use the following mapping::

        args = SparseEncoderTrainingArguments(
            ...,
            router_mapping={
                "question": "query",
                "positive": "document",
                "negative": "document",
            }
        )

    Additionally, it is common to use a different learning rate for the different routes. For this, you should
    use the ``learning_rate_mapping`` argument in the :class:`~sentence_transformers.training_args.SentenceTransformerTrainingArguments`
    or :class:`~sentence_transformers.sparse_encoder.training_args.SparseEncoderTrainingArguments` to map parameter patterns
    to their learning rates. For example, if you want to use a learning rate of ``1e-3`` for an SparseStaticEmbedding module and
    ``2e-5`` for the rest of the model, you can do this::

        args = SparseEncoderTrainingArguments(
            ...,
            learning_rate=2e-5,
            learning_rate_mapping={
                r"SparseStaticEmbedding\.*": 1e-3,
            }
        )

In the below examples, the ``Router`` model is used to create asymmetric models with different encoders for
queries and documents. In these examples, the "query" route is efficient (e.g., using SparseStaticEmbedding),
while the "document" route uses a more complex model (e.g. a Transformers module). This allows for efficient
query encoding while still using a powerful document encoder, but the combinations are not limited to this.

Example:
    ::

        from sentence_transformers import SentenceTransformer
        from sentence_transformers.models import Router, Normalize

        # Use a regular SentenceTransformer for the document embeddings, and a static embedding model for the query embeddings
        document_embedder = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
        query_embedder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
        router = Router.for_query_document(
            query_modules=list(query_embedder.children()),
            document_modules=list(document_embedder.children()),
        )
        normalize = Normalize()

        # Create an asymmetric model with different encoders for queries and documents
        model = SentenceTransformer(
            modules=[router, normalize],
        )

        # ... requires more training to align the vector spaces

        # Use the query & document routes
        query_embedding = model.encode_query("What is the capital of France?")
        document_embedding = model.encode_document("Paris is the capital of France.")

    ::

        from sentence_transformers.models import Router
        from sentence_transformers.sparse_encoder import SparseEncoder
        from sentence_transformers.sparse_encoder.models import MLMTransformer, SparseStaticEmbedding, SpladePooling

        # Load an asymmetric model with different encoders for queries and documents
        doc_encoder = MLMTransformer("opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill")
        router = Router.for_query_document(
            query_modules=[
                SparseStaticEmbedding.from_json(
                    "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
                    tokenizer=doc_encoder.tokenizer,
                    frozen=True,
                ),
            ],
            document_modules=[
                doc_encoder,
                SpladePooling(pooling_strategy="max", activation_function="log1p_relu"),
            ],
        )

        model = SparseEncoder(modules=[router], similarity_fn_name="dot")

        query = "What's the weather in ny now?"
        document = "Currently New York is rainy."

        query_embed = model.encode_query(query)
        document_embed = model.encode_document(document)

        sim = model.similarity(query_embed, document_embed)
        print(f"Similarity: {sim}")

        # Visualize top tokens for each text
        top_k = 10
        print(f"Top tokens {top_k} for each text:")

        decoded_query = model.decode(query_embed, top_k=top_k)
        decoded_document = model.decode(document_embed)

        for i in range(min(top_k, len(decoded_query))):
            query_token, query_score = decoded_query[i]
            doc_score = next((score for token, score in decoded_document if token == query_token), 0)
            if doc_score != 0:
                print(f"Token: {query_token}, Query score: {query_score:.4f}, Document score: {doc_score:.4f}")

        '''
        Similarity: tensor([[11.1105]], device='cuda:0')
        Top tokens 10 for each text:
        Token: ny, Query score: 5.7729, Document score: 0.8049
        Token: weather, Query score: 4.5684, Document score: 0.9710
        Token: now, Query score: 3.5895, Document score: 0.4720
        Token: ?, Query score: 3.3313, Document score: 0.0286
        Token: what, Query score: 2.7699, Document score: 0.0787
        Token: in, Query score: 0.4989, Document score: 0.0417
        '''

Note:
    These models are not necessarily stronger than non-asymmetric models. Rudimentary experiments indicate
    that non-Router models perform better in many cases.

Args:
    sub_modules: Mapping of route keys to lists of modules. Each key corresponds to a specific task type,
        often "query" or "document", and the list contains the modules to be applied for that task type.
    default_route: The default route to use if no task type is specified. If None, an exception will be thrown
        if no task type is specified. If ``allow_empty_key`` is True, the first key in sub_modules will be used as
        the default route. Defaults to None.
    allow_empty_key: If True, allows the default route to be set to the first key in `sub_modules` if
        ``default_route`` is None. Defaults to True.
Nr   z&The routes dictionary cannot be empty.zDefault route 'z' not found in route keys: )super__init__len
ValueErrorlistkeysr   
ModuleDictitems
Sequentialsub_modulesnextiterr   r   )selfr   r   r   
route_namemodules	__class__s         [/var/www/html/shao/venv/lib/python3.13/site-packages/sentence_transformers/models/Router.pyr   Router.__init__   s    ` 	#k"2a"7EFF$)I}o=XY]^i^n^n^pYqXrstt==LWL]L]L_`L_5HZZ00L_`

 }4 k&6&6&8!9:M*. as    C%
c                (    U " XS.U=(       d    SUS9$ )aI  
Creates a Router model specifically for query and document modules, allowing convenient usage via `model.encode_query`
and `model.encode_document`.

Args:
    query_modules: List of modules to be applied for the "query" task type.
    document_modules: List of modules to be applied for the "document" task type.
    default_route: The default route to use if no task type is specified. If None, an exception will be thrown
        if no task type is specified. If ``allow_empty_key`` is True, the first key in sub_modules will be used as
        the default route. Defaults to None.
    allow_empty_key: If True, allows the default route to be set to the first key in `sub_modules` if
        ``default_route`` is None. Defaults to True.

Returns:
    Router: An instance of the Router model with the specified query and document modules.
)querydocumentr)   )r   r   r    )clsquery_modulesdocument_modulesr   r   s        r%   for_query_documentRouter.for_query_document   s"    0 "/N'5:+
 	
    c           	        Uc  UR                  SU R                  5      nUc'  U R                  (       a  [        S5      e[        S5      eX R                  ;  a2  [        SU S[        U R                  R                  5       5       35      eX#S'   U R                  U    HS  nUR                  5        VVs0 sH+  u  pV[        US5      (       d  M  XTR                  ;   d  M)  XV_M-     nnnU" U40 UD6nMU     U$ s  snnf )Nr   vYou must provide a `router_mapping` argument on the training arguments, or set a default route in the `Router` module.kYou must provide a `task` argument when calling this method, or set a default route in the `Router` module.No route found for task type ''. Available routes: forward_kwargs)
getr   trainingr   r   r   r   r   hasattrr6   )r!   featuresr   kwargsmodulekeyvaluemodule_kwargss           r%   forwardRouter.forward   s   <<<(:(:;D<}} E 
 !E 
 '''06KDQUQaQaQfQfQhLiKjk  v&&t,F #),,."0JC6#34 9<@U@U9U 
"0  
 h8-8H - s   4C7C7C7c                    U R                   R                  5        H:  n[        U5       H(  n[        US5      (       d  M  UR	                  5       s  s  $    M<     g )N get_sentence_embedding_dimension)r   valuesreversedr9   rC   )r!   r   r<   s      r%   rC   'Router.get_sentence_embedding_dimension   sL    ++224K";/6#EFF!BBDD 0 5 r0   c           	     R   0 n0 n0 nU R                   R                  5        H  u  px/ Xg'   [        U5       Hh  u  pU SU	 S[        U
5      R                   3nXU'   [        U
5      R
                   S[        U
5      R                   3X['   Xg   R                  U5        Mj     M     UR                  5        HY  u  p[        R                  R                  U[        U5      5      n[        R                  " USS9   U
R                  " U4SU0UD6  M[     [        [        R                  R                  XR                  5      SSS9 n[         R"                  " UUU R%                  5       S	.US
S9  S S S 5        g ! [         a    U
R                  U5         M  f = f! , (       d  f       g = f)N_.T)exist_oksafe_serializationwutf8)encoding)types	structure
parameters   )indent)r   r   	enumeratetype__name__
__module__appendospathjoinstrmakedirssave	TypeErroropenconfig_file_namejsondumpget_config_dict)r!   output_pathrK   r;   model_lookupmodel_typesmodel_structurenamemodels
module_idxmodelmodel_id
model_pathfOuts                 r%   r^   Router.save   s    ,,224LD$&O!%.v%6!
"V1ZL$u+2F2F1GH).X&+/;+A+A*B!DKDXDXCY(Z%%,,X6	 &7 5  ,113OHk3x=AJKK
T2'

:W:LWPVW	  4 "'',,{,A,ABCRXY]aII(!0"&"6"6"8
  ZY	  '

:&' ZYs   9E6)F6FF
F&c           	     *   [        US   [        5      (       aR  Uc<  [        S U 5       5      n[        U5      S:  a  [	        S5      eUR                  5       nU Vs/ sH  oUU   PM	     nnUc  U R                  nUc'  U R                  (       a  [	        S5      e[	        S5      eX R                  ;  a2  [	        SU S[        U R                  R                  5       5       35      eU R                  U   S   nUR                  " U40 UD6nX'S	'   U$ s  snf )
z-Tokenizes a text and maps tokens to token-idsr   c              3  N   #    U H  oR                  5        H  o"v   M     M     g 7fN)r   ).0textr=   s      r%   	<genexpr>"Router.tokenize.<locals>.<genexpr>$  s     IEDYY[cC[CEs   #%   zYou cannot pass a list of dictionaries with different task types. Please ensure all dictionaries have the same task type key, or pass a single `task` argument.r2   r3   r4   r5   r   )
isinstancedictsetr   r   popr   r8   r   r   r   tokenize)r!   textsr   r;   tasksru   input_module	tokenizeds           r%   r}   Router.tokenize  s0   eAh%%|IEIIu:>$x  yy{ -22ED$ZEE2<%%D<}} E 
 !E  '''06KDQUQaQaQfQfQhLiKjk  ''-a0 ))%:6:	 &/ 3s   Dc           
     X   UUUUS.nU R                   " S	XS.UD6n	U	(       d  U R                   " S	USUS.UD6n	0 n
U	S   R                  5        HC  u  p[        U5      n UR                  " U4S[	        X+5      R                  5       0UDUD6nXU'   ME     0 nU	S   R                  5        H*  u  nn/ UU'   U H  nUU   R                  X   5        M     M,     U " U40 U	S   D6nU$ ! [         a9    [        S	U[	        X+5      R                  5       S.UD6nUR                  U5      n Nf = f)
N)tokencache_folderrevisionlocal_files_only)model_name_or_path	subfolderzconfig.json)r   config_filenamer   rO   r   rP   rQ   r*   )	load_configr   r   loadr   as_posixr_   r   rX   )r+   r   r   r   r   r   r   r;   
hub_kwargsconfigr#   rm   
model_typemodule_classr<   
local_pathrh   key_namemodels_listrl   s                       r%   r   Router.loadF  s    (  0	

 j4Fj_ij__ #5}`imwF $*7O$9$9$; H#5j#AL7%**&26y2K2T2T2VZdhn !'H %< %+K%8%>%>%@!Hk(*OH%')001BC ( &A
 O<vl';<  7* '9T)E^EgEgEimw
 &**:6	7s   .C&&A D)(D)c                    U R                   R                  5        H6  nUS   n[        US5      (       d  M  UR                  c  M*  UR                  s  $    g )Nr   	tokenizer)r   rD   r9   r   )r!   r   r   s      r%   r   Router.tokenizert  sO      ++224K(3AL|[11l6L6L6X#--- 5 r0   c                f   [        5       nU R                  R                  5        H?  nUS   nU(       d  M  [        US5      (       d  M$  UR	                  UR
                  5        MA     U(       d  g [        U5      S:X  a  UR                  5       $ [        R                  SU S35        [        U5      $ )Nr   max_seq_lengthrx   z$Different max_seq_lengths detected: z. Using the maximum value.)r{   r   rD   r9   addr   r   r|   loggerwarning_oncemax)r!   max_seq_lengthsr#   r   s       r%   r   Router.max_seq_length}  s     %''..0G(/
Lw7<1ABB##L$?$?@ 1
 !Q&"&&(("FFWWq rs''r0   c                :   / nU R                   R                  5        H5  u  p4U(       d  M  [        US   S5      (       d  M$  UR                  U5        M7     [	        U5      S:X  a  [
        R                  S5        g U H  nU R                   U   S   nXl        M     g )Nr   r   z2No modules have a max_seq_length attribute to set.)r   r   r9   rX   r   r   warningr   )r!   r>   has_max_seq_length_keysr=   rj   r   s         r%   r   r     s     #%++113KCv'&)-=>>'..s3 4 &'1,NNOP*C(,(8(8(=a(@L*/' +r0   )r   r   r   )NT)r   zdict[str, list[Module]]r   
str | Noner   boolreturnNone)
r,   list[Module]r-   r   r   r   r   r   r   r   rs   )r:   dict[str, Tensor]r   r   r   r   )r   int)T)re   r\   rK   r   )r~   z!list[str] | list[tuple[str, str]]r   r   ) NNNF)r   r\   r   r\   r   zbool | str | Noner   r   r   r   r   r   r   r   )r   r   )rV   rW   __qualname____firstlineno__r6   r   __annotations__ra   r   classmethodr.   r@   rC   r^   r}   r   propertyr   r   setter__static_attributes____classcell__)r$   s   @r%   r   r      sf   XN-/@AKA+ os^/2^/CM^/gk^/	^/ ^/@ 
 %) $
#
 '
 "	

 
 

 
:<B%N  #'#'#!&++ + !	+
 !+ + + 
+ +Z   ( (" 0 0r0   r   )
__future__r   rb   rY   pathlibr   typingr   ImportErrortyping_extensionstorchr   r   transformers.utilsr   (sentence_transformers.models.InputModuler	   #sentence_transformers.models.Moduler
   sentence_transformers.utilr   r   
get_loggerrV   r   r   Asymr*   r0   r%   <module>r      se    "  	 '  & @ 6 H			H	%G0[ G0V q  '&'s   A A-,A-